{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9998815423190064, "eval_steps": 500, "global_step": 25324, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.897178732897673e-05, "grad_norm": 5.6364933215880075, "learning_rate": 3.947887879984208e-07, "loss": 8.4087, "step": 1 }, { "epoch": 0.0003948589366448836, "grad_norm": 5.546487873129007, "learning_rate": 1.9739439399921044e-06, "loss": 8.5739, "step": 5 }, { "epoch": 0.0007897178732897672, "grad_norm": 5.741374363164821, "learning_rate": 3.947887879984209e-06, "loss": 8.515, "step": 10 }, { "epoch": 0.001184576809934651, "grad_norm": 5.5414673787823014, "learning_rate": 5.921831819976312e-06, "loss": 8.5093, "step": 15 }, { "epoch": 0.0015794357465795344, "grad_norm": 3.018107677804395, "learning_rate": 7.895775759968418e-06, "loss": 8.4252, "step": 20 }, { "epoch": 0.001974294683224418, "grad_norm": 4.385696121738529, "learning_rate": 9.869719699960521e-06, "loss": 8.3565, "step": 25 }, { "epoch": 0.002369153619869302, "grad_norm": 2.635538768576203, "learning_rate": 1.1843663639952625e-05, "loss": 8.2873, "step": 30 }, { "epoch": 0.002764012556514185, "grad_norm": 1.0638564409124187, "learning_rate": 1.381760757994473e-05, "loss": 8.2664, "step": 35 }, { "epoch": 0.003158871493159069, "grad_norm": 1.142267791518535, "learning_rate": 1.5791551519936835e-05, "loss": 8.1886, "step": 40 }, { "epoch": 0.0035537304298039526, "grad_norm": 0.7349486243297472, "learning_rate": 1.7765495459928937e-05, "loss": 8.2006, "step": 45 }, { "epoch": 0.003948589366448836, "grad_norm": 0.5184893822743845, "learning_rate": 1.9739439399921042e-05, "loss": 8.1215, "step": 50 }, { "epoch": 0.00434344830309372, "grad_norm": 0.8226116613898637, "learning_rate": 2.1713383339913147e-05, "loss": 8.0946, "step": 55 }, { "epoch": 0.004738307239738604, "grad_norm": 0.49010570416880445, "learning_rate": 2.368732727990525e-05, "loss": 7.956, "step": 60 }, { "epoch": 0.005133166176383487, "grad_norm": 0.40605112713198227, "learning_rate": 2.5661271219897354e-05, "loss": 8.0331, "step": 65 }, { "epoch": 0.00552802511302837, "grad_norm": 0.43306344431619953, "learning_rate": 2.763521515988946e-05, "loss": 7.9246, "step": 70 }, { "epoch": 0.005922884049673254, "grad_norm": 0.7994353712652111, "learning_rate": 2.9609159099881565e-05, "loss": 7.948, "step": 75 }, { "epoch": 0.006317742986318138, "grad_norm": 0.33822557904088063, "learning_rate": 3.158310303987367e-05, "loss": 7.8481, "step": 80 }, { "epoch": 0.0067126019229630214, "grad_norm": 0.46083410076889214, "learning_rate": 3.3557046979865775e-05, "loss": 7.8621, "step": 85 }, { "epoch": 0.007107460859607905, "grad_norm": 0.8415433669019611, "learning_rate": 3.5530990919857874e-05, "loss": 7.8981, "step": 90 }, { "epoch": 0.007502319796252789, "grad_norm": 0.8529917130280075, "learning_rate": 3.750493485984998e-05, "loss": 7.7077, "step": 95 }, { "epoch": 0.007897178732897673, "grad_norm": 2.12711393508725, "learning_rate": 3.9478878799842084e-05, "loss": 7.827, "step": 100 }, { "epoch": 0.008292037669542555, "grad_norm": 0.9143284795226546, "learning_rate": 4.145282273983419e-05, "loss": 7.7618, "step": 105 }, { "epoch": 0.00868689660618744, "grad_norm": 0.3719761245313438, "learning_rate": 4.3426766679826295e-05, "loss": 7.794, "step": 110 }, { "epoch": 0.009081755542832323, "grad_norm": 0.565392246746009, "learning_rate": 4.540071061981839e-05, "loss": 7.6851, "step": 115 }, { "epoch": 0.009476614479477207, "grad_norm": 1.03259972843582, "learning_rate": 4.73746545598105e-05, "loss": 7.7108, "step": 120 }, { "epoch": 0.00987147341612209, "grad_norm": 0.28305610153653443, "learning_rate": 4.9348598499802604e-05, "loss": 7.6629, "step": 125 }, { "epoch": 0.010266332352766973, "grad_norm": 1.0549619619075872, "learning_rate": 5.132254243979471e-05, "loss": 7.6565, "step": 130 }, { "epoch": 0.010661191289411858, "grad_norm": 0.604082623842012, "learning_rate": 5.329648637978682e-05, "loss": 7.5867, "step": 135 }, { "epoch": 0.01105605022605674, "grad_norm": 1.1954832943742413, "learning_rate": 5.527043031977892e-05, "loss": 7.5729, "step": 140 }, { "epoch": 0.011450909162701625, "grad_norm": 0.7811678279966684, "learning_rate": 5.7244374259771025e-05, "loss": 7.6641, "step": 145 }, { "epoch": 0.011845768099346508, "grad_norm": 0.5968996325656871, "learning_rate": 5.921831819976313e-05, "loss": 7.4971, "step": 150 }, { "epoch": 0.012240627035991393, "grad_norm": 0.6684704580332496, "learning_rate": 6.119226213975524e-05, "loss": 7.5324, "step": 155 }, { "epoch": 0.012635485972636275, "grad_norm": 0.6697151968355394, "learning_rate": 6.316620607974734e-05, "loss": 7.53, "step": 160 }, { "epoch": 0.01303034490928116, "grad_norm": 0.5313996254843999, "learning_rate": 6.514015001973945e-05, "loss": 7.5576, "step": 165 }, { "epoch": 0.013425203845926043, "grad_norm": 0.9140590861114964, "learning_rate": 6.711409395973155e-05, "loss": 7.5595, "step": 170 }, { "epoch": 0.013820062782570926, "grad_norm": 0.2791486813238053, "learning_rate": 6.908803789972364e-05, "loss": 7.5413, "step": 175 }, { "epoch": 0.01421492171921581, "grad_norm": 0.7009402252903231, "learning_rate": 7.106198183971575e-05, "loss": 7.5165, "step": 180 }, { "epoch": 0.014609780655860693, "grad_norm": 0.9455411599644917, "learning_rate": 7.303592577970785e-05, "loss": 7.5456, "step": 185 }, { "epoch": 0.015004639592505578, "grad_norm": 1.1927579408482192, "learning_rate": 7.500986971969996e-05, "loss": 7.4493, "step": 190 }, { "epoch": 0.01539949852915046, "grad_norm": 0.7292933790475317, "learning_rate": 7.698381365969206e-05, "loss": 7.4412, "step": 195 }, { "epoch": 0.015794357465795345, "grad_norm": 1.3755660992515326, "learning_rate": 7.895775759968417e-05, "loss": 7.4867, "step": 200 }, { "epoch": 0.01618921640244023, "grad_norm": 0.29904986139324147, "learning_rate": 8.093170153967627e-05, "loss": 7.4511, "step": 205 }, { "epoch": 0.01658407533908511, "grad_norm": 0.5318650826462576, "learning_rate": 8.290564547966838e-05, "loss": 7.3558, "step": 210 }, { "epoch": 0.016978934275729995, "grad_norm": 0.4689504444308271, "learning_rate": 8.487958941966048e-05, "loss": 7.3773, "step": 215 }, { "epoch": 0.01737379321237488, "grad_norm": 0.417313930568363, "learning_rate": 8.685353335965259e-05, "loss": 7.3628, "step": 220 }, { "epoch": 0.01776865214901976, "grad_norm": 0.506039868484003, "learning_rate": 8.882747729964468e-05, "loss": 7.3299, "step": 225 }, { "epoch": 0.018163511085664646, "grad_norm": 0.755797270599672, "learning_rate": 9.080142123963679e-05, "loss": 7.3605, "step": 230 }, { "epoch": 0.01855837002230953, "grad_norm": 0.3235540894484792, "learning_rate": 9.277536517962889e-05, "loss": 7.3782, "step": 235 }, { "epoch": 0.018953228958954415, "grad_norm": 0.5610188939142582, "learning_rate": 9.4749309119621e-05, "loss": 7.2025, "step": 240 }, { "epoch": 0.019348087895599296, "grad_norm": 0.7862699570513978, "learning_rate": 9.67232530596131e-05, "loss": 7.3493, "step": 245 }, { "epoch": 0.01974294683224418, "grad_norm": 0.33469324080979856, "learning_rate": 9.869719699960521e-05, "loss": 7.287, "step": 250 }, { "epoch": 0.020137805768889065, "grad_norm": 0.4727494364750123, "learning_rate": 0.00010067114093959731, "loss": 7.3691, "step": 255 }, { "epoch": 0.020532664705533946, "grad_norm": 0.3867827150360007, "learning_rate": 0.00010264508487958942, "loss": 7.2183, "step": 260 }, { "epoch": 0.02092752364217883, "grad_norm": 0.6219264776780915, "learning_rate": 0.00010461902881958154, "loss": 7.2853, "step": 265 }, { "epoch": 0.021322382578823715, "grad_norm": 0.1527098290088256, "learning_rate": 0.00010659297275957364, "loss": 7.2299, "step": 270 }, { "epoch": 0.0217172415154686, "grad_norm": 0.5458613828390525, "learning_rate": 0.00010856691669956573, "loss": 7.3953, "step": 275 }, { "epoch": 0.02211210045211348, "grad_norm": 0.8420711035526456, "learning_rate": 0.00011054086063955784, "loss": 7.2513, "step": 280 }, { "epoch": 0.022506959388758366, "grad_norm": 1.0786927582966943, "learning_rate": 0.00011251480457954994, "loss": 7.2341, "step": 285 }, { "epoch": 0.02290181832540325, "grad_norm": 0.6313157710460624, "learning_rate": 0.00011448874851954205, "loss": 7.3988, "step": 290 }, { "epoch": 0.023296677262048135, "grad_norm": 1.211292435501242, "learning_rate": 0.00011646269245953415, "loss": 7.2263, "step": 295 }, { "epoch": 0.023691536198693016, "grad_norm": 0.379316114906543, "learning_rate": 0.00011843663639952626, "loss": 7.0657, "step": 300 }, { "epoch": 0.0240863951353379, "grad_norm": 0.21706643140369852, "learning_rate": 0.00012041058033951837, "loss": 7.1802, "step": 305 }, { "epoch": 0.024481254071982785, "grad_norm": 0.4866118137681756, "learning_rate": 0.00012238452427951047, "loss": 7.2032, "step": 310 }, { "epoch": 0.024876113008627666, "grad_norm": 0.4511548668152491, "learning_rate": 0.00012435846821950256, "loss": 7.0437, "step": 315 }, { "epoch": 0.02527097194527255, "grad_norm": 0.4231569253142339, "learning_rate": 0.00012633241215949468, "loss": 7.2052, "step": 320 }, { "epoch": 0.025665830881917436, "grad_norm": 0.4166995095287679, "learning_rate": 0.00012830635609948677, "loss": 7.187, "step": 325 }, { "epoch": 0.02606068981856232, "grad_norm": 0.3315741739726114, "learning_rate": 0.0001302803000394789, "loss": 7.2189, "step": 330 }, { "epoch": 0.0264555487552072, "grad_norm": 0.6894051434371847, "learning_rate": 0.00013225424397947098, "loss": 7.2197, "step": 335 }, { "epoch": 0.026850407691852086, "grad_norm": 0.6903693888260329, "learning_rate": 0.0001342281879194631, "loss": 7.2968, "step": 340 }, { "epoch": 0.02724526662849697, "grad_norm": 0.24865143418668384, "learning_rate": 0.0001362021318594552, "loss": 7.1625, "step": 345 }, { "epoch": 0.02764012556514185, "grad_norm": 0.29185680499939926, "learning_rate": 0.00013817607579944729, "loss": 7.1936, "step": 350 }, { "epoch": 0.028034984501786736, "grad_norm": 0.2692479530782695, "learning_rate": 0.0001401500197394394, "loss": 7.247, "step": 355 }, { "epoch": 0.02842984343843162, "grad_norm": 0.8127936239764955, "learning_rate": 0.0001421239636794315, "loss": 7.1144, "step": 360 }, { "epoch": 0.028824702375076505, "grad_norm": 0.47766251800340614, "learning_rate": 0.00014409790761942361, "loss": 7.1236, "step": 365 }, { "epoch": 0.029219561311721386, "grad_norm": 0.4518857058487687, "learning_rate": 0.0001460718515594157, "loss": 7.1432, "step": 370 }, { "epoch": 0.02961442024836627, "grad_norm": 0.46813919890030603, "learning_rate": 0.00014804579549940782, "loss": 7.1191, "step": 375 }, { "epoch": 0.030009279185011156, "grad_norm": 0.31883142779747636, "learning_rate": 0.00015001973943939992, "loss": 7.1021, "step": 380 }, { "epoch": 0.03040413812165604, "grad_norm": 0.2706264149789679, "learning_rate": 0.00015199368337939204, "loss": 7.155, "step": 385 }, { "epoch": 0.03079899705830092, "grad_norm": 0.6940009274985549, "learning_rate": 0.00015396762731938413, "loss": 7.2149, "step": 390 }, { "epoch": 0.031193855994945806, "grad_norm": 0.8071611814268795, "learning_rate": 0.00015594157125937625, "loss": 7.2183, "step": 395 }, { "epoch": 0.03158871493159069, "grad_norm": 1.246275141871388, "learning_rate": 0.00015791551519936834, "loss": 7.0453, "step": 400 }, { "epoch": 0.031983573868235575, "grad_norm": 1.1366333544134617, "learning_rate": 0.00015988945913936046, "loss": 7.1189, "step": 405 }, { "epoch": 0.03237843280488046, "grad_norm": 0.8725730180594035, "learning_rate": 0.00016186340307935255, "loss": 7.1153, "step": 410 }, { "epoch": 0.03277329174152534, "grad_norm": 0.3397113176526181, "learning_rate": 0.00016383734701934467, "loss": 7.179, "step": 415 }, { "epoch": 0.03316815067817022, "grad_norm": 0.345855502356945, "learning_rate": 0.00016581129095933676, "loss": 7.1199, "step": 420 }, { "epoch": 0.033563009614815106, "grad_norm": 0.952950181676769, "learning_rate": 0.00016778523489932888, "loss": 7.1073, "step": 425 }, { "epoch": 0.03395786855145999, "grad_norm": 0.6432220149884589, "learning_rate": 0.00016975917883932097, "loss": 7.0535, "step": 430 }, { "epoch": 0.034352727488104876, "grad_norm": 0.909882752079194, "learning_rate": 0.0001717331227793131, "loss": 7.0902, "step": 435 }, { "epoch": 0.03474758642474976, "grad_norm": 0.524899452248505, "learning_rate": 0.00017370706671930518, "loss": 7.1282, "step": 440 }, { "epoch": 0.035142445361394645, "grad_norm": 0.5383974875940429, "learning_rate": 0.0001756810106592973, "loss": 7.058, "step": 445 }, { "epoch": 0.03553730429803952, "grad_norm": 0.3771772090065947, "learning_rate": 0.00017765495459928936, "loss": 6.9843, "step": 450 }, { "epoch": 0.03593216323468441, "grad_norm": 0.7633554590310165, "learning_rate": 0.00017962889853928148, "loss": 7.0695, "step": 455 }, { "epoch": 0.03632702217132929, "grad_norm": 0.5203825096585563, "learning_rate": 0.00018160284247927357, "loss": 7.0061, "step": 460 }, { "epoch": 0.036721881107974176, "grad_norm": 0.457122520787566, "learning_rate": 0.0001835767864192657, "loss": 7.0917, "step": 465 }, { "epoch": 0.03711674004461906, "grad_norm": 0.8401061255719732, "learning_rate": 0.00018555073035925778, "loss": 7.1282, "step": 470 }, { "epoch": 0.037511598981263945, "grad_norm": 0.3959587513463755, "learning_rate": 0.0001875246742992499, "loss": 7.2066, "step": 475 }, { "epoch": 0.03790645791790883, "grad_norm": 0.4486693669217552, "learning_rate": 0.000189498618239242, "loss": 7.1461, "step": 480 }, { "epoch": 0.03830131685455371, "grad_norm": 0.7621630599732567, "learning_rate": 0.0001914725621792341, "loss": 7.1314, "step": 485 }, { "epoch": 0.03869617579119859, "grad_norm": 0.229000855901464, "learning_rate": 0.0001934465061192262, "loss": 7.1284, "step": 490 }, { "epoch": 0.03909103472784348, "grad_norm": 0.4319340694695639, "learning_rate": 0.00019542045005921832, "loss": 7.1608, "step": 495 }, { "epoch": 0.03948589366448836, "grad_norm": 0.2460814438806878, "learning_rate": 0.00019739439399921041, "loss": 7.0823, "step": 500 }, { "epoch": 0.039880752601133246, "grad_norm": 0.4628650846954491, "learning_rate": 0.00019936833793920253, "loss": 7.1285, "step": 505 }, { "epoch": 0.04027561153777813, "grad_norm": 0.3451832815289159, "learning_rate": 0.00020134228187919463, "loss": 7.1413, "step": 510 }, { "epoch": 0.040670470474423015, "grad_norm": 0.16849388576247834, "learning_rate": 0.00020331622581918674, "loss": 7.1, "step": 515 }, { "epoch": 0.04106532941106789, "grad_norm": 0.20423193394227485, "learning_rate": 0.00020529016975917884, "loss": 6.9746, "step": 520 }, { "epoch": 0.04146018834771278, "grad_norm": 0.39346172627344084, "learning_rate": 0.00020726411369917095, "loss": 7.102, "step": 525 }, { "epoch": 0.04185504728435766, "grad_norm": 0.3660955583808101, "learning_rate": 0.00020923805763916307, "loss": 7.0735, "step": 530 }, { "epoch": 0.042249906221002546, "grad_norm": 0.18419128855454156, "learning_rate": 0.00021121200157915517, "loss": 7.0555, "step": 535 }, { "epoch": 0.04264476515764743, "grad_norm": 0.28181502694517, "learning_rate": 0.00021318594551914728, "loss": 7.0026, "step": 540 }, { "epoch": 0.043039624094292316, "grad_norm": 0.7486113596776681, "learning_rate": 0.00021515988945913938, "loss": 7.2017, "step": 545 }, { "epoch": 0.0434344830309372, "grad_norm": 0.23510567023133397, "learning_rate": 0.00021713383339913147, "loss": 7.1617, "step": 550 }, { "epoch": 0.043829341967582085, "grad_norm": 0.3817562014824304, "learning_rate": 0.00021910777733912356, "loss": 7.084, "step": 555 }, { "epoch": 0.04422420090422696, "grad_norm": 0.58844521615861, "learning_rate": 0.00022108172127911568, "loss": 7.0701, "step": 560 }, { "epoch": 0.04461905984087185, "grad_norm": 0.6601049853855961, "learning_rate": 0.00022305566521910777, "loss": 7.029, "step": 565 }, { "epoch": 0.04501391877751673, "grad_norm": 0.4934646462326718, "learning_rate": 0.0002250296091590999, "loss": 7.0442, "step": 570 }, { "epoch": 0.045408777714161616, "grad_norm": 0.44554734206927066, "learning_rate": 0.00022700355309909198, "loss": 6.9661, "step": 575 }, { "epoch": 0.0458036366508065, "grad_norm": 0.24513331564679455, "learning_rate": 0.0002289774970390841, "loss": 7.0662, "step": 580 }, { "epoch": 0.046198495587451385, "grad_norm": 0.2231357272401659, "learning_rate": 0.0002309514409790762, "loss": 7.113, "step": 585 }, { "epoch": 0.04659335452409627, "grad_norm": 0.5846066143743338, "learning_rate": 0.0002329253849190683, "loss": 7.0569, "step": 590 }, { "epoch": 0.04698821346074115, "grad_norm": 0.42200804638914724, "learning_rate": 0.0002348993288590604, "loss": 7.089, "step": 595 }, { "epoch": 0.04738307239738603, "grad_norm": 0.7349561145967988, "learning_rate": 0.00023687327279905252, "loss": 7.0087, "step": 600 }, { "epoch": 0.04777793133403092, "grad_norm": 0.3174336744410153, "learning_rate": 0.0002388472167390446, "loss": 7.0848, "step": 605 }, { "epoch": 0.0481727902706758, "grad_norm": 0.44383008675263885, "learning_rate": 0.00024082116067903673, "loss": 7.009, "step": 610 }, { "epoch": 0.048567649207320686, "grad_norm": 0.26023310480517936, "learning_rate": 0.00024279510461902882, "loss": 6.9455, "step": 615 }, { "epoch": 0.04896250814396557, "grad_norm": 0.2978669131927101, "learning_rate": 0.00024476904855902094, "loss": 7.2568, "step": 620 }, { "epoch": 0.049357367080610455, "grad_norm": 0.4497984109561941, "learning_rate": 0.00024674299249901303, "loss": 6.9709, "step": 625 }, { "epoch": 0.04975222601725533, "grad_norm": 0.3697479232635726, "learning_rate": 0.0002487169364390051, "loss": 6.9827, "step": 630 }, { "epoch": 0.05014708495390022, "grad_norm": 0.2660590651879891, "learning_rate": 0.0002506908803789972, "loss": 6.9068, "step": 635 }, { "epoch": 0.0505419438905451, "grad_norm": 0.43267306130119215, "learning_rate": 0.00025266482431898936, "loss": 6.974, "step": 640 }, { "epoch": 0.050936802827189986, "grad_norm": 0.5841010535306203, "learning_rate": 0.00025463876825898145, "loss": 7.0406, "step": 645 }, { "epoch": 0.05133166176383487, "grad_norm": 0.7844788345978493, "learning_rate": 0.00025661271219897354, "loss": 7.1016, "step": 650 }, { "epoch": 0.051726520700479756, "grad_norm": 0.544392119831974, "learning_rate": 0.00025858665613896564, "loss": 6.8638, "step": 655 }, { "epoch": 0.05212137963712464, "grad_norm": 0.339077844225145, "learning_rate": 0.0002605606000789578, "loss": 6.8686, "step": 660 }, { "epoch": 0.05251623857376952, "grad_norm": 0.20436135254406648, "learning_rate": 0.0002625345440189499, "loss": 6.9281, "step": 665 }, { "epoch": 0.0529110975104144, "grad_norm": 0.306406541433125, "learning_rate": 0.00026450848795894197, "loss": 6.9826, "step": 670 }, { "epoch": 0.05330595644705929, "grad_norm": 0.5251119771811662, "learning_rate": 0.00026648243189893406, "loss": 6.8635, "step": 675 }, { "epoch": 0.05370081538370417, "grad_norm": 0.9268633904330905, "learning_rate": 0.0002684563758389262, "loss": 6.9131, "step": 680 }, { "epoch": 0.054095674320349056, "grad_norm": 0.5922486615184307, "learning_rate": 0.0002704303197789183, "loss": 6.9227, "step": 685 }, { "epoch": 0.05449053325699394, "grad_norm": 1.0026535635396976, "learning_rate": 0.0002724042637189104, "loss": 6.7937, "step": 690 }, { "epoch": 0.054885392193638825, "grad_norm": 0.5266464170092442, "learning_rate": 0.0002743782076589025, "loss": 6.877, "step": 695 }, { "epoch": 0.0552802511302837, "grad_norm": 0.4691666132614931, "learning_rate": 0.00027635215159889457, "loss": 6.8476, "step": 700 }, { "epoch": 0.05567511006692859, "grad_norm": 0.288319636644655, "learning_rate": 0.0002783260955388867, "loss": 6.802, "step": 705 }, { "epoch": 0.05606996900357347, "grad_norm": 0.5488155692101869, "learning_rate": 0.0002803000394788788, "loss": 6.8509, "step": 710 }, { "epoch": 0.05646482794021836, "grad_norm": 0.5672391230938693, "learning_rate": 0.0002822739834188709, "loss": 6.9427, "step": 715 }, { "epoch": 0.05685968687686324, "grad_norm": 0.6202000715814174, "learning_rate": 0.000284247927358863, "loss": 6.8317, "step": 720 }, { "epoch": 0.057254545813508126, "grad_norm": 1.288531803942922, "learning_rate": 0.00028622187129885514, "loss": 6.9527, "step": 725 }, { "epoch": 0.05764940475015301, "grad_norm": 0.7205085069495126, "learning_rate": 0.00028819581523884723, "loss": 6.9069, "step": 730 }, { "epoch": 0.05804426368679789, "grad_norm": 0.3234831084497484, "learning_rate": 0.0002901697591788393, "loss": 6.887, "step": 735 }, { "epoch": 0.05843912262344277, "grad_norm": 0.23769113935766606, "learning_rate": 0.0002921437031188314, "loss": 6.7573, "step": 740 }, { "epoch": 0.05883398156008766, "grad_norm": 0.7735467626857055, "learning_rate": 0.00029411764705882356, "loss": 6.9878, "step": 745 }, { "epoch": 0.05922884049673254, "grad_norm": 0.6496348225163944, "learning_rate": 0.00029609159099881565, "loss": 6.8717, "step": 750 }, { "epoch": 0.059623699433377426, "grad_norm": 1.1928107466510738, "learning_rate": 0.00029806553493880774, "loss": 6.9452, "step": 755 }, { "epoch": 0.06001855837002231, "grad_norm": 0.3288895466778138, "learning_rate": 0.00030003947887879983, "loss": 6.8482, "step": 760 }, { "epoch": 0.060413417306667196, "grad_norm": 0.4969298360642689, "learning_rate": 0.000302013422818792, "loss": 6.9461, "step": 765 }, { "epoch": 0.06080827624331208, "grad_norm": 0.44304617893546344, "learning_rate": 0.00030398736675878407, "loss": 6.8042, "step": 770 }, { "epoch": 0.06120313517995696, "grad_norm": 0.38324414151736697, "learning_rate": 0.00030596131069877616, "loss": 6.6468, "step": 775 }, { "epoch": 0.06159799411660184, "grad_norm": 0.27470943464404096, "learning_rate": 0.00030793525463876825, "loss": 6.7684, "step": 780 }, { "epoch": 0.06199285305324673, "grad_norm": 0.8187266381372185, "learning_rate": 0.0003099091985787604, "loss": 6.7438, "step": 785 }, { "epoch": 0.06238771198989161, "grad_norm": 0.730927627674773, "learning_rate": 0.0003118831425187525, "loss": 6.655, "step": 790 }, { "epoch": 0.0627825709265365, "grad_norm": 0.8999640177009153, "learning_rate": 0.0003138570864587446, "loss": 6.6942, "step": 795 }, { "epoch": 0.06317742986318138, "grad_norm": 0.8278772934442582, "learning_rate": 0.0003158310303987367, "loss": 6.6254, "step": 800 }, { "epoch": 0.06357228879982627, "grad_norm": 0.4327386693420839, "learning_rate": 0.00031780497433872877, "loss": 6.7407, "step": 805 }, { "epoch": 0.06396714773647115, "grad_norm": 0.4573757512435893, "learning_rate": 0.0003197789182787209, "loss": 6.6757, "step": 810 }, { "epoch": 0.06436200667311603, "grad_norm": 0.8988064547272374, "learning_rate": 0.00032175286221871295, "loss": 6.452, "step": 815 }, { "epoch": 0.06475686560976092, "grad_norm": 0.465653797615005, "learning_rate": 0.0003237268061587051, "loss": 6.5672, "step": 820 }, { "epoch": 0.06515172454640579, "grad_norm": 0.4079566468842491, "learning_rate": 0.0003257007500986972, "loss": 6.5856, "step": 825 }, { "epoch": 0.06554658348305067, "grad_norm": 0.8804173899336221, "learning_rate": 0.00032767469403868933, "loss": 6.4256, "step": 830 }, { "epoch": 0.06594144241969556, "grad_norm": 0.47398578773150146, "learning_rate": 0.00032964863797868137, "loss": 6.4743, "step": 835 }, { "epoch": 0.06633630135634044, "grad_norm": 0.5270496302793023, "learning_rate": 0.0003316225819186735, "loss": 6.3291, "step": 840 }, { "epoch": 0.06673116029298533, "grad_norm": 0.420298015364841, "learning_rate": 0.0003335965258586656, "loss": 6.4112, "step": 845 }, { "epoch": 0.06712601922963021, "grad_norm": 0.6695481691587143, "learning_rate": 0.00033557046979865775, "loss": 6.3999, "step": 850 }, { "epoch": 0.0675208781662751, "grad_norm": 0.4274195443174277, "learning_rate": 0.0003375444137386498, "loss": 6.3147, "step": 855 }, { "epoch": 0.06791573710291998, "grad_norm": 0.3924992180758938, "learning_rate": 0.00033951835767864194, "loss": 6.2792, "step": 860 }, { "epoch": 0.06831059603956487, "grad_norm": 0.6028556356736195, "learning_rate": 0.00034149230161863403, "loss": 6.322, "step": 865 }, { "epoch": 0.06870545497620975, "grad_norm": 0.4778037549828151, "learning_rate": 0.0003434662455586262, "loss": 6.1941, "step": 870 }, { "epoch": 0.06910031391285464, "grad_norm": 0.4904634692925081, "learning_rate": 0.00034544018949861827, "loss": 6.1995, "step": 875 }, { "epoch": 0.06949517284949952, "grad_norm": 1.2450107094524352, "learning_rate": 0.00034741413343861036, "loss": 6.1547, "step": 880 }, { "epoch": 0.0698900317861444, "grad_norm": 0.5945635060628294, "learning_rate": 0.00034938807737860245, "loss": 6.0975, "step": 885 }, { "epoch": 0.07028489072278929, "grad_norm": 0.614113880452359, "learning_rate": 0.0003513620213185946, "loss": 6.166, "step": 890 }, { "epoch": 0.07067974965943417, "grad_norm": 0.6462131552234863, "learning_rate": 0.0003533359652585867, "loss": 6.3998, "step": 895 }, { "epoch": 0.07107460859607904, "grad_norm": 0.5145582366354633, "learning_rate": 0.0003553099091985787, "loss": 6.1673, "step": 900 }, { "epoch": 0.07146946753272393, "grad_norm": 0.6031624253048248, "learning_rate": 0.00035728385313857087, "loss": 6.2305, "step": 905 }, { "epoch": 0.07186432646936881, "grad_norm": 0.38542796309002775, "learning_rate": 0.00035925779707856296, "loss": 6.219, "step": 910 }, { "epoch": 0.0722591854060137, "grad_norm": 0.6389522302366885, "learning_rate": 0.0003612317410185551, "loss": 6.216, "step": 915 }, { "epoch": 0.07265404434265858, "grad_norm": 0.6188106695500818, "learning_rate": 0.00036320568495854715, "loss": 6.1989, "step": 920 }, { "epoch": 0.07304890327930347, "grad_norm": 0.5869605464868938, "learning_rate": 0.0003651796288985393, "loss": 6.054, "step": 925 }, { "epoch": 0.07344376221594835, "grad_norm": 0.46310907340067076, "learning_rate": 0.0003671535728385314, "loss": 5.9924, "step": 930 }, { "epoch": 0.07383862115259324, "grad_norm": 0.46705829976050045, "learning_rate": 0.00036912751677852353, "loss": 5.9444, "step": 935 }, { "epoch": 0.07423348008923812, "grad_norm": 0.4646621106392412, "learning_rate": 0.00037110146071851557, "loss": 5.9337, "step": 940 }, { "epoch": 0.074628339025883, "grad_norm": 0.4088990602920996, "learning_rate": 0.0003730754046585077, "loss": 5.9738, "step": 945 }, { "epoch": 0.07502319796252789, "grad_norm": 0.37262501145090265, "learning_rate": 0.0003750493485984998, "loss": 5.8892, "step": 950 }, { "epoch": 0.07541805689917278, "grad_norm": 0.2870924062991571, "learning_rate": 0.00037702329253849195, "loss": 5.9164, "step": 955 }, { "epoch": 0.07581291583581766, "grad_norm": 0.44233277166379914, "learning_rate": 0.000378997236478484, "loss": 5.9329, "step": 960 }, { "epoch": 0.07620777477246254, "grad_norm": 0.48402541092439033, "learning_rate": 0.00038097118041847613, "loss": 5.9978, "step": 965 }, { "epoch": 0.07660263370910741, "grad_norm": 0.3800976667965624, "learning_rate": 0.0003829451243584682, "loss": 5.7493, "step": 970 }, { "epoch": 0.0769974926457523, "grad_norm": 0.38152103486641, "learning_rate": 0.00038491906829846037, "loss": 5.915, "step": 975 }, { "epoch": 0.07739235158239718, "grad_norm": 0.2544042897726775, "learning_rate": 0.0003868930122384524, "loss": 5.8755, "step": 980 }, { "epoch": 0.07778721051904207, "grad_norm": 0.3415163712677125, "learning_rate": 0.00038886695617844455, "loss": 5.6586, "step": 985 }, { "epoch": 0.07818206945568695, "grad_norm": 0.3781325824337903, "learning_rate": 0.00039084090011843665, "loss": 5.8284, "step": 990 }, { "epoch": 0.07857692839233184, "grad_norm": 0.40611362332345846, "learning_rate": 0.0003928148440584288, "loss": 5.6845, "step": 995 }, { "epoch": 0.07897178732897672, "grad_norm": 0.47582117543468877, "learning_rate": 0.00039478878799842083, "loss": 5.7029, "step": 1000 }, { "epoch": 0.07936664626562161, "grad_norm": 0.3200349065940017, "learning_rate": 0.0003967627319384129, "loss": 5.5935, "step": 1005 }, { "epoch": 0.07976150520226649, "grad_norm": 0.37558852113400537, "learning_rate": 0.00039873667587840507, "loss": 5.5637, "step": 1010 }, { "epoch": 0.08015636413891138, "grad_norm": 0.5036930070126071, "learning_rate": 0.00040071061981839716, "loss": 5.6446, "step": 1015 }, { "epoch": 0.08055122307555626, "grad_norm": 0.5742079281001577, "learning_rate": 0.00040268456375838925, "loss": 5.7776, "step": 1020 }, { "epoch": 0.08094608201220115, "grad_norm": 0.4695301782582392, "learning_rate": 0.00040465850769838134, "loss": 5.7071, "step": 1025 }, { "epoch": 0.08134094094884603, "grad_norm": 0.4589126300737802, "learning_rate": 0.0004066324516383735, "loss": 5.6018, "step": 1030 }, { "epoch": 0.08173579988549091, "grad_norm": 0.31121419882338514, "learning_rate": 0.0004086063955783656, "loss": 5.6547, "step": 1035 }, { "epoch": 0.08213065882213579, "grad_norm": 0.4532534219174123, "learning_rate": 0.00041058033951835767, "loss": 5.6365, "step": 1040 }, { "epoch": 0.08252551775878067, "grad_norm": 0.5469095653585231, "learning_rate": 0.00041255428345834976, "loss": 5.588, "step": 1045 }, { "epoch": 0.08292037669542555, "grad_norm": 0.3819448310396733, "learning_rate": 0.0004145282273983419, "loss": 5.4685, "step": 1050 }, { "epoch": 0.08331523563207044, "grad_norm": 0.6078685274214377, "learning_rate": 0.000416502171338334, "loss": 5.643, "step": 1055 }, { "epoch": 0.08371009456871532, "grad_norm": 0.4664719596014047, "learning_rate": 0.00041847611527832615, "loss": 5.5799, "step": 1060 }, { "epoch": 0.08410495350536021, "grad_norm": 0.36418535532469976, "learning_rate": 0.0004204500592183182, "loss": 5.5605, "step": 1065 }, { "epoch": 0.08449981244200509, "grad_norm": 0.4203759165893138, "learning_rate": 0.00042242400315831033, "loss": 5.4586, "step": 1070 }, { "epoch": 0.08489467137864998, "grad_norm": 0.4011216836694698, "learning_rate": 0.0004243979470983024, "loss": 5.5585, "step": 1075 }, { "epoch": 0.08528953031529486, "grad_norm": 0.4087988387270972, "learning_rate": 0.00042637189103829457, "loss": 5.5921, "step": 1080 }, { "epoch": 0.08568438925193975, "grad_norm": 0.4090485537073061, "learning_rate": 0.0004283458349782866, "loss": 5.4642, "step": 1085 }, { "epoch": 0.08607924818858463, "grad_norm": 0.3622690537870435, "learning_rate": 0.00043031977891827875, "loss": 5.6926, "step": 1090 }, { "epoch": 0.08647410712522952, "grad_norm": 0.3509600893330048, "learning_rate": 0.00043229372285827084, "loss": 5.6076, "step": 1095 }, { "epoch": 0.0868689660618744, "grad_norm": 0.45895319268406476, "learning_rate": 0.00043426766679826293, "loss": 5.5776, "step": 1100 }, { "epoch": 0.08726382499851928, "grad_norm": 0.46360555622412886, "learning_rate": 0.000436241610738255, "loss": 5.4998, "step": 1105 }, { "epoch": 0.08765868393516417, "grad_norm": 0.5483639295790057, "learning_rate": 0.0004382155546782471, "loss": 5.5248, "step": 1110 }, { "epoch": 0.08805354287180904, "grad_norm": 0.6394774926096858, "learning_rate": 0.00044018949861823926, "loss": 5.7142, "step": 1115 }, { "epoch": 0.08844840180845392, "grad_norm": 0.40461947885948985, "learning_rate": 0.00044216344255823136, "loss": 5.6089, "step": 1120 }, { "epoch": 0.08884326074509881, "grad_norm": 0.37817328261244954, "learning_rate": 0.00044413738649822345, "loss": 5.477, "step": 1125 }, { "epoch": 0.0892381196817437, "grad_norm": 0.4513567122126569, "learning_rate": 0.00044611133043821554, "loss": 5.522, "step": 1130 }, { "epoch": 0.08963297861838858, "grad_norm": 0.4481733947148505, "learning_rate": 0.0004480852743782077, "loss": 5.443, "step": 1135 }, { "epoch": 0.09002783755503346, "grad_norm": 0.23167196539178603, "learning_rate": 0.0004500592183181998, "loss": 5.4841, "step": 1140 }, { "epoch": 0.09042269649167835, "grad_norm": 0.4780157829653467, "learning_rate": 0.00045203316225819187, "loss": 5.3796, "step": 1145 }, { "epoch": 0.09081755542832323, "grad_norm": 0.40201537865266335, "learning_rate": 0.00045400710619818396, "loss": 5.4761, "step": 1150 }, { "epoch": 0.09121241436496812, "grad_norm": 0.4132857236486611, "learning_rate": 0.0004559810501381761, "loss": 5.362, "step": 1155 }, { "epoch": 0.091607273301613, "grad_norm": 0.5168863305285437, "learning_rate": 0.0004579549940781682, "loss": 5.5047, "step": 1160 }, { "epoch": 0.09200213223825789, "grad_norm": 0.5130229692146594, "learning_rate": 0.0004599289380181603, "loss": 5.3309, "step": 1165 }, { "epoch": 0.09239699117490277, "grad_norm": 0.6589045661009468, "learning_rate": 0.0004619028819581524, "loss": 5.2951, "step": 1170 }, { "epoch": 0.09279185011154766, "grad_norm": 0.5157536093782933, "learning_rate": 0.0004638768258981445, "loss": 5.2793, "step": 1175 }, { "epoch": 0.09318670904819254, "grad_norm": 0.5381638063355653, "learning_rate": 0.0004658507698381366, "loss": 5.3723, "step": 1180 }, { "epoch": 0.09358156798483741, "grad_norm": 0.6446865260052062, "learning_rate": 0.0004678247137781287, "loss": 5.6301, "step": 1185 }, { "epoch": 0.0939764269214823, "grad_norm": 0.6087521976946316, "learning_rate": 0.0004697986577181208, "loss": 5.307, "step": 1190 }, { "epoch": 0.09437128585812718, "grad_norm": 0.34685992018766665, "learning_rate": 0.00047177260165811295, "loss": 5.3162, "step": 1195 }, { "epoch": 0.09476614479477206, "grad_norm": 0.44930910313418027, "learning_rate": 0.00047374654559810504, "loss": 5.3767, "step": 1200 }, { "epoch": 0.09516100373141695, "grad_norm": 0.36534550746205224, "learning_rate": 0.0004757204895380971, "loss": 5.2815, "step": 1205 }, { "epoch": 0.09555586266806183, "grad_norm": 0.5117846364836162, "learning_rate": 0.0004776944334780892, "loss": 5.2505, "step": 1210 }, { "epoch": 0.09595072160470672, "grad_norm": 0.39824942446018974, "learning_rate": 0.0004796683774180813, "loss": 5.2689, "step": 1215 }, { "epoch": 0.0963455805413516, "grad_norm": 0.2402869202094707, "learning_rate": 0.00048164232135807346, "loss": 5.2608, "step": 1220 }, { "epoch": 0.09674043947799649, "grad_norm": 0.4448882053999812, "learning_rate": 0.00048361626529806555, "loss": 5.262, "step": 1225 }, { "epoch": 0.09713529841464137, "grad_norm": 0.4206154973132127, "learning_rate": 0.00048559020923805764, "loss": 5.3984, "step": 1230 }, { "epoch": 0.09753015735128626, "grad_norm": 0.3427094638842304, "learning_rate": 0.00048756415317804974, "loss": 5.2747, "step": 1235 }, { "epoch": 0.09792501628793114, "grad_norm": 0.2720836169696419, "learning_rate": 0.0004895380971180419, "loss": 5.1758, "step": 1240 }, { "epoch": 0.09831987522457603, "grad_norm": 0.49694296372059854, "learning_rate": 0.000491512041058034, "loss": 5.2174, "step": 1245 }, { "epoch": 0.09871473416122091, "grad_norm": 0.2873550711297647, "learning_rate": 0.0004934859849980261, "loss": 5.1871, "step": 1250 }, { "epoch": 0.09910959309786578, "grad_norm": 0.21603048759389612, "learning_rate": 0.0004954599289380182, "loss": 5.0973, "step": 1255 }, { "epoch": 0.09950445203451067, "grad_norm": 0.2340670762343812, "learning_rate": 0.0004974338728780102, "loss": 5.1439, "step": 1260 }, { "epoch": 0.09989931097115555, "grad_norm": 0.3251826248226323, "learning_rate": 0.0004994078168180024, "loss": 5.1086, "step": 1265 }, { "epoch": 0.10029416990780043, "grad_norm": 0.48526736117923314, "learning_rate": 0.0005013817607579944, "loss": 5.0914, "step": 1270 }, { "epoch": 0.10068902884444532, "grad_norm": 0.4138709089029035, "learning_rate": 0.0005033557046979866, "loss": 5.1171, "step": 1275 }, { "epoch": 0.1010838877810902, "grad_norm": 0.4822162439253974, "learning_rate": 0.0005053296486379787, "loss": 5.024, "step": 1280 }, { "epoch": 0.10147874671773509, "grad_norm": 0.3363021334609675, "learning_rate": 0.0005073035925779708, "loss": 5.2199, "step": 1285 }, { "epoch": 0.10187360565437997, "grad_norm": 0.4229519999185117, "learning_rate": 0.0005092775365179629, "loss": 5.0435, "step": 1290 }, { "epoch": 0.10226846459102486, "grad_norm": 0.3262526944989459, "learning_rate": 0.000511251480457955, "loss": 5.1631, "step": 1295 }, { "epoch": 0.10266332352766974, "grad_norm": 0.6160694233337705, "learning_rate": 0.0005132254243979471, "loss": 5.2788, "step": 1300 }, { "epoch": 0.10305818246431463, "grad_norm": 0.38530194044276045, "learning_rate": 0.0005151993683379392, "loss": 5.2559, "step": 1305 }, { "epoch": 0.10345304140095951, "grad_norm": 0.33393651670317126, "learning_rate": 0.0005171733122779313, "loss": 5.1102, "step": 1310 }, { "epoch": 0.1038479003376044, "grad_norm": 0.4040062963703004, "learning_rate": 0.0005191472562179235, "loss": 5.0042, "step": 1315 }, { "epoch": 0.10424275927424928, "grad_norm": 0.34923577678774925, "learning_rate": 0.0005211212001579156, "loss": 5.2072, "step": 1320 }, { "epoch": 0.10463761821089416, "grad_norm": 0.33948022067872474, "learning_rate": 0.0005230951440979075, "loss": 4.9793, "step": 1325 }, { "epoch": 0.10503247714753904, "grad_norm": 0.2200400540356678, "learning_rate": 0.0005250690880378997, "loss": 5.0128, "step": 1330 }, { "epoch": 0.10542733608418392, "grad_norm": 0.3028888199597662, "learning_rate": 0.0005270430319778918, "loss": 5.1915, "step": 1335 }, { "epoch": 0.1058221950208288, "grad_norm": 0.3122224333258491, "learning_rate": 0.0005290169759178839, "loss": 5.0435, "step": 1340 }, { "epoch": 0.10621705395747369, "grad_norm": 0.30838784168256583, "learning_rate": 0.000530990919857876, "loss": 4.9936, "step": 1345 }, { "epoch": 0.10661191289411857, "grad_norm": 0.3844539134084894, "learning_rate": 0.0005329648637978681, "loss": 5.2069, "step": 1350 }, { "epoch": 0.10700677183076346, "grad_norm": 0.5176720360735406, "learning_rate": 0.0005349388077378603, "loss": 5.0015, "step": 1355 }, { "epoch": 0.10740163076740834, "grad_norm": 0.5000073276206142, "learning_rate": 0.0005369127516778524, "loss": 5.1263, "step": 1360 }, { "epoch": 0.10779648970405323, "grad_norm": 0.4675860571702226, "learning_rate": 0.0005388866956178444, "loss": 5.0648, "step": 1365 }, { "epoch": 0.10819134864069811, "grad_norm": 0.4387843877577823, "learning_rate": 0.0005408606395578366, "loss": 5.0702, "step": 1370 }, { "epoch": 0.108586207577343, "grad_norm": 0.2608299523894895, "learning_rate": 0.0005428345834978287, "loss": 5.1072, "step": 1375 }, { "epoch": 0.10898106651398788, "grad_norm": 0.34371837006466605, "learning_rate": 0.0005448085274378208, "loss": 5.0818, "step": 1380 }, { "epoch": 0.10937592545063277, "grad_norm": 0.4126287497776492, "learning_rate": 0.0005467824713778129, "loss": 5.1494, "step": 1385 }, { "epoch": 0.10977078438727765, "grad_norm": 0.2383233117698553, "learning_rate": 0.000548756415317805, "loss": 5.0046, "step": 1390 }, { "epoch": 0.11016564332392254, "grad_norm": 0.4335168089820936, "learning_rate": 0.0005507303592577972, "loss": 5.2954, "step": 1395 }, { "epoch": 0.1105605022605674, "grad_norm": 0.3643314871843505, "learning_rate": 0.0005527043031977891, "loss": 4.9044, "step": 1400 }, { "epoch": 0.11095536119721229, "grad_norm": 0.6413961839896176, "learning_rate": 0.0005546782471377812, "loss": 5.2179, "step": 1405 }, { "epoch": 0.11135022013385718, "grad_norm": 0.5368334164019187, "learning_rate": 0.0005566521910777734, "loss": 5.1059, "step": 1410 }, { "epoch": 0.11174507907050206, "grad_norm": 0.4708640695460941, "learning_rate": 0.0005586261350177655, "loss": 5.2294, "step": 1415 }, { "epoch": 0.11213993800714694, "grad_norm": 0.3409024627275843, "learning_rate": 0.0005606000789577576, "loss": 4.9452, "step": 1420 }, { "epoch": 0.11253479694379183, "grad_norm": 0.4549998378470075, "learning_rate": 0.0005625740228977497, "loss": 5.0055, "step": 1425 }, { "epoch": 0.11292965588043671, "grad_norm": 0.2647872488675996, "learning_rate": 0.0005645479668377418, "loss": 5.1681, "step": 1430 }, { "epoch": 0.1133245148170816, "grad_norm": 0.2667836791481634, "learning_rate": 0.000566521910777734, "loss": 4.9135, "step": 1435 }, { "epoch": 0.11371937375372648, "grad_norm": 0.37438748559230306, "learning_rate": 0.000568495854717726, "loss": 4.7973, "step": 1440 }, { "epoch": 0.11411423269037137, "grad_norm": 0.3838814057893405, "learning_rate": 0.0005704697986577181, "loss": 4.939, "step": 1445 }, { "epoch": 0.11450909162701625, "grad_norm": 0.3993175251665644, "learning_rate": 0.0005724437425977103, "loss": 5.1851, "step": 1450 }, { "epoch": 0.11490395056366114, "grad_norm": 0.23860216667820236, "learning_rate": 0.0005744176865377024, "loss": 4.7918, "step": 1455 }, { "epoch": 0.11529880950030602, "grad_norm": 0.24605144687160868, "learning_rate": 0.0005763916304776945, "loss": 5.0345, "step": 1460 }, { "epoch": 0.1156936684369509, "grad_norm": 0.366896631014013, "learning_rate": 0.0005783655744176865, "loss": 4.8976, "step": 1465 }, { "epoch": 0.11608852737359578, "grad_norm": 0.3300810354393011, "learning_rate": 0.0005803395183576786, "loss": 4.8611, "step": 1470 }, { "epoch": 0.11648338631024066, "grad_norm": 0.26216057038770324, "learning_rate": 0.0005823134622976708, "loss": 4.9389, "step": 1475 }, { "epoch": 0.11687824524688555, "grad_norm": 0.44125658985328114, "learning_rate": 0.0005842874062376628, "loss": 5.009, "step": 1480 }, { "epoch": 0.11727310418353043, "grad_norm": 0.4460591470429377, "learning_rate": 0.0005862613501776549, "loss": 5.0108, "step": 1485 }, { "epoch": 0.11766796312017531, "grad_norm": 0.29394175246657916, "learning_rate": 0.0005882352941176471, "loss": 4.7984, "step": 1490 }, { "epoch": 0.1180628220568202, "grad_norm": 0.3160742449398314, "learning_rate": 0.0005902092380576391, "loss": 4.7702, "step": 1495 }, { "epoch": 0.11845768099346508, "grad_norm": 0.35372623968780775, "learning_rate": 0.0005921831819976313, "loss": 4.827, "step": 1500 }, { "epoch": 0.11885253993010997, "grad_norm": 0.30380627251270753, "learning_rate": 0.0005941571259376234, "loss": 4.8013, "step": 1505 }, { "epoch": 0.11924739886675485, "grad_norm": 0.35578870286949194, "learning_rate": 0.0005961310698776155, "loss": 4.7189, "step": 1510 }, { "epoch": 0.11964225780339974, "grad_norm": 0.2493264590820699, "learning_rate": 0.0005981050138176076, "loss": 4.7841, "step": 1515 }, { "epoch": 0.12003711674004462, "grad_norm": 0.6969884860752366, "learning_rate": 0.0006000789577575997, "loss": 4.8647, "step": 1520 }, { "epoch": 0.1204319756766895, "grad_norm": 0.44892074776789986, "learning_rate": 0.0006020529016975918, "loss": 4.8749, "step": 1525 }, { "epoch": 0.12082683461333439, "grad_norm": 0.4672457656789041, "learning_rate": 0.000604026845637584, "loss": 4.8634, "step": 1530 }, { "epoch": 0.12122169354997928, "grad_norm": 0.2882904488659362, "learning_rate": 0.0006060007895775759, "loss": 4.9333, "step": 1535 }, { "epoch": 0.12161655248662416, "grad_norm": 0.5141908454283838, "learning_rate": 0.0006079747335175681, "loss": 4.9825, "step": 1540 }, { "epoch": 0.12201141142326903, "grad_norm": 0.41428310447282796, "learning_rate": 0.0006099486774575602, "loss": 4.9882, "step": 1545 }, { "epoch": 0.12240627035991392, "grad_norm": 0.40785190303925, "learning_rate": 0.0006119226213975523, "loss": 4.7024, "step": 1550 }, { "epoch": 0.1228011292965588, "grad_norm": 0.32292450583937765, "learning_rate": 0.0006138965653375444, "loss": 4.6701, "step": 1555 }, { "epoch": 0.12319598823320368, "grad_norm": 0.2836447303890162, "learning_rate": 0.0006158705092775365, "loss": 4.8447, "step": 1560 }, { "epoch": 0.12359084716984857, "grad_norm": 0.41450840359601965, "learning_rate": 0.0006178444532175287, "loss": 4.8764, "step": 1565 }, { "epoch": 0.12398570610649345, "grad_norm": 0.39131798049027733, "learning_rate": 0.0006198183971575208, "loss": 4.7784, "step": 1570 }, { "epoch": 0.12438056504313834, "grad_norm": 0.315113962171956, "learning_rate": 0.0006217923410975128, "loss": 4.7705, "step": 1575 }, { "epoch": 0.12477542397978322, "grad_norm": 0.4004275159962067, "learning_rate": 0.000623766285037505, "loss": 4.9853, "step": 1580 }, { "epoch": 0.1251702829164281, "grad_norm": 0.324812205130933, "learning_rate": 0.0006257402289774971, "loss": 4.6791, "step": 1585 }, { "epoch": 0.125565141853073, "grad_norm": 0.31999875921570775, "learning_rate": 0.0006277141729174892, "loss": 4.9313, "step": 1590 }, { "epoch": 0.12596000078971786, "grad_norm": 0.24036308040583124, "learning_rate": 0.0006296881168574813, "loss": 4.7828, "step": 1595 }, { "epoch": 0.12635485972636276, "grad_norm": 0.31228125506672355, "learning_rate": 0.0006316620607974733, "loss": 4.7694, "step": 1600 }, { "epoch": 0.12674971866300763, "grad_norm": 0.32454392579540564, "learning_rate": 0.0006336360047374655, "loss": 4.8395, "step": 1605 }, { "epoch": 0.12714457759965253, "grad_norm": 0.23230289122009745, "learning_rate": 0.0006356099486774575, "loss": 4.6793, "step": 1610 }, { "epoch": 0.1275394365362974, "grad_norm": 0.21522214939232653, "learning_rate": 0.0006375838926174496, "loss": 4.8028, "step": 1615 }, { "epoch": 0.1279342954729423, "grad_norm": 0.20744759015103256, "learning_rate": 0.0006395578365574418, "loss": 4.8135, "step": 1620 }, { "epoch": 0.12832915440958717, "grad_norm": 0.33435499320509476, "learning_rate": 0.0006415317804974339, "loss": 4.7892, "step": 1625 }, { "epoch": 0.12872401334623207, "grad_norm": 0.40296413755792543, "learning_rate": 0.0006435057244374259, "loss": 4.8773, "step": 1630 }, { "epoch": 0.12911887228287694, "grad_norm": 0.3904003998781514, "learning_rate": 0.0006454796683774181, "loss": 4.7804, "step": 1635 }, { "epoch": 0.12951373121952184, "grad_norm": 0.401706774911394, "learning_rate": 0.0006474536123174102, "loss": 4.921, "step": 1640 }, { "epoch": 0.1299085901561667, "grad_norm": 0.5045332985683723, "learning_rate": 0.0006494275562574024, "loss": 4.7535, "step": 1645 }, { "epoch": 0.13030344909281158, "grad_norm": 0.4009516168592568, "learning_rate": 0.0006514015001973944, "loss": 4.8195, "step": 1650 }, { "epoch": 0.13069830802945648, "grad_norm": 0.3347889295366546, "learning_rate": 0.0006533754441373865, "loss": 4.8747, "step": 1655 }, { "epoch": 0.13109316696610135, "grad_norm": 0.2754088211287113, "learning_rate": 0.0006553493880773787, "loss": 4.7244, "step": 1660 }, { "epoch": 0.13148802590274625, "grad_norm": 0.369984800950091, "learning_rate": 0.0006573233320173708, "loss": 4.7962, "step": 1665 }, { "epoch": 0.13188288483939112, "grad_norm": 0.2379217523753239, "learning_rate": 0.0006592972759573627, "loss": 4.688, "step": 1670 }, { "epoch": 0.13227774377603602, "grad_norm": 0.2593339762095257, "learning_rate": 0.0006612712198973549, "loss": 4.6821, "step": 1675 }, { "epoch": 0.1326726027126809, "grad_norm": 0.24074157336416632, "learning_rate": 0.000663245163837347, "loss": 4.5883, "step": 1680 }, { "epoch": 0.13306746164932579, "grad_norm": 0.34874559341229167, "learning_rate": 0.0006652191077773392, "loss": 4.7114, "step": 1685 }, { "epoch": 0.13346232058597066, "grad_norm": 0.5202647968450888, "learning_rate": 0.0006671930517173312, "loss": 4.982, "step": 1690 }, { "epoch": 0.13385717952261555, "grad_norm": 0.31676968328032207, "learning_rate": 0.0006691669956573233, "loss": 4.6246, "step": 1695 }, { "epoch": 0.13425203845926043, "grad_norm": 0.36833363610735487, "learning_rate": 0.0006711409395973155, "loss": 4.6356, "step": 1700 }, { "epoch": 0.13464689739590532, "grad_norm": 0.424557645216495, "learning_rate": 0.0006731148835373075, "loss": 4.6325, "step": 1705 }, { "epoch": 0.1350417563325502, "grad_norm": 0.2542689322714413, "learning_rate": 0.0006750888274772996, "loss": 4.5375, "step": 1710 }, { "epoch": 0.1354366152691951, "grad_norm": 0.22557270951015862, "learning_rate": 0.0006770627714172918, "loss": 4.5439, "step": 1715 }, { "epoch": 0.13583147420583996, "grad_norm": 0.32933408202962194, "learning_rate": 0.0006790367153572839, "loss": 4.7191, "step": 1720 }, { "epoch": 0.13622633314248483, "grad_norm": 0.19308555002892336, "learning_rate": 0.000681010659297276, "loss": 4.5488, "step": 1725 }, { "epoch": 0.13662119207912973, "grad_norm": 0.26516876951578666, "learning_rate": 0.0006829846032372681, "loss": 4.4126, "step": 1730 }, { "epoch": 0.1370160510157746, "grad_norm": 0.2526634910777325, "learning_rate": 0.0006849585471772602, "loss": 4.7143, "step": 1735 }, { "epoch": 0.1374109099524195, "grad_norm": 0.4908267253815443, "learning_rate": 0.0006869324911172524, "loss": 4.8489, "step": 1740 }, { "epoch": 0.13780576888906437, "grad_norm": 0.2914977480115424, "learning_rate": 0.0006889064350572443, "loss": 4.5042, "step": 1745 }, { "epoch": 0.13820062782570927, "grad_norm": 0.2868405341338058, "learning_rate": 0.0006908803789972365, "loss": 4.6109, "step": 1750 }, { "epoch": 0.13859548676235414, "grad_norm": 0.5330706346070812, "learning_rate": 0.0006928543229372286, "loss": 4.6145, "step": 1755 }, { "epoch": 0.13899034569899904, "grad_norm": 0.2803533973930595, "learning_rate": 0.0006948282668772207, "loss": 4.791, "step": 1760 }, { "epoch": 0.1393852046356439, "grad_norm": 0.3257459030763324, "learning_rate": 0.0006968022108172128, "loss": 4.5154, "step": 1765 }, { "epoch": 0.1397800635722888, "grad_norm": 0.26934055979374577, "learning_rate": 0.0006987761547572049, "loss": 4.5353, "step": 1770 }, { "epoch": 0.14017492250893368, "grad_norm": 0.2620714311080178, "learning_rate": 0.000700750098697197, "loss": 4.6054, "step": 1775 }, { "epoch": 0.14056978144557858, "grad_norm": 0.40741783186273384, "learning_rate": 0.0007027240426371892, "loss": 4.5923, "step": 1780 }, { "epoch": 0.14096464038222345, "grad_norm": 0.262350472716846, "learning_rate": 0.0007046979865771812, "loss": 4.5251, "step": 1785 }, { "epoch": 0.14135949931886835, "grad_norm": 0.26866333093762507, "learning_rate": 0.0007066719305171734, "loss": 4.5456, "step": 1790 }, { "epoch": 0.14175435825551322, "grad_norm": 0.2850813686561735, "learning_rate": 0.0007086458744571655, "loss": 4.5749, "step": 1795 }, { "epoch": 0.1421492171921581, "grad_norm": 0.2805995109317244, "learning_rate": 0.0007106198183971575, "loss": 4.6351, "step": 1800 }, { "epoch": 0.142544076128803, "grad_norm": 0.2665348959727934, "learning_rate": 0.0007125937623371497, "loss": 4.4927, "step": 1805 }, { "epoch": 0.14293893506544786, "grad_norm": 0.3817199191761178, "learning_rate": 0.0007145677062771417, "loss": 4.6891, "step": 1810 }, { "epoch": 0.14333379400209276, "grad_norm": 0.5009196354063228, "learning_rate": 0.0007165416502171338, "loss": 4.9201, "step": 1815 }, { "epoch": 0.14372865293873763, "grad_norm": 0.358329838407527, "learning_rate": 0.0007185155941571259, "loss": 4.6015, "step": 1820 }, { "epoch": 0.14412351187538253, "grad_norm": 0.31771914973334, "learning_rate": 0.000720489538097118, "loss": 4.3853, "step": 1825 }, { "epoch": 0.1445183708120274, "grad_norm": 0.5464677434635412, "learning_rate": 0.0007224634820371102, "loss": 4.7027, "step": 1830 }, { "epoch": 0.1449132297486723, "grad_norm": 0.3788353940795815, "learning_rate": 0.0007244374259771023, "loss": 4.5066, "step": 1835 }, { "epoch": 0.14530808868531717, "grad_norm": 0.3080867174211892, "learning_rate": 0.0007264113699170943, "loss": 4.699, "step": 1840 }, { "epoch": 0.14570294762196206, "grad_norm": 0.5450493460888692, "learning_rate": 0.0007283853138570865, "loss": 4.5943, "step": 1845 }, { "epoch": 0.14609780655860694, "grad_norm": 0.3999091147328748, "learning_rate": 0.0007303592577970786, "loss": 4.6554, "step": 1850 }, { "epoch": 0.14649266549525183, "grad_norm": 0.28645698058738983, "learning_rate": 0.0007323332017370707, "loss": 4.6309, "step": 1855 }, { "epoch": 0.1468875244318967, "grad_norm": 0.31265838654493133, "learning_rate": 0.0007343071456770628, "loss": 4.533, "step": 1860 }, { "epoch": 0.14728238336854158, "grad_norm": 0.2634997447880731, "learning_rate": 0.0007362810896170549, "loss": 4.544, "step": 1865 }, { "epoch": 0.14767724230518647, "grad_norm": 0.32227535259451023, "learning_rate": 0.0007382550335570471, "loss": 4.4826, "step": 1870 }, { "epoch": 0.14807210124183134, "grad_norm": 0.22347145736693788, "learning_rate": 0.0007402289774970392, "loss": 4.5779, "step": 1875 }, { "epoch": 0.14846696017847624, "grad_norm": 0.253397448607278, "learning_rate": 0.0007422029214370311, "loss": 4.5413, "step": 1880 }, { "epoch": 0.1488618191151211, "grad_norm": 0.2734035922863676, "learning_rate": 0.0007441768653770233, "loss": 4.4496, "step": 1885 }, { "epoch": 0.149256678051766, "grad_norm": 0.44049798705336063, "learning_rate": 0.0007461508093170154, "loss": 4.7918, "step": 1890 }, { "epoch": 0.14965153698841088, "grad_norm": 0.28528655853077023, "learning_rate": 0.0007481247532570074, "loss": 4.3549, "step": 1895 }, { "epoch": 0.15004639592505578, "grad_norm": 0.42991584027857743, "learning_rate": 0.0007500986971969996, "loss": 4.4546, "step": 1900 }, { "epoch": 0.15044125486170065, "grad_norm": 0.2566996277504709, "learning_rate": 0.0007520726411369917, "loss": 4.4376, "step": 1905 }, { "epoch": 0.15083611379834555, "grad_norm": 0.31796371821304414, "learning_rate": 0.0007540465850769839, "loss": 4.5751, "step": 1910 }, { "epoch": 0.15123097273499042, "grad_norm": 0.24766838818705045, "learning_rate": 0.0007560205290169759, "loss": 4.3845, "step": 1915 }, { "epoch": 0.15162583167163532, "grad_norm": 0.36266786283978286, "learning_rate": 0.000757994472956968, "loss": 4.9947, "step": 1920 }, { "epoch": 0.1520206906082802, "grad_norm": 0.24569326784795345, "learning_rate": 0.0007599684168969602, "loss": 4.6245, "step": 1925 }, { "epoch": 0.1524155495449251, "grad_norm": 0.2839997454201994, "learning_rate": 0.0007619423608369523, "loss": 4.4036, "step": 1930 }, { "epoch": 0.15281040848156996, "grad_norm": 0.30015585936058264, "learning_rate": 0.0007639163047769444, "loss": 4.5104, "step": 1935 }, { "epoch": 0.15320526741821483, "grad_norm": 0.5012980799392976, "learning_rate": 0.0007658902487169365, "loss": 4.8613, "step": 1940 }, { "epoch": 0.15360012635485973, "grad_norm": 0.31813489129835976, "learning_rate": 0.0007678641926569285, "loss": 4.5745, "step": 1945 }, { "epoch": 0.1539949852915046, "grad_norm": 0.2524278320250973, "learning_rate": 0.0007698381365969207, "loss": 4.5468, "step": 1950 }, { "epoch": 0.1543898442281495, "grad_norm": 0.2584214277469108, "learning_rate": 0.0007718120805369127, "loss": 4.5984, "step": 1955 }, { "epoch": 0.15478470316479437, "grad_norm": 0.29981646065793416, "learning_rate": 0.0007737860244769048, "loss": 4.7476, "step": 1960 }, { "epoch": 0.15517956210143927, "grad_norm": 0.2641774331266728, "learning_rate": 0.000775759968416897, "loss": 4.61, "step": 1965 }, { "epoch": 0.15557442103808414, "grad_norm": 0.2702147575505111, "learning_rate": 0.0007777339123568891, "loss": 4.561, "step": 1970 }, { "epoch": 0.15596927997472904, "grad_norm": 0.2369204507612005, "learning_rate": 0.0007797078562968812, "loss": 4.4701, "step": 1975 }, { "epoch": 0.1563641389113739, "grad_norm": 0.35635550257129356, "learning_rate": 0.0007816818002368733, "loss": 4.6504, "step": 1980 }, { "epoch": 0.1567589978480188, "grad_norm": 0.47170603991890286, "learning_rate": 0.0007836557441768654, "loss": 4.4583, "step": 1985 }, { "epoch": 0.15715385678466368, "grad_norm": 0.31365985686753906, "learning_rate": 0.0007856296881168576, "loss": 4.6068, "step": 1990 }, { "epoch": 0.15754871572130857, "grad_norm": 0.35542779004718855, "learning_rate": 0.0007876036320568496, "loss": 4.367, "step": 1995 }, { "epoch": 0.15794357465795344, "grad_norm": 0.3487480626168087, "learning_rate": 0.0007895775759968417, "loss": 4.4722, "step": 2000 }, { "epoch": 0.15833843359459834, "grad_norm": 0.31828684812992664, "learning_rate": 0.0007915515199368339, "loss": 4.4546, "step": 2005 }, { "epoch": 0.15873329253124321, "grad_norm": 0.1975697746095403, "learning_rate": 0.0007935254638768258, "loss": 4.4239, "step": 2010 }, { "epoch": 0.15912815146788808, "grad_norm": 0.2395943468615528, "learning_rate": 0.000795499407816818, "loss": 4.4112, "step": 2015 }, { "epoch": 0.15952301040453298, "grad_norm": 0.2280102867660075, "learning_rate": 0.0007974733517568101, "loss": 4.3954, "step": 2020 }, { "epoch": 0.15991786934117785, "grad_norm": 0.24737288368797364, "learning_rate": 0.0007994472956968022, "loss": 4.7205, "step": 2025 }, { "epoch": 0.16031272827782275, "grad_norm": 0.26338872250169587, "learning_rate": 0.0008014212396367943, "loss": 4.4582, "step": 2030 }, { "epoch": 0.16070758721446762, "grad_norm": 0.26572133536835063, "learning_rate": 0.0008033951835767864, "loss": 4.5537, "step": 2035 }, { "epoch": 0.16110244615111252, "grad_norm": 0.3544901686306195, "learning_rate": 0.0008053691275167785, "loss": 4.3974, "step": 2040 }, { "epoch": 0.1614973050877574, "grad_norm": 0.2973980542353721, "learning_rate": 0.0008073430714567707, "loss": 4.4285, "step": 2045 }, { "epoch": 0.1618921640244023, "grad_norm": 0.2756391803524288, "learning_rate": 0.0008093170153967627, "loss": 4.5561, "step": 2050 }, { "epoch": 0.16228702296104716, "grad_norm": 0.17381019421558885, "learning_rate": 0.0008112909593367549, "loss": 4.3937, "step": 2055 }, { "epoch": 0.16268188189769206, "grad_norm": 0.19057503547012677, "learning_rate": 0.000813264903276747, "loss": 4.4285, "step": 2060 }, { "epoch": 0.16307674083433693, "grad_norm": 0.19191511192363878, "learning_rate": 0.0008152388472167391, "loss": 4.6056, "step": 2065 }, { "epoch": 0.16347159977098183, "grad_norm": 0.22576029630270844, "learning_rate": 0.0008172127911567312, "loss": 4.333, "step": 2070 }, { "epoch": 0.1638664587076267, "grad_norm": 0.27567825980821425, "learning_rate": 0.0008191867350967233, "loss": 4.4868, "step": 2075 }, { "epoch": 0.16426131764427157, "grad_norm": 0.23721799432151183, "learning_rate": 0.0008211606790367153, "loss": 4.467, "step": 2080 }, { "epoch": 0.16465617658091647, "grad_norm": 0.20087811173734332, "learning_rate": 0.0008231346229767075, "loss": 4.2613, "step": 2085 }, { "epoch": 0.16505103551756134, "grad_norm": 0.17124281481344572, "learning_rate": 0.0008251085669166995, "loss": 4.3992, "step": 2090 }, { "epoch": 0.16544589445420624, "grad_norm": 0.30544727894048684, "learning_rate": 0.0008270825108566917, "loss": 4.4277, "step": 2095 }, { "epoch": 0.1658407533908511, "grad_norm": 0.26455826029586654, "learning_rate": 0.0008290564547966838, "loss": 4.3689, "step": 2100 }, { "epoch": 0.166235612327496, "grad_norm": 0.2670026920902779, "learning_rate": 0.0008310303987366758, "loss": 4.5091, "step": 2105 }, { "epoch": 0.16663047126414088, "grad_norm": 0.22708469121777902, "learning_rate": 0.000833004342676668, "loss": 4.3129, "step": 2110 }, { "epoch": 0.16702533020078578, "grad_norm": 0.24131239511618474, "learning_rate": 0.0008349782866166601, "loss": 4.3003, "step": 2115 }, { "epoch": 0.16742018913743065, "grad_norm": 0.30023201046275066, "learning_rate": 0.0008369522305566523, "loss": 4.4298, "step": 2120 }, { "epoch": 0.16781504807407555, "grad_norm": 0.3513573450004193, "learning_rate": 0.0008389261744966443, "loss": 4.4488, "step": 2125 }, { "epoch": 0.16820990701072042, "grad_norm": 0.26036788316545717, "learning_rate": 0.0008409001184366364, "loss": 4.4296, "step": 2130 }, { "epoch": 0.16860476594736531, "grad_norm": 0.3012584974099911, "learning_rate": 0.0008428740623766286, "loss": 4.4902, "step": 2135 }, { "epoch": 0.16899962488401019, "grad_norm": 0.1826439015209678, "learning_rate": 0.0008448480063166207, "loss": 4.262, "step": 2140 }, { "epoch": 0.16939448382065508, "grad_norm": 0.17821528601831474, "learning_rate": 0.0008468219502566126, "loss": 4.2557, "step": 2145 }, { "epoch": 0.16978934275729995, "grad_norm": 0.17904671639659353, "learning_rate": 0.0008487958941966048, "loss": 4.2965, "step": 2150 }, { "epoch": 0.17018420169394483, "grad_norm": 0.26488560502358777, "learning_rate": 0.0008507698381365969, "loss": 4.4597, "step": 2155 }, { "epoch": 0.17057906063058972, "grad_norm": 0.18749889046840848, "learning_rate": 0.0008527437820765891, "loss": 4.3533, "step": 2160 }, { "epoch": 0.1709739195672346, "grad_norm": 0.27161360843969085, "learning_rate": 0.0008547177260165811, "loss": 4.4747, "step": 2165 }, { "epoch": 0.1713687785038795, "grad_norm": 0.27399206251937563, "learning_rate": 0.0008566916699565732, "loss": 4.322, "step": 2170 }, { "epoch": 0.17176363744052436, "grad_norm": 0.2513749636118227, "learning_rate": 0.0008586656138965654, "loss": 4.5214, "step": 2175 }, { "epoch": 0.17215849637716926, "grad_norm": 0.19837814641800752, "learning_rate": 0.0008606395578365575, "loss": 4.4843, "step": 2180 }, { "epoch": 0.17255335531381413, "grad_norm": 0.2159167322619068, "learning_rate": 0.0008626135017765495, "loss": 4.5683, "step": 2185 }, { "epoch": 0.17294821425045903, "grad_norm": 0.2704292233859428, "learning_rate": 0.0008645874457165417, "loss": 4.41, "step": 2190 }, { "epoch": 0.1733430731871039, "grad_norm": 0.22647521214543578, "learning_rate": 0.0008665613896565338, "loss": 4.313, "step": 2195 }, { "epoch": 0.1737379321237488, "grad_norm": 0.29694247658350764, "learning_rate": 0.0008685353335965259, "loss": 4.4068, "step": 2200 }, { "epoch": 0.17413279106039367, "grad_norm": 0.3405716201004796, "learning_rate": 0.000870509277536518, "loss": 4.401, "step": 2205 }, { "epoch": 0.17452764999703857, "grad_norm": 0.25424638038348923, "learning_rate": 0.00087248322147651, "loss": 4.1341, "step": 2210 }, { "epoch": 0.17492250893368344, "grad_norm": 0.31684257270750815, "learning_rate": 0.0008744571654165023, "loss": 4.2878, "step": 2215 }, { "epoch": 0.17531736787032834, "grad_norm": 0.3092118647111602, "learning_rate": 0.0008764311093564942, "loss": 4.4607, "step": 2220 }, { "epoch": 0.1757122268069732, "grad_norm": 0.3056047349263378, "learning_rate": 0.0008784050532964863, "loss": 4.3831, "step": 2225 }, { "epoch": 0.17610708574361808, "grad_norm": 0.2437971851620714, "learning_rate": 0.0008803789972364785, "loss": 4.4364, "step": 2230 }, { "epoch": 0.17650194468026298, "grad_norm": 0.33874129052313456, "learning_rate": 0.0008823529411764706, "loss": 4.5624, "step": 2235 }, { "epoch": 0.17689680361690785, "grad_norm": 0.2933023385887184, "learning_rate": 0.0008843268851164627, "loss": 4.4657, "step": 2240 }, { "epoch": 0.17729166255355275, "grad_norm": 0.26967929763399856, "learning_rate": 0.0008863008290564548, "loss": 4.334, "step": 2245 }, { "epoch": 0.17768652149019762, "grad_norm": 0.3085407522620815, "learning_rate": 0.0008882747729964469, "loss": 4.2937, "step": 2250 }, { "epoch": 0.17808138042684252, "grad_norm": 0.24793661050947355, "learning_rate": 0.0008902487169364391, "loss": 4.2189, "step": 2255 }, { "epoch": 0.1784762393634874, "grad_norm": 0.2080061486716911, "learning_rate": 0.0008922226608764311, "loss": 4.4523, "step": 2260 }, { "epoch": 0.1788710983001323, "grad_norm": 0.3688793532475759, "learning_rate": 0.0008941966048164233, "loss": 4.4746, "step": 2265 }, { "epoch": 0.17926595723677716, "grad_norm": 0.30460837633172544, "learning_rate": 0.0008961705487564154, "loss": 4.1989, "step": 2270 }, { "epoch": 0.17966081617342206, "grad_norm": 0.2297915686727774, "learning_rate": 0.0008981444926964075, "loss": 4.1884, "step": 2275 }, { "epoch": 0.18005567511006693, "grad_norm": 0.34855785970974174, "learning_rate": 0.0009001184366363996, "loss": 4.3804, "step": 2280 }, { "epoch": 0.18045053404671182, "grad_norm": 0.34056526988374214, "learning_rate": 0.0009020923805763916, "loss": 4.4417, "step": 2285 }, { "epoch": 0.1808453929833567, "grad_norm": 0.22807981282953835, "learning_rate": 0.0009040663245163837, "loss": 4.1129, "step": 2290 }, { "epoch": 0.18124025192000157, "grad_norm": 0.4891507519714791, "learning_rate": 0.0009060402684563759, "loss": 4.7221, "step": 2295 }, { "epoch": 0.18163511085664646, "grad_norm": 0.2587271926815033, "learning_rate": 0.0009080142123963679, "loss": 4.3121, "step": 2300 }, { "epoch": 0.18202996979329134, "grad_norm": 0.3891812669048577, "learning_rate": 0.0009099881563363601, "loss": 4.5094, "step": 2305 }, { "epoch": 0.18242482872993623, "grad_norm": 0.18329158808660892, "learning_rate": 0.0009119621002763522, "loss": 4.1575, "step": 2310 }, { "epoch": 0.1828196876665811, "grad_norm": 0.24555663818718187, "learning_rate": 0.0009139360442163442, "loss": 4.4204, "step": 2315 }, { "epoch": 0.183214546603226, "grad_norm": 0.24592023735228344, "learning_rate": 0.0009159099881563364, "loss": 4.403, "step": 2320 }, { "epoch": 0.18360940553987087, "grad_norm": 0.3141295681083139, "learning_rate": 0.0009178839320963285, "loss": 4.3714, "step": 2325 }, { "epoch": 0.18400426447651577, "grad_norm": 0.20627829483625812, "learning_rate": 0.0009198578760363206, "loss": 4.2146, "step": 2330 }, { "epoch": 0.18439912341316064, "grad_norm": 0.1861943809912585, "learning_rate": 0.0009218318199763127, "loss": 4.2126, "step": 2335 }, { "epoch": 0.18479398234980554, "grad_norm": 0.1965141236744414, "learning_rate": 0.0009238057639163048, "loss": 4.1449, "step": 2340 }, { "epoch": 0.1851888412864504, "grad_norm": 0.19716411292652267, "learning_rate": 0.000925779707856297, "loss": 4.0487, "step": 2345 }, { "epoch": 0.1855837002230953, "grad_norm": 0.1781510914414413, "learning_rate": 0.000927753651796289, "loss": 4.2578, "step": 2350 }, { "epoch": 0.18597855915974018, "grad_norm": 0.2583766017231438, "learning_rate": 0.000929727595736281, "loss": 4.2639, "step": 2355 }, { "epoch": 0.18637341809638508, "grad_norm": 0.2107552463011324, "learning_rate": 0.0009317015396762732, "loss": 4.4145, "step": 2360 }, { "epoch": 0.18676827703302995, "grad_norm": 0.20055987951980064, "learning_rate": 0.0009336754836162653, "loss": 4.1734, "step": 2365 }, { "epoch": 0.18716313596967482, "grad_norm": 0.2292579010819858, "learning_rate": 0.0009356494275562574, "loss": 4.2578, "step": 2370 }, { "epoch": 0.18755799490631972, "grad_norm": 0.23827219564462024, "learning_rate": 0.0009376233714962495, "loss": 4.1495, "step": 2375 }, { "epoch": 0.1879528538429646, "grad_norm": 0.24393798378816584, "learning_rate": 0.0009395973154362416, "loss": 4.337, "step": 2380 }, { "epoch": 0.1883477127796095, "grad_norm": 0.18146809886689155, "learning_rate": 0.0009415712593762338, "loss": 4.2879, "step": 2385 }, { "epoch": 0.18874257171625436, "grad_norm": 0.24862113013809822, "learning_rate": 0.0009435452033162259, "loss": 4.2761, "step": 2390 }, { "epoch": 0.18913743065289926, "grad_norm": 0.2616084902249722, "learning_rate": 0.0009455191472562179, "loss": 4.3776, "step": 2395 }, { "epoch": 0.18953228958954413, "grad_norm": 0.23312365538500326, "learning_rate": 0.0009474930911962101, "loss": 4.166, "step": 2400 }, { "epoch": 0.18992714852618903, "grad_norm": 0.3646060353265127, "learning_rate": 0.0009494670351362022, "loss": 4.479, "step": 2405 }, { "epoch": 0.1903220074628339, "grad_norm": 0.24731121420479973, "learning_rate": 0.0009514409790761942, "loss": 4.2769, "step": 2410 }, { "epoch": 0.1907168663994788, "grad_norm": 0.22351500261128454, "learning_rate": 0.0009534149230161864, "loss": 4.2634, "step": 2415 }, { "epoch": 0.19111172533612367, "grad_norm": 0.25945169534214096, "learning_rate": 0.0009553888669561784, "loss": 4.3325, "step": 2420 }, { "epoch": 0.19150658427276857, "grad_norm": 0.22323262469697103, "learning_rate": 0.0009573628108961706, "loss": 4.4071, "step": 2425 }, { "epoch": 0.19190144320941344, "grad_norm": 0.21947164858513746, "learning_rate": 0.0009593367548361626, "loss": 4.4166, "step": 2430 }, { "epoch": 0.19229630214605833, "grad_norm": 0.21605900376737924, "learning_rate": 0.0009613106987761547, "loss": 4.2768, "step": 2435 }, { "epoch": 0.1926911610827032, "grad_norm": 0.41928892466708495, "learning_rate": 0.0009632846427161469, "loss": 4.3627, "step": 2440 }, { "epoch": 0.19308602001934808, "grad_norm": 0.2266194179692957, "learning_rate": 0.000965258586656139, "loss": 4.2561, "step": 2445 }, { "epoch": 0.19348087895599297, "grad_norm": 0.3206518795128694, "learning_rate": 0.0009672325305961311, "loss": 4.519, "step": 2450 }, { "epoch": 0.19387573789263784, "grad_norm": 0.18492799547972974, "learning_rate": 0.0009692064745361232, "loss": 4.0739, "step": 2455 }, { "epoch": 0.19427059682928274, "grad_norm": 0.2206556280210194, "learning_rate": 0.0009711804184761153, "loss": 4.1172, "step": 2460 }, { "epoch": 0.19466545576592761, "grad_norm": 0.2113410713731331, "learning_rate": 0.0009731543624161075, "loss": 4.0876, "step": 2465 }, { "epoch": 0.1950603147025725, "grad_norm": 0.2027196568791118, "learning_rate": 0.0009751283063560995, "loss": 4.1183, "step": 2470 }, { "epoch": 0.19545517363921738, "grad_norm": 0.18198273116947283, "learning_rate": 0.0009771022502960917, "loss": 4.0556, "step": 2475 }, { "epoch": 0.19585003257586228, "grad_norm": 0.19372358994750635, "learning_rate": 0.0009790761942360838, "loss": 4.005, "step": 2480 }, { "epoch": 0.19624489151250715, "grad_norm": 0.18142309779313281, "learning_rate": 0.0009810501381760759, "loss": 4.0837, "step": 2485 }, { "epoch": 0.19663975044915205, "grad_norm": 0.15125835439575053, "learning_rate": 0.000983024082116068, "loss": 4.2533, "step": 2490 }, { "epoch": 0.19703460938579692, "grad_norm": 0.15634558705999962, "learning_rate": 0.00098499802605606, "loss": 4.1946, "step": 2495 }, { "epoch": 0.19742946832244182, "grad_norm": 0.2648112209566831, "learning_rate": 0.0009869719699960521, "loss": 4.4834, "step": 2500 }, { "epoch": 0.1978243272590867, "grad_norm": 0.19581785636658405, "learning_rate": 0.0009889459139360442, "loss": 4.1069, "step": 2505 }, { "epoch": 0.19821918619573156, "grad_norm": 0.1741154628728696, "learning_rate": 0.0009909198578760363, "loss": 4.227, "step": 2510 }, { "epoch": 0.19861404513237646, "grad_norm": 0.16632533280862874, "learning_rate": 0.0009928938018160284, "loss": 4.401, "step": 2515 }, { "epoch": 0.19900890406902133, "grad_norm": 0.27309811042726745, "learning_rate": 0.0009948677457560205, "loss": 4.3492, "step": 2520 }, { "epoch": 0.19940376300566623, "grad_norm": 0.2669066613650968, "learning_rate": 0.0009968416896960126, "loss": 4.1787, "step": 2525 }, { "epoch": 0.1997986219423111, "grad_norm": 0.21034424573671034, "learning_rate": 0.000998815633636005, "loss": 4.1754, "step": 2530 }, { "epoch": 0.200193480878956, "grad_norm": 0.1797632794747558, "learning_rate": 0.0009999999809991523, "loss": 4.3721, "step": 2535 }, { "epoch": 0.20058833981560087, "grad_norm": 0.1889332083068049, "learning_rate": 0.0009999997672396304, "loss": 4.1299, "step": 2540 }, { "epoch": 0.20098319875224577, "grad_norm": 0.39218663746879995, "learning_rate": 0.000999999315969629, "loss": 4.1767, "step": 2545 }, { "epoch": 0.20137805768889064, "grad_norm": 0.27476177180032246, "learning_rate": 0.0009999986271893623, "loss": 4.2535, "step": 2550 }, { "epoch": 0.20177291662553554, "grad_norm": 0.21885762668005793, "learning_rate": 0.0009999977008991574, "loss": 4.2975, "step": 2555 }, { "epoch": 0.2021677755621804, "grad_norm": 0.2402034177205705, "learning_rate": 0.0009999965370994543, "loss": 4.0254, "step": 2560 }, { "epoch": 0.2025626344988253, "grad_norm": 0.1890476145346214, "learning_rate": 0.000999995135790806, "loss": 4.0102, "step": 2565 }, { "epoch": 0.20295749343547018, "grad_norm": 0.2604466249967968, "learning_rate": 0.000999993496973878, "loss": 4.2359, "step": 2570 }, { "epoch": 0.20335235237211507, "grad_norm": 0.2522284760308965, "learning_rate": 0.000999991620649449, "loss": 4.1089, "step": 2575 }, { "epoch": 0.20374721130875995, "grad_norm": 0.3260540812906151, "learning_rate": 0.0009999895068184097, "loss": 4.2311, "step": 2580 }, { "epoch": 0.20414207024540482, "grad_norm": 0.2619094710777399, "learning_rate": 0.000999987155481765, "loss": 4.1408, "step": 2585 }, { "epoch": 0.20453692918204971, "grad_norm": 0.20644660904922651, "learning_rate": 0.000999984566640631, "loss": 4.1328, "step": 2590 }, { "epoch": 0.20493178811869459, "grad_norm": 0.2489524347515891, "learning_rate": 0.0009999817402962384, "loss": 4.2289, "step": 2595 }, { "epoch": 0.20532664705533948, "grad_norm": 0.19592373264201876, "learning_rate": 0.0009999786764499292, "loss": 4.4387, "step": 2600 }, { "epoch": 0.20572150599198435, "grad_norm": 0.22105914848598646, "learning_rate": 0.0009999753751031588, "loss": 4.05, "step": 2605 }, { "epoch": 0.20611636492862925, "grad_norm": 0.2762341210328987, "learning_rate": 0.0009999718362574956, "loss": 4.2702, "step": 2610 }, { "epoch": 0.20651122386527412, "grad_norm": 0.1913848163799927, "learning_rate": 0.0009999680599146203, "loss": 4.0733, "step": 2615 }, { "epoch": 0.20690608280191902, "grad_norm": 0.18453923197124494, "learning_rate": 0.000999964046076327, "loss": 4.1485, "step": 2620 }, { "epoch": 0.2073009417385639, "grad_norm": 0.22446596867327187, "learning_rate": 0.0009999597947445225, "loss": 4.3085, "step": 2625 }, { "epoch": 0.2076958006752088, "grad_norm": 0.3289525746078777, "learning_rate": 0.0009999553059212261, "loss": 4.4096, "step": 2630 }, { "epoch": 0.20809065961185366, "grad_norm": 0.30718330805642946, "learning_rate": 0.00099995057960857, "loss": 4.2214, "step": 2635 }, { "epoch": 0.20848551854849856, "grad_norm": 0.20103242268349708, "learning_rate": 0.0009999456158087995, "loss": 4.0752, "step": 2640 }, { "epoch": 0.20888037748514343, "grad_norm": 0.27483290759325907, "learning_rate": 0.000999940414524272, "loss": 4.1463, "step": 2645 }, { "epoch": 0.20927523642178833, "grad_norm": 0.3728963137672497, "learning_rate": 0.000999934975757459, "loss": 4.0205, "step": 2650 }, { "epoch": 0.2096700953584332, "grad_norm": 0.2145748133385545, "learning_rate": 0.0009999292995109435, "loss": 4.0508, "step": 2655 }, { "epoch": 0.21006495429507807, "grad_norm": 0.21077079189679224, "learning_rate": 0.000999923385787422, "loss": 4.1016, "step": 2660 }, { "epoch": 0.21045981323172297, "grad_norm": 0.19180156264026116, "learning_rate": 0.0009999172345897037, "loss": 4.4145, "step": 2665 }, { "epoch": 0.21085467216836784, "grad_norm": 0.20018851046554165, "learning_rate": 0.0009999108459207103, "loss": 4.3418, "step": 2670 }, { "epoch": 0.21124953110501274, "grad_norm": 0.19739791513759142, "learning_rate": 0.0009999042197834768, "loss": 4.1187, "step": 2675 }, { "epoch": 0.2116443900416576, "grad_norm": 0.23988713761670594, "learning_rate": 0.0009998973561811506, "loss": 4.2698, "step": 2680 }, { "epoch": 0.2120392489783025, "grad_norm": 0.18970199269091675, "learning_rate": 0.0009998902551169922, "loss": 4.0426, "step": 2685 }, { "epoch": 0.21243410791494738, "grad_norm": 0.13694424146526268, "learning_rate": 0.0009998829165943747, "loss": 4.0107, "step": 2690 }, { "epoch": 0.21282896685159228, "grad_norm": 0.19600821535330684, "learning_rate": 0.000999875340616784, "loss": 4.0817, "step": 2695 }, { "epoch": 0.21322382578823715, "grad_norm": 0.18985891714990777, "learning_rate": 0.000999867527187819, "loss": 4.1214, "step": 2700 }, { "epoch": 0.21361868472488205, "grad_norm": 0.22923847469773778, "learning_rate": 0.000999859476311191, "loss": 4.0751, "step": 2705 }, { "epoch": 0.21401354366152692, "grad_norm": 0.2233313459825262, "learning_rate": 0.0009998511879907245, "loss": 4.0908, "step": 2710 }, { "epoch": 0.21440840259817182, "grad_norm": 0.18831260606567263, "learning_rate": 0.0009998426622303566, "loss": 4.1272, "step": 2715 }, { "epoch": 0.2148032615348167, "grad_norm": 0.20189269632969936, "learning_rate": 0.0009998338990341374, "loss": 3.8858, "step": 2720 }, { "epoch": 0.21519812047146156, "grad_norm": 0.3140219614718552, "learning_rate": 0.0009998248984062293, "loss": 4.1305, "step": 2725 }, { "epoch": 0.21559297940810646, "grad_norm": 0.21318317345291424, "learning_rate": 0.0009998156603509079, "loss": 4.1757, "step": 2730 }, { "epoch": 0.21598783834475133, "grad_norm": 0.22917409893322555, "learning_rate": 0.0009998061848725615, "loss": 3.9859, "step": 2735 }, { "epoch": 0.21638269728139622, "grad_norm": 0.2583398905599422, "learning_rate": 0.0009997964719756912, "loss": 4.0042, "step": 2740 }, { "epoch": 0.2167775562180411, "grad_norm": 0.20060359008480294, "learning_rate": 0.0009997865216649106, "loss": 4.0527, "step": 2745 }, { "epoch": 0.217172415154686, "grad_norm": 0.19764732240046584, "learning_rate": 0.0009997763339449465, "loss": 4.0813, "step": 2750 }, { "epoch": 0.21756727409133086, "grad_norm": 0.22693051438661882, "learning_rate": 0.0009997659088206384, "loss": 4.0605, "step": 2755 }, { "epoch": 0.21796213302797576, "grad_norm": 0.20684395253797125, "learning_rate": 0.000999755246296938, "loss": 4.0521, "step": 2760 }, { "epoch": 0.21835699196462063, "grad_norm": 0.22567534646830842, "learning_rate": 0.000999744346378911, "loss": 4.2688, "step": 2765 }, { "epoch": 0.21875185090126553, "grad_norm": 0.1910583206991681, "learning_rate": 0.0009997332090717344, "loss": 3.9922, "step": 2770 }, { "epoch": 0.2191467098379104, "grad_norm": 0.25642361133359665, "learning_rate": 0.000999721834380699, "loss": 4.2229, "step": 2775 }, { "epoch": 0.2195415687745553, "grad_norm": 0.20637378266796158, "learning_rate": 0.0009997102223112076, "loss": 4.1611, "step": 2780 }, { "epoch": 0.21993642771120017, "grad_norm": 0.22562513749581395, "learning_rate": 0.0009996983728687767, "loss": 4.1739, "step": 2785 }, { "epoch": 0.22033128664784507, "grad_norm": 0.25040217400685727, "learning_rate": 0.0009996862860590348, "loss": 4.1752, "step": 2790 }, { "epoch": 0.22072614558448994, "grad_norm": 0.19029973769717837, "learning_rate": 0.0009996739618877235, "loss": 4.0025, "step": 2795 }, { "epoch": 0.2211210045211348, "grad_norm": 0.16593964888743115, "learning_rate": 0.0009996614003606968, "loss": 4.0726, "step": 2800 }, { "epoch": 0.2215158634577797, "grad_norm": 0.19182479320289542, "learning_rate": 0.0009996486014839218, "loss": 4.0852, "step": 2805 }, { "epoch": 0.22191072239442458, "grad_norm": 0.19207278189151922, "learning_rate": 0.0009996355652634786, "loss": 3.9764, "step": 2810 }, { "epoch": 0.22230558133106948, "grad_norm": 0.2671632912133079, "learning_rate": 0.0009996222917055592, "loss": 4.167, "step": 2815 }, { "epoch": 0.22270044026771435, "grad_norm": 0.17562227643352135, "learning_rate": 0.000999608780816469, "loss": 4.068, "step": 2820 }, { "epoch": 0.22309529920435925, "grad_norm": 0.221319288722463, "learning_rate": 0.0009995950326026257, "loss": 4.0128, "step": 2825 }, { "epoch": 0.22349015814100412, "grad_norm": 0.1911245079561378, "learning_rate": 0.0009995810470705604, "loss": 4.1241, "step": 2830 }, { "epoch": 0.22388501707764902, "grad_norm": 0.21475014727267372, "learning_rate": 0.0009995668242269164, "loss": 4.0568, "step": 2835 }, { "epoch": 0.2242798760142939, "grad_norm": 0.1992065291251396, "learning_rate": 0.0009995523640784498, "loss": 3.8687, "step": 2840 }, { "epoch": 0.2246747349509388, "grad_norm": 0.17074167452888986, "learning_rate": 0.0009995376666320295, "loss": 4.0909, "step": 2845 }, { "epoch": 0.22506959388758366, "grad_norm": 0.21354747126212692, "learning_rate": 0.0009995227318946372, "loss": 4.0508, "step": 2850 }, { "epoch": 0.22546445282422856, "grad_norm": 0.20080355314551662, "learning_rate": 0.000999507559873367, "loss": 3.9206, "step": 2855 }, { "epoch": 0.22585931176087343, "grad_norm": 0.23148227263328264, "learning_rate": 0.0009994921505754259, "loss": 4.0185, "step": 2860 }, { "epoch": 0.22625417069751833, "grad_norm": 0.19349973546126858, "learning_rate": 0.0009994765040081338, "loss": 3.8886, "step": 2865 }, { "epoch": 0.2266490296341632, "grad_norm": 0.20452669453642533, "learning_rate": 0.0009994606201789234, "loss": 4.3817, "step": 2870 }, { "epoch": 0.22704388857080807, "grad_norm": 0.2470773440585612, "learning_rate": 0.0009994444990953392, "loss": 4.0693, "step": 2875 }, { "epoch": 0.22743874750745297, "grad_norm": 0.19369788016090198, "learning_rate": 0.00099942814076504, "loss": 4.0243, "step": 2880 }, { "epoch": 0.22783360644409784, "grad_norm": 0.13854852521828365, "learning_rate": 0.0009994115451957952, "loss": 3.8789, "step": 2885 }, { "epoch": 0.22822846538074273, "grad_norm": 0.18597911783144958, "learning_rate": 0.000999394712395489, "loss": 4.0149, "step": 2890 }, { "epoch": 0.2286233243173876, "grad_norm": 0.2507433096897054, "learning_rate": 0.0009993776423721167, "loss": 4.1398, "step": 2895 }, { "epoch": 0.2290181832540325, "grad_norm": 0.18005444679966293, "learning_rate": 0.0009993603351337874, "loss": 4.0844, "step": 2900 }, { "epoch": 0.22941304219067737, "grad_norm": 0.25352083978351164, "learning_rate": 0.0009993427906887222, "loss": 4.1118, "step": 2905 }, { "epoch": 0.22980790112732227, "grad_norm": 0.27235471456710036, "learning_rate": 0.0009993250090452551, "loss": 4.2309, "step": 2910 }, { "epoch": 0.23020276006396714, "grad_norm": 0.24437714552251041, "learning_rate": 0.0009993069902118326, "loss": 4.3291, "step": 2915 }, { "epoch": 0.23059761900061204, "grad_norm": 0.2119410548276697, "learning_rate": 0.0009992887341970145, "loss": 4.0054, "step": 2920 }, { "epoch": 0.2309924779372569, "grad_norm": 0.18913132222556872, "learning_rate": 0.0009992702410094722, "loss": 4.1169, "step": 2925 }, { "epoch": 0.2313873368739018, "grad_norm": 0.16736958086987755, "learning_rate": 0.0009992515106579905, "loss": 3.9842, "step": 2930 }, { "epoch": 0.23178219581054668, "grad_norm": 0.12942760953172253, "learning_rate": 0.000999232543151467, "loss": 4.0495, "step": 2935 }, { "epoch": 0.23217705474719155, "grad_norm": 0.15644370084380693, "learning_rate": 0.0009992133384989115, "loss": 3.9837, "step": 2940 }, { "epoch": 0.23257191368383645, "grad_norm": 0.12888856503421992, "learning_rate": 0.0009991938967094467, "loss": 3.8355, "step": 2945 }, { "epoch": 0.23296677262048132, "grad_norm": 0.2068423083449014, "learning_rate": 0.0009991742177923079, "loss": 3.9409, "step": 2950 }, { "epoch": 0.23336163155712622, "grad_norm": 0.1558488139842433, "learning_rate": 0.0009991543017568425, "loss": 3.9004, "step": 2955 }, { "epoch": 0.2337564904937711, "grad_norm": 0.15859054536234238, "learning_rate": 0.0009991341486125117, "loss": 4.2386, "step": 2960 }, { "epoch": 0.234151349430416, "grad_norm": 0.142729910708915, "learning_rate": 0.0009991137583688883, "loss": 3.9478, "step": 2965 }, { "epoch": 0.23454620836706086, "grad_norm": 0.2463632975522476, "learning_rate": 0.0009990931310356582, "loss": 4.0727, "step": 2970 }, { "epoch": 0.23494106730370576, "grad_norm": 0.15531193754406325, "learning_rate": 0.0009990722666226198, "loss": 3.7351, "step": 2975 }, { "epoch": 0.23533592624035063, "grad_norm": 0.15044864873021357, "learning_rate": 0.000999051165139684, "loss": 3.903, "step": 2980 }, { "epoch": 0.23573078517699553, "grad_norm": 0.20090611156383784, "learning_rate": 0.000999029826596875, "loss": 4.1417, "step": 2985 }, { "epoch": 0.2361256441136404, "grad_norm": 0.2097719735439394, "learning_rate": 0.0009990082510043284, "loss": 3.9603, "step": 2990 }, { "epoch": 0.2365205030502853, "grad_norm": 0.18144821978423367, "learning_rate": 0.0009989864383722932, "loss": 3.9695, "step": 2995 }, { "epoch": 0.23691536198693017, "grad_norm": 0.24813658425285254, "learning_rate": 0.0009989643887111312, "loss": 4.0374, "step": 3000 }, { "epoch": 0.23731022092357507, "grad_norm": 0.21827408494986048, "learning_rate": 0.0009989421020313162, "loss": 4.0434, "step": 3005 }, { "epoch": 0.23770507986021994, "grad_norm": 0.16068380503866173, "learning_rate": 0.0009989195783434348, "loss": 4.1223, "step": 3010 }, { "epoch": 0.2380999387968648, "grad_norm": 0.13535054175678118, "learning_rate": 0.0009988968176581866, "loss": 3.8287, "step": 3015 }, { "epoch": 0.2384947977335097, "grad_norm": 0.14238141679122363, "learning_rate": 0.0009988738199863828, "loss": 3.9992, "step": 3020 }, { "epoch": 0.23888965667015458, "grad_norm": 0.1499995402936881, "learning_rate": 0.0009988505853389482, "loss": 3.8481, "step": 3025 }, { "epoch": 0.23928451560679947, "grad_norm": 0.14368049902404836, "learning_rate": 0.0009988271137269197, "loss": 3.9707, "step": 3030 }, { "epoch": 0.23967937454344435, "grad_norm": 0.1719738126341041, "learning_rate": 0.0009988034051614466, "loss": 3.894, "step": 3035 }, { "epoch": 0.24007423348008924, "grad_norm": 0.16150066551455533, "learning_rate": 0.0009987794596537914, "loss": 4.0361, "step": 3040 }, { "epoch": 0.24046909241673411, "grad_norm": 0.2805486293447406, "learning_rate": 0.0009987552772153284, "loss": 4.0375, "step": 3045 }, { "epoch": 0.240863951353379, "grad_norm": 0.1876786377532791, "learning_rate": 0.0009987308578575447, "loss": 4.0449, "step": 3050 }, { "epoch": 0.24125881029002388, "grad_norm": 0.19632968414241092, "learning_rate": 0.0009987062015920402, "loss": 3.9249, "step": 3055 }, { "epoch": 0.24165366922666878, "grad_norm": 0.19518664009184644, "learning_rate": 0.0009986813084305272, "loss": 3.8633, "step": 3060 }, { "epoch": 0.24204852816331365, "grad_norm": 0.18137607468924388, "learning_rate": 0.0009986561783848302, "loss": 3.9094, "step": 3065 }, { "epoch": 0.24244338709995855, "grad_norm": 0.1726058725794767, "learning_rate": 0.0009986308114668867, "loss": 4.0467, "step": 3070 }, { "epoch": 0.24283824603660342, "grad_norm": 0.17330136208428318, "learning_rate": 0.0009986052076887467, "loss": 4.0527, "step": 3075 }, { "epoch": 0.24323310497324832, "grad_norm": 0.2593249749323006, "learning_rate": 0.000998579367062572, "loss": 4.0077, "step": 3080 }, { "epoch": 0.2436279639098932, "grad_norm": 0.2703284646410039, "learning_rate": 0.0009985532896006379, "loss": 3.9393, "step": 3085 }, { "epoch": 0.24402282284653806, "grad_norm": 0.2783975570270029, "learning_rate": 0.000998526975315332, "loss": 3.9743, "step": 3090 }, { "epoch": 0.24441768178318296, "grad_norm": 0.3159106911729843, "learning_rate": 0.0009985004242191533, "loss": 4.1359, "step": 3095 }, { "epoch": 0.24481254071982783, "grad_norm": 0.18710927413464867, "learning_rate": 0.0009984736363247146, "loss": 4.0396, "step": 3100 }, { "epoch": 0.24520739965647273, "grad_norm": 0.1773424606457227, "learning_rate": 0.000998446611644741, "loss": 4.0294, "step": 3105 }, { "epoch": 0.2456022585931176, "grad_norm": 0.4868990457122563, "learning_rate": 0.0009984193501920695, "loss": 3.9443, "step": 3110 }, { "epoch": 0.2459971175297625, "grad_norm": 0.18670698832609442, "learning_rate": 0.0009983918519796498, "loss": 4.0, "step": 3115 }, { "epoch": 0.24639197646640737, "grad_norm": 0.21662390795944905, "learning_rate": 0.0009983641170205441, "loss": 4.0154, "step": 3120 }, { "epoch": 0.24678683540305227, "grad_norm": 0.14749443306844862, "learning_rate": 0.0009983361453279275, "loss": 3.9127, "step": 3125 }, { "epoch": 0.24718169433969714, "grad_norm": 0.13867324460871247, "learning_rate": 0.0009983079369150865, "loss": 3.9722, "step": 3130 }, { "epoch": 0.24757655327634204, "grad_norm": 0.147447758118497, "learning_rate": 0.0009982794917954211, "loss": 4.0047, "step": 3135 }, { "epoch": 0.2479714122129869, "grad_norm": 0.18272732407304262, "learning_rate": 0.0009982508099824435, "loss": 3.9181, "step": 3140 }, { "epoch": 0.2483662711496318, "grad_norm": 0.17447173526948456, "learning_rate": 0.000998221891489778, "loss": 3.9996, "step": 3145 }, { "epoch": 0.24876113008627668, "grad_norm": 0.1703616152412051, "learning_rate": 0.0009981927363311612, "loss": 3.8562, "step": 3150 }, { "epoch": 0.24915598902292155, "grad_norm": 0.2091293543628589, "learning_rate": 0.0009981633445204428, "loss": 4.0964, "step": 3155 }, { "epoch": 0.24955084795956645, "grad_norm": 0.16562116071139216, "learning_rate": 0.0009981337160715844, "loss": 3.7737, "step": 3160 }, { "epoch": 0.24994570689621132, "grad_norm": 0.2169437889679153, "learning_rate": 0.00099810385099866, "loss": 4.0093, "step": 3165 }, { "epoch": 0.2503405658328562, "grad_norm": 0.15272767983656443, "learning_rate": 0.0009980737493158565, "loss": 4.0971, "step": 3170 }, { "epoch": 0.2507354247695011, "grad_norm": 0.18119319633487405, "learning_rate": 0.0009980434110374724, "loss": 3.9812, "step": 3175 }, { "epoch": 0.251130283706146, "grad_norm": 0.209178165130668, "learning_rate": 0.0009980128361779194, "loss": 3.9782, "step": 3180 }, { "epoch": 0.25152514264279086, "grad_norm": 0.18150510476852164, "learning_rate": 0.000997982024751721, "loss": 3.8403, "step": 3185 }, { "epoch": 0.2519200015794357, "grad_norm": 0.2017975292518101, "learning_rate": 0.0009979509767735133, "loss": 3.9162, "step": 3190 }, { "epoch": 0.25231486051608065, "grad_norm": 0.13102510750498308, "learning_rate": 0.0009979196922580448, "loss": 3.8493, "step": 3195 }, { "epoch": 0.2527097194527255, "grad_norm": 0.1945156798745024, "learning_rate": 0.0009978881712201764, "loss": 3.9671, "step": 3200 }, { "epoch": 0.2531045783893704, "grad_norm": 0.1396021316282331, "learning_rate": 0.000997856413674881, "loss": 3.6748, "step": 3205 }, { "epoch": 0.25349943732601526, "grad_norm": 0.14704720724635578, "learning_rate": 0.0009978244196372442, "loss": 3.7231, "step": 3210 }, { "epoch": 0.2538942962626602, "grad_norm": 0.1461166425372746, "learning_rate": 0.0009977921891224642, "loss": 4.1469, "step": 3215 }, { "epoch": 0.25428915519930506, "grad_norm": 0.17685863078948208, "learning_rate": 0.0009977597221458505, "loss": 3.9407, "step": 3220 }, { "epoch": 0.25468401413594993, "grad_norm": 0.1722531682870449, "learning_rate": 0.0009977270187228261, "loss": 3.84, "step": 3225 }, { "epoch": 0.2550788730725948, "grad_norm": 0.17330295945135674, "learning_rate": 0.0009976940788689259, "loss": 3.9073, "step": 3230 }, { "epoch": 0.2554737320092397, "grad_norm": 0.16500737404906649, "learning_rate": 0.0009976609025997966, "loss": 3.827, "step": 3235 }, { "epoch": 0.2558685909458846, "grad_norm": 0.16464615151904183, "learning_rate": 0.0009976274899311979, "loss": 3.8465, "step": 3240 }, { "epoch": 0.25626344988252947, "grad_norm": 0.13777906154938124, "learning_rate": 0.0009975938408790016, "loss": 3.7776, "step": 3245 }, { "epoch": 0.25665830881917434, "grad_norm": 0.16739637766654958, "learning_rate": 0.0009975599554591914, "loss": 3.7734, "step": 3250 }, { "epoch": 0.2570531677558192, "grad_norm": 0.21121297410344383, "learning_rate": 0.000997525833687864, "loss": 3.7235, "step": 3255 }, { "epoch": 0.25744802669246414, "grad_norm": 0.17468180732402783, "learning_rate": 0.0009974914755812275, "loss": 3.8124, "step": 3260 }, { "epoch": 0.257842885629109, "grad_norm": 0.18861223145653688, "learning_rate": 0.000997456881155603, "loss": 3.9049, "step": 3265 }, { "epoch": 0.2582377445657539, "grad_norm": 0.15326297145540654, "learning_rate": 0.000997422050427424, "loss": 3.8544, "step": 3270 }, { "epoch": 0.25863260350239875, "grad_norm": 0.16733323296873986, "learning_rate": 0.0009973869834132348, "loss": 3.8065, "step": 3275 }, { "epoch": 0.2590274624390437, "grad_norm": 0.16962502669573426, "learning_rate": 0.0009973516801296937, "loss": 3.8549, "step": 3280 }, { "epoch": 0.25942232137568855, "grad_norm": 0.1634666790194458, "learning_rate": 0.0009973161405935707, "loss": 3.9159, "step": 3285 }, { "epoch": 0.2598171803123334, "grad_norm": 0.1646808117231358, "learning_rate": 0.0009972803648217472, "loss": 3.9719, "step": 3290 }, { "epoch": 0.2602120392489783, "grad_norm": 0.1625070579185935, "learning_rate": 0.000997244352831218, "loss": 3.8763, "step": 3295 }, { "epoch": 0.26060689818562316, "grad_norm": 0.13681513556283526, "learning_rate": 0.0009972081046390891, "loss": 3.7871, "step": 3300 }, { "epoch": 0.2610017571222681, "grad_norm": 0.13122974411775284, "learning_rate": 0.0009971716202625796, "loss": 3.8676, "step": 3305 }, { "epoch": 0.26139661605891296, "grad_norm": 0.13600563746193411, "learning_rate": 0.00099713489971902, "loss": 3.6952, "step": 3310 }, { "epoch": 0.2617914749955578, "grad_norm": 0.14543533575693987, "learning_rate": 0.0009970979430258533, "loss": 3.8966, "step": 3315 }, { "epoch": 0.2621863339322027, "grad_norm": 0.13881603209798735, "learning_rate": 0.000997060750200635, "loss": 3.6469, "step": 3320 }, { "epoch": 0.2625811928688476, "grad_norm": 0.15926480793052, "learning_rate": 0.0009970233212610324, "loss": 3.6885, "step": 3325 }, { "epoch": 0.2629760518054925, "grad_norm": 0.1469128991917304, "learning_rate": 0.000996985656224825, "loss": 3.6634, "step": 3330 }, { "epoch": 0.26337091074213737, "grad_norm": 0.15675511153388014, "learning_rate": 0.0009969477551099045, "loss": 3.827, "step": 3335 }, { "epoch": 0.26376576967878224, "grad_norm": 0.3650380971577688, "learning_rate": 0.0009969096179342747, "loss": 4.0151, "step": 3340 }, { "epoch": 0.26416062861542716, "grad_norm": 0.1656373504581893, "learning_rate": 0.0009968712447160515, "loss": 3.6578, "step": 3345 }, { "epoch": 0.26455548755207203, "grad_norm": 0.17734371350890424, "learning_rate": 0.0009968326354734631, "loss": 3.7755, "step": 3350 }, { "epoch": 0.2649503464887169, "grad_norm": 0.1493208259959817, "learning_rate": 0.00099679379022485, "loss": 3.8766, "step": 3355 }, { "epoch": 0.2653452054253618, "grad_norm": 0.20896735220728385, "learning_rate": 0.0009967547089886637, "loss": 4.1062, "step": 3360 }, { "epoch": 0.2657400643620067, "grad_norm": 0.19069066632034493, "learning_rate": 0.0009967153917834692, "loss": 4.1459, "step": 3365 }, { "epoch": 0.26613492329865157, "grad_norm": 0.2250311491810841, "learning_rate": 0.0009966758386279432, "loss": 4.1861, "step": 3370 }, { "epoch": 0.26652978223529644, "grad_norm": 0.22838919371067523, "learning_rate": 0.0009966360495408738, "loss": 4.0176, "step": 3375 }, { "epoch": 0.2669246411719413, "grad_norm": 0.13404090442411593, "learning_rate": 0.0009965960245411618, "loss": 3.9551, "step": 3380 }, { "epoch": 0.2673195001085862, "grad_norm": 0.17422474051748701, "learning_rate": 0.0009965557636478202, "loss": 3.8456, "step": 3385 }, { "epoch": 0.2677143590452311, "grad_norm": 0.15377249261333542, "learning_rate": 0.0009965152668799735, "loss": 3.9896, "step": 3390 }, { "epoch": 0.268109217981876, "grad_norm": 0.18929340414638537, "learning_rate": 0.0009964745342568586, "loss": 3.8636, "step": 3395 }, { "epoch": 0.26850407691852085, "grad_norm": 0.12001498061266873, "learning_rate": 0.000996433565797824, "loss": 3.7239, "step": 3400 }, { "epoch": 0.2688989358551657, "grad_norm": 0.12164834503551232, "learning_rate": 0.0009963923615223314, "loss": 3.759, "step": 3405 }, { "epoch": 0.26929379479181065, "grad_norm": 0.15357201962966457, "learning_rate": 0.000996350921449953, "loss": 4.0009, "step": 3410 }, { "epoch": 0.2696886537284555, "grad_norm": 0.15435442012098238, "learning_rate": 0.0009963092456003741, "loss": 3.8907, "step": 3415 }, { "epoch": 0.2700835126651004, "grad_norm": 0.16189222929989966, "learning_rate": 0.0009962673339933914, "loss": 3.6924, "step": 3420 }, { "epoch": 0.27047837160174526, "grad_norm": 0.14882678582228295, "learning_rate": 0.0009962251866489139, "loss": 3.8269, "step": 3425 }, { "epoch": 0.2708732305383902, "grad_norm": 0.14247180871262832, "learning_rate": 0.0009961828035869621, "loss": 3.9469, "step": 3430 }, { "epoch": 0.27126808947503506, "grad_norm": 0.17143492700564716, "learning_rate": 0.0009961401848276696, "loss": 3.9015, "step": 3435 }, { "epoch": 0.2716629484116799, "grad_norm": 0.1566092600890232, "learning_rate": 0.0009960973303912806, "loss": 3.7019, "step": 3440 }, { "epoch": 0.2720578073483248, "grad_norm": 0.1437263068803601, "learning_rate": 0.0009960542402981522, "loss": 3.9429, "step": 3445 }, { "epoch": 0.27245266628496967, "grad_norm": 0.15729659750747046, "learning_rate": 0.0009960109145687528, "loss": 3.8061, "step": 3450 }, { "epoch": 0.2728475252216146, "grad_norm": 0.17948310000287523, "learning_rate": 0.0009959673532236634, "loss": 3.9025, "step": 3455 }, { "epoch": 0.27324238415825947, "grad_norm": 0.16684061059114147, "learning_rate": 0.0009959235562835762, "loss": 3.7949, "step": 3460 }, { "epoch": 0.27363724309490434, "grad_norm": 0.1694568550647538, "learning_rate": 0.000995879523769296, "loss": 3.7788, "step": 3465 }, { "epoch": 0.2740321020315492, "grad_norm": 0.1738756070121251, "learning_rate": 0.000995835255701739, "loss": 3.8397, "step": 3470 }, { "epoch": 0.27442696096819413, "grad_norm": 0.20948724634400248, "learning_rate": 0.0009957907521019336, "loss": 3.9944, "step": 3475 }, { "epoch": 0.274821819904839, "grad_norm": 0.1721710975897837, "learning_rate": 0.0009957460129910195, "loss": 4.0102, "step": 3480 }, { "epoch": 0.2752166788414839, "grad_norm": 0.184861009640931, "learning_rate": 0.0009957010383902494, "loss": 3.9546, "step": 3485 }, { "epoch": 0.27561153777812875, "grad_norm": 0.20976920443888114, "learning_rate": 0.0009956558283209867, "loss": 4.0081, "step": 3490 }, { "epoch": 0.27600639671477367, "grad_norm": 0.2753994357667805, "learning_rate": 0.0009956103828047075, "loss": 3.9275, "step": 3495 }, { "epoch": 0.27640125565141854, "grad_norm": 0.15708468727818045, "learning_rate": 0.000995564701862999, "loss": 3.7652, "step": 3500 }, { "epoch": 0.2767961145880634, "grad_norm": 0.1662491336101524, "learning_rate": 0.0009955187855175609, "loss": 3.8223, "step": 3505 }, { "epoch": 0.2771909735247083, "grad_norm": 0.16784811812989422, "learning_rate": 0.0009954726337902042, "loss": 3.9848, "step": 3510 }, { "epoch": 0.27758583246135315, "grad_norm": 0.21301683443887412, "learning_rate": 0.0009954262467028524, "loss": 4.2301, "step": 3515 }, { "epoch": 0.2779806913979981, "grad_norm": 0.20123595298274474, "learning_rate": 0.0009953796242775396, "loss": 3.8987, "step": 3520 }, { "epoch": 0.27837555033464295, "grad_norm": 0.19732594797788405, "learning_rate": 0.0009953327665364132, "loss": 3.8527, "step": 3525 }, { "epoch": 0.2787704092712878, "grad_norm": 0.1715019406828215, "learning_rate": 0.0009952856735017312, "loss": 3.764, "step": 3530 }, { "epoch": 0.2791652682079327, "grad_norm": 0.16589588902008454, "learning_rate": 0.000995238345195864, "loss": 3.9469, "step": 3535 }, { "epoch": 0.2795601271445776, "grad_norm": 0.19874496772220945, "learning_rate": 0.000995190781641293, "loss": 3.8533, "step": 3540 }, { "epoch": 0.2799549860812225, "grad_norm": 0.20677180485316785, "learning_rate": 0.0009951429828606129, "loss": 3.7364, "step": 3545 }, { "epoch": 0.28034984501786736, "grad_norm": 0.15945763447263414, "learning_rate": 0.000995094948876528, "loss": 3.7805, "step": 3550 }, { "epoch": 0.28074470395451223, "grad_norm": 0.15736406081995155, "learning_rate": 0.0009950466797118565, "loss": 3.6649, "step": 3555 }, { "epoch": 0.28113956289115716, "grad_norm": 0.15041125313524303, "learning_rate": 0.0009949981753895267, "loss": 3.7862, "step": 3560 }, { "epoch": 0.28153442182780203, "grad_norm": 0.20031299705180539, "learning_rate": 0.000994949435932579, "loss": 3.9027, "step": 3565 }, { "epoch": 0.2819292807644469, "grad_norm": 0.1765913580670778, "learning_rate": 0.0009949004613641664, "loss": 3.6855, "step": 3570 }, { "epoch": 0.28232413970109177, "grad_norm": 0.2607624827676873, "learning_rate": 0.000994851251707552, "loss": 3.92, "step": 3575 }, { "epoch": 0.2827189986377367, "grad_norm": 0.15104553727025452, "learning_rate": 0.000994801806986112, "loss": 3.992, "step": 3580 }, { "epoch": 0.28311385757438157, "grad_norm": 0.13245324762466462, "learning_rate": 0.0009947521272233334, "loss": 3.7912, "step": 3585 }, { "epoch": 0.28350871651102644, "grad_norm": 0.16513001313399256, "learning_rate": 0.0009947022124428156, "loss": 3.7, "step": 3590 }, { "epoch": 0.2839035754476713, "grad_norm": 0.15680892525081627, "learning_rate": 0.0009946520626682685, "loss": 3.7016, "step": 3595 }, { "epoch": 0.2842984343843162, "grad_norm": 0.19415141016255122, "learning_rate": 0.0009946016779235148, "loss": 3.8565, "step": 3600 }, { "epoch": 0.2846932933209611, "grad_norm": 0.14358797782196464, "learning_rate": 0.000994551058232488, "loss": 3.7182, "step": 3605 }, { "epoch": 0.285088152257606, "grad_norm": 0.14367626760736027, "learning_rate": 0.000994500203619234, "loss": 3.7577, "step": 3610 }, { "epoch": 0.28548301119425085, "grad_norm": 0.1698277174722452, "learning_rate": 0.0009944491141079094, "loss": 3.8031, "step": 3615 }, { "epoch": 0.2858778701308957, "grad_norm": 0.20588699334293245, "learning_rate": 0.000994397789722783, "loss": 3.7649, "step": 3620 }, { "epoch": 0.28627272906754064, "grad_norm": 0.1544123841794175, "learning_rate": 0.0009943462304882347, "loss": 3.7387, "step": 3625 }, { "epoch": 0.2866675880041855, "grad_norm": 0.23626910092699424, "learning_rate": 0.0009942944364287566, "loss": 4.0005, "step": 3630 }, { "epoch": 0.2870624469408304, "grad_norm": 0.21111171884900537, "learning_rate": 0.0009942424075689515, "loss": 4.0312, "step": 3635 }, { "epoch": 0.28745730587747526, "grad_norm": 0.20427882128514438, "learning_rate": 0.0009941901439335346, "loss": 3.8646, "step": 3640 }, { "epoch": 0.2878521648141202, "grad_norm": 0.17373856842976093, "learning_rate": 0.0009941376455473324, "loss": 3.7777, "step": 3645 }, { "epoch": 0.28824702375076505, "grad_norm": 0.23895488538641527, "learning_rate": 0.0009940849124352822, "loss": 4.0248, "step": 3650 }, { "epoch": 0.2886418826874099, "grad_norm": 0.15421307463518968, "learning_rate": 0.0009940319446224337, "loss": 3.6702, "step": 3655 }, { "epoch": 0.2890367416240548, "grad_norm": 0.15570624636148514, "learning_rate": 0.0009939787421339476, "loss": 3.8877, "step": 3660 }, { "epoch": 0.28943160056069966, "grad_norm": 0.14218759685154517, "learning_rate": 0.0009939253049950963, "loss": 3.7232, "step": 3665 }, { "epoch": 0.2898264594973446, "grad_norm": 0.13656408218635016, "learning_rate": 0.0009938716332312636, "loss": 3.5995, "step": 3670 }, { "epoch": 0.29022131843398946, "grad_norm": 0.11464275707088553, "learning_rate": 0.0009938177268679447, "loss": 3.7231, "step": 3675 }, { "epoch": 0.29061617737063433, "grad_norm": 0.21269058444657474, "learning_rate": 0.0009937635859307461, "loss": 3.896, "step": 3680 }, { "epoch": 0.2910110363072792, "grad_norm": 0.17570643136281364, "learning_rate": 0.000993709210445386, "loss": 3.8542, "step": 3685 }, { "epoch": 0.29140589524392413, "grad_norm": 0.21572941150163213, "learning_rate": 0.000993654600437694, "loss": 3.9625, "step": 3690 }, { "epoch": 0.291800754180569, "grad_norm": 0.1871461591498049, "learning_rate": 0.000993599755933611, "loss": 3.8992, "step": 3695 }, { "epoch": 0.29219561311721387, "grad_norm": 0.24067598199196444, "learning_rate": 0.000993544676959189, "loss": 3.8447, "step": 3700 }, { "epoch": 0.29259047205385874, "grad_norm": 0.20528614565601092, "learning_rate": 0.0009934893635405921, "loss": 3.759, "step": 3705 }, { "epoch": 0.29298533099050367, "grad_norm": 0.19622520575286873, "learning_rate": 0.0009934338157040951, "loss": 3.826, "step": 3710 }, { "epoch": 0.29338018992714854, "grad_norm": 0.1528895619956615, "learning_rate": 0.0009933780334760846, "loss": 3.7056, "step": 3715 }, { "epoch": 0.2937750488637934, "grad_norm": 0.14588606704949633, "learning_rate": 0.0009933220168830581, "loss": 3.7878, "step": 3720 }, { "epoch": 0.2941699078004383, "grad_norm": 0.15891764701336686, "learning_rate": 0.000993265765951625, "loss": 3.7694, "step": 3725 }, { "epoch": 0.29456476673708315, "grad_norm": 0.13814097887923682, "learning_rate": 0.0009932092807085052, "loss": 3.6923, "step": 3730 }, { "epoch": 0.2949596256737281, "grad_norm": 0.11555040590420351, "learning_rate": 0.0009931525611805306, "loss": 3.795, "step": 3735 }, { "epoch": 0.29535448461037295, "grad_norm": 0.1797264018151648, "learning_rate": 0.0009930956073946442, "loss": 3.7787, "step": 3740 }, { "epoch": 0.2957493435470178, "grad_norm": 0.1474105616208162, "learning_rate": 0.0009930384193779004, "loss": 3.7315, "step": 3745 }, { "epoch": 0.2961442024836627, "grad_norm": 0.1440413043000379, "learning_rate": 0.0009929809971574645, "loss": 3.7699, "step": 3750 }, { "epoch": 0.2965390614203076, "grad_norm": 0.13986907913847718, "learning_rate": 0.0009929233407606135, "loss": 3.7831, "step": 3755 }, { "epoch": 0.2969339203569525, "grad_norm": 0.25775251791781706, "learning_rate": 0.0009928654502147352, "loss": 3.8789, "step": 3760 }, { "epoch": 0.29732877929359736, "grad_norm": 0.17800068725298088, "learning_rate": 0.000992807325547329, "loss": 3.8727, "step": 3765 }, { "epoch": 0.2977236382302422, "grad_norm": 0.175218560727318, "learning_rate": 0.000992748966786005, "loss": 3.907, "step": 3770 }, { "epoch": 0.29811849716688715, "grad_norm": 0.19318092630271017, "learning_rate": 0.0009926903739584851, "loss": 3.8347, "step": 3775 }, { "epoch": 0.298513356103532, "grad_norm": 0.1552461054828742, "learning_rate": 0.0009926315470926025, "loss": 3.8349, "step": 3780 }, { "epoch": 0.2989082150401769, "grad_norm": 0.2853589929071522, "learning_rate": 0.0009925724862163007, "loss": 3.8329, "step": 3785 }, { "epoch": 0.29930307397682177, "grad_norm": 0.240810192949652, "learning_rate": 0.0009925131913576348, "loss": 3.9199, "step": 3790 }, { "epoch": 0.2996979329134667, "grad_norm": 0.20592587245318006, "learning_rate": 0.0009924536625447713, "loss": 4.003, "step": 3795 }, { "epoch": 0.30009279185011156, "grad_norm": 0.22062782149856794, "learning_rate": 0.000992393899805988, "loss": 3.9667, "step": 3800 }, { "epoch": 0.30048765078675643, "grad_norm": 0.19654916272401746, "learning_rate": 0.000992333903169673, "loss": 3.778, "step": 3805 }, { "epoch": 0.3008825097234013, "grad_norm": 0.18308558253562032, "learning_rate": 0.000992273672664326, "loss": 3.7778, "step": 3810 }, { "epoch": 0.3012773686600462, "grad_norm": 0.14492476794415785, "learning_rate": 0.000992213208318558, "loss": 3.7187, "step": 3815 }, { "epoch": 0.3016722275966911, "grad_norm": 0.22355581085351425, "learning_rate": 0.0009921525101610907, "loss": 3.9037, "step": 3820 }, { "epoch": 0.30206708653333597, "grad_norm": 0.17241843066732643, "learning_rate": 0.000992091578220757, "loss": 3.7559, "step": 3825 }, { "epoch": 0.30246194546998084, "grad_norm": 0.16078228141862277, "learning_rate": 0.0009920304125265009, "loss": 3.5328, "step": 3830 }, { "epoch": 0.3028568044066257, "grad_norm": 0.14310282292959925, "learning_rate": 0.0009919690131073775, "loss": 3.9038, "step": 3835 }, { "epoch": 0.30325166334327064, "grad_norm": 0.17894812128634444, "learning_rate": 0.0009919073799925526, "loss": 3.8313, "step": 3840 }, { "epoch": 0.3036465222799155, "grad_norm": 0.1657874738218434, "learning_rate": 0.0009918455132113034, "loss": 3.7373, "step": 3845 }, { "epoch": 0.3040413812165604, "grad_norm": 0.20112291442346733, "learning_rate": 0.0009917834127930179, "loss": 3.8218, "step": 3850 }, { "epoch": 0.30443624015320525, "grad_norm": 0.21427883505713163, "learning_rate": 0.000991721078767195, "loss": 3.8662, "step": 3855 }, { "epoch": 0.3048310990898502, "grad_norm": 0.28189892883430107, "learning_rate": 0.0009916585111634448, "loss": 3.9685, "step": 3860 }, { "epoch": 0.30522595802649505, "grad_norm": 0.21190558736148077, "learning_rate": 0.0009915957100114883, "loss": 3.8114, "step": 3865 }, { "epoch": 0.3056208169631399, "grad_norm": 0.19994463897893622, "learning_rate": 0.0009915326753411574, "loss": 3.8928, "step": 3870 }, { "epoch": 0.3060156758997848, "grad_norm": 0.16620503261089478, "learning_rate": 0.0009914694071823946, "loss": 3.6722, "step": 3875 }, { "epoch": 0.30641053483642966, "grad_norm": 0.18804623499104522, "learning_rate": 0.0009914059055652538, "loss": 3.8612, "step": 3880 }, { "epoch": 0.3068053937730746, "grad_norm": 0.15687208464038566, "learning_rate": 0.0009913421705198997, "loss": 4.0548, "step": 3885 }, { "epoch": 0.30720025270971946, "grad_norm": 0.20405290209061627, "learning_rate": 0.0009912782020766078, "loss": 3.6367, "step": 3890 }, { "epoch": 0.3075951116463643, "grad_norm": 0.1379402177333614, "learning_rate": 0.0009912140002657644, "loss": 3.8122, "step": 3895 }, { "epoch": 0.3079899705830092, "grad_norm": 0.1232226983428921, "learning_rate": 0.0009911495651178665, "loss": 3.6446, "step": 3900 }, { "epoch": 0.3083848295196541, "grad_norm": 0.17861546116283405, "learning_rate": 0.0009910848966635227, "loss": 3.6875, "step": 3905 }, { "epoch": 0.308779688456299, "grad_norm": 0.12206969389331228, "learning_rate": 0.0009910199949334513, "loss": 3.62, "step": 3910 }, { "epoch": 0.30917454739294387, "grad_norm": 0.1086586252401671, "learning_rate": 0.0009909548599584823, "loss": 3.7274, "step": 3915 }, { "epoch": 0.30956940632958874, "grad_norm": 0.20860518352406743, "learning_rate": 0.000990889491769556, "loss": 4.0628, "step": 3920 }, { "epoch": 0.30996426526623366, "grad_norm": 0.18309968223595605, "learning_rate": 0.0009908238903977244, "loss": 3.9276, "step": 3925 }, { "epoch": 0.31035912420287853, "grad_norm": 0.1460450863171261, "learning_rate": 0.0009907580558741484, "loss": 3.808, "step": 3930 }, { "epoch": 0.3107539831395234, "grad_norm": 0.15577619076128976, "learning_rate": 0.0009906919882301014, "loss": 3.7169, "step": 3935 }, { "epoch": 0.3111488420761683, "grad_norm": 0.16950749596549386, "learning_rate": 0.000990625687496967, "loss": 3.8158, "step": 3940 }, { "epoch": 0.31154370101281315, "grad_norm": 0.19996957085145717, "learning_rate": 0.0009905591537062394, "loss": 3.6409, "step": 3945 }, { "epoch": 0.31193855994945807, "grad_norm": 0.15183864649401743, "learning_rate": 0.0009904923868895234, "loss": 3.8506, "step": 3950 }, { "epoch": 0.31233341888610294, "grad_norm": 0.15458487065052104, "learning_rate": 0.0009904253870785349, "loss": 3.581, "step": 3955 }, { "epoch": 0.3127282778227478, "grad_norm": 0.14346236752304267, "learning_rate": 0.0009903581543051, "loss": 3.7021, "step": 3960 }, { "epoch": 0.3131231367593927, "grad_norm": 0.32163706072244314, "learning_rate": 0.0009902906886011555, "loss": 4.1592, "step": 3965 }, { "epoch": 0.3135179956960376, "grad_norm": 0.12995534627786376, "learning_rate": 0.0009902229899987497, "loss": 3.5697, "step": 3970 }, { "epoch": 0.3139128546326825, "grad_norm": 0.18683111622101228, "learning_rate": 0.0009901550585300403, "loss": 3.9906, "step": 3975 }, { "epoch": 0.31430771356932735, "grad_norm": 0.16916235420304057, "learning_rate": 0.0009900868942272962, "loss": 3.6923, "step": 3980 }, { "epoch": 0.3147025725059722, "grad_norm": 0.1341684635687523, "learning_rate": 0.0009900184971228974, "loss": 3.6516, "step": 3985 }, { "epoch": 0.31509743144261715, "grad_norm": 0.1270844530371833, "learning_rate": 0.0009899498672493335, "loss": 3.7832, "step": 3990 }, { "epoch": 0.315492290379262, "grad_norm": 0.15599796299625138, "learning_rate": 0.000989881004639205, "loss": 3.8316, "step": 3995 }, { "epoch": 0.3158871493159069, "grad_norm": 0.1579516487293539, "learning_rate": 0.0009898119093252238, "loss": 3.6155, "step": 4000 }, { "epoch": 0.31628200825255176, "grad_norm": 0.12987117087645325, "learning_rate": 0.0009897425813402108, "loss": 3.6611, "step": 4005 }, { "epoch": 0.3166768671891967, "grad_norm": 0.17915628352767843, "learning_rate": 0.000989673020717099, "loss": 3.8242, "step": 4010 }, { "epoch": 0.31707172612584156, "grad_norm": 0.12941516984374368, "learning_rate": 0.0009896032274889306, "loss": 3.9155, "step": 4015 }, { "epoch": 0.31746658506248643, "grad_norm": 0.21149399299185032, "learning_rate": 0.000989533201688859, "loss": 3.8796, "step": 4020 }, { "epoch": 0.3178614439991313, "grad_norm": 0.15481472108678462, "learning_rate": 0.0009894629433501486, "loss": 3.7476, "step": 4025 }, { "epoch": 0.31825630293577617, "grad_norm": 0.1487261358725805, "learning_rate": 0.0009893924525061727, "loss": 3.6975, "step": 4030 }, { "epoch": 0.3186511618724211, "grad_norm": 0.1605017876096822, "learning_rate": 0.000989321729190416, "loss": 3.6033, "step": 4035 }, { "epoch": 0.31904602080906597, "grad_norm": 0.17612399238880444, "learning_rate": 0.0009892507734364743, "loss": 3.8241, "step": 4040 }, { "epoch": 0.31944087974571084, "grad_norm": 0.15302779008795778, "learning_rate": 0.0009891795852780523, "loss": 3.7812, "step": 4045 }, { "epoch": 0.3198357386823557, "grad_norm": 0.30221156063987, "learning_rate": 0.0009891081647489665, "loss": 3.7905, "step": 4050 }, { "epoch": 0.32023059761900063, "grad_norm": 0.1963394724649312, "learning_rate": 0.0009890365118831426, "loss": 3.8632, "step": 4055 }, { "epoch": 0.3206254565556455, "grad_norm": 0.2565972943865063, "learning_rate": 0.0009889646267146176, "loss": 3.7058, "step": 4060 }, { "epoch": 0.3210203154922904, "grad_norm": 0.13918768294651251, "learning_rate": 0.0009888925092775384, "loss": 3.6699, "step": 4065 }, { "epoch": 0.32141517442893525, "grad_norm": 0.1626937500814368, "learning_rate": 0.0009888201596061623, "loss": 3.467, "step": 4070 }, { "epoch": 0.3218100333655802, "grad_norm": 0.1571779629113992, "learning_rate": 0.000988747577734857, "loss": 3.5419, "step": 4075 }, { "epoch": 0.32220489230222504, "grad_norm": 0.2157987216912809, "learning_rate": 0.0009886747636981004, "loss": 3.7208, "step": 4080 }, { "epoch": 0.3225997512388699, "grad_norm": 0.1649051382800758, "learning_rate": 0.0009886017175304808, "loss": 3.6869, "step": 4085 }, { "epoch": 0.3229946101755148, "grad_norm": 0.23720396366174534, "learning_rate": 0.0009885284392666962, "loss": 3.8722, "step": 4090 }, { "epoch": 0.32338946911215966, "grad_norm": 0.1566275992942821, "learning_rate": 0.0009884549289415556, "loss": 3.5167, "step": 4095 }, { "epoch": 0.3237843280488046, "grad_norm": 0.17713781643363505, "learning_rate": 0.0009883811865899783, "loss": 3.7613, "step": 4100 }, { "epoch": 0.32417918698544945, "grad_norm": 0.15519645195938125, "learning_rate": 0.000988307212246993, "loss": 3.8923, "step": 4105 }, { "epoch": 0.3245740459220943, "grad_norm": 0.18827032683202202, "learning_rate": 0.0009882330059477395, "loss": 3.9008, "step": 4110 }, { "epoch": 0.3249689048587392, "grad_norm": 0.13699439240827116, "learning_rate": 0.000988158567727467, "loss": 3.7057, "step": 4115 }, { "epoch": 0.3253637637953841, "grad_norm": 0.1891496720907779, "learning_rate": 0.0009880838976215355, "loss": 3.751, "step": 4120 }, { "epoch": 0.325758622732029, "grad_norm": 0.1369656089041444, "learning_rate": 0.0009880089956654148, "loss": 3.7672, "step": 4125 }, { "epoch": 0.32615348166867386, "grad_norm": 0.12770985575937824, "learning_rate": 0.0009879338618946848, "loss": 4.0007, "step": 4130 }, { "epoch": 0.32654834060531873, "grad_norm": 0.15961789345125038, "learning_rate": 0.0009878584963450357, "loss": 3.6333, "step": 4135 }, { "epoch": 0.32694319954196366, "grad_norm": 0.18060821052890952, "learning_rate": 0.0009877828990522677, "loss": 3.737, "step": 4140 }, { "epoch": 0.32733805847860853, "grad_norm": 0.13683020315731717, "learning_rate": 0.0009877070700522912, "loss": 3.9227, "step": 4145 }, { "epoch": 0.3277329174152534, "grad_norm": 0.125074923170578, "learning_rate": 0.0009876310093811266, "loss": 3.6839, "step": 4150 }, { "epoch": 0.32812777635189827, "grad_norm": 0.16714787784166818, "learning_rate": 0.0009875547170749043, "loss": 3.5929, "step": 4155 }, { "epoch": 0.32852263528854314, "grad_norm": 0.1632367972305905, "learning_rate": 0.0009874781931698646, "loss": 3.7166, "step": 4160 }, { "epoch": 0.32891749422518807, "grad_norm": 0.10839742692981494, "learning_rate": 0.000987401437702358, "loss": 3.5065, "step": 4165 }, { "epoch": 0.32931235316183294, "grad_norm": 0.16530327590520466, "learning_rate": 0.0009873244507088454, "loss": 3.6114, "step": 4170 }, { "epoch": 0.3297072120984778, "grad_norm": 0.1340898933667081, "learning_rate": 0.0009872472322258968, "loss": 3.794, "step": 4175 }, { "epoch": 0.3301020710351227, "grad_norm": 0.12997199069795043, "learning_rate": 0.0009871697822901927, "loss": 3.749, "step": 4180 }, { "epoch": 0.3304969299717676, "grad_norm": 0.1708117698442536, "learning_rate": 0.0009870921009385236, "loss": 3.732, "step": 4185 }, { "epoch": 0.3308917889084125, "grad_norm": 0.12224986112375191, "learning_rate": 0.0009870141882077897, "loss": 3.5716, "step": 4190 }, { "epoch": 0.33128664784505735, "grad_norm": 0.18822604906109336, "learning_rate": 0.000986936044135001, "loss": 3.6689, "step": 4195 }, { "epoch": 0.3316815067817022, "grad_norm": 0.1632829945732785, "learning_rate": 0.000986857668757278, "loss": 3.646, "step": 4200 }, { "epoch": 0.33207636571834714, "grad_norm": 0.16390508109245783, "learning_rate": 0.0009867790621118503, "loss": 3.7313, "step": 4205 }, { "epoch": 0.332471224654992, "grad_norm": 0.12710132766200433, "learning_rate": 0.000986700224236058, "loss": 3.8273, "step": 4210 }, { "epoch": 0.3328660835916369, "grad_norm": 0.15392533204876968, "learning_rate": 0.0009866211551673506, "loss": 3.8366, "step": 4215 }, { "epoch": 0.33326094252828176, "grad_norm": 0.15819236376850362, "learning_rate": 0.0009865418549432875, "loss": 3.9706, "step": 4220 }, { "epoch": 0.3336558014649267, "grad_norm": 0.17237662143771842, "learning_rate": 0.0009864623236015382, "loss": 3.7666, "step": 4225 }, { "epoch": 0.33405066040157155, "grad_norm": 0.1498610225342138, "learning_rate": 0.0009863825611798815, "loss": 3.6528, "step": 4230 }, { "epoch": 0.3344455193382164, "grad_norm": 0.10846757151751457, "learning_rate": 0.0009863025677162066, "loss": 3.6338, "step": 4235 }, { "epoch": 0.3348403782748613, "grad_norm": 0.12825639431703872, "learning_rate": 0.0009862223432485118, "loss": 3.72, "step": 4240 }, { "epoch": 0.33523523721150617, "grad_norm": 0.14582098307875518, "learning_rate": 0.0009861418878149056, "loss": 3.7205, "step": 4245 }, { "epoch": 0.3356300961481511, "grad_norm": 0.17730002469260997, "learning_rate": 0.0009860612014536058, "loss": 3.8531, "step": 4250 }, { "epoch": 0.33602495508479596, "grad_norm": 0.1486990102461357, "learning_rate": 0.0009859802842029405, "loss": 3.6007, "step": 4255 }, { "epoch": 0.33641981402144083, "grad_norm": 0.1640899826000651, "learning_rate": 0.0009858991361013467, "loss": 3.8293, "step": 4260 }, { "epoch": 0.3368146729580857, "grad_norm": 0.13711009966872323, "learning_rate": 0.0009858177571873718, "loss": 3.6475, "step": 4265 }, { "epoch": 0.33720953189473063, "grad_norm": 0.12873094599788754, "learning_rate": 0.0009857361474996725, "loss": 3.5606, "step": 4270 }, { "epoch": 0.3376043908313755, "grad_norm": 0.1691950167117618, "learning_rate": 0.0009856543070770147, "loss": 3.6766, "step": 4275 }, { "epoch": 0.33799924976802037, "grad_norm": 0.22266105086195573, "learning_rate": 0.0009855722359582747, "loss": 3.8781, "step": 4280 }, { "epoch": 0.33839410870466524, "grad_norm": 0.16765442601303174, "learning_rate": 0.000985489934182438, "loss": 3.6555, "step": 4285 }, { "epoch": 0.33878896764131017, "grad_norm": 0.2145599932122651, "learning_rate": 0.0009854074017885996, "loss": 3.7738, "step": 4290 }, { "epoch": 0.33918382657795504, "grad_norm": 0.1758577831939249, "learning_rate": 0.0009853246388159644, "loss": 3.7198, "step": 4295 }, { "epoch": 0.3395786855145999, "grad_norm": 0.14183377208925937, "learning_rate": 0.0009852416453038461, "loss": 3.5778, "step": 4300 }, { "epoch": 0.3399735444512448, "grad_norm": 0.14800543452386195, "learning_rate": 0.0009851584212916687, "loss": 3.6909, "step": 4305 }, { "epoch": 0.34036840338788965, "grad_norm": 0.18603723763069177, "learning_rate": 0.0009850749668189652, "loss": 3.6753, "step": 4310 }, { "epoch": 0.3407632623245346, "grad_norm": 0.19129678891874669, "learning_rate": 0.0009849912819253784, "loss": 3.7759, "step": 4315 }, { "epoch": 0.34115812126117945, "grad_norm": 0.14938085772595677, "learning_rate": 0.0009849073666506603, "loss": 3.7168, "step": 4320 }, { "epoch": 0.3415529801978243, "grad_norm": 0.1955027765400127, "learning_rate": 0.0009848232210346725, "loss": 3.8195, "step": 4325 }, { "epoch": 0.3419478391344692, "grad_norm": 0.15017137125908217, "learning_rate": 0.0009847388451173858, "loss": 3.5917, "step": 4330 }, { "epoch": 0.3423426980711141, "grad_norm": 0.11274456227696343, "learning_rate": 0.0009846542389388808, "loss": 3.6735, "step": 4335 }, { "epoch": 0.342737557007759, "grad_norm": 0.12010849726714043, "learning_rate": 0.000984569402539347, "loss": 3.4637, "step": 4340 }, { "epoch": 0.34313241594440386, "grad_norm": 0.12381384562313036, "learning_rate": 0.0009844843359590836, "loss": 3.2832, "step": 4345 }, { "epoch": 0.3435272748810487, "grad_norm": 0.14352524197379601, "learning_rate": 0.000984399039238499, "loss": 3.6485, "step": 4350 }, { "epoch": 0.34392213381769365, "grad_norm": 0.3236710802940003, "learning_rate": 0.0009843135124181109, "loss": 3.84, "step": 4355 }, { "epoch": 0.3443169927543385, "grad_norm": 0.14463174317676938, "learning_rate": 0.0009842277555385463, "loss": 4.03, "step": 4360 }, { "epoch": 0.3447118516909834, "grad_norm": 0.19470368391198184, "learning_rate": 0.0009841417686405418, "loss": 3.5217, "step": 4365 }, { "epoch": 0.34510671062762827, "grad_norm": 0.12525123706636143, "learning_rate": 0.0009840555517649427, "loss": 3.7097, "step": 4370 }, { "epoch": 0.34550156956427314, "grad_norm": 0.11043171303516439, "learning_rate": 0.0009839691049527042, "loss": 3.615, "step": 4375 }, { "epoch": 0.34589642850091806, "grad_norm": 0.15852330750831922, "learning_rate": 0.00098388242824489, "loss": 3.6421, "step": 4380 }, { "epoch": 0.34629128743756293, "grad_norm": 0.1504636249585656, "learning_rate": 0.0009837955216826734, "loss": 3.8083, "step": 4385 }, { "epoch": 0.3466861463742078, "grad_norm": 0.16088707372024685, "learning_rate": 0.000983708385307337, "loss": 3.6862, "step": 4390 }, { "epoch": 0.3470810053108527, "grad_norm": 0.15687259648951293, "learning_rate": 0.0009836210191602724, "loss": 3.68, "step": 4395 }, { "epoch": 0.3474758642474976, "grad_norm": 0.3852655272463381, "learning_rate": 0.0009835334232829805, "loss": 3.9523, "step": 4400 }, { "epoch": 0.34787072318414247, "grad_norm": 0.240639185252359, "learning_rate": 0.0009834455977170708, "loss": 3.6944, "step": 4405 }, { "epoch": 0.34826558212078734, "grad_norm": 0.16813881700771116, "learning_rate": 0.0009833575425042629, "loss": 3.6011, "step": 4410 }, { "epoch": 0.3486604410574322, "grad_norm": 0.3489000684773577, "learning_rate": 0.000983269257686384, "loss": 3.9028, "step": 4415 }, { "epoch": 0.34905529999407714, "grad_norm": 0.2371622622627103, "learning_rate": 0.0009831807433053725, "loss": 3.6914, "step": 4420 }, { "epoch": 0.349450158930722, "grad_norm": 0.16635703487290746, "learning_rate": 0.0009830919994032734, "loss": 3.6147, "step": 4425 }, { "epoch": 0.3498450178673669, "grad_norm": 0.14613789134887675, "learning_rate": 0.0009830030260222426, "loss": 3.5726, "step": 4430 }, { "epoch": 0.35023987680401175, "grad_norm": 0.12834874061280066, "learning_rate": 0.0009829138232045444, "loss": 3.7857, "step": 4435 }, { "epoch": 0.3506347357406567, "grad_norm": 0.14253818400737195, "learning_rate": 0.0009828243909925517, "loss": 3.7125, "step": 4440 }, { "epoch": 0.35102959467730155, "grad_norm": 0.14021733458360236, "learning_rate": 0.0009827347294287467, "loss": 3.6509, "step": 4445 }, { "epoch": 0.3514244536139464, "grad_norm": 0.12479276304517087, "learning_rate": 0.0009826448385557207, "loss": 3.7195, "step": 4450 }, { "epoch": 0.3518193125505913, "grad_norm": 0.1398538835888721, "learning_rate": 0.0009825547184161736, "loss": 3.824, "step": 4455 }, { "epoch": 0.35221417148723616, "grad_norm": 0.13375937076060115, "learning_rate": 0.0009824643690529147, "loss": 3.6467, "step": 4460 }, { "epoch": 0.3526090304238811, "grad_norm": 0.12380582401758528, "learning_rate": 0.0009823737905088616, "loss": 3.545, "step": 4465 }, { "epoch": 0.35300388936052596, "grad_norm": 0.16608418332324235, "learning_rate": 0.000982282982827041, "loss": 4.0149, "step": 4470 }, { "epoch": 0.35339874829717083, "grad_norm": 0.15546908333126044, "learning_rate": 0.0009821919460505887, "loss": 3.859, "step": 4475 }, { "epoch": 0.3537936072338157, "grad_norm": 0.11535881165485526, "learning_rate": 0.0009821006802227489, "loss": 3.6017, "step": 4480 }, { "epoch": 0.3541884661704606, "grad_norm": 0.17278370028526344, "learning_rate": 0.000982009185386875, "loss": 3.7733, "step": 4485 }, { "epoch": 0.3545833251071055, "grad_norm": 0.14780662383270973, "learning_rate": 0.0009819174615864285, "loss": 3.4859, "step": 4490 }, { "epoch": 0.35497818404375037, "grad_norm": 0.13827779713095478, "learning_rate": 0.0009818255088649807, "loss": 3.6402, "step": 4495 }, { "epoch": 0.35537304298039524, "grad_norm": 0.1237182957743857, "learning_rate": 0.0009817333272662107, "loss": 3.847, "step": 4500 }, { "epoch": 0.35576790191704016, "grad_norm": 0.14243708253074447, "learning_rate": 0.0009816409168339072, "loss": 3.648, "step": 4505 }, { "epoch": 0.35616276085368503, "grad_norm": 0.13075976196170305, "learning_rate": 0.0009815482776119667, "loss": 3.4717, "step": 4510 }, { "epoch": 0.3565576197903299, "grad_norm": 0.16957307991950485, "learning_rate": 0.0009814554096443948, "loss": 3.8095, "step": 4515 }, { "epoch": 0.3569524787269748, "grad_norm": 0.14896933312934577, "learning_rate": 0.0009813623129753058, "loss": 3.7836, "step": 4520 }, { "epoch": 0.35734733766361965, "grad_norm": 0.11980369865626143, "learning_rate": 0.0009812689876489227, "loss": 3.5947, "step": 4525 }, { "epoch": 0.3577421966002646, "grad_norm": 0.14369760244786414, "learning_rate": 0.000981175433709577, "loss": 3.6192, "step": 4530 }, { "epoch": 0.35813705553690944, "grad_norm": 0.13901283144345747, "learning_rate": 0.0009810816512017088, "loss": 3.8349, "step": 4535 }, { "epoch": 0.3585319144735543, "grad_norm": 0.21148987165375763, "learning_rate": 0.0009809876401698668, "loss": 3.568, "step": 4540 }, { "epoch": 0.3589267734101992, "grad_norm": 0.10950645311449285, "learning_rate": 0.0009808934006587079, "loss": 3.5659, "step": 4545 }, { "epoch": 0.3593216323468441, "grad_norm": 0.16007064378266042, "learning_rate": 0.0009807989327129982, "loss": 3.5732, "step": 4550 }, { "epoch": 0.359716491283489, "grad_norm": 0.1485542164969082, "learning_rate": 0.000980704236377612, "loss": 3.5423, "step": 4555 }, { "epoch": 0.36011135022013385, "grad_norm": 0.11650730747140937, "learning_rate": 0.0009806093116975318, "loss": 3.7593, "step": 4560 }, { "epoch": 0.3605062091567787, "grad_norm": 0.17737141350247118, "learning_rate": 0.000980514158717849, "loss": 3.6397, "step": 4565 }, { "epoch": 0.36090106809342365, "grad_norm": 0.1388427873644708, "learning_rate": 0.0009804187774837634, "loss": 3.7236, "step": 4570 }, { "epoch": 0.3612959270300685, "grad_norm": 0.15838364395680474, "learning_rate": 0.000980323168040583, "loss": 3.7392, "step": 4575 }, { "epoch": 0.3616907859667134, "grad_norm": 0.16621357057252367, "learning_rate": 0.0009802273304337242, "loss": 3.6859, "step": 4580 }, { "epoch": 0.36208564490335826, "grad_norm": 0.1908016464504105, "learning_rate": 0.000980131264708712, "loss": 3.7526, "step": 4585 }, { "epoch": 0.36248050384000313, "grad_norm": 0.1264995340277357, "learning_rate": 0.0009800349709111796, "loss": 3.6824, "step": 4590 }, { "epoch": 0.36287536277664806, "grad_norm": 0.1600761423572493, "learning_rate": 0.0009799384490868687, "loss": 3.5344, "step": 4595 }, { "epoch": 0.36327022171329293, "grad_norm": 0.14324087056745993, "learning_rate": 0.000979841699281629, "loss": 3.5254, "step": 4600 }, { "epoch": 0.3636650806499378, "grad_norm": 0.15461032412786196, "learning_rate": 0.0009797447215414191, "loss": 3.6684, "step": 4605 }, { "epoch": 0.36405993958658267, "grad_norm": 0.132539817604961, "learning_rate": 0.000979647515912305, "loss": 3.4271, "step": 4610 }, { "epoch": 0.3644547985232276, "grad_norm": 0.1151505947170085, "learning_rate": 0.0009795500824404618, "loss": 3.6926, "step": 4615 }, { "epoch": 0.36484965745987247, "grad_norm": 0.13084136227141135, "learning_rate": 0.0009794524211721725, "loss": 3.5747, "step": 4620 }, { "epoch": 0.36524451639651734, "grad_norm": 0.17207031147083543, "learning_rate": 0.0009793545321538277, "loss": 3.7099, "step": 4625 }, { "epoch": 0.3656393753331622, "grad_norm": 0.17484823019648427, "learning_rate": 0.0009792564154319275, "loss": 3.6664, "step": 4630 }, { "epoch": 0.36603423426980713, "grad_norm": 0.11559649473174631, "learning_rate": 0.0009791580710530787, "loss": 3.7269, "step": 4635 }, { "epoch": 0.366429093206452, "grad_norm": 0.14370608940366578, "learning_rate": 0.0009790594990639976, "loss": 3.5345, "step": 4640 }, { "epoch": 0.3668239521430969, "grad_norm": 0.1523135838977165, "learning_rate": 0.0009789606995115076, "loss": 3.6785, "step": 4645 }, { "epoch": 0.36721881107974175, "grad_norm": 0.16853343398034862, "learning_rate": 0.000978861672442541, "loss": 3.5444, "step": 4650 }, { "epoch": 0.3676136700163867, "grad_norm": 0.1481689964830751, "learning_rate": 0.000978762417904137, "loss": 3.5896, "step": 4655 }, { "epoch": 0.36800852895303154, "grad_norm": 0.1447707110984751, "learning_rate": 0.0009786629359434444, "loss": 3.515, "step": 4660 }, { "epoch": 0.3684033878896764, "grad_norm": 0.1338146512804964, "learning_rate": 0.000978563226607719, "loss": 3.5078, "step": 4665 }, { "epoch": 0.3687982468263213, "grad_norm": 0.13193917567216482, "learning_rate": 0.0009784632899443244, "loss": 3.6393, "step": 4670 }, { "epoch": 0.36919310576296616, "grad_norm": 0.1624009725752309, "learning_rate": 0.0009783631260007332, "loss": 3.6695, "step": 4675 }, { "epoch": 0.3695879646996111, "grad_norm": 0.13844304974493277, "learning_rate": 0.000978262734824525, "loss": 3.7599, "step": 4680 }, { "epoch": 0.36998282363625595, "grad_norm": 0.15895490710555044, "learning_rate": 0.000978162116463388, "loss": 3.577, "step": 4685 }, { "epoch": 0.3703776825729008, "grad_norm": 0.14577019337189007, "learning_rate": 0.000978061270965118, "loss": 3.395, "step": 4690 }, { "epoch": 0.3707725415095457, "grad_norm": 0.14916715672310463, "learning_rate": 0.0009779601983776187, "loss": 3.8378, "step": 4695 }, { "epoch": 0.3711674004461906, "grad_norm": 0.15961853401291942, "learning_rate": 0.0009778588987489018, "loss": 3.6637, "step": 4700 }, { "epoch": 0.3715622593828355, "grad_norm": 0.11970976483403588, "learning_rate": 0.0009777573721270864, "loss": 3.689, "step": 4705 }, { "epoch": 0.37195711831948036, "grad_norm": 0.36084710172340867, "learning_rate": 0.0009776556185604004, "loss": 3.9021, "step": 4710 }, { "epoch": 0.37235197725612523, "grad_norm": 0.22925905541108332, "learning_rate": 0.0009775536380971785, "loss": 3.7586, "step": 4715 }, { "epoch": 0.37274683619277016, "grad_norm": 0.18972669923656804, "learning_rate": 0.0009774514307858636, "loss": 3.8042, "step": 4720 }, { "epoch": 0.37314169512941503, "grad_norm": 0.1368140432866921, "learning_rate": 0.0009773489966750067, "loss": 3.9076, "step": 4725 }, { "epoch": 0.3735365540660599, "grad_norm": 0.15991955675028158, "learning_rate": 0.0009772463358132654, "loss": 3.4769, "step": 4730 }, { "epoch": 0.37393141300270477, "grad_norm": 0.18594436210662832, "learning_rate": 0.0009771434482494068, "loss": 3.5983, "step": 4735 }, { "epoch": 0.37432627193934964, "grad_norm": 0.14445573624662963, "learning_rate": 0.0009770403340323038, "loss": 3.6153, "step": 4740 }, { "epoch": 0.37472113087599457, "grad_norm": 0.12023122703254165, "learning_rate": 0.0009769369932109383, "loss": 3.6105, "step": 4745 }, { "epoch": 0.37511598981263944, "grad_norm": 0.17624284621815176, "learning_rate": 0.000976833425834399, "loss": 3.6016, "step": 4750 }, { "epoch": 0.3755108487492843, "grad_norm": 0.17165214222390185, "learning_rate": 0.0009767296319518831, "loss": 3.5604, "step": 4755 }, { "epoch": 0.3759057076859292, "grad_norm": 0.11164983100763168, "learning_rate": 0.0009766256116126948, "loss": 3.4488, "step": 4760 }, { "epoch": 0.3763005666225741, "grad_norm": 0.16728831160434943, "learning_rate": 0.0009765213648662457, "loss": 3.7532, "step": 4765 }, { "epoch": 0.376695425559219, "grad_norm": 0.15364982557485135, "learning_rate": 0.0009764168917620552, "loss": 3.61, "step": 4770 }, { "epoch": 0.37709028449586385, "grad_norm": 0.1284307897337875, "learning_rate": 0.0009763121923497504, "loss": 3.5599, "step": 4775 }, { "epoch": 0.3774851434325087, "grad_norm": 0.12131652656404662, "learning_rate": 0.0009762072666790658, "loss": 3.6893, "step": 4780 }, { "epoch": 0.37788000236915364, "grad_norm": 0.17252306649118074, "learning_rate": 0.000976102114799843, "loss": 3.8968, "step": 4785 }, { "epoch": 0.3782748613057985, "grad_norm": 0.1326509091802803, "learning_rate": 0.0009759967367620317, "loss": 3.6139, "step": 4790 }, { "epoch": 0.3786697202424434, "grad_norm": 0.1248257339938776, "learning_rate": 0.0009758911326156886, "loss": 3.4176, "step": 4795 }, { "epoch": 0.37906457917908826, "grad_norm": 0.12642386570873065, "learning_rate": 0.0009757853024109779, "loss": 3.7211, "step": 4800 }, { "epoch": 0.3794594381157331, "grad_norm": 0.11199665817044818, "learning_rate": 0.0009756792461981711, "loss": 3.5855, "step": 4805 }, { "epoch": 0.37985429705237805, "grad_norm": 0.1094268691716203, "learning_rate": 0.0009755729640276474, "loss": 3.6866, "step": 4810 }, { "epoch": 0.3802491559890229, "grad_norm": 0.13063925972961724, "learning_rate": 0.0009754664559498928, "loss": 3.6323, "step": 4815 }, { "epoch": 0.3806440149256678, "grad_norm": 0.14256642292508007, "learning_rate": 0.0009753597220155011, "loss": 3.5012, "step": 4820 }, { "epoch": 0.38103887386231267, "grad_norm": 0.1673340394624616, "learning_rate": 0.0009752527622751729, "loss": 3.6589, "step": 4825 }, { "epoch": 0.3814337327989576, "grad_norm": 0.18203180172544495, "learning_rate": 0.0009751455767797166, "loss": 3.3784, "step": 4830 }, { "epoch": 0.38182859173560246, "grad_norm": 0.13783426158960962, "learning_rate": 0.0009750381655800475, "loss": 3.6335, "step": 4835 }, { "epoch": 0.38222345067224733, "grad_norm": 0.18675394897044004, "learning_rate": 0.0009749305287271884, "loss": 3.6705, "step": 4840 }, { "epoch": 0.3826183096088922, "grad_norm": 0.15706879254717915, "learning_rate": 0.0009748226662722686, "loss": 3.6348, "step": 4845 }, { "epoch": 0.38301316854553713, "grad_norm": 0.1310449854494739, "learning_rate": 0.0009747145782665254, "loss": 3.7475, "step": 4850 }, { "epoch": 0.383408027482182, "grad_norm": 0.13122990274215793, "learning_rate": 0.0009746062647613029, "loss": 3.6197, "step": 4855 }, { "epoch": 0.38380288641882687, "grad_norm": 0.12451254787260982, "learning_rate": 0.0009744977258080522, "loss": 3.6196, "step": 4860 }, { "epoch": 0.38419774535547174, "grad_norm": 0.1325417071415291, "learning_rate": 0.0009743889614583316, "loss": 3.4856, "step": 4865 }, { "epoch": 0.38459260429211667, "grad_norm": 0.11869440026711958, "learning_rate": 0.0009742799717638066, "loss": 3.4556, "step": 4870 }, { "epoch": 0.38498746322876154, "grad_norm": 0.1390157056499002, "learning_rate": 0.0009741707567762494, "loss": 3.5504, "step": 4875 }, { "epoch": 0.3853823221654064, "grad_norm": 0.11384584147446816, "learning_rate": 0.0009740613165475396, "loss": 3.6133, "step": 4880 }, { "epoch": 0.3857771811020513, "grad_norm": 0.47689573261408236, "learning_rate": 0.0009739516511296637, "loss": 3.6402, "step": 4885 }, { "epoch": 0.38617204003869615, "grad_norm": 0.12805873688522668, "learning_rate": 0.0009738417605747149, "loss": 3.6905, "step": 4890 }, { "epoch": 0.3865668989753411, "grad_norm": 0.12342370181991338, "learning_rate": 0.0009737316449348935, "loss": 3.6794, "step": 4895 }, { "epoch": 0.38696175791198595, "grad_norm": 0.1166041660359602, "learning_rate": 0.000973621304262507, "loss": 3.5857, "step": 4900 }, { "epoch": 0.3873566168486308, "grad_norm": 0.15456982566531363, "learning_rate": 0.0009735107386099694, "loss": 3.6869, "step": 4905 }, { "epoch": 0.3877514757852757, "grad_norm": 0.14527952171832417, "learning_rate": 0.0009733999480298018, "loss": 3.6833, "step": 4910 }, { "epoch": 0.3881463347219206, "grad_norm": 0.13734005152275885, "learning_rate": 0.000973288932574632, "loss": 3.4831, "step": 4915 }, { "epoch": 0.3885411936585655, "grad_norm": 0.11758083723214122, "learning_rate": 0.0009731776922971947, "loss": 3.4885, "step": 4920 }, { "epoch": 0.38893605259521036, "grad_norm": 0.11982531271703072, "learning_rate": 0.0009730662272503315, "loss": 3.7851, "step": 4925 }, { "epoch": 0.38933091153185523, "grad_norm": 0.21960304288515706, "learning_rate": 0.0009729545374869905, "loss": 3.9984, "step": 4930 }, { "epoch": 0.38972577046850015, "grad_norm": 0.1480749128748313, "learning_rate": 0.0009728426230602267, "loss": 3.6416, "step": 4935 }, { "epoch": 0.390120629405145, "grad_norm": 0.12775402396888988, "learning_rate": 0.000972730484023202, "loss": 3.5303, "step": 4940 }, { "epoch": 0.3905154883417899, "grad_norm": 0.12865736502745817, "learning_rate": 0.0009726181204291848, "loss": 3.5339, "step": 4945 }, { "epoch": 0.39091034727843477, "grad_norm": 0.11928543154457094, "learning_rate": 0.0009725055323315501, "loss": 3.5637, "step": 4950 }, { "epoch": 0.39130520621507964, "grad_norm": 0.11690283260108525, "learning_rate": 0.0009723927197837794, "loss": 3.6692, "step": 4955 }, { "epoch": 0.39170006515172456, "grad_norm": 0.13054570475522564, "learning_rate": 0.0009722796828394616, "loss": 3.4602, "step": 4960 }, { "epoch": 0.39209492408836943, "grad_norm": 0.1281856086329582, "learning_rate": 0.000972166421552291, "loss": 3.6698, "step": 4965 }, { "epoch": 0.3924897830250143, "grad_norm": 0.14144811071773494, "learning_rate": 0.0009720529359760697, "loss": 3.5939, "step": 4970 }, { "epoch": 0.3928846419616592, "grad_norm": 0.1549005757270711, "learning_rate": 0.0009719392261647054, "loss": 3.4915, "step": 4975 }, { "epoch": 0.3932795008983041, "grad_norm": 0.1502941485657608, "learning_rate": 0.0009718252921722127, "loss": 3.813, "step": 4980 }, { "epoch": 0.393674359834949, "grad_norm": 0.12082940663856655, "learning_rate": 0.0009717111340527127, "loss": 3.5349, "step": 4985 }, { "epoch": 0.39406921877159384, "grad_norm": 0.19641011663414737, "learning_rate": 0.0009715967518604331, "loss": 3.7441, "step": 4990 }, { "epoch": 0.3944640777082387, "grad_norm": 0.12516377221394256, "learning_rate": 0.0009714821456497076, "loss": 3.653, "step": 4995 }, { "epoch": 0.39485893664488364, "grad_norm": 0.13062129432194472, "learning_rate": 0.0009713673154749767, "loss": 3.5743, "step": 5000 }, { "epoch": 0.3952537955815285, "grad_norm": 0.12831677716352763, "learning_rate": 0.0009712522613907872, "loss": 3.4423, "step": 5005 }, { "epoch": 0.3956486545181734, "grad_norm": 0.12052064626643007, "learning_rate": 0.0009711369834517921, "loss": 3.5724, "step": 5010 }, { "epoch": 0.39604351345481825, "grad_norm": 0.12520579226376027, "learning_rate": 0.0009710214817127509, "loss": 3.4452, "step": 5015 }, { "epoch": 0.3964383723914631, "grad_norm": 0.13876128817623384, "learning_rate": 0.0009709057562285295, "loss": 3.5706, "step": 5020 }, { "epoch": 0.39683323132810805, "grad_norm": 0.10790312576599231, "learning_rate": 0.0009707898070540999, "loss": 3.6207, "step": 5025 }, { "epoch": 0.3972280902647529, "grad_norm": 0.10731556663064264, "learning_rate": 0.0009706736342445403, "loss": 3.4466, "step": 5030 }, { "epoch": 0.3976229492013978, "grad_norm": 0.11925527227221021, "learning_rate": 0.0009705572378550354, "loss": 3.4119, "step": 5035 }, { "epoch": 0.39801780813804266, "grad_norm": 0.11237245119772887, "learning_rate": 0.0009704406179408758, "loss": 3.4442, "step": 5040 }, { "epoch": 0.3984126670746876, "grad_norm": 0.11103808068462935, "learning_rate": 0.0009703237745574585, "loss": 3.6278, "step": 5045 }, { "epoch": 0.39880752601133246, "grad_norm": 0.14707539664056105, "learning_rate": 0.0009702067077602866, "loss": 3.5229, "step": 5050 }, { "epoch": 0.39920238494797733, "grad_norm": 0.17830995816733813, "learning_rate": 0.0009700894176049693, "loss": 3.5035, "step": 5055 }, { "epoch": 0.3995972438846222, "grad_norm": 0.13325486872216868, "learning_rate": 0.000969971904147222, "loss": 3.524, "step": 5060 }, { "epoch": 0.3999921028212671, "grad_norm": 0.2189008780067487, "learning_rate": 0.0009698541674428658, "loss": 3.8612, "step": 5065 }, { "epoch": 0.400386961757912, "grad_norm": 0.20854305027059117, "learning_rate": 0.0009697362075478284, "loss": 3.5417, "step": 5070 }, { "epoch": 0.40078182069455687, "grad_norm": 0.22990064094382498, "learning_rate": 0.0009696180245181433, "loss": 3.7515, "step": 5075 }, { "epoch": 0.40117667963120174, "grad_norm": 0.24388381623870015, "learning_rate": 0.0009694996184099497, "loss": 3.8538, "step": 5080 }, { "epoch": 0.40157153856784666, "grad_norm": 0.15628904864598106, "learning_rate": 0.0009693809892794931, "loss": 3.6085, "step": 5085 }, { "epoch": 0.40196639750449153, "grad_norm": 0.15279729308958995, "learning_rate": 0.0009692621371831249, "loss": 3.5852, "step": 5090 }, { "epoch": 0.4023612564411364, "grad_norm": 0.16308282379672745, "learning_rate": 0.0009691430621773023, "loss": 3.6811, "step": 5095 }, { "epoch": 0.4027561153777813, "grad_norm": 0.17093717556991417, "learning_rate": 0.0009690237643185884, "loss": 3.7857, "step": 5100 }, { "epoch": 0.40315097431442615, "grad_norm": 0.14425228634556744, "learning_rate": 0.0009689042436636523, "loss": 3.4785, "step": 5105 }, { "epoch": 0.4035458332510711, "grad_norm": 0.098668012500979, "learning_rate": 0.000968784500269269, "loss": 3.5749, "step": 5110 }, { "epoch": 0.40394069218771594, "grad_norm": 0.28297435172923713, "learning_rate": 0.0009686645341923188, "loss": 3.7319, "step": 5115 }, { "epoch": 0.4043355511243608, "grad_norm": 0.11445590953497954, "learning_rate": 0.0009685443454897885, "loss": 3.4329, "step": 5120 }, { "epoch": 0.4047304100610057, "grad_norm": 0.15013174176637986, "learning_rate": 0.00096842393421877, "loss": 3.4739, "step": 5125 }, { "epoch": 0.4051252689976506, "grad_norm": 0.1405211490400228, "learning_rate": 0.0009683033004364612, "loss": 3.7556, "step": 5130 }, { "epoch": 0.4055201279342955, "grad_norm": 0.12730635259857798, "learning_rate": 0.0009681824442001658, "loss": 3.7495, "step": 5135 }, { "epoch": 0.40591498687094035, "grad_norm": 0.12074114766652302, "learning_rate": 0.0009680613655672932, "loss": 3.5578, "step": 5140 }, { "epoch": 0.4063098458075852, "grad_norm": 0.1317784364333637, "learning_rate": 0.000967940064595358, "loss": 3.5583, "step": 5145 }, { "epoch": 0.40670470474423015, "grad_norm": 0.11801874035562485, "learning_rate": 0.000967818541341981, "loss": 3.5859, "step": 5150 }, { "epoch": 0.407099563680875, "grad_norm": 0.13765151952102508, "learning_rate": 0.0009676967958648881, "loss": 3.528, "step": 5155 }, { "epoch": 0.4074944226175199, "grad_norm": 0.15563445699621484, "learning_rate": 0.0009675748282219113, "loss": 3.6978, "step": 5160 }, { "epoch": 0.40788928155416476, "grad_norm": 0.13237475714527938, "learning_rate": 0.0009674526384709876, "loss": 3.6734, "step": 5165 }, { "epoch": 0.40828414049080963, "grad_norm": 0.12353554106695883, "learning_rate": 0.0009673302266701597, "loss": 3.4649, "step": 5170 }, { "epoch": 0.40867899942745456, "grad_norm": 0.10705422300612526, "learning_rate": 0.0009672075928775758, "loss": 3.579, "step": 5175 }, { "epoch": 0.40907385836409943, "grad_norm": 0.1292508677315278, "learning_rate": 0.0009670847371514896, "loss": 3.4973, "step": 5180 }, { "epoch": 0.4094687173007443, "grad_norm": 0.12646617008148317, "learning_rate": 0.0009669616595502601, "loss": 3.5228, "step": 5185 }, { "epoch": 0.40986357623738917, "grad_norm": 0.11733632298711841, "learning_rate": 0.000966838360132352, "loss": 3.6073, "step": 5190 }, { "epoch": 0.4102584351740341, "grad_norm": 0.15404509417839654, "learning_rate": 0.0009667148389563349, "loss": 3.6548, "step": 5195 }, { "epoch": 0.41065329411067897, "grad_norm": 0.11868778739817668, "learning_rate": 0.000966591096080884, "loss": 3.448, "step": 5200 }, { "epoch": 0.41104815304732384, "grad_norm": 0.25024388661194974, "learning_rate": 0.0009664671315647797, "loss": 3.5922, "step": 5205 }, { "epoch": 0.4114430119839687, "grad_norm": 0.12684785693306716, "learning_rate": 0.0009663429454669081, "loss": 3.7115, "step": 5210 }, { "epoch": 0.41183787092061364, "grad_norm": 0.16435490987963025, "learning_rate": 0.0009662185378462601, "loss": 3.5312, "step": 5215 }, { "epoch": 0.4122327298572585, "grad_norm": 0.10934833086225951, "learning_rate": 0.0009660939087619316, "loss": 3.4232, "step": 5220 }, { "epoch": 0.4126275887939034, "grad_norm": 0.15957748102019795, "learning_rate": 0.0009659690582731244, "loss": 3.4022, "step": 5225 }, { "epoch": 0.41302244773054825, "grad_norm": 0.14044397763167923, "learning_rate": 0.0009658439864391452, "loss": 3.5307, "step": 5230 }, { "epoch": 0.4134173066671931, "grad_norm": 0.14584869868575803, "learning_rate": 0.0009657186933194055, "loss": 3.461, "step": 5235 }, { "epoch": 0.41381216560383804, "grad_norm": 0.11866206914846573, "learning_rate": 0.0009655931789734224, "loss": 3.4916, "step": 5240 }, { "epoch": 0.4142070245404829, "grad_norm": 0.12430752412380848, "learning_rate": 0.0009654674434608177, "loss": 3.5217, "step": 5245 }, { "epoch": 0.4146018834771278, "grad_norm": 0.12360362370286249, "learning_rate": 0.0009653414868413187, "loss": 3.448, "step": 5250 }, { "epoch": 0.41499674241377266, "grad_norm": 0.1175584489074594, "learning_rate": 0.000965215309174757, "loss": 3.3969, "step": 5255 }, { "epoch": 0.4153916013504176, "grad_norm": 0.11005085125878455, "learning_rate": 0.00096508891052107, "loss": 3.5049, "step": 5260 }, { "epoch": 0.41578646028706245, "grad_norm": 0.11100911914267908, "learning_rate": 0.0009649622909402998, "loss": 3.4827, "step": 5265 }, { "epoch": 0.4161813192237073, "grad_norm": 0.15274937519020007, "learning_rate": 0.0009648354504925934, "loss": 3.7582, "step": 5270 }, { "epoch": 0.4165761781603522, "grad_norm": 0.2302250393601021, "learning_rate": 0.0009647083892382022, "loss": 3.6537, "step": 5275 }, { "epoch": 0.4169710370969971, "grad_norm": 0.18111041092084168, "learning_rate": 0.0009645811072374837, "loss": 3.4217, "step": 5280 }, { "epoch": 0.417365896033642, "grad_norm": 0.12203106691619076, "learning_rate": 0.000964453604550899, "loss": 3.5305, "step": 5285 }, { "epoch": 0.41776075497028686, "grad_norm": 0.1464185609388867, "learning_rate": 0.0009643258812390147, "loss": 3.5885, "step": 5290 }, { "epoch": 0.41815561390693173, "grad_norm": 0.12069542731283435, "learning_rate": 0.0009641979373625025, "loss": 3.5324, "step": 5295 }, { "epoch": 0.41855047284357666, "grad_norm": 0.13272088193468573, "learning_rate": 0.0009640697729821378, "loss": 3.5302, "step": 5300 }, { "epoch": 0.41894533178022153, "grad_norm": 0.12808849912237213, "learning_rate": 0.0009639413881588017, "loss": 3.4606, "step": 5305 }, { "epoch": 0.4193401907168664, "grad_norm": 0.16508968226133738, "learning_rate": 0.0009638127829534799, "loss": 3.811, "step": 5310 }, { "epoch": 0.41973504965351127, "grad_norm": 0.1342991994726159, "learning_rate": 0.0009636839574272623, "loss": 3.466, "step": 5315 }, { "epoch": 0.42012990859015614, "grad_norm": 0.12848629588890836, "learning_rate": 0.0009635549116413438, "loss": 3.426, "step": 5320 }, { "epoch": 0.42052476752680107, "grad_norm": 0.11083271850938484, "learning_rate": 0.0009634256456570241, "loss": 3.4419, "step": 5325 }, { "epoch": 0.42091962646344594, "grad_norm": 0.21183149364125212, "learning_rate": 0.000963296159535707, "loss": 3.5282, "step": 5330 }, { "epoch": 0.4213144854000908, "grad_norm": 0.11101527069940144, "learning_rate": 0.0009631664533389012, "loss": 3.4752, "step": 5335 }, { "epoch": 0.4217093443367357, "grad_norm": 0.2579430667590828, "learning_rate": 0.0009630365271282202, "loss": 3.5937, "step": 5340 }, { "epoch": 0.4221042032733806, "grad_norm": 0.10963763515966736, "learning_rate": 0.0009629063809653813, "loss": 3.4328, "step": 5345 }, { "epoch": 0.4224990622100255, "grad_norm": 0.17081229487192712, "learning_rate": 0.0009627760149122069, "loss": 3.6245, "step": 5350 }, { "epoch": 0.42289392114667035, "grad_norm": 0.10996943724904924, "learning_rate": 0.0009626454290306235, "loss": 3.403, "step": 5355 }, { "epoch": 0.4232887800833152, "grad_norm": 0.10145505239779458, "learning_rate": 0.0009625146233826623, "loss": 3.5178, "step": 5360 }, { "epoch": 0.42368363901996015, "grad_norm": 0.10068857942697869, "learning_rate": 0.0009623835980304587, "loss": 3.4524, "step": 5365 }, { "epoch": 0.424078497956605, "grad_norm": 0.12629093613802658, "learning_rate": 0.0009622523530362525, "loss": 3.629, "step": 5370 }, { "epoch": 0.4244733568932499, "grad_norm": 0.13649527627223215, "learning_rate": 0.0009621208884623879, "loss": 3.3627, "step": 5375 }, { "epoch": 0.42486821582989476, "grad_norm": 0.16152444132001634, "learning_rate": 0.0009619892043713132, "loss": 3.4995, "step": 5380 }, { "epoch": 0.42526307476653963, "grad_norm": 0.15815751968029065, "learning_rate": 0.0009618573008255814, "loss": 3.9853, "step": 5385 }, { "epoch": 0.42565793370318455, "grad_norm": 0.11983633207519882, "learning_rate": 0.0009617251778878493, "loss": 3.4159, "step": 5390 }, { "epoch": 0.4260527926398294, "grad_norm": 0.10564199361547016, "learning_rate": 0.0009615928356208782, "loss": 3.5448, "step": 5395 }, { "epoch": 0.4264476515764743, "grad_norm": 0.11148420256463527, "learning_rate": 0.0009614602740875333, "loss": 3.2732, "step": 5400 }, { "epoch": 0.42684251051311917, "grad_norm": 0.10473328660851862, "learning_rate": 0.0009613274933507843, "loss": 3.5019, "step": 5405 }, { "epoch": 0.4272373694497641, "grad_norm": 0.32448462080133794, "learning_rate": 0.0009611944934737048, "loss": 3.4347, "step": 5410 }, { "epoch": 0.42763222838640896, "grad_norm": 0.13840598732314385, "learning_rate": 0.0009610612745194725, "loss": 3.5251, "step": 5415 }, { "epoch": 0.42802708732305383, "grad_norm": 0.11725004920813357, "learning_rate": 0.0009609278365513694, "loss": 3.669, "step": 5420 }, { "epoch": 0.4284219462596987, "grad_norm": 0.16382321928157748, "learning_rate": 0.0009607941796327813, "loss": 3.5629, "step": 5425 }, { "epoch": 0.42881680519634363, "grad_norm": 0.13928485136807905, "learning_rate": 0.0009606603038271978, "loss": 3.7671, "step": 5430 }, { "epoch": 0.4292116641329885, "grad_norm": 0.13378174503632512, "learning_rate": 0.0009605262091982131, "loss": 3.5303, "step": 5435 }, { "epoch": 0.4296065230696334, "grad_norm": 0.11231836577907789, "learning_rate": 0.0009603918958095248, "loss": 3.398, "step": 5440 }, { "epoch": 0.43000138200627824, "grad_norm": 0.11321175746846904, "learning_rate": 0.0009602573637249348, "loss": 3.3469, "step": 5445 }, { "epoch": 0.4303962409429231, "grad_norm": 0.11046769046533045, "learning_rate": 0.0009601226130083484, "loss": 3.5894, "step": 5450 }, { "epoch": 0.43079109987956804, "grad_norm": 0.13316802820573548, "learning_rate": 0.0009599876437237752, "loss": 3.5695, "step": 5455 }, { "epoch": 0.4311859588162129, "grad_norm": 0.12781952146698702, "learning_rate": 0.0009598524559353284, "loss": 3.3577, "step": 5460 }, { "epoch": 0.4315808177528578, "grad_norm": 0.11243326838219501, "learning_rate": 0.0009597170497072252, "loss": 3.5704, "step": 5465 }, { "epoch": 0.43197567668950265, "grad_norm": 0.10910792468255302, "learning_rate": 0.0009595814251037864, "loss": 3.4111, "step": 5470 }, { "epoch": 0.4323705356261476, "grad_norm": 0.1404840118957828, "learning_rate": 0.0009594455821894365, "loss": 3.4843, "step": 5475 }, { "epoch": 0.43276539456279245, "grad_norm": 0.14049296480324983, "learning_rate": 0.0009593095210287037, "loss": 3.4836, "step": 5480 }, { "epoch": 0.4331602534994373, "grad_norm": 0.2753395540095464, "learning_rate": 0.00095917324168622, "loss": 3.6474, "step": 5485 }, { "epoch": 0.4335551124360822, "grad_norm": 0.10698675753670986, "learning_rate": 0.000959036744226721, "loss": 3.5245, "step": 5490 }, { "epoch": 0.4339499713727271, "grad_norm": 0.12379995055348542, "learning_rate": 0.0009589000287150459, "loss": 3.6735, "step": 5495 }, { "epoch": 0.434344830309372, "grad_norm": 0.11417771511330888, "learning_rate": 0.0009587630952161373, "loss": 3.3447, "step": 5500 }, { "epoch": 0.43473968924601686, "grad_norm": 0.10971598768659094, "learning_rate": 0.0009586259437950417, "loss": 3.385, "step": 5505 }, { "epoch": 0.43513454818266173, "grad_norm": 0.14663695182937367, "learning_rate": 0.0009584885745169089, "loss": 3.5426, "step": 5510 }, { "epoch": 0.43552940711930666, "grad_norm": 0.15456931013793182, "learning_rate": 0.0009583509874469922, "loss": 3.5775, "step": 5515 }, { "epoch": 0.4359242660559515, "grad_norm": 0.13940035596018485, "learning_rate": 0.0009582131826506484, "loss": 3.5752, "step": 5520 }, { "epoch": 0.4363191249925964, "grad_norm": 0.13743382007169394, "learning_rate": 0.0009580751601933375, "loss": 3.6227, "step": 5525 }, { "epoch": 0.43671398392924127, "grad_norm": 0.12830883154389106, "learning_rate": 0.0009579369201406234, "loss": 3.4227, "step": 5530 }, { "epoch": 0.43710884286588614, "grad_norm": 0.12767682330953992, "learning_rate": 0.0009577984625581728, "loss": 3.2802, "step": 5535 }, { "epoch": 0.43750370180253106, "grad_norm": 0.11883163615262535, "learning_rate": 0.0009576597875117561, "loss": 3.4784, "step": 5540 }, { "epoch": 0.43789856073917593, "grad_norm": 0.11839753801881062, "learning_rate": 0.0009575208950672469, "loss": 3.4382, "step": 5545 }, { "epoch": 0.4382934196758208, "grad_norm": 0.1156922487971146, "learning_rate": 0.0009573817852906219, "loss": 3.444, "step": 5550 }, { "epoch": 0.4386882786124657, "grad_norm": 0.15965505820384585, "learning_rate": 0.0009572424582479616, "loss": 3.5916, "step": 5555 }, { "epoch": 0.4390831375491106, "grad_norm": 0.22735645311816519, "learning_rate": 0.0009571029140054487, "loss": 3.7389, "step": 5560 }, { "epoch": 0.4394779964857555, "grad_norm": 0.1068908656643703, "learning_rate": 0.0009569631526293701, "loss": 3.4173, "step": 5565 }, { "epoch": 0.43987285542240034, "grad_norm": 0.12064391983695523, "learning_rate": 0.0009568231741861152, "loss": 3.5634, "step": 5570 }, { "epoch": 0.4402677143590452, "grad_norm": 0.12761526223334219, "learning_rate": 0.0009566829787421769, "loss": 3.5702, "step": 5575 }, { "epoch": 0.44066257329569014, "grad_norm": 0.12296559053817359, "learning_rate": 0.0009565425663641509, "loss": 3.5406, "step": 5580 }, { "epoch": 0.441057432232335, "grad_norm": 0.11763170935322605, "learning_rate": 0.000956401937118736, "loss": 3.6709, "step": 5585 }, { "epoch": 0.4414522911689799, "grad_norm": 0.14153951179803415, "learning_rate": 0.0009562610910727343, "loss": 3.5733, "step": 5590 }, { "epoch": 0.44184715010562475, "grad_norm": 0.123845376431762, "learning_rate": 0.0009561200282930505, "loss": 3.4619, "step": 5595 }, { "epoch": 0.4422420090422696, "grad_norm": 0.11657377607121533, "learning_rate": 0.0009559787488466922, "loss": 3.5236, "step": 5600 }, { "epoch": 0.44263686797891455, "grad_norm": 0.11138978622943728, "learning_rate": 0.0009558372528007704, "loss": 3.4055, "step": 5605 }, { "epoch": 0.4430317269155594, "grad_norm": 0.1341762562266986, "learning_rate": 0.0009556955402224987, "loss": 3.4654, "step": 5610 }, { "epoch": 0.4434265858522043, "grad_norm": 0.1416116307570291, "learning_rate": 0.0009555536111791936, "loss": 3.4443, "step": 5615 }, { "epoch": 0.44382144478884916, "grad_norm": 0.11408207075901862, "learning_rate": 0.0009554114657382742, "loss": 3.5863, "step": 5620 }, { "epoch": 0.4442163037254941, "grad_norm": 0.14429878879988972, "learning_rate": 0.000955269103967263, "loss": 3.6707, "step": 5625 }, { "epoch": 0.44461116266213896, "grad_norm": 0.12421997416598096, "learning_rate": 0.0009551265259337842, "loss": 3.449, "step": 5630 }, { "epoch": 0.44500602159878383, "grad_norm": 0.1311365442737691, "learning_rate": 0.0009549837317055661, "loss": 3.5505, "step": 5635 }, { "epoch": 0.4454008805354287, "grad_norm": 0.17160014643296542, "learning_rate": 0.0009548407213504385, "loss": 3.4945, "step": 5640 }, { "epoch": 0.4457957394720736, "grad_norm": 0.19628254570763382, "learning_rate": 0.0009546974949363344, "loss": 3.7169, "step": 5645 }, { "epoch": 0.4461905984087185, "grad_norm": 0.12451053453079002, "learning_rate": 0.0009545540525312897, "loss": 3.2421, "step": 5650 }, { "epoch": 0.44658545734536337, "grad_norm": 0.1586312640144639, "learning_rate": 0.0009544103942034421, "loss": 3.5, "step": 5655 }, { "epoch": 0.44698031628200824, "grad_norm": 0.14006810118725546, "learning_rate": 0.0009542665200210328, "loss": 3.603, "step": 5660 }, { "epoch": 0.4473751752186531, "grad_norm": 0.15054896758818975, "learning_rate": 0.0009541224300524049, "loss": 3.5287, "step": 5665 }, { "epoch": 0.44777003415529804, "grad_norm": 0.1272138114491271, "learning_rate": 0.0009539781243660042, "loss": 3.5682, "step": 5670 }, { "epoch": 0.4481648930919429, "grad_norm": 0.11640916951336358, "learning_rate": 0.0009538336030303788, "loss": 3.5757, "step": 5675 }, { "epoch": 0.4485597520285878, "grad_norm": 0.1380761557254188, "learning_rate": 0.0009536888661141796, "loss": 3.6345, "step": 5680 }, { "epoch": 0.44895461096523265, "grad_norm": 0.11491486415856221, "learning_rate": 0.0009535439136861597, "loss": 3.5086, "step": 5685 }, { "epoch": 0.4493494699018776, "grad_norm": 0.10808050322192728, "learning_rate": 0.0009533987458151744, "loss": 3.6317, "step": 5690 }, { "epoch": 0.44974432883852244, "grad_norm": 0.16368053972020594, "learning_rate": 0.0009532533625701816, "loss": 3.3552, "step": 5695 }, { "epoch": 0.4501391877751673, "grad_norm": 0.11486360015634241, "learning_rate": 0.0009531077640202416, "loss": 3.6558, "step": 5700 }, { "epoch": 0.4505340467118122, "grad_norm": 0.11377876607878833, "learning_rate": 0.0009529619502345165, "loss": 3.5683, "step": 5705 }, { "epoch": 0.4509289056484571, "grad_norm": 0.22534757552756587, "learning_rate": 0.0009528159212822712, "loss": 3.2638, "step": 5710 }, { "epoch": 0.451323764585102, "grad_norm": 0.13377051999471204, "learning_rate": 0.0009526696772328723, "loss": 3.4487, "step": 5715 }, { "epoch": 0.45171862352174685, "grad_norm": 0.15366330267613568, "learning_rate": 0.000952523218155789, "loss": 3.5668, "step": 5720 }, { "epoch": 0.4521134824583917, "grad_norm": 0.150198867268635, "learning_rate": 0.0009523765441205922, "loss": 3.5924, "step": 5725 }, { "epoch": 0.45250834139503665, "grad_norm": 0.12430396176764388, "learning_rate": 0.0009522296551969557, "loss": 3.3771, "step": 5730 }, { "epoch": 0.4529032003316815, "grad_norm": 0.15118971611199025, "learning_rate": 0.0009520825514546544, "loss": 3.5375, "step": 5735 }, { "epoch": 0.4532980592683264, "grad_norm": 0.1510550144614886, "learning_rate": 0.0009519352329635658, "loss": 3.4565, "step": 5740 }, { "epoch": 0.45369291820497126, "grad_norm": 0.12875484479091406, "learning_rate": 0.0009517876997936695, "loss": 3.3948, "step": 5745 }, { "epoch": 0.45408777714161613, "grad_norm": 0.11867321738926671, "learning_rate": 0.0009516399520150463, "loss": 3.3959, "step": 5750 }, { "epoch": 0.45448263607826106, "grad_norm": 0.12067162763877234, "learning_rate": 0.0009514919896978802, "loss": 3.343, "step": 5755 }, { "epoch": 0.45487749501490593, "grad_norm": 0.10833687519232879, "learning_rate": 0.0009513438129124562, "loss": 3.4189, "step": 5760 }, { "epoch": 0.4552723539515508, "grad_norm": 0.15886951487287782, "learning_rate": 0.0009511954217291613, "loss": 3.7381, "step": 5765 }, { "epoch": 0.45566721288819567, "grad_norm": 0.15056284449364815, "learning_rate": 0.0009510468162184845, "loss": 3.4383, "step": 5770 }, { "epoch": 0.4560620718248406, "grad_norm": 0.1282223020168303, "learning_rate": 0.0009508979964510167, "loss": 3.3917, "step": 5775 }, { "epoch": 0.45645693076148547, "grad_norm": 0.12312748767474993, "learning_rate": 0.0009507489624974503, "loss": 3.4283, "step": 5780 }, { "epoch": 0.45685178969813034, "grad_norm": 0.1265277647265853, "learning_rate": 0.0009505997144285797, "loss": 3.6563, "step": 5785 }, { "epoch": 0.4572466486347752, "grad_norm": 0.16168169290475634, "learning_rate": 0.0009504502523153004, "loss": 3.4479, "step": 5790 }, { "epoch": 0.45764150757142014, "grad_norm": 0.1351804646152564, "learning_rate": 0.000950300576228611, "loss": 3.458, "step": 5795 }, { "epoch": 0.458036366508065, "grad_norm": 0.1269547989541292, "learning_rate": 0.0009501506862396101, "loss": 3.3631, "step": 5800 }, { "epoch": 0.4584312254447099, "grad_norm": 0.15064366536364027, "learning_rate": 0.0009500005824194987, "loss": 3.6868, "step": 5805 }, { "epoch": 0.45882608438135475, "grad_norm": 0.11674431524764338, "learning_rate": 0.0009498502648395794, "loss": 3.3669, "step": 5810 }, { "epoch": 0.4592209433179996, "grad_norm": 0.15177062722971119, "learning_rate": 0.0009496997335712565, "loss": 3.6701, "step": 5815 }, { "epoch": 0.45961580225464455, "grad_norm": 0.10151182757071123, "learning_rate": 0.0009495489886860351, "loss": 3.395, "step": 5820 }, { "epoch": 0.4600106611912894, "grad_norm": 0.11515964642954295, "learning_rate": 0.0009493980302555225, "loss": 3.3552, "step": 5825 }, { "epoch": 0.4604055201279343, "grad_norm": 0.11005442519669963, "learning_rate": 0.0009492468583514269, "loss": 3.4883, "step": 5830 }, { "epoch": 0.46080037906457916, "grad_norm": 0.15879218703520742, "learning_rate": 0.0009490954730455584, "loss": 3.4053, "step": 5835 }, { "epoch": 0.4611952380012241, "grad_norm": 0.12063803746189852, "learning_rate": 0.0009489438744098282, "loss": 3.4804, "step": 5840 }, { "epoch": 0.46159009693786895, "grad_norm": 0.1470347390015456, "learning_rate": 0.0009487920625162487, "loss": 3.6406, "step": 5845 }, { "epoch": 0.4619849558745138, "grad_norm": 0.12738743099360864, "learning_rate": 0.0009486400374369339, "loss": 3.7172, "step": 5850 }, { "epoch": 0.4623798148111587, "grad_norm": 0.09252078047697121, "learning_rate": 0.0009484877992440988, "loss": 3.288, "step": 5855 }, { "epoch": 0.4627746737478036, "grad_norm": 0.12395083460010481, "learning_rate": 0.00094833534801006, "loss": 3.6499, "step": 5860 }, { "epoch": 0.4631695326844485, "grad_norm": 0.1717996033916852, "learning_rate": 0.0009481826838072348, "loss": 3.7114, "step": 5865 }, { "epoch": 0.46356439162109336, "grad_norm": 0.1285317048849269, "learning_rate": 0.0009480298067081421, "loss": 3.4929, "step": 5870 }, { "epoch": 0.46395925055773823, "grad_norm": 0.22674596822019466, "learning_rate": 0.0009478767167854018, "loss": 3.4076, "step": 5875 }, { "epoch": 0.4643541094943831, "grad_norm": 0.12857615927471353, "learning_rate": 0.0009477234141117347, "loss": 3.3598, "step": 5880 }, { "epoch": 0.46474896843102803, "grad_norm": 0.12269135826747406, "learning_rate": 0.0009475698987599628, "loss": 3.3801, "step": 5885 }, { "epoch": 0.4651438273676729, "grad_norm": 0.16212543733594792, "learning_rate": 0.0009474161708030094, "loss": 3.4632, "step": 5890 }, { "epoch": 0.4655386863043178, "grad_norm": 0.17028928708779945, "learning_rate": 0.0009472622303138982, "loss": 3.4103, "step": 5895 }, { "epoch": 0.46593354524096264, "grad_norm": 0.11909595911560038, "learning_rate": 0.0009471080773657545, "loss": 3.4794, "step": 5900 }, { "epoch": 0.46632840417760757, "grad_norm": 0.12551880078270378, "learning_rate": 0.000946953712031804, "loss": 3.6021, "step": 5905 }, { "epoch": 0.46672326311425244, "grad_norm": 0.11386502873719062, "learning_rate": 0.0009467991343853737, "loss": 3.2813, "step": 5910 }, { "epoch": 0.4671181220508973, "grad_norm": 0.11681097427942101, "learning_rate": 0.0009466443444998909, "loss": 3.5452, "step": 5915 }, { "epoch": 0.4675129809875422, "grad_norm": 0.11188759175197102, "learning_rate": 0.0009464893424488845, "loss": 3.4012, "step": 5920 }, { "epoch": 0.4679078399241871, "grad_norm": 0.09896642864858803, "learning_rate": 0.0009463341283059834, "loss": 3.3997, "step": 5925 }, { "epoch": 0.468302698860832, "grad_norm": 0.15436265966918708, "learning_rate": 0.0009461787021449179, "loss": 3.306, "step": 5930 }, { "epoch": 0.46869755779747685, "grad_norm": 0.12462093950518503, "learning_rate": 0.0009460230640395186, "loss": 3.4968, "step": 5935 }, { "epoch": 0.4690924167341217, "grad_norm": 0.11195897380959213, "learning_rate": 0.0009458672140637168, "loss": 3.3708, "step": 5940 }, { "epoch": 0.46948727567076665, "grad_norm": 0.10526160337041691, "learning_rate": 0.0009457111522915447, "loss": 3.2431, "step": 5945 }, { "epoch": 0.4698821346074115, "grad_norm": 0.0986281823825257, "learning_rate": 0.0009455548787971348, "loss": 3.6152, "step": 5950 }, { "epoch": 0.4702769935440564, "grad_norm": 0.10201101911760913, "learning_rate": 0.0009453983936547205, "loss": 3.525, "step": 5955 }, { "epoch": 0.47067185248070126, "grad_norm": 0.13584841280448984, "learning_rate": 0.0009452416969386355, "loss": 3.458, "step": 5960 }, { "epoch": 0.47106671141734613, "grad_norm": 0.13009015432392323, "learning_rate": 0.0009450847887233139, "loss": 3.3883, "step": 5965 }, { "epoch": 0.47146157035399106, "grad_norm": 0.15315467985040057, "learning_rate": 0.0009449276690832904, "loss": 3.4994, "step": 5970 }, { "epoch": 0.4718564292906359, "grad_norm": 0.13632762628474204, "learning_rate": 0.0009447703380932004, "loss": 3.5207, "step": 5975 }, { "epoch": 0.4722512882272808, "grad_norm": 0.17628793832700929, "learning_rate": 0.0009446127958277795, "loss": 3.5674, "step": 5980 }, { "epoch": 0.47264614716392567, "grad_norm": 0.12313679126861624, "learning_rate": 0.0009444550423618632, "loss": 3.5712, "step": 5985 }, { "epoch": 0.4730410061005706, "grad_norm": 0.13987906557785926, "learning_rate": 0.0009442970777703881, "loss": 3.303, "step": 5990 }, { "epoch": 0.47343586503721546, "grad_norm": 0.14078231389170148, "learning_rate": 0.0009441389021283906, "loss": 3.4238, "step": 5995 }, { "epoch": 0.47383072397386033, "grad_norm": 0.10426184767093276, "learning_rate": 0.0009439805155110077, "loss": 3.5089, "step": 6000 }, { "epoch": 0.4742255829105052, "grad_norm": 0.1511499970514765, "learning_rate": 0.0009438219179934759, "loss": 3.4652, "step": 6005 }, { "epoch": 0.47462044184715013, "grad_norm": 0.11470578605328695, "learning_rate": 0.0009436631096511328, "loss": 3.4658, "step": 6010 }, { "epoch": 0.475015300783795, "grad_norm": 0.14006321979587105, "learning_rate": 0.0009435040905594156, "loss": 3.5199, "step": 6015 }, { "epoch": 0.4754101597204399, "grad_norm": 0.10925705468503309, "learning_rate": 0.0009433448607938618, "loss": 3.4889, "step": 6020 }, { "epoch": 0.47580501865708474, "grad_norm": 0.2260321200136147, "learning_rate": 0.0009431854204301089, "loss": 3.5826, "step": 6025 }, { "epoch": 0.4761998775937296, "grad_norm": 0.18387769379903057, "learning_rate": 0.0009430257695438942, "loss": 3.4785, "step": 6030 }, { "epoch": 0.47659473653037454, "grad_norm": 0.13589256185563872, "learning_rate": 0.0009428659082110556, "loss": 3.3786, "step": 6035 }, { "epoch": 0.4769895954670194, "grad_norm": 0.14009043462939424, "learning_rate": 0.0009427058365075305, "loss": 3.4125, "step": 6040 }, { "epoch": 0.4773844544036643, "grad_norm": 0.12242670820201189, "learning_rate": 0.0009425455545093562, "loss": 3.5003, "step": 6045 }, { "epoch": 0.47777931334030915, "grad_norm": 0.14215248892171667, "learning_rate": 0.0009423850622926703, "loss": 3.5012, "step": 6050 }, { "epoch": 0.4781741722769541, "grad_norm": 0.1427591346929786, "learning_rate": 0.0009422243599337098, "loss": 3.5256, "step": 6055 }, { "epoch": 0.47856903121359895, "grad_norm": 0.13421371261925918, "learning_rate": 0.0009420634475088118, "loss": 3.3829, "step": 6060 }, { "epoch": 0.4789638901502438, "grad_norm": 0.13959332325936138, "learning_rate": 0.0009419023250944131, "loss": 3.5359, "step": 6065 }, { "epoch": 0.4793587490868887, "grad_norm": 0.10635256196782368, "learning_rate": 0.0009417409927670503, "loss": 3.6205, "step": 6070 }, { "epoch": 0.4797536080235336, "grad_norm": 0.11365819250443669, "learning_rate": 0.0009415794506033595, "loss": 3.3877, "step": 6075 }, { "epoch": 0.4801484669601785, "grad_norm": 0.09864595788471107, "learning_rate": 0.000941417698680077, "loss": 3.5536, "step": 6080 }, { "epoch": 0.48054332589682336, "grad_norm": 0.13850510650653045, "learning_rate": 0.000941255737074038, "loss": 3.5128, "step": 6085 }, { "epoch": 0.48093818483346823, "grad_norm": 0.1405778017759027, "learning_rate": 0.000941093565862178, "loss": 3.719, "step": 6090 }, { "epoch": 0.4813330437701131, "grad_norm": 0.14053991143913744, "learning_rate": 0.0009409311851215316, "loss": 3.5298, "step": 6095 }, { "epoch": 0.481727902706758, "grad_norm": 0.10912823170520661, "learning_rate": 0.0009407685949292331, "loss": 3.3719, "step": 6100 }, { "epoch": 0.4821227616434029, "grad_norm": 0.4018708333525831, "learning_rate": 0.0009406057953625163, "loss": 3.4263, "step": 6105 }, { "epoch": 0.48251762058004777, "grad_norm": 0.11889901144189229, "learning_rate": 0.0009404427864987143, "loss": 3.4018, "step": 6110 }, { "epoch": 0.48291247951669264, "grad_norm": 0.11298858384816915, "learning_rate": 0.0009402795684152601, "loss": 3.3482, "step": 6115 }, { "epoch": 0.48330733845333756, "grad_norm": 0.1201951701270992, "learning_rate": 0.0009401161411896854, "loss": 3.4051, "step": 6120 }, { "epoch": 0.48370219738998244, "grad_norm": 0.23650971992915087, "learning_rate": 0.000939952504899622, "loss": 3.5204, "step": 6125 }, { "epoch": 0.4840970563266273, "grad_norm": 0.35023837236574523, "learning_rate": 0.0009397886596228003, "loss": 3.5504, "step": 6130 }, { "epoch": 0.4844919152632722, "grad_norm": 0.12745515761082432, "learning_rate": 0.0009396246054370501, "loss": 3.5673, "step": 6135 }, { "epoch": 0.4848867741999171, "grad_norm": 0.10300161534568979, "learning_rate": 0.000939460342420301, "loss": 3.5306, "step": 6140 }, { "epoch": 0.485281633136562, "grad_norm": 0.16995572016001173, "learning_rate": 0.0009392958706505813, "loss": 3.2948, "step": 6145 }, { "epoch": 0.48567649207320684, "grad_norm": 0.11875563364251275, "learning_rate": 0.0009391311902060184, "loss": 3.259, "step": 6150 }, { "epoch": 0.4860713510098517, "grad_norm": 0.3176894756182711, "learning_rate": 0.0009389663011648392, "loss": 3.5142, "step": 6155 }, { "epoch": 0.48646620994649664, "grad_norm": 0.17561287439337034, "learning_rate": 0.0009388012036053695, "loss": 3.6116, "step": 6160 }, { "epoch": 0.4868610688831415, "grad_norm": 0.12831691285190225, "learning_rate": 0.0009386358976060339, "loss": 3.4153, "step": 6165 }, { "epoch": 0.4872559278197864, "grad_norm": 0.12484895227858175, "learning_rate": 0.0009384703832453565, "loss": 3.6379, "step": 6170 }, { "epoch": 0.48765078675643125, "grad_norm": 0.1217316377827978, "learning_rate": 0.0009383046606019601, "loss": 3.3086, "step": 6175 }, { "epoch": 0.4880456456930761, "grad_norm": 0.18442326206153814, "learning_rate": 0.0009381387297545663, "loss": 3.3432, "step": 6180 }, { "epoch": 0.48844050462972105, "grad_norm": 0.1192623360519064, "learning_rate": 0.0009379725907819958, "loss": 3.3607, "step": 6185 }, { "epoch": 0.4888353635663659, "grad_norm": 0.12673297414870188, "learning_rate": 0.0009378062437631684, "loss": 3.5832, "step": 6190 }, { "epoch": 0.4892302225030108, "grad_norm": 0.14091069132941475, "learning_rate": 0.0009376396887771022, "loss": 3.4579, "step": 6195 }, { "epoch": 0.48962508143965566, "grad_norm": 0.11332597186834982, "learning_rate": 0.0009374729259029142, "loss": 3.3985, "step": 6200 }, { "epoch": 0.4900199403763006, "grad_norm": 0.13038883565363749, "learning_rate": 0.0009373059552198206, "loss": 3.3941, "step": 6205 }, { "epoch": 0.49041479931294546, "grad_norm": 0.1597803152774343, "learning_rate": 0.000937138776807136, "loss": 3.5206, "step": 6210 }, { "epoch": 0.49080965824959033, "grad_norm": 0.2656910982626759, "learning_rate": 0.0009369713907442735, "loss": 3.255, "step": 6215 }, { "epoch": 0.4912045171862352, "grad_norm": 0.15143318103347078, "learning_rate": 0.0009368037971107452, "loss": 3.3341, "step": 6220 }, { "epoch": 0.4915993761228801, "grad_norm": 0.12509652187979767, "learning_rate": 0.0009366359959861615, "loss": 3.7298, "step": 6225 }, { "epoch": 0.491994235059525, "grad_norm": 0.15980783779712565, "learning_rate": 0.0009364679874502316, "loss": 3.4712, "step": 6230 }, { "epoch": 0.49238909399616987, "grad_norm": 0.09937887279485734, "learning_rate": 0.0009362997715827628, "loss": 3.3143, "step": 6235 }, { "epoch": 0.49278395293281474, "grad_norm": 0.13415335888236732, "learning_rate": 0.0009361313484636617, "loss": 3.7006, "step": 6240 }, { "epoch": 0.4931788118694596, "grad_norm": 0.20282303557462053, "learning_rate": 0.0009359627181729325, "loss": 3.3946, "step": 6245 }, { "epoch": 0.49357367080610454, "grad_norm": 0.15731962892561024, "learning_rate": 0.0009357938807906783, "loss": 3.24, "step": 6250 }, { "epoch": 0.4939685297427494, "grad_norm": 0.2568126053036941, "learning_rate": 0.0009356248363971003, "loss": 3.4174, "step": 6255 }, { "epoch": 0.4943633886793943, "grad_norm": 0.14089009148755935, "learning_rate": 0.0009354555850724984, "loss": 3.4429, "step": 6260 }, { "epoch": 0.49475824761603915, "grad_norm": 0.1253165730713488, "learning_rate": 0.0009352861268972704, "loss": 3.4398, "step": 6265 }, { "epoch": 0.4951531065526841, "grad_norm": 0.11109646961804451, "learning_rate": 0.0009351164619519123, "loss": 3.3665, "step": 6270 }, { "epoch": 0.49554796548932895, "grad_norm": 0.2249446063691891, "learning_rate": 0.0009349465903170191, "loss": 3.504, "step": 6275 }, { "epoch": 0.4959428244259738, "grad_norm": 0.11478736246501606, "learning_rate": 0.000934776512073283, "loss": 3.6974, "step": 6280 }, { "epoch": 0.4963376833626187, "grad_norm": 0.14717702263037627, "learning_rate": 0.0009346062273014948, "loss": 3.518, "step": 6285 }, { "epoch": 0.4967325422992636, "grad_norm": 0.13814445927009095, "learning_rate": 0.0009344357360825435, "loss": 3.5915, "step": 6290 }, { "epoch": 0.4971274012359085, "grad_norm": 0.11165718526977805, "learning_rate": 0.0009342650384974161, "loss": 3.5506, "step": 6295 }, { "epoch": 0.49752226017255335, "grad_norm": 0.12251720446745744, "learning_rate": 0.0009340941346271972, "loss": 3.5237, "step": 6300 }, { "epoch": 0.4979171191091982, "grad_norm": 0.11501811860012083, "learning_rate": 0.0009339230245530702, "loss": 3.5471, "step": 6305 }, { "epoch": 0.4983119780458431, "grad_norm": 0.10695205751876372, "learning_rate": 0.0009337517083563158, "loss": 3.53, "step": 6310 }, { "epoch": 0.498706836982488, "grad_norm": 0.10550215276775271, "learning_rate": 0.0009335801861183129, "loss": 3.4574, "step": 6315 }, { "epoch": 0.4991016959191329, "grad_norm": 0.1117824917063947, "learning_rate": 0.0009334084579205381, "loss": 3.1542, "step": 6320 }, { "epoch": 0.49949655485577776, "grad_norm": 0.10059015073843078, "learning_rate": 0.000933236523844566, "loss": 3.426, "step": 6325 }, { "epoch": 0.49989141379242263, "grad_norm": 0.1132094332917733, "learning_rate": 0.0009330643839720687, "loss": 3.3055, "step": 6330 }, { "epoch": 0.5002862727290676, "grad_norm": 0.1096583141379342, "learning_rate": 0.0009328920383848168, "loss": 3.4998, "step": 6335 }, { "epoch": 0.5006811316657124, "grad_norm": 0.19698151890481091, "learning_rate": 0.0009327194871646776, "loss": 3.537, "step": 6340 }, { "epoch": 0.5010759906023573, "grad_norm": 0.10391961442725302, "learning_rate": 0.0009325467303936168, "loss": 3.4154, "step": 6345 }, { "epoch": 0.5014708495390022, "grad_norm": 0.11319856422415744, "learning_rate": 0.0009323737681536976, "loss": 3.2242, "step": 6350 }, { "epoch": 0.501865708475647, "grad_norm": 0.1866721791281448, "learning_rate": 0.0009322006005270805, "loss": 3.4637, "step": 6355 }, { "epoch": 0.502260567412292, "grad_norm": 0.18575798959685152, "learning_rate": 0.0009320272275960239, "loss": 3.3706, "step": 6360 }, { "epoch": 0.5026554263489369, "grad_norm": 0.10987443018555872, "learning_rate": 0.0009318536494428839, "loss": 3.5515, "step": 6365 }, { "epoch": 0.5030502852855817, "grad_norm": 0.12808707423712212, "learning_rate": 0.0009316798661501134, "loss": 3.4638, "step": 6370 }, { "epoch": 0.5034451442222266, "grad_norm": 0.1432868659800917, "learning_rate": 0.0009315058778002631, "loss": 3.5837, "step": 6375 }, { "epoch": 0.5038400031588715, "grad_norm": 0.13729601561240667, "learning_rate": 0.0009313316844759815, "loss": 3.4745, "step": 6380 }, { "epoch": 0.5042348620955164, "grad_norm": 0.1027558881720779, "learning_rate": 0.0009311572862600139, "loss": 3.2562, "step": 6385 }, { "epoch": 0.5046297210321613, "grad_norm": 0.1157349817262516, "learning_rate": 0.0009309826832352033, "loss": 3.5564, "step": 6390 }, { "epoch": 0.5050245799688061, "grad_norm": 0.1236984434501114, "learning_rate": 0.0009308078754844896, "loss": 3.1667, "step": 6395 }, { "epoch": 0.505419438905451, "grad_norm": 0.09583284554512439, "learning_rate": 0.0009306328630909103, "loss": 3.2795, "step": 6400 }, { "epoch": 0.5058142978420959, "grad_norm": 0.15115546495108925, "learning_rate": 0.0009304576461376001, "loss": 3.2448, "step": 6405 }, { "epoch": 0.5062091567787408, "grad_norm": 0.12803962073710673, "learning_rate": 0.0009302822247077906, "loss": 3.4903, "step": 6410 }, { "epoch": 0.5066040157153857, "grad_norm": 0.11360369854031048, "learning_rate": 0.0009301065988848108, "loss": 3.4668, "step": 6415 }, { "epoch": 0.5069988746520305, "grad_norm": 0.12058282932788171, "learning_rate": 0.0009299307687520867, "loss": 3.58, "step": 6420 }, { "epoch": 0.5073937335886755, "grad_norm": 0.10051544257107414, "learning_rate": 0.0009297547343931412, "loss": 3.3895, "step": 6425 }, { "epoch": 0.5077885925253204, "grad_norm": 0.1276059074781447, "learning_rate": 0.0009295784958915945, "loss": 3.3758, "step": 6430 }, { "epoch": 0.5081834514619652, "grad_norm": 0.11735421684680813, "learning_rate": 0.0009294020533311636, "loss": 3.3451, "step": 6435 }, { "epoch": 0.5085783103986101, "grad_norm": 0.0963964562791748, "learning_rate": 0.0009292254067956624, "loss": 3.2757, "step": 6440 }, { "epoch": 0.5089731693352549, "grad_norm": 0.16644921294853773, "learning_rate": 0.0009290485563690017, "loss": 3.413, "step": 6445 }, { "epoch": 0.5093680282718999, "grad_norm": 0.1460755239809692, "learning_rate": 0.0009288715021351893, "loss": 3.2782, "step": 6450 }, { "epoch": 0.5097628872085448, "grad_norm": 0.12612743080178082, "learning_rate": 0.0009286942441783296, "loss": 3.1727, "step": 6455 }, { "epoch": 0.5101577461451896, "grad_norm": 0.13751373361193214, "learning_rate": 0.0009285167825826239, "loss": 3.5521, "step": 6460 }, { "epoch": 0.5105526050818345, "grad_norm": 0.13601849127388488, "learning_rate": 0.0009283391174323705, "loss": 3.5753, "step": 6465 }, { "epoch": 0.5109474640184793, "grad_norm": 0.11969049255906472, "learning_rate": 0.0009281612488119637, "loss": 3.2431, "step": 6470 }, { "epoch": 0.5113423229551243, "grad_norm": 0.11317629865623943, "learning_rate": 0.0009279831768058951, "loss": 3.5482, "step": 6475 }, { "epoch": 0.5117371818917692, "grad_norm": 0.10597404970635523, "learning_rate": 0.0009278049014987527, "loss": 3.3549, "step": 6480 }, { "epoch": 0.512132040828414, "grad_norm": 0.13567081196973568, "learning_rate": 0.0009276264229752207, "loss": 3.3282, "step": 6485 }, { "epoch": 0.5125268997650589, "grad_norm": 0.14044438045481392, "learning_rate": 0.0009274477413200807, "loss": 3.6906, "step": 6490 }, { "epoch": 0.5129217587017039, "grad_norm": 0.15281150946006955, "learning_rate": 0.0009272688566182099, "loss": 3.5586, "step": 6495 }, { "epoch": 0.5133166176383487, "grad_norm": 0.14558963712244113, "learning_rate": 0.0009270897689545825, "loss": 3.3715, "step": 6500 }, { "epoch": 0.5137114765749936, "grad_norm": 0.24740582454419752, "learning_rate": 0.0009269104784142688, "loss": 3.5974, "step": 6505 }, { "epoch": 0.5141063355116384, "grad_norm": 0.253837824453036, "learning_rate": 0.0009267309850824357, "loss": 3.5765, "step": 6510 }, { "epoch": 0.5145011944482833, "grad_norm": 0.13042170138724618, "learning_rate": 0.0009265512890443464, "loss": 3.469, "step": 6515 }, { "epoch": 0.5148960533849283, "grad_norm": 0.12815885193730117, "learning_rate": 0.0009263713903853602, "loss": 3.4733, "step": 6520 }, { "epoch": 0.5152909123215731, "grad_norm": 0.17388622142743873, "learning_rate": 0.0009261912891909325, "loss": 3.5031, "step": 6525 }, { "epoch": 0.515685771258218, "grad_norm": 0.12518136033850938, "learning_rate": 0.0009260109855466158, "loss": 3.5384, "step": 6530 }, { "epoch": 0.5160806301948628, "grad_norm": 0.1229827383124711, "learning_rate": 0.0009258304795380578, "loss": 3.2063, "step": 6535 }, { "epoch": 0.5164754891315078, "grad_norm": 0.13581842001085756, "learning_rate": 0.0009256497712510026, "loss": 3.2884, "step": 6540 }, { "epoch": 0.5168703480681527, "grad_norm": 0.12497677866282254, "learning_rate": 0.0009254688607712907, "loss": 3.4882, "step": 6545 }, { "epoch": 0.5172652070047975, "grad_norm": 0.10946738807928899, "learning_rate": 0.0009252877481848583, "loss": 3.5543, "step": 6550 }, { "epoch": 0.5176600659414424, "grad_norm": 0.1300617387193341, "learning_rate": 0.0009251064335777376, "loss": 3.4519, "step": 6555 }, { "epoch": 0.5180549248780874, "grad_norm": 0.10005554645692002, "learning_rate": 0.0009249249170360569, "loss": 3.4482, "step": 6560 }, { "epoch": 0.5184497838147322, "grad_norm": 0.1323831129481947, "learning_rate": 0.0009247431986460406, "loss": 3.4209, "step": 6565 }, { "epoch": 0.5188446427513771, "grad_norm": 0.15820982721084328, "learning_rate": 0.0009245612784940087, "loss": 3.5991, "step": 6570 }, { "epoch": 0.5192395016880219, "grad_norm": 0.11574886445779146, "learning_rate": 0.000924379156666377, "loss": 3.5068, "step": 6575 }, { "epoch": 0.5196343606246668, "grad_norm": 0.10470317168527488, "learning_rate": 0.0009241968332496575, "loss": 3.3468, "step": 6580 }, { "epoch": 0.5200292195613118, "grad_norm": 0.18209923301841388, "learning_rate": 0.0009240143083304573, "loss": 3.3091, "step": 6585 }, { "epoch": 0.5204240784979566, "grad_norm": 0.12006538780449474, "learning_rate": 0.0009238315819954799, "loss": 3.3378, "step": 6590 }, { "epoch": 0.5208189374346015, "grad_norm": 0.11559478047461348, "learning_rate": 0.0009236486543315241, "loss": 3.2086, "step": 6595 }, { "epoch": 0.5212137963712463, "grad_norm": 0.11652091015195065, "learning_rate": 0.0009234655254254843, "loss": 3.3725, "step": 6600 }, { "epoch": 0.5216086553078912, "grad_norm": 0.11401149882690727, "learning_rate": 0.0009232821953643509, "loss": 3.2182, "step": 6605 }, { "epoch": 0.5220035142445362, "grad_norm": 0.10111006095137617, "learning_rate": 0.0009230986642352092, "loss": 3.5048, "step": 6610 }, { "epoch": 0.522398373181181, "grad_norm": 0.09603104929692138, "learning_rate": 0.0009229149321252405, "loss": 3.3156, "step": 6615 }, { "epoch": 0.5227932321178259, "grad_norm": 0.09496565232610818, "learning_rate": 0.0009227309991217216, "loss": 3.386, "step": 6620 }, { "epoch": 0.5231880910544708, "grad_norm": 0.11197855897679476, "learning_rate": 0.0009225468653120245, "loss": 3.4604, "step": 6625 }, { "epoch": 0.5235829499911157, "grad_norm": 0.11100051994219415, "learning_rate": 0.0009223625307836165, "loss": 3.3907, "step": 6630 }, { "epoch": 0.5239778089277606, "grad_norm": 0.10221294046310132, "learning_rate": 0.0009221779956240606, "loss": 3.3242, "step": 6635 }, { "epoch": 0.5243726678644054, "grad_norm": 0.11014910398432808, "learning_rate": 0.0009219932599210148, "loss": 3.5075, "step": 6640 }, { "epoch": 0.5247675268010503, "grad_norm": 0.12102256038047961, "learning_rate": 0.0009218083237622326, "loss": 3.3668, "step": 6645 }, { "epoch": 0.5251623857376952, "grad_norm": 0.11275980767360046, "learning_rate": 0.0009216231872355622, "loss": 3.4275, "step": 6650 }, { "epoch": 0.5255572446743401, "grad_norm": 0.09366406355682672, "learning_rate": 0.000921437850428948, "loss": 3.3522, "step": 6655 }, { "epoch": 0.525952103610985, "grad_norm": 0.09738057099589985, "learning_rate": 0.0009212523134304284, "loss": 3.2559, "step": 6660 }, { "epoch": 0.5263469625476298, "grad_norm": 0.10554327166049861, "learning_rate": 0.0009210665763281376, "loss": 3.368, "step": 6665 }, { "epoch": 0.5267418214842747, "grad_norm": 0.09102407055483429, "learning_rate": 0.0009208806392103048, "loss": 3.1721, "step": 6670 }, { "epoch": 0.5271366804209197, "grad_norm": 0.1004136859568684, "learning_rate": 0.0009206945021652537, "loss": 3.3238, "step": 6675 }, { "epoch": 0.5275315393575645, "grad_norm": 0.09718951234320003, "learning_rate": 0.0009205081652814037, "loss": 3.3705, "step": 6680 }, { "epoch": 0.5279263982942094, "grad_norm": 0.10824753998094246, "learning_rate": 0.0009203216286472686, "loss": 3.4039, "step": 6685 }, { "epoch": 0.5283212572308543, "grad_norm": 0.0973408647371045, "learning_rate": 0.0009201348923514572, "loss": 3.1929, "step": 6690 }, { "epoch": 0.5287161161674991, "grad_norm": 0.1009041155551718, "learning_rate": 0.0009199479564826732, "loss": 3.3026, "step": 6695 }, { "epoch": 0.5291109751041441, "grad_norm": 0.10254694355782691, "learning_rate": 0.0009197608211297153, "loss": 3.1971, "step": 6700 }, { "epoch": 0.5295058340407889, "grad_norm": 0.11339084503634436, "learning_rate": 0.0009195734863814766, "loss": 3.3831, "step": 6705 }, { "epoch": 0.5299006929774338, "grad_norm": 0.12117371515278175, "learning_rate": 0.0009193859523269449, "loss": 3.493, "step": 6710 }, { "epoch": 0.5302955519140787, "grad_norm": 0.16920087790444466, "learning_rate": 0.0009191982190552033, "loss": 3.4237, "step": 6715 }, { "epoch": 0.5306904108507235, "grad_norm": 0.1325329977560005, "learning_rate": 0.0009190102866554286, "loss": 3.5288, "step": 6720 }, { "epoch": 0.5310852697873685, "grad_norm": 0.1125786478411503, "learning_rate": 0.0009188221552168931, "loss": 3.271, "step": 6725 }, { "epoch": 0.5314801287240134, "grad_norm": 0.11866173348596455, "learning_rate": 0.0009186338248289627, "loss": 3.1845, "step": 6730 }, { "epoch": 0.5318749876606582, "grad_norm": 0.14778102930953615, "learning_rate": 0.0009184452955810989, "loss": 3.6286, "step": 6735 }, { "epoch": 0.5322698465973031, "grad_norm": 0.12020726188837705, "learning_rate": 0.0009182565675628567, "loss": 3.3181, "step": 6740 }, { "epoch": 0.532664705533948, "grad_norm": 0.15437309688383533, "learning_rate": 0.0009180676408638862, "loss": 3.4618, "step": 6745 }, { "epoch": 0.5330595644705929, "grad_norm": 0.12344946705826332, "learning_rate": 0.0009178785155739312, "loss": 3.2988, "step": 6750 }, { "epoch": 0.5334544234072378, "grad_norm": 0.10977694899840709, "learning_rate": 0.0009176891917828305, "loss": 3.5503, "step": 6755 }, { "epoch": 0.5338492823438826, "grad_norm": 0.10483734033177393, "learning_rate": 0.0009174996695805168, "loss": 3.3425, "step": 6760 }, { "epoch": 0.5342441412805276, "grad_norm": 0.11169251341152647, "learning_rate": 0.0009173099490570172, "loss": 3.4447, "step": 6765 }, { "epoch": 0.5346390002171724, "grad_norm": 0.11047805366749221, "learning_rate": 0.0009171200303024529, "loss": 3.2236, "step": 6770 }, { "epoch": 0.5350338591538173, "grad_norm": 0.13218630671039025, "learning_rate": 0.0009169299134070395, "loss": 3.4193, "step": 6775 }, { "epoch": 0.5354287180904622, "grad_norm": 0.1281484779981502, "learning_rate": 0.0009167395984610864, "loss": 3.4045, "step": 6780 }, { "epoch": 0.535823577027107, "grad_norm": 0.12029979582365677, "learning_rate": 0.0009165490855549973, "loss": 3.3329, "step": 6785 }, { "epoch": 0.536218435963752, "grad_norm": 0.12121193351039476, "learning_rate": 0.0009163583747792698, "loss": 3.3104, "step": 6790 }, { "epoch": 0.5366132949003969, "grad_norm": 0.12108757453369738, "learning_rate": 0.0009161674662244956, "loss": 3.562, "step": 6795 }, { "epoch": 0.5370081538370417, "grad_norm": 0.11066569573009463, "learning_rate": 0.0009159763599813605, "loss": 3.5523, "step": 6800 }, { "epoch": 0.5374030127736866, "grad_norm": 0.11428364306276721, "learning_rate": 0.0009157850561406436, "loss": 3.3884, "step": 6805 }, { "epoch": 0.5377978717103314, "grad_norm": 0.12104636570866069, "learning_rate": 0.0009155935547932183, "loss": 3.1923, "step": 6810 }, { "epoch": 0.5381927306469764, "grad_norm": 0.38561942180055064, "learning_rate": 0.0009154018560300523, "loss": 3.1678, "step": 6815 }, { "epoch": 0.5385875895836213, "grad_norm": 0.11314054725335385, "learning_rate": 0.0009152099599422063, "loss": 3.2001, "step": 6820 }, { "epoch": 0.5389824485202661, "grad_norm": 0.11987313636471568, "learning_rate": 0.0009150178666208348, "loss": 3.4829, "step": 6825 }, { "epoch": 0.539377307456911, "grad_norm": 0.14062735605228782, "learning_rate": 0.0009148255761571864, "loss": 3.4188, "step": 6830 }, { "epoch": 0.5397721663935559, "grad_norm": 0.1243663973782642, "learning_rate": 0.0009146330886426031, "loss": 3.3406, "step": 6835 }, { "epoch": 0.5401670253302008, "grad_norm": 0.1628967836431002, "learning_rate": 0.0009144404041685206, "loss": 3.3141, "step": 6840 }, { "epoch": 0.5405618842668457, "grad_norm": 0.10393901460991785, "learning_rate": 0.0009142475228264681, "loss": 3.2418, "step": 6845 }, { "epoch": 0.5409567432034905, "grad_norm": 0.16163013055731443, "learning_rate": 0.0009140544447080682, "loss": 3.5205, "step": 6850 }, { "epoch": 0.5413516021401354, "grad_norm": 0.11572951922916018, "learning_rate": 0.0009138611699050372, "loss": 3.2504, "step": 6855 }, { "epoch": 0.5417464610767804, "grad_norm": 0.26030299211269164, "learning_rate": 0.0009136676985091847, "loss": 3.2341, "step": 6860 }, { "epoch": 0.5421413200134252, "grad_norm": 0.12829008684369145, "learning_rate": 0.0009134740306124137, "loss": 3.2966, "step": 6865 }, { "epoch": 0.5425361789500701, "grad_norm": 0.11344890897239711, "learning_rate": 0.0009132801663067207, "loss": 3.4385, "step": 6870 }, { "epoch": 0.5429310378867149, "grad_norm": 0.1141061013547324, "learning_rate": 0.000913086105684195, "loss": 3.3571, "step": 6875 }, { "epoch": 0.5433258968233599, "grad_norm": 0.1314605158168725, "learning_rate": 0.00091289184883702, "loss": 3.3298, "step": 6880 }, { "epoch": 0.5437207557600048, "grad_norm": 0.11129914618899105, "learning_rate": 0.0009126973958574713, "loss": 3.1627, "step": 6885 }, { "epoch": 0.5441156146966496, "grad_norm": 0.10205061102007207, "learning_rate": 0.0009125027468379186, "loss": 3.2427, "step": 6890 }, { "epoch": 0.5445104736332945, "grad_norm": 0.10741264937863199, "learning_rate": 0.000912307901870824, "loss": 3.3249, "step": 6895 }, { "epoch": 0.5449053325699393, "grad_norm": 0.0971635914024379, "learning_rate": 0.0009121128610487431, "loss": 3.2664, "step": 6900 }, { "epoch": 0.5453001915065843, "grad_norm": 0.1312522091867945, "learning_rate": 0.0009119176244643246, "loss": 3.3278, "step": 6905 }, { "epoch": 0.5456950504432292, "grad_norm": 0.11539612572020427, "learning_rate": 0.0009117221922103097, "loss": 3.4444, "step": 6910 }, { "epoch": 0.546089909379874, "grad_norm": 0.0955361115642794, "learning_rate": 0.0009115265643795332, "loss": 3.2649, "step": 6915 }, { "epoch": 0.5464847683165189, "grad_norm": 0.11340877207116883, "learning_rate": 0.0009113307410649221, "loss": 3.6267, "step": 6920 }, { "epoch": 0.5468796272531639, "grad_norm": 0.1266302440770175, "learning_rate": 0.0009111347223594968, "loss": 3.4383, "step": 6925 }, { "epoch": 0.5472744861898087, "grad_norm": 0.1279859802847328, "learning_rate": 0.0009109385083563704, "loss": 3.2386, "step": 6930 }, { "epoch": 0.5476693451264536, "grad_norm": 0.10995677696713761, "learning_rate": 0.0009107420991487488, "loss": 3.3695, "step": 6935 }, { "epoch": 0.5480642040630984, "grad_norm": 0.19811921443495828, "learning_rate": 0.0009105454948299301, "loss": 3.5753, "step": 6940 }, { "epoch": 0.5484590629997433, "grad_norm": 0.15790056743012182, "learning_rate": 0.0009103486954933058, "loss": 3.2535, "step": 6945 }, { "epoch": 0.5488539219363883, "grad_norm": 0.1520329306470509, "learning_rate": 0.0009101517012323599, "loss": 3.4248, "step": 6950 }, { "epoch": 0.5492487808730331, "grad_norm": 0.17407228587701454, "learning_rate": 0.0009099545121406687, "loss": 3.4442, "step": 6955 }, { "epoch": 0.549643639809678, "grad_norm": 0.16968509976029253, "learning_rate": 0.000909757128311901, "loss": 3.3341, "step": 6960 }, { "epoch": 0.5500384987463228, "grad_norm": 0.155866627693342, "learning_rate": 0.0009095595498398187, "loss": 3.5336, "step": 6965 }, { "epoch": 0.5504333576829677, "grad_norm": 0.11153265431775708, "learning_rate": 0.0009093617768182755, "loss": 3.3983, "step": 6970 }, { "epoch": 0.5508282166196127, "grad_norm": 0.10595634435658581, "learning_rate": 0.0009091638093412178, "loss": 3.3033, "step": 6975 }, { "epoch": 0.5512230755562575, "grad_norm": 0.15060509774312775, "learning_rate": 0.0009089656475026844, "loss": 3.4391, "step": 6980 }, { "epoch": 0.5516179344929024, "grad_norm": 0.12024647595018752, "learning_rate": 0.0009087672913968062, "loss": 3.363, "step": 6985 }, { "epoch": 0.5520127934295473, "grad_norm": 0.1807835436644723, "learning_rate": 0.0009085687411178068, "loss": 3.4052, "step": 6990 }, { "epoch": 0.5524076523661922, "grad_norm": 0.13989467801175595, "learning_rate": 0.0009083699967600017, "loss": 3.5172, "step": 6995 }, { "epoch": 0.5528025113028371, "grad_norm": 0.1005725133779783, "learning_rate": 0.0009081710584177985, "loss": 3.298, "step": 7000 }, { "epoch": 0.5531973702394819, "grad_norm": 0.1202346114291381, "learning_rate": 0.0009079719261856975, "loss": 3.6207, "step": 7005 }, { "epoch": 0.5535922291761268, "grad_norm": 0.12159370360208707, "learning_rate": 0.0009077726001582904, "loss": 3.386, "step": 7010 }, { "epoch": 0.5539870881127718, "grad_norm": 0.12499839330765894, "learning_rate": 0.0009075730804302614, "loss": 3.3268, "step": 7015 }, { "epoch": 0.5543819470494166, "grad_norm": 0.12125594317557381, "learning_rate": 0.0009073733670963865, "loss": 3.3947, "step": 7020 }, { "epoch": 0.5547768059860615, "grad_norm": 0.13092980822264388, "learning_rate": 0.000907173460251534, "loss": 3.3006, "step": 7025 }, { "epoch": 0.5551716649227063, "grad_norm": 0.09834677564981027, "learning_rate": 0.0009069733599906636, "loss": 3.38, "step": 7030 }, { "epoch": 0.5555665238593512, "grad_norm": 0.09256176028013008, "learning_rate": 0.0009067730664088274, "loss": 3.2681, "step": 7035 }, { "epoch": 0.5559613827959962, "grad_norm": 0.10616013852465107, "learning_rate": 0.0009065725796011691, "loss": 3.2312, "step": 7040 }, { "epoch": 0.556356241732641, "grad_norm": 0.09218027986878417, "learning_rate": 0.0009063718996629238, "loss": 3.1767, "step": 7045 }, { "epoch": 0.5567511006692859, "grad_norm": 0.09598630165492752, "learning_rate": 0.0009061710266894192, "loss": 3.4776, "step": 7050 }, { "epoch": 0.5571459596059308, "grad_norm": 0.11503661180017617, "learning_rate": 0.0009059699607760739, "loss": 3.3627, "step": 7055 }, { "epoch": 0.5575408185425756, "grad_norm": 0.10380357990054476, "learning_rate": 0.0009057687020183986, "loss": 3.1883, "step": 7060 }, { "epoch": 0.5579356774792206, "grad_norm": 0.12212216750572004, "learning_rate": 0.0009055672505119956, "loss": 3.4628, "step": 7065 }, { "epoch": 0.5583305364158654, "grad_norm": 0.11837159350083429, "learning_rate": 0.0009053656063525584, "loss": 3.4009, "step": 7070 }, { "epoch": 0.5587253953525103, "grad_norm": 0.12662817538363114, "learning_rate": 0.0009051637696358722, "loss": 3.2958, "step": 7075 }, { "epoch": 0.5591202542891552, "grad_norm": 0.09950044076472574, "learning_rate": 0.000904961740457814, "loss": 3.2807, "step": 7080 }, { "epoch": 0.5595151132258, "grad_norm": 0.10198702478179109, "learning_rate": 0.0009047595189143518, "loss": 3.4039, "step": 7085 }, { "epoch": 0.559909972162445, "grad_norm": 0.09729414384237896, "learning_rate": 0.0009045571051015451, "loss": 3.3833, "step": 7090 }, { "epoch": 0.5603048310990898, "grad_norm": 0.12644139681732516, "learning_rate": 0.0009043544991155446, "loss": 3.5402, "step": 7095 }, { "epoch": 0.5606996900357347, "grad_norm": 0.13033792789356033, "learning_rate": 0.0009041517010525927, "loss": 3.5446, "step": 7100 }, { "epoch": 0.5610945489723796, "grad_norm": 0.19728226666999576, "learning_rate": 0.0009039487110090226, "loss": 3.3146, "step": 7105 }, { "epoch": 0.5614894079090245, "grad_norm": 0.1131686831362473, "learning_rate": 0.000903745529081259, "loss": 3.202, "step": 7110 }, { "epoch": 0.5618842668456694, "grad_norm": 0.10903381583000864, "learning_rate": 0.0009035421553658176, "loss": 3.2223, "step": 7115 }, { "epoch": 0.5622791257823143, "grad_norm": 0.12084025778097807, "learning_rate": 0.000903338589959305, "loss": 3.2718, "step": 7120 }, { "epoch": 0.5626739847189591, "grad_norm": 0.12117048297005933, "learning_rate": 0.0009031348329584195, "loss": 3.5933, "step": 7125 }, { "epoch": 0.5630688436556041, "grad_norm": 0.14586484285628587, "learning_rate": 0.0009029308844599495, "loss": 3.3322, "step": 7130 }, { "epoch": 0.5634637025922489, "grad_norm": 0.18933066384338176, "learning_rate": 0.0009027267445607752, "loss": 3.4092, "step": 7135 }, { "epoch": 0.5638585615288938, "grad_norm": 0.1289951752200751, "learning_rate": 0.0009025224133578674, "loss": 3.5303, "step": 7140 }, { "epoch": 0.5642534204655387, "grad_norm": 0.13794378047695124, "learning_rate": 0.0009023178909482874, "loss": 3.1673, "step": 7145 }, { "epoch": 0.5646482794021835, "grad_norm": 0.13911589345891312, "learning_rate": 0.000902113177429188, "loss": 3.4955, "step": 7150 }, { "epoch": 0.5650431383388285, "grad_norm": 0.10711306031363664, "learning_rate": 0.0009019082728978123, "loss": 3.3263, "step": 7155 }, { "epoch": 0.5654379972754734, "grad_norm": 0.1312259946722742, "learning_rate": 0.0009017031774514945, "loss": 3.3929, "step": 7160 }, { "epoch": 0.5658328562121182, "grad_norm": 0.12869981107739217, "learning_rate": 0.0009014978911876591, "loss": 3.3473, "step": 7165 }, { "epoch": 0.5662277151487631, "grad_norm": 0.10986788885403674, "learning_rate": 0.0009012924142038212, "loss": 3.2857, "step": 7170 }, { "epoch": 0.566622574085408, "grad_norm": 0.1094115590145832, "learning_rate": 0.0009010867465975873, "loss": 3.1341, "step": 7175 }, { "epoch": 0.5670174330220529, "grad_norm": 0.23146909444609648, "learning_rate": 0.0009008808884666534, "loss": 3.5835, "step": 7180 }, { "epoch": 0.5674122919586978, "grad_norm": 0.12237920566737188, "learning_rate": 0.0009006748399088067, "loss": 3.5112, "step": 7185 }, { "epoch": 0.5678071508953426, "grad_norm": 0.09026837703334327, "learning_rate": 0.0009004686010219244, "loss": 3.4709, "step": 7190 }, { "epoch": 0.5682020098319875, "grad_norm": 0.1550898690622976, "learning_rate": 0.0009002621719039745, "loss": 3.5319, "step": 7195 }, { "epoch": 0.5685968687686324, "grad_norm": 0.12548938219795475, "learning_rate": 0.0009000555526530154, "loss": 3.3162, "step": 7200 }, { "epoch": 0.5689917277052773, "grad_norm": 0.10422114550191557, "learning_rate": 0.0008998487433671952, "loss": 3.4105, "step": 7205 }, { "epoch": 0.5693865866419222, "grad_norm": 0.1326541372102074, "learning_rate": 0.0008996417441447529, "loss": 3.3082, "step": 7210 }, { "epoch": 0.569781445578567, "grad_norm": 0.1664512644001425, "learning_rate": 0.0008994345550840178, "loss": 3.7126, "step": 7215 }, { "epoch": 0.570176304515212, "grad_norm": 0.1594930312999086, "learning_rate": 0.0008992271762834086, "loss": 3.316, "step": 7220 }, { "epoch": 0.5705711634518569, "grad_norm": 0.10228392702694235, "learning_rate": 0.0008990196078414348, "loss": 3.3702, "step": 7225 }, { "epoch": 0.5709660223885017, "grad_norm": 0.11707163853080745, "learning_rate": 0.0008988118498566958, "loss": 3.2188, "step": 7230 }, { "epoch": 0.5713608813251466, "grad_norm": 0.10103326462377551, "learning_rate": 0.0008986039024278811, "loss": 3.3766, "step": 7235 }, { "epoch": 0.5717557402617914, "grad_norm": 0.11678935199578869, "learning_rate": 0.0008983957656537703, "loss": 3.335, "step": 7240 }, { "epoch": 0.5721505991984364, "grad_norm": 0.15218652950786526, "learning_rate": 0.0008981874396332323, "loss": 3.2648, "step": 7245 }, { "epoch": 0.5725454581350813, "grad_norm": 0.10017381546925663, "learning_rate": 0.0008979789244652268, "loss": 3.3574, "step": 7250 }, { "epoch": 0.5729403170717261, "grad_norm": 0.17499325935332163, "learning_rate": 0.0008977702202488027, "loss": 3.373, "step": 7255 }, { "epoch": 0.573335176008371, "grad_norm": 0.11343572142920581, "learning_rate": 0.0008975613270830992, "loss": 3.4416, "step": 7260 }, { "epoch": 0.5737300349450158, "grad_norm": 0.16015858283411485, "learning_rate": 0.0008973522450673447, "loss": 3.291, "step": 7265 }, { "epoch": 0.5741248938816608, "grad_norm": 0.10252369169443598, "learning_rate": 0.0008971429743008576, "loss": 3.3391, "step": 7270 }, { "epoch": 0.5745197528183057, "grad_norm": 0.1173165044157149, "learning_rate": 0.0008969335148830461, "loss": 3.2949, "step": 7275 }, { "epoch": 0.5749146117549505, "grad_norm": 0.19035855263969742, "learning_rate": 0.0008967238669134077, "loss": 3.5836, "step": 7280 }, { "epoch": 0.5753094706915954, "grad_norm": 0.13139730159286006, "learning_rate": 0.0008965140304915298, "loss": 3.2747, "step": 7285 }, { "epoch": 0.5757043296282404, "grad_norm": 0.1343065810120146, "learning_rate": 0.000896304005717089, "loss": 3.3717, "step": 7290 }, { "epoch": 0.5760991885648852, "grad_norm": 0.09887020426020408, "learning_rate": 0.0008960937926898516, "loss": 3.2569, "step": 7295 }, { "epoch": 0.5764940475015301, "grad_norm": 0.122156093018175, "learning_rate": 0.0008958833915096731, "loss": 3.3195, "step": 7300 }, { "epoch": 0.5768889064381749, "grad_norm": 0.17101702468281485, "learning_rate": 0.0008956728022764986, "loss": 3.3883, "step": 7305 }, { "epoch": 0.5772837653748198, "grad_norm": 0.11370093065243649, "learning_rate": 0.0008954620250903627, "loss": 3.3517, "step": 7310 }, { "epoch": 0.5776786243114648, "grad_norm": 0.11043255799800651, "learning_rate": 0.0008952510600513886, "loss": 3.3312, "step": 7315 }, { "epoch": 0.5780734832481096, "grad_norm": 0.11768444297436391, "learning_rate": 0.0008950399072597895, "loss": 3.3557, "step": 7320 }, { "epoch": 0.5784683421847545, "grad_norm": 0.11288094067333997, "learning_rate": 0.0008948285668158671, "loss": 3.3745, "step": 7325 }, { "epoch": 0.5788632011213993, "grad_norm": 0.0879923693189446, "learning_rate": 0.0008946170388200129, "loss": 3.3377, "step": 7330 }, { "epoch": 0.5792580600580443, "grad_norm": 0.11272064867001788, "learning_rate": 0.0008944053233727071, "loss": 3.2266, "step": 7335 }, { "epoch": 0.5796529189946892, "grad_norm": 0.14369580446664112, "learning_rate": 0.0008941934205745189, "loss": 3.2404, "step": 7340 }, { "epoch": 0.580047777931334, "grad_norm": 0.11787433843963412, "learning_rate": 0.0008939813305261069, "loss": 3.3683, "step": 7345 }, { "epoch": 0.5804426368679789, "grad_norm": 0.11486144484341698, "learning_rate": 0.0008937690533282179, "loss": 3.375, "step": 7350 }, { "epoch": 0.5808374958046238, "grad_norm": 0.1241811742755171, "learning_rate": 0.0008935565890816886, "loss": 3.3093, "step": 7355 }, { "epoch": 0.5812323547412687, "grad_norm": 0.1289546215978727, "learning_rate": 0.0008933439378874435, "loss": 3.2098, "step": 7360 }, { "epoch": 0.5816272136779136, "grad_norm": 0.1514643361375765, "learning_rate": 0.0008931310998464969, "loss": 3.5212, "step": 7365 }, { "epoch": 0.5820220726145584, "grad_norm": 0.14097198793205176, "learning_rate": 0.000892918075059951, "loss": 3.404, "step": 7370 }, { "epoch": 0.5824169315512033, "grad_norm": 0.11824130359741998, "learning_rate": 0.0008927048636289974, "loss": 3.2007, "step": 7375 }, { "epoch": 0.5828117904878483, "grad_norm": 0.09347347610589661, "learning_rate": 0.0008924914656549158, "loss": 3.2454, "step": 7380 }, { "epoch": 0.5832066494244931, "grad_norm": 0.10363508524746995, "learning_rate": 0.0008922778812390748, "loss": 3.1938, "step": 7385 }, { "epoch": 0.583601508361138, "grad_norm": 0.10974543480316645, "learning_rate": 0.0008920641104829316, "loss": 3.2096, "step": 7390 }, { "epoch": 0.5839963672977828, "grad_norm": 0.12033576358481422, "learning_rate": 0.0008918501534880319, "loss": 3.4113, "step": 7395 }, { "epoch": 0.5843912262344277, "grad_norm": 0.12509545795556729, "learning_rate": 0.0008916360103560097, "loss": 3.384, "step": 7400 }, { "epoch": 0.5847860851710727, "grad_norm": 0.09061075913007674, "learning_rate": 0.0008914216811885874, "loss": 3.3711, "step": 7405 }, { "epoch": 0.5851809441077175, "grad_norm": 0.11379578239314363, "learning_rate": 0.0008912071660875761, "loss": 3.3492, "step": 7410 }, { "epoch": 0.5855758030443624, "grad_norm": 0.14179275686933895, "learning_rate": 0.0008909924651548749, "loss": 3.4801, "step": 7415 }, { "epoch": 0.5859706619810073, "grad_norm": 0.17392213771187973, "learning_rate": 0.0008907775784924714, "loss": 3.45, "step": 7420 }, { "epoch": 0.5863655209176521, "grad_norm": 0.5512070706312587, "learning_rate": 0.0008905625062024412, "loss": 3.3644, "step": 7425 }, { "epoch": 0.5867603798542971, "grad_norm": 0.12705224716486066, "learning_rate": 0.0008903472483869483, "loss": 3.1805, "step": 7430 }, { "epoch": 0.5871552387909419, "grad_norm": 0.11495227503215372, "learning_rate": 0.0008901318051482446, "loss": 3.4299, "step": 7435 }, { "epoch": 0.5875500977275868, "grad_norm": 0.11534872435346419, "learning_rate": 0.0008899161765886703, "loss": 3.526, "step": 7440 }, { "epoch": 0.5879449566642317, "grad_norm": 0.17522167723163398, "learning_rate": 0.0008897003628106535, "loss": 3.4263, "step": 7445 }, { "epoch": 0.5883398156008766, "grad_norm": 0.10419706732145544, "learning_rate": 0.0008894843639167103, "loss": 3.4119, "step": 7450 }, { "epoch": 0.5887346745375215, "grad_norm": 0.09896684132556323, "learning_rate": 0.0008892681800094447, "loss": 3.2681, "step": 7455 }, { "epoch": 0.5891295334741663, "grad_norm": 0.13996340474518473, "learning_rate": 0.0008890518111915487, "loss": 3.4167, "step": 7460 }, { "epoch": 0.5895243924108112, "grad_norm": 0.10298020744366863, "learning_rate": 0.0008888352575658021, "loss": 3.3597, "step": 7465 }, { "epoch": 0.5899192513474562, "grad_norm": 0.6689737086464751, "learning_rate": 0.0008886185192350725, "loss": 3.5378, "step": 7470 }, { "epoch": 0.590314110284101, "grad_norm": 0.13167984244001923, "learning_rate": 0.0008884015963023151, "loss": 3.4641, "step": 7475 }, { "epoch": 0.5907089692207459, "grad_norm": 0.13483800633792997, "learning_rate": 0.0008881844888705728, "loss": 3.3587, "step": 7480 }, { "epoch": 0.5911038281573908, "grad_norm": 0.14822052824478948, "learning_rate": 0.0008879671970429765, "loss": 3.1167, "step": 7485 }, { "epoch": 0.5914986870940356, "grad_norm": 0.13575687215853105, "learning_rate": 0.0008877497209227441, "loss": 3.4362, "step": 7490 }, { "epoch": 0.5918935460306806, "grad_norm": 0.13097440181116554, "learning_rate": 0.0008875320606131816, "loss": 3.279, "step": 7495 }, { "epoch": 0.5922884049673254, "grad_norm": 0.17244765155903072, "learning_rate": 0.0008873142162176822, "loss": 3.4378, "step": 7500 }, { "epoch": 0.5926832639039703, "grad_norm": 0.11657575019589372, "learning_rate": 0.0008870961878397266, "loss": 3.2843, "step": 7505 }, { "epoch": 0.5930781228406152, "grad_norm": 0.1006258829071874, "learning_rate": 0.0008868779755828828, "loss": 3.3504, "step": 7510 }, { "epoch": 0.59347298177726, "grad_norm": 0.1207315891247049, "learning_rate": 0.0008866595795508065, "loss": 3.4395, "step": 7515 }, { "epoch": 0.593867840713905, "grad_norm": 0.14318154117785292, "learning_rate": 0.00088644099984724, "loss": 3.3796, "step": 7520 }, { "epoch": 0.5942626996505498, "grad_norm": 0.11144769832836732, "learning_rate": 0.0008862222365760138, "loss": 3.0948, "step": 7525 }, { "epoch": 0.5946575585871947, "grad_norm": 0.12823671313327814, "learning_rate": 0.0008860032898410448, "loss": 3.4877, "step": 7530 }, { "epoch": 0.5950524175238396, "grad_norm": 0.11266601698832349, "learning_rate": 0.0008857841597463373, "loss": 3.3134, "step": 7535 }, { "epoch": 0.5954472764604845, "grad_norm": 0.1163484554281188, "learning_rate": 0.0008855648463959828, "loss": 3.5618, "step": 7540 }, { "epoch": 0.5958421353971294, "grad_norm": 0.12723222339884088, "learning_rate": 0.0008853453498941599, "loss": 3.3915, "step": 7545 }, { "epoch": 0.5962369943337743, "grad_norm": 0.09546006521857027, "learning_rate": 0.000885125670345134, "loss": 3.213, "step": 7550 }, { "epoch": 0.5966318532704191, "grad_norm": 0.1040482517853813, "learning_rate": 0.0008849058078532574, "loss": 3.1321, "step": 7555 }, { "epoch": 0.597026712207064, "grad_norm": 0.15398330098630042, "learning_rate": 0.0008846857625229696, "loss": 3.4151, "step": 7560 }, { "epoch": 0.5974215711437089, "grad_norm": 0.09110006696906835, "learning_rate": 0.0008844655344587968, "loss": 3.1765, "step": 7565 }, { "epoch": 0.5978164300803538, "grad_norm": 0.11144319454278358, "learning_rate": 0.0008842451237653519, "loss": 3.2045, "step": 7570 }, { "epoch": 0.5982112890169987, "grad_norm": 0.1279275397467783, "learning_rate": 0.0008840245305473349, "loss": 3.521, "step": 7575 }, { "epoch": 0.5986061479536435, "grad_norm": 0.14562862946211005, "learning_rate": 0.0008838037549095319, "loss": 3.2932, "step": 7580 }, { "epoch": 0.5990010068902885, "grad_norm": 0.09199004169313933, "learning_rate": 0.0008835827969568162, "loss": 3.1323, "step": 7585 }, { "epoch": 0.5993958658269334, "grad_norm": 0.10750033983391066, "learning_rate": 0.0008833616567941473, "loss": 3.2221, "step": 7590 }, { "epoch": 0.5997907247635782, "grad_norm": 0.11893341981755191, "learning_rate": 0.0008831403345265718, "loss": 3.1562, "step": 7595 }, { "epoch": 0.6001855837002231, "grad_norm": 0.11332193259335324, "learning_rate": 0.0008829188302592223, "loss": 3.2847, "step": 7600 }, { "epoch": 0.6005804426368679, "grad_norm": 0.17741538023967637, "learning_rate": 0.000882697144097318, "loss": 3.2617, "step": 7605 }, { "epoch": 0.6009753015735129, "grad_norm": 0.11660298283134948, "learning_rate": 0.0008824752761461644, "loss": 3.6359, "step": 7610 }, { "epoch": 0.6013701605101578, "grad_norm": 0.12472606528836078, "learning_rate": 0.0008822532265111539, "loss": 3.2721, "step": 7615 }, { "epoch": 0.6017650194468026, "grad_norm": 0.22119962925820133, "learning_rate": 0.0008820309952977641, "loss": 3.5683, "step": 7620 }, { "epoch": 0.6021598783834475, "grad_norm": 0.12304401382199097, "learning_rate": 0.0008818085826115601, "loss": 3.3508, "step": 7625 }, { "epoch": 0.6025547373200923, "grad_norm": 0.1290278226368501, "learning_rate": 0.0008815859885581925, "loss": 3.204, "step": 7630 }, { "epoch": 0.6029495962567373, "grad_norm": 0.11532877588447188, "learning_rate": 0.0008813632132433982, "loss": 3.327, "step": 7635 }, { "epoch": 0.6033444551933822, "grad_norm": 0.15718461722948207, "learning_rate": 0.000881140256773, "loss": 3.2448, "step": 7640 }, { "epoch": 0.603739314130027, "grad_norm": 0.10946175454362236, "learning_rate": 0.0008809171192529073, "loss": 3.3033, "step": 7645 }, { "epoch": 0.6041341730666719, "grad_norm": 0.10511355709376481, "learning_rate": 0.0008806938007891148, "loss": 3.2067, "step": 7650 }, { "epoch": 0.6045290320033169, "grad_norm": 0.10015972427122877, "learning_rate": 0.0008804703014877036, "loss": 3.198, "step": 7655 }, { "epoch": 0.6049238909399617, "grad_norm": 0.10164936464318328, "learning_rate": 0.0008802466214548407, "loss": 3.3438, "step": 7660 }, { "epoch": 0.6053187498766066, "grad_norm": 0.12039452535007963, "learning_rate": 0.0008800227607967788, "loss": 3.2805, "step": 7665 }, { "epoch": 0.6057136088132514, "grad_norm": 0.12217415816827536, "learning_rate": 0.0008797987196198562, "loss": 3.3246, "step": 7670 }, { "epoch": 0.6061084677498964, "grad_norm": 0.09200403006735242, "learning_rate": 0.0008795744980304979, "loss": 3.1856, "step": 7675 }, { "epoch": 0.6065033266865413, "grad_norm": 0.09912320134267495, "learning_rate": 0.0008793500961352131, "loss": 3.2098, "step": 7680 }, { "epoch": 0.6068981856231861, "grad_norm": 0.09910145980282707, "learning_rate": 0.000879125514040598, "loss": 3.5476, "step": 7685 }, { "epoch": 0.607293044559831, "grad_norm": 0.08784104035887523, "learning_rate": 0.0008789007518533336, "loss": 3.3497, "step": 7690 }, { "epoch": 0.6076879034964758, "grad_norm": 0.11202952177696532, "learning_rate": 0.0008786758096801867, "loss": 3.3051, "step": 7695 }, { "epoch": 0.6080827624331208, "grad_norm": 0.10485974658805328, "learning_rate": 0.0008784506876280096, "loss": 3.1643, "step": 7700 }, { "epoch": 0.6084776213697657, "grad_norm": 0.1136978776028882, "learning_rate": 0.0008782253858037404, "loss": 3.2337, "step": 7705 }, { "epoch": 0.6088724803064105, "grad_norm": 0.12535326080583284, "learning_rate": 0.0008779999043144015, "loss": 3.3338, "step": 7710 }, { "epoch": 0.6092673392430554, "grad_norm": 0.10784200845509842, "learning_rate": 0.0008777742432671021, "loss": 3.3812, "step": 7715 }, { "epoch": 0.6096621981797004, "grad_norm": 0.1057684517759512, "learning_rate": 0.0008775484027690357, "loss": 3.4178, "step": 7720 }, { "epoch": 0.6100570571163452, "grad_norm": 0.09795750200540014, "learning_rate": 0.0008773223829274812, "loss": 3.206, "step": 7725 }, { "epoch": 0.6104519160529901, "grad_norm": 0.10112925354491199, "learning_rate": 0.0008770961838498031, "loss": 3.3743, "step": 7730 }, { "epoch": 0.6108467749896349, "grad_norm": 0.1640308063513589, "learning_rate": 0.0008768698056434506, "loss": 3.3592, "step": 7735 }, { "epoch": 0.6112416339262798, "grad_norm": 0.10466024728770032, "learning_rate": 0.0008766432484159579, "loss": 3.329, "step": 7740 }, { "epoch": 0.6116364928629248, "grad_norm": 0.11025316617789968, "learning_rate": 0.0008764165122749448, "loss": 3.4136, "step": 7745 }, { "epoch": 0.6120313517995696, "grad_norm": 0.14530974663228993, "learning_rate": 0.0008761895973281157, "loss": 3.2725, "step": 7750 }, { "epoch": 0.6124262107362145, "grad_norm": 0.13935784648216276, "learning_rate": 0.0008759625036832599, "loss": 3.6183, "step": 7755 }, { "epoch": 0.6128210696728593, "grad_norm": 0.11470972362598507, "learning_rate": 0.0008757352314482518, "loss": 3.4981, "step": 7760 }, { "epoch": 0.6132159286095042, "grad_norm": 0.1092392894975075, "learning_rate": 0.0008755077807310505, "loss": 3.1287, "step": 7765 }, { "epoch": 0.6136107875461492, "grad_norm": 0.18942542877731303, "learning_rate": 0.0008752801516397001, "loss": 3.1626, "step": 7770 }, { "epoch": 0.614005646482794, "grad_norm": 0.10871942066406064, "learning_rate": 0.0008750523442823289, "loss": 3.2442, "step": 7775 }, { "epoch": 0.6144005054194389, "grad_norm": 0.10431424260820664, "learning_rate": 0.0008748243587671503, "loss": 3.4254, "step": 7780 }, { "epoch": 0.6147953643560838, "grad_norm": 0.10081981044594512, "learning_rate": 0.0008745961952024623, "loss": 3.2168, "step": 7785 }, { "epoch": 0.6151902232927287, "grad_norm": 0.1419247734958077, "learning_rate": 0.0008743678536966476, "loss": 3.2031, "step": 7790 }, { "epoch": 0.6155850822293736, "grad_norm": 0.12053866734452039, "learning_rate": 0.0008741393343581731, "loss": 3.3851, "step": 7795 }, { "epoch": 0.6159799411660184, "grad_norm": 0.1297658170570488, "learning_rate": 0.0008739106372955902, "loss": 3.0431, "step": 7800 }, { "epoch": 0.6163748001026633, "grad_norm": 0.10671776170688302, "learning_rate": 0.0008736817626175349, "loss": 3.309, "step": 7805 }, { "epoch": 0.6167696590393082, "grad_norm": 0.09817299645907214, "learning_rate": 0.0008734527104327277, "loss": 3.1616, "step": 7810 }, { "epoch": 0.6171645179759531, "grad_norm": 0.11429064355448286, "learning_rate": 0.0008732234808499731, "loss": 3.1303, "step": 7815 }, { "epoch": 0.617559376912598, "grad_norm": 0.10183270812852781, "learning_rate": 0.0008729940739781601, "loss": 3.3117, "step": 7820 }, { "epoch": 0.6179542358492428, "grad_norm": 0.09189289575614379, "learning_rate": 0.0008727644899262617, "loss": 3.1153, "step": 7825 }, { "epoch": 0.6183490947858877, "grad_norm": 0.11061605135109881, "learning_rate": 0.0008725347288033352, "loss": 3.1385, "step": 7830 }, { "epoch": 0.6187439537225327, "grad_norm": 0.0921707879178463, "learning_rate": 0.0008723047907185222, "loss": 3.0996, "step": 7835 }, { "epoch": 0.6191388126591775, "grad_norm": 0.10154423634913082, "learning_rate": 0.0008720746757810478, "loss": 3.3223, "step": 7840 }, { "epoch": 0.6195336715958224, "grad_norm": 0.12035563734065186, "learning_rate": 0.0008718443841002217, "loss": 3.3486, "step": 7845 }, { "epoch": 0.6199285305324673, "grad_norm": 0.09956539347947949, "learning_rate": 0.0008716139157854374, "loss": 3.2837, "step": 7850 }, { "epoch": 0.6203233894691121, "grad_norm": 0.10464832752794762, "learning_rate": 0.0008713832709461722, "loss": 3.2269, "step": 7855 }, { "epoch": 0.6207182484057571, "grad_norm": 0.09203630464767192, "learning_rate": 0.000871152449691987, "loss": 3.275, "step": 7860 }, { "epoch": 0.6211131073424019, "grad_norm": 0.10994347644786445, "learning_rate": 0.0008709214521325271, "loss": 3.353, "step": 7865 }, { "epoch": 0.6215079662790468, "grad_norm": 0.1009729214125113, "learning_rate": 0.0008706902783775213, "loss": 3.405, "step": 7870 }, { "epoch": 0.6219028252156917, "grad_norm": 0.11170924321418213, "learning_rate": 0.0008704589285367816, "loss": 3.2462, "step": 7875 }, { "epoch": 0.6222976841523365, "grad_norm": 0.11191307062759996, "learning_rate": 0.0008702274027202044, "loss": 3.1142, "step": 7880 }, { "epoch": 0.6226925430889815, "grad_norm": 0.11242433437823107, "learning_rate": 0.0008699957010377693, "loss": 3.2718, "step": 7885 }, { "epoch": 0.6230874020256263, "grad_norm": 0.09403048099874292, "learning_rate": 0.0008697638235995395, "loss": 3.4432, "step": 7890 }, { "epoch": 0.6234822609622712, "grad_norm": 0.10792827599867545, "learning_rate": 0.0008695317705156618, "loss": 3.1635, "step": 7895 }, { "epoch": 0.6238771198989161, "grad_norm": 0.10737384030988864, "learning_rate": 0.0008692995418963662, "loss": 3.3632, "step": 7900 }, { "epoch": 0.624271978835561, "grad_norm": 0.12135116109418084, "learning_rate": 0.0008690671378519663, "loss": 3.3816, "step": 7905 }, { "epoch": 0.6246668377722059, "grad_norm": 0.11844924505171844, "learning_rate": 0.0008688345584928587, "loss": 3.2678, "step": 7910 }, { "epoch": 0.6250616967088508, "grad_norm": 0.15218363382363653, "learning_rate": 0.000868601803929524, "loss": 3.3026, "step": 7915 }, { "epoch": 0.6254565556454956, "grad_norm": 0.13604117933926216, "learning_rate": 0.0008683688742725249, "loss": 3.3033, "step": 7920 }, { "epoch": 0.6258514145821406, "grad_norm": 0.13653398678568404, "learning_rate": 0.0008681357696325086, "loss": 3.236, "step": 7925 }, { "epoch": 0.6262462735187854, "grad_norm": 0.15196132727801842, "learning_rate": 0.0008679024901202042, "loss": 3.1969, "step": 7930 }, { "epoch": 0.6266411324554303, "grad_norm": 0.09651002456937212, "learning_rate": 0.0008676690358464247, "loss": 3.3167, "step": 7935 }, { "epoch": 0.6270359913920752, "grad_norm": 0.12699685860996374, "learning_rate": 0.0008674354069220659, "loss": 3.4452, "step": 7940 }, { "epoch": 0.62743085032872, "grad_norm": 0.10599478920606417, "learning_rate": 0.0008672016034581062, "loss": 3.2562, "step": 7945 }, { "epoch": 0.627825709265365, "grad_norm": 0.10017507962417749, "learning_rate": 0.0008669676255656074, "loss": 3.1795, "step": 7950 }, { "epoch": 0.6282205682020098, "grad_norm": 0.1258759015998286, "learning_rate": 0.0008667334733557138, "loss": 3.4737, "step": 7955 }, { "epoch": 0.6286154271386547, "grad_norm": 0.10629252591713735, "learning_rate": 0.0008664991469396528, "loss": 3.3518, "step": 7960 }, { "epoch": 0.6290102860752996, "grad_norm": 0.15454207177454346, "learning_rate": 0.0008662646464287345, "loss": 3.2647, "step": 7965 }, { "epoch": 0.6294051450119444, "grad_norm": 0.1201304570998593, "learning_rate": 0.0008660299719343513, "loss": 3.299, "step": 7970 }, { "epoch": 0.6298000039485894, "grad_norm": 0.11828024086486734, "learning_rate": 0.0008657951235679788, "loss": 3.1088, "step": 7975 }, { "epoch": 0.6301948628852343, "grad_norm": 0.11521072851377591, "learning_rate": 0.0008655601014411749, "loss": 3.2945, "step": 7980 }, { "epoch": 0.6305897218218791, "grad_norm": 0.11764440946118156, "learning_rate": 0.00086532490566558, "loss": 3.3015, "step": 7985 }, { "epoch": 0.630984580758524, "grad_norm": 0.09695496073403817, "learning_rate": 0.0008650895363529173, "loss": 3.2222, "step": 7990 }, { "epoch": 0.6313794396951689, "grad_norm": 0.16101159602081974, "learning_rate": 0.0008648539936149918, "loss": 3.1516, "step": 7995 }, { "epoch": 0.6317742986318138, "grad_norm": 0.11610678204812419, "learning_rate": 0.0008646182775636917, "loss": 3.191, "step": 8000 }, { "epoch": 0.6321691575684587, "grad_norm": 0.09048334431775275, "learning_rate": 0.0008643823883109869, "loss": 3.3472, "step": 8005 }, { "epoch": 0.6325640165051035, "grad_norm": 0.10171195934800754, "learning_rate": 0.0008641463259689297, "loss": 3.088, "step": 8010 }, { "epoch": 0.6329588754417484, "grad_norm": 0.11289063687008508, "learning_rate": 0.0008639100906496551, "loss": 3.0632, "step": 8015 }, { "epoch": 0.6333537343783934, "grad_norm": 0.08546644208005494, "learning_rate": 0.0008636736824653797, "loss": 3.2468, "step": 8020 }, { "epoch": 0.6337485933150382, "grad_norm": 0.134744098903084, "learning_rate": 0.0008634371015284021, "loss": 3.335, "step": 8025 }, { "epoch": 0.6341434522516831, "grad_norm": 0.10458215670983823, "learning_rate": 0.0008632003479511035, "loss": 3.1004, "step": 8030 }, { "epoch": 0.6345383111883279, "grad_norm": 0.10205869082494994, "learning_rate": 0.000862963421845947, "loss": 3.1731, "step": 8035 }, { "epoch": 0.6349331701249729, "grad_norm": 0.09451690228766968, "learning_rate": 0.0008627263233254774, "loss": 3.2641, "step": 8040 }, { "epoch": 0.6353280290616178, "grad_norm": 0.0891208032450903, "learning_rate": 0.0008624890525023214, "loss": 3.2304, "step": 8045 }, { "epoch": 0.6357228879982626, "grad_norm": 0.09756464677115245, "learning_rate": 0.0008622516094891877, "loss": 3.166, "step": 8050 }, { "epoch": 0.6361177469349075, "grad_norm": 0.10241737670061611, "learning_rate": 0.000862013994398867, "loss": 3.1128, "step": 8055 }, { "epoch": 0.6365126058715523, "grad_norm": 0.10867554377206097, "learning_rate": 0.0008617762073442313, "loss": 3.2049, "step": 8060 }, { "epoch": 0.6369074648081973, "grad_norm": 0.10445311800192254, "learning_rate": 0.0008615382484382344, "loss": 3.2491, "step": 8065 }, { "epoch": 0.6373023237448422, "grad_norm": 0.09710088366690878, "learning_rate": 0.0008613001177939118, "loss": 3.2944, "step": 8070 }, { "epoch": 0.637697182681487, "grad_norm": 0.1034072679189127, "learning_rate": 0.0008610618155243809, "loss": 3.2766, "step": 8075 }, { "epoch": 0.6380920416181319, "grad_norm": 0.14707372591075538, "learning_rate": 0.0008608233417428401, "loss": 3.2416, "step": 8080 }, { "epoch": 0.6384869005547769, "grad_norm": 0.09825854731455239, "learning_rate": 0.0008605846965625696, "loss": 3.1849, "step": 8085 }, { "epoch": 0.6388817594914217, "grad_norm": 0.1045991800434775, "learning_rate": 0.0008603458800969309, "loss": 3.2728, "step": 8090 }, { "epoch": 0.6392766184280666, "grad_norm": 0.10587544099979272, "learning_rate": 0.0008601068924593667, "loss": 3.2679, "step": 8095 }, { "epoch": 0.6396714773647114, "grad_norm": 0.09680890508610646, "learning_rate": 0.0008598677337634015, "loss": 3.1833, "step": 8100 }, { "epoch": 0.6400663363013563, "grad_norm": 0.10453833177025224, "learning_rate": 0.0008596284041226405, "loss": 3.2008, "step": 8105 }, { "epoch": 0.6404611952380013, "grad_norm": 0.11528071965116299, "learning_rate": 0.0008593889036507705, "loss": 3.281, "step": 8110 }, { "epoch": 0.6408560541746461, "grad_norm": 0.11414041386987149, "learning_rate": 0.000859149232461559, "loss": 3.256, "step": 8115 }, { "epoch": 0.641250913111291, "grad_norm": 0.10444708006960075, "learning_rate": 0.0008589093906688554, "loss": 3.267, "step": 8120 }, { "epoch": 0.6416457720479358, "grad_norm": 0.22041842700055314, "learning_rate": 0.000858669378386589, "loss": 3.3225, "step": 8125 }, { "epoch": 0.6420406309845808, "grad_norm": 0.10941189196917668, "learning_rate": 0.0008584291957287716, "loss": 3.2905, "step": 8130 }, { "epoch": 0.6424354899212257, "grad_norm": 0.09575940163432925, "learning_rate": 0.000858188842809494, "loss": 3.0744, "step": 8135 }, { "epoch": 0.6428303488578705, "grad_norm": 0.09600062313669966, "learning_rate": 0.0008579483197429297, "loss": 3.1193, "step": 8140 }, { "epoch": 0.6432252077945154, "grad_norm": 0.10731221506880077, "learning_rate": 0.0008577076266433319, "loss": 3.3734, "step": 8145 }, { "epoch": 0.6436200667311603, "grad_norm": 0.13709782894184802, "learning_rate": 0.000857466763625035, "loss": 3.1812, "step": 8150 }, { "epoch": 0.6440149256678052, "grad_norm": 0.11668214301786081, "learning_rate": 0.0008572257308024542, "loss": 3.4153, "step": 8155 }, { "epoch": 0.6444097846044501, "grad_norm": 0.10044422029191183, "learning_rate": 0.000856984528290085, "loss": 3.1579, "step": 8160 }, { "epoch": 0.6448046435410949, "grad_norm": 0.16091139216573072, "learning_rate": 0.0008567431562025036, "loss": 3.6872, "step": 8165 }, { "epoch": 0.6451995024777398, "grad_norm": 0.12371963764589791, "learning_rate": 0.0008565016146543672, "loss": 3.152, "step": 8170 }, { "epoch": 0.6455943614143848, "grad_norm": 0.10098317896531495, "learning_rate": 0.0008562599037604128, "loss": 3.3123, "step": 8175 }, { "epoch": 0.6459892203510296, "grad_norm": 0.10719554095936523, "learning_rate": 0.0008560180236354584, "loss": 3.3104, "step": 8180 }, { "epoch": 0.6463840792876745, "grad_norm": 0.14299078573992674, "learning_rate": 0.0008557759743944022, "loss": 3.5327, "step": 8185 }, { "epoch": 0.6467789382243193, "grad_norm": 0.15089687312349223, "learning_rate": 0.0008555337561522227, "loss": 3.1946, "step": 8190 }, { "epoch": 0.6471737971609642, "grad_norm": 0.09635901549173206, "learning_rate": 0.0008552913690239785, "loss": 3.3444, "step": 8195 }, { "epoch": 0.6475686560976092, "grad_norm": 0.09580493441595973, "learning_rate": 0.0008550488131248087, "loss": 3.3097, "step": 8200 }, { "epoch": 0.647963515034254, "grad_norm": 0.08992794856079742, "learning_rate": 0.0008548060885699327, "loss": 3.0696, "step": 8205 }, { "epoch": 0.6483583739708989, "grad_norm": 0.12346530824920907, "learning_rate": 0.0008545631954746495, "loss": 3.0778, "step": 8210 }, { "epoch": 0.6487532329075438, "grad_norm": 0.11362819717415076, "learning_rate": 0.0008543201339543387, "loss": 3.1743, "step": 8215 }, { "epoch": 0.6491480918441886, "grad_norm": 0.14368296267205108, "learning_rate": 0.0008540769041244596, "loss": 3.2924, "step": 8220 }, { "epoch": 0.6495429507808336, "grad_norm": 0.140488778550219, "learning_rate": 0.0008538335061005516, "loss": 3.2148, "step": 8225 }, { "epoch": 0.6499378097174784, "grad_norm": 0.11330219274064782, "learning_rate": 0.0008535899399982336, "loss": 3.6879, "step": 8230 }, { "epoch": 0.6503326686541233, "grad_norm": 0.10972022161728928, "learning_rate": 0.0008533462059332051, "loss": 3.3981, "step": 8235 }, { "epoch": 0.6507275275907682, "grad_norm": 0.09213793654560902, "learning_rate": 0.0008531023040212447, "loss": 3.0561, "step": 8240 }, { "epoch": 0.651122386527413, "grad_norm": 0.13860667274640784, "learning_rate": 0.000852858234378211, "loss": 3.3989, "step": 8245 }, { "epoch": 0.651517245464058, "grad_norm": 0.15515816383177605, "learning_rate": 0.0008526139971200421, "loss": 3.1034, "step": 8250 }, { "epoch": 0.6519121044007028, "grad_norm": 0.10154172678737847, "learning_rate": 0.0008523695923627562, "loss": 3.164, "step": 8255 }, { "epoch": 0.6523069633373477, "grad_norm": 0.13635564632146285, "learning_rate": 0.0008521250202224505, "loss": 2.9994, "step": 8260 }, { "epoch": 0.6527018222739926, "grad_norm": 0.11832645823523798, "learning_rate": 0.000851880280815302, "loss": 3.1503, "step": 8265 }, { "epoch": 0.6530966812106375, "grad_norm": 0.09608803646326824, "learning_rate": 0.0008516353742575671, "loss": 3.199, "step": 8270 }, { "epoch": 0.6534915401472824, "grad_norm": 0.20355644273340723, "learning_rate": 0.0008513903006655817, "loss": 3.3365, "step": 8275 }, { "epoch": 0.6538863990839273, "grad_norm": 0.11309365199253996, "learning_rate": 0.0008511450601557608, "loss": 3.2476, "step": 8280 }, { "epoch": 0.6542812580205721, "grad_norm": 0.10944313494458446, "learning_rate": 0.0008508996528445989, "loss": 3.1252, "step": 8285 }, { "epoch": 0.6546761169572171, "grad_norm": 0.10933559309704752, "learning_rate": 0.0008506540788486696, "loss": 3.4071, "step": 8290 }, { "epoch": 0.6550709758938619, "grad_norm": 0.11504144121064659, "learning_rate": 0.000850408338284626, "loss": 3.244, "step": 8295 }, { "epoch": 0.6554658348305068, "grad_norm": 0.1000285271749103, "learning_rate": 0.0008501624312691996, "loss": 3.2571, "step": 8300 }, { "epoch": 0.6558606937671517, "grad_norm": 0.15312012234671665, "learning_rate": 0.0008499163579192019, "loss": 3.2975, "step": 8305 }, { "epoch": 0.6562555527037965, "grad_norm": 0.10363912228217008, "learning_rate": 0.0008496701183515228, "loss": 3.1859, "step": 8310 }, { "epoch": 0.6566504116404415, "grad_norm": 0.11596155295576463, "learning_rate": 0.0008494237126831312, "loss": 3.0843, "step": 8315 }, { "epoch": 0.6570452705770863, "grad_norm": 0.11867115675364476, "learning_rate": 0.0008491771410310752, "loss": 3.2652, "step": 8320 }, { "epoch": 0.6574401295137312, "grad_norm": 0.08663805030439675, "learning_rate": 0.0008489304035124814, "loss": 3.2974, "step": 8325 }, { "epoch": 0.6578349884503761, "grad_norm": 0.09375839883661172, "learning_rate": 0.0008486835002445554, "loss": 3.1948, "step": 8330 }, { "epoch": 0.658229847387021, "grad_norm": 0.111496326296684, "learning_rate": 0.0008484364313445816, "loss": 3.2121, "step": 8335 }, { "epoch": 0.6586247063236659, "grad_norm": 0.11024673272616052, "learning_rate": 0.0008481891969299227, "loss": 2.9667, "step": 8340 }, { "epoch": 0.6590195652603108, "grad_norm": 0.361030672093046, "learning_rate": 0.0008479417971180205, "loss": 3.4116, "step": 8345 }, { "epoch": 0.6594144241969556, "grad_norm": 0.14464201507006244, "learning_rate": 0.0008476942320263951, "loss": 3.2765, "step": 8350 }, { "epoch": 0.6598092831336005, "grad_norm": 0.1583252479318777, "learning_rate": 0.0008474465017726451, "loss": 3.2017, "step": 8355 }, { "epoch": 0.6602041420702454, "grad_norm": 0.11543978466387399, "learning_rate": 0.0008471986064744477, "loss": 3.3925, "step": 8360 }, { "epoch": 0.6605990010068903, "grad_norm": 0.14173529237606883, "learning_rate": 0.0008469505462495585, "loss": 3.2798, "step": 8365 }, { "epoch": 0.6609938599435352, "grad_norm": 0.09937467222444146, "learning_rate": 0.0008467023212158111, "loss": 3.3432, "step": 8370 }, { "epoch": 0.66138871888018, "grad_norm": 0.11385952709155642, "learning_rate": 0.0008464539314911177, "loss": 3.3726, "step": 8375 }, { "epoch": 0.661783577816825, "grad_norm": 0.12151048189876258, "learning_rate": 0.0008462053771934689, "loss": 3.3474, "step": 8380 }, { "epoch": 0.6621784367534698, "grad_norm": 0.10394578258160118, "learning_rate": 0.0008459566584409331, "loss": 3.3071, "step": 8385 }, { "epoch": 0.6625732956901147, "grad_norm": 0.10717263280919447, "learning_rate": 0.000845707775351657, "loss": 3.1648, "step": 8390 }, { "epoch": 0.6629681546267596, "grad_norm": 0.13172772408292044, "learning_rate": 0.0008454587280438653, "loss": 3.1768, "step": 8395 }, { "epoch": 0.6633630135634044, "grad_norm": 0.24624264959354314, "learning_rate": 0.000845209516635861, "loss": 3.0567, "step": 8400 }, { "epoch": 0.6637578725000494, "grad_norm": 0.10382783314398486, "learning_rate": 0.0008449601412460244, "loss": 3.2856, "step": 8405 }, { "epoch": 0.6641527314366943, "grad_norm": 0.09999742680558005, "learning_rate": 0.0008447106019928144, "loss": 3.3153, "step": 8410 }, { "epoch": 0.6645475903733391, "grad_norm": 0.09708535746429113, "learning_rate": 0.0008444608989947671, "loss": 3.1176, "step": 8415 }, { "epoch": 0.664942449309984, "grad_norm": 0.09384433963189001, "learning_rate": 0.000844211032370497, "loss": 3.3424, "step": 8420 }, { "epoch": 0.6653373082466288, "grad_norm": 0.12140819158051185, "learning_rate": 0.0008439610022386959, "loss": 3.3056, "step": 8425 }, { "epoch": 0.6657321671832738, "grad_norm": 0.10352079794140742, "learning_rate": 0.0008437108087181336, "loss": 3.2419, "step": 8430 }, { "epoch": 0.6661270261199187, "grad_norm": 0.10086030366459975, "learning_rate": 0.0008434604519276572, "loss": 3.2047, "step": 8435 }, { "epoch": 0.6665218850565635, "grad_norm": 0.11034506676941166, "learning_rate": 0.0008432099319861915, "loss": 3.2653, "step": 8440 }, { "epoch": 0.6669167439932084, "grad_norm": 0.09738178161252967, "learning_rate": 0.0008429592490127386, "loss": 3.1873, "step": 8445 }, { "epoch": 0.6673116029298534, "grad_norm": 0.09143762531603193, "learning_rate": 0.0008427084031263784, "loss": 3.0621, "step": 8450 }, { "epoch": 0.6677064618664982, "grad_norm": 0.10929527726428313, "learning_rate": 0.000842457394446268, "loss": 3.1375, "step": 8455 }, { "epoch": 0.6681013208031431, "grad_norm": 0.09258160057023435, "learning_rate": 0.0008422062230916418, "loss": 3.1586, "step": 8460 }, { "epoch": 0.6684961797397879, "grad_norm": 0.10646217402259393, "learning_rate": 0.0008419548891818116, "loss": 3.5694, "step": 8465 }, { "epoch": 0.6688910386764328, "grad_norm": 0.13109287107977363, "learning_rate": 0.0008417033928361661, "loss": 3.2009, "step": 8470 }, { "epoch": 0.6692858976130778, "grad_norm": 0.16658639691426166, "learning_rate": 0.0008414517341741717, "loss": 3.3575, "step": 8475 }, { "epoch": 0.6696807565497226, "grad_norm": 0.11863531845398505, "learning_rate": 0.0008411999133153714, "loss": 3.4309, "step": 8480 }, { "epoch": 0.6700756154863675, "grad_norm": 0.1755654282318166, "learning_rate": 0.0008409479303793854, "loss": 3.2665, "step": 8485 }, { "epoch": 0.6704704744230123, "grad_norm": 0.1263114194496404, "learning_rate": 0.0008406957854859112, "loss": 3.1639, "step": 8490 }, { "epoch": 0.6708653333596573, "grad_norm": 0.11760125462331349, "learning_rate": 0.0008404434787547228, "loss": 3.4133, "step": 8495 }, { "epoch": 0.6712601922963022, "grad_norm": 0.1808930954922336, "learning_rate": 0.0008401910103056711, "loss": 3.4752, "step": 8500 }, { "epoch": 0.671655051232947, "grad_norm": 0.09643626768383609, "learning_rate": 0.0008399383802586841, "loss": 3.0605, "step": 8505 }, { "epoch": 0.6720499101695919, "grad_norm": 0.1260923876539727, "learning_rate": 0.0008396855887337664, "loss": 3.4402, "step": 8510 }, { "epoch": 0.6724447691062369, "grad_norm": 0.15134745597037103, "learning_rate": 0.0008394326358509993, "loss": 3.459, "step": 8515 }, { "epoch": 0.6728396280428817, "grad_norm": 0.11714570194382691, "learning_rate": 0.0008391795217305409, "loss": 3.4327, "step": 8520 }, { "epoch": 0.6732344869795266, "grad_norm": 0.11281863141111667, "learning_rate": 0.0008389262464926256, "loss": 3.0981, "step": 8525 }, { "epoch": 0.6736293459161714, "grad_norm": 0.10617478431460332, "learning_rate": 0.0008386728102575647, "loss": 3.3724, "step": 8530 }, { "epoch": 0.6740242048528163, "grad_norm": 0.1292975236055261, "learning_rate": 0.0008384192131457456, "loss": 3.245, "step": 8535 }, { "epoch": 0.6744190637894613, "grad_norm": 0.10670534655770013, "learning_rate": 0.0008381654552776323, "loss": 3.1942, "step": 8540 }, { "epoch": 0.6748139227261061, "grad_norm": 0.0991727418637046, "learning_rate": 0.0008379115367737653, "loss": 3.1656, "step": 8545 }, { "epoch": 0.675208781662751, "grad_norm": 0.10580642181053504, "learning_rate": 0.0008376574577547613, "loss": 3.1038, "step": 8550 }, { "epoch": 0.6756036405993958, "grad_norm": 0.10395571282866913, "learning_rate": 0.0008374032183413129, "loss": 3.2166, "step": 8555 }, { "epoch": 0.6759984995360407, "grad_norm": 0.11762819408469276, "learning_rate": 0.0008371488186541896, "loss": 3.2523, "step": 8560 }, { "epoch": 0.6763933584726857, "grad_norm": 0.09987613629449463, "learning_rate": 0.0008368942588142363, "loss": 3.246, "step": 8565 }, { "epoch": 0.6767882174093305, "grad_norm": 0.10128123617802409, "learning_rate": 0.0008366395389423746, "loss": 3.0956, "step": 8570 }, { "epoch": 0.6771830763459754, "grad_norm": 0.09929252524450333, "learning_rate": 0.0008363846591596016, "loss": 2.9289, "step": 8575 }, { "epoch": 0.6775779352826203, "grad_norm": 0.08738888667103484, "learning_rate": 0.0008361296195869908, "loss": 3.0987, "step": 8580 }, { "epoch": 0.6779727942192652, "grad_norm": 0.09544255353355105, "learning_rate": 0.0008358744203456912, "loss": 3.177, "step": 8585 }, { "epoch": 0.6783676531559101, "grad_norm": 0.0896858502697625, "learning_rate": 0.000835619061556928, "loss": 3.1023, "step": 8590 }, { "epoch": 0.6787625120925549, "grad_norm": 0.10294857362703758, "learning_rate": 0.0008353635433420019, "loss": 3.2353, "step": 8595 }, { "epoch": 0.6791573710291998, "grad_norm": 0.1287285370017612, "learning_rate": 0.0008351078658222897, "loss": 3.3976, "step": 8600 }, { "epoch": 0.6795522299658447, "grad_norm": 0.10669443201356113, "learning_rate": 0.0008348520291192434, "loss": 3.0776, "step": 8605 }, { "epoch": 0.6799470889024896, "grad_norm": 0.10080075060114345, "learning_rate": 0.0008345960333543909, "loss": 3.2163, "step": 8610 }, { "epoch": 0.6803419478391345, "grad_norm": 0.09755301806510612, "learning_rate": 0.0008343398786493357, "loss": 3.1041, "step": 8615 }, { "epoch": 0.6807368067757793, "grad_norm": 0.15729407174782342, "learning_rate": 0.0008340835651257565, "loss": 3.1163, "step": 8620 }, { "epoch": 0.6811316657124242, "grad_norm": 0.14979963960878667, "learning_rate": 0.0008338270929054081, "loss": 3.1649, "step": 8625 }, { "epoch": 0.6815265246490692, "grad_norm": 0.09384227675808207, "learning_rate": 0.0008335704621101197, "loss": 3.2984, "step": 8630 }, { "epoch": 0.681921383585714, "grad_norm": 0.16157443308557204, "learning_rate": 0.0008333136728617967, "loss": 3.1643, "step": 8635 }, { "epoch": 0.6823162425223589, "grad_norm": 0.09536272187552021, "learning_rate": 0.0008330567252824193, "loss": 3.2044, "step": 8640 }, { "epoch": 0.6827111014590038, "grad_norm": 0.10265594797607996, "learning_rate": 0.000832799619494043, "loss": 3.2019, "step": 8645 }, { "epoch": 0.6831059603956486, "grad_norm": 0.12129097891000684, "learning_rate": 0.0008325423556187988, "loss": 3.1536, "step": 8650 }, { "epoch": 0.6835008193322936, "grad_norm": 0.1428479135046388, "learning_rate": 0.0008322849337788921, "loss": 3.3588, "step": 8655 }, { "epoch": 0.6838956782689384, "grad_norm": 0.0979337126468216, "learning_rate": 0.0008320273540966038, "loss": 3.1255, "step": 8660 }, { "epoch": 0.6842905372055833, "grad_norm": 0.09396873870766928, "learning_rate": 0.00083176961669429, "loss": 3.0593, "step": 8665 }, { "epoch": 0.6846853961422282, "grad_norm": 0.21056026100038083, "learning_rate": 0.0008315117216943811, "loss": 3.081, "step": 8670 }, { "epoch": 0.685080255078873, "grad_norm": 0.1738857752677988, "learning_rate": 0.0008312536692193828, "loss": 3.5131, "step": 8675 }, { "epoch": 0.685475114015518, "grad_norm": 0.116578729516499, "learning_rate": 0.0008309954593918756, "loss": 2.9725, "step": 8680 }, { "epoch": 0.6858699729521628, "grad_norm": 0.09694530815605854, "learning_rate": 0.0008307370923345145, "loss": 3.3016, "step": 8685 }, { "epoch": 0.6862648318888077, "grad_norm": 0.10655765658057892, "learning_rate": 0.0008304785681700293, "loss": 3.2227, "step": 8690 }, { "epoch": 0.6866596908254526, "grad_norm": 0.11387383798660831, "learning_rate": 0.0008302198870212246, "loss": 3.1872, "step": 8695 }, { "epoch": 0.6870545497620975, "grad_norm": 0.09142688801389243, "learning_rate": 0.0008299610490109792, "loss": 3.0557, "step": 8700 }, { "epoch": 0.6874494086987424, "grad_norm": 0.09467235920761823, "learning_rate": 0.0008297020542622469, "loss": 3.1291, "step": 8705 }, { "epoch": 0.6878442676353873, "grad_norm": 0.1008984467615193, "learning_rate": 0.0008294429028980556, "loss": 3.1208, "step": 8710 }, { "epoch": 0.6882391265720321, "grad_norm": 0.13150228695479144, "learning_rate": 0.0008291835950415077, "loss": 3.0908, "step": 8715 }, { "epoch": 0.688633985508677, "grad_norm": 0.20246543186347446, "learning_rate": 0.0008289241308157796, "loss": 3.0792, "step": 8720 }, { "epoch": 0.6890288444453219, "grad_norm": 0.10176579756152923, "learning_rate": 0.0008286645103441229, "loss": 3.1355, "step": 8725 }, { "epoch": 0.6894237033819668, "grad_norm": 0.09758852937418612, "learning_rate": 0.0008284047337498622, "loss": 3.2599, "step": 8730 }, { "epoch": 0.6898185623186117, "grad_norm": 0.09802826011337314, "learning_rate": 0.0008281448011563975, "loss": 3.2798, "step": 8735 }, { "epoch": 0.6902134212552565, "grad_norm": 0.10022259407111145, "learning_rate": 0.0008278847126872018, "loss": 3.266, "step": 8740 }, { "epoch": 0.6906082801919015, "grad_norm": 0.09401494871192057, "learning_rate": 0.0008276244684658228, "loss": 3.0725, "step": 8745 }, { "epoch": 0.6910031391285463, "grad_norm": 0.11674167060979421, "learning_rate": 0.0008273640686158821, "loss": 3.1991, "step": 8750 }, { "epoch": 0.6913979980651912, "grad_norm": 0.11558918781117869, "learning_rate": 0.0008271035132610751, "loss": 3.3279, "step": 8755 }, { "epoch": 0.6917928570018361, "grad_norm": 0.1074293874216869, "learning_rate": 0.0008268428025251708, "loss": 3.2375, "step": 8760 }, { "epoch": 0.6921877159384809, "grad_norm": 0.10832841652859307, "learning_rate": 0.0008265819365320128, "loss": 3.1966, "step": 8765 }, { "epoch": 0.6925825748751259, "grad_norm": 0.1120944579047623, "learning_rate": 0.000826320915405518, "loss": 3.1497, "step": 8770 }, { "epoch": 0.6929774338117708, "grad_norm": 0.09543644264205484, "learning_rate": 0.0008260597392696765, "loss": 3.1772, "step": 8775 }, { "epoch": 0.6933722927484156, "grad_norm": 0.08980345178228973, "learning_rate": 0.0008257984082485527, "loss": 3.1139, "step": 8780 }, { "epoch": 0.6937671516850605, "grad_norm": 0.11342651053988363, "learning_rate": 0.0008255369224662845, "loss": 3.3245, "step": 8785 }, { "epoch": 0.6941620106217053, "grad_norm": 0.11452228564033505, "learning_rate": 0.000825275282047083, "loss": 3.1326, "step": 8790 }, { "epoch": 0.6945568695583503, "grad_norm": 0.1276240552076, "learning_rate": 0.000825013487115233, "loss": 3.1481, "step": 8795 }, { "epoch": 0.6949517284949952, "grad_norm": 0.12344312136428486, "learning_rate": 0.0008247515377950929, "loss": 3.177, "step": 8800 }, { "epoch": 0.69534658743164, "grad_norm": 0.1489628215482825, "learning_rate": 0.0008244894342110938, "loss": 3.2631, "step": 8805 }, { "epoch": 0.6957414463682849, "grad_norm": 0.14764961542774577, "learning_rate": 0.0008242271764877407, "loss": 3.033, "step": 8810 }, { "epoch": 0.6961363053049298, "grad_norm": 0.10847054422445301, "learning_rate": 0.0008239647647496113, "loss": 3.2107, "step": 8815 }, { "epoch": 0.6965311642415747, "grad_norm": 0.1009638318225479, "learning_rate": 0.000823702199121357, "loss": 3.146, "step": 8820 }, { "epoch": 0.6969260231782196, "grad_norm": 0.08851621360754736, "learning_rate": 0.0008234394797277019, "loss": 3.3563, "step": 8825 }, { "epoch": 0.6973208821148644, "grad_norm": 0.11682118986464445, "learning_rate": 0.0008231766066934434, "loss": 3.2641, "step": 8830 }, { "epoch": 0.6977157410515094, "grad_norm": 0.11786581260727502, "learning_rate": 0.0008229135801434515, "loss": 3.3042, "step": 8835 }, { "epoch": 0.6981105999881543, "grad_norm": 0.11243400813269441, "learning_rate": 0.0008226504002026698, "loss": 3.378, "step": 8840 }, { "epoch": 0.6985054589247991, "grad_norm": 0.09891505661472931, "learning_rate": 0.000822387066996114, "loss": 3.1432, "step": 8845 }, { "epoch": 0.698900317861444, "grad_norm": 0.09678230564578898, "learning_rate": 0.000822123580648873, "loss": 3.1365, "step": 8850 }, { "epoch": 0.6992951767980888, "grad_norm": 0.0974119026073569, "learning_rate": 0.0008218599412861085, "loss": 3.3529, "step": 8855 }, { "epoch": 0.6996900357347338, "grad_norm": 0.10278431640889767, "learning_rate": 0.0008215961490330547, "loss": 3.1467, "step": 8860 }, { "epoch": 0.7000848946713787, "grad_norm": 0.10582543500080568, "learning_rate": 0.0008213322040150186, "loss": 3.2041, "step": 8865 }, { "epoch": 0.7004797536080235, "grad_norm": 0.10699956902930448, "learning_rate": 0.0008210681063573795, "loss": 3.0611, "step": 8870 }, { "epoch": 0.7008746125446684, "grad_norm": 0.09598668276639663, "learning_rate": 0.0008208038561855895, "loss": 3.0451, "step": 8875 }, { "epoch": 0.7012694714813134, "grad_norm": 0.1354466637207538, "learning_rate": 0.0008205394536251731, "loss": 3.3255, "step": 8880 }, { "epoch": 0.7016643304179582, "grad_norm": 0.10910903170034271, "learning_rate": 0.0008202748988017271, "loss": 3.2341, "step": 8885 }, { "epoch": 0.7020591893546031, "grad_norm": 0.10861355626506568, "learning_rate": 0.0008200101918409206, "loss": 3.0992, "step": 8890 }, { "epoch": 0.7024540482912479, "grad_norm": 0.10402024662370056, "learning_rate": 0.0008197453328684948, "loss": 3.3043, "step": 8895 }, { "epoch": 0.7028489072278928, "grad_norm": 0.10794669143554325, "learning_rate": 0.0008194803220102635, "loss": 3.1218, "step": 8900 }, { "epoch": 0.7032437661645378, "grad_norm": 0.10873130997265774, "learning_rate": 0.0008192151593921127, "loss": 3.0542, "step": 8905 }, { "epoch": 0.7036386251011826, "grad_norm": 0.09478581599520013, "learning_rate": 0.0008189498451399999, "loss": 3.035, "step": 8910 }, { "epoch": 0.7040334840378275, "grad_norm": 0.11534583426293812, "learning_rate": 0.0008186843793799553, "loss": 3.0426, "step": 8915 }, { "epoch": 0.7044283429744723, "grad_norm": 0.09937648275934526, "learning_rate": 0.0008184187622380805, "loss": 3.1198, "step": 8920 }, { "epoch": 0.7048232019111172, "grad_norm": 0.13423893383305865, "learning_rate": 0.0008181529938405493, "loss": 3.0226, "step": 8925 }, { "epoch": 0.7052180608477622, "grad_norm": 0.1777529644701129, "learning_rate": 0.0008178870743136076, "loss": 3.0179, "step": 8930 }, { "epoch": 0.705612919784407, "grad_norm": 0.09627192448074401, "learning_rate": 0.0008176210037835725, "loss": 3.0808, "step": 8935 }, { "epoch": 0.7060077787210519, "grad_norm": 0.08187712275009684, "learning_rate": 0.000817354782376833, "loss": 3.0408, "step": 8940 }, { "epoch": 0.7064026376576968, "grad_norm": 0.1466707709877353, "learning_rate": 0.0008170884102198504, "loss": 3.0828, "step": 8945 }, { "epoch": 0.7067974965943417, "grad_norm": 0.12672492205028107, "learning_rate": 0.0008168218874391567, "loss": 3.1034, "step": 8950 }, { "epoch": 0.7071923555309866, "grad_norm": 0.11057891216227578, "learning_rate": 0.000816555214161356, "loss": 3.1173, "step": 8955 }, { "epoch": 0.7075872144676314, "grad_norm": 0.09713334567304839, "learning_rate": 0.000816288390513124, "loss": 3.0621, "step": 8960 }, { "epoch": 0.7079820734042763, "grad_norm": 0.08469169809187842, "learning_rate": 0.000816021416621207, "loss": 3.11, "step": 8965 }, { "epoch": 0.7083769323409213, "grad_norm": 0.09479971138848471, "learning_rate": 0.000815754292612424, "loss": 3.197, "step": 8970 }, { "epoch": 0.7087717912775661, "grad_norm": 0.09559588061413934, "learning_rate": 0.0008154870186136639, "loss": 3.1626, "step": 8975 }, { "epoch": 0.709166650214211, "grad_norm": 0.1023931958878223, "learning_rate": 0.0008152195947518879, "loss": 3.0183, "step": 8980 }, { "epoch": 0.7095615091508558, "grad_norm": 0.0884453026501553, "learning_rate": 0.0008149520211541278, "loss": 3.0657, "step": 8985 }, { "epoch": 0.7099563680875007, "grad_norm": 0.17527985537263616, "learning_rate": 0.0008146842979474869, "loss": 3.1037, "step": 8990 }, { "epoch": 0.7103512270241457, "grad_norm": 0.11290424673924779, "learning_rate": 0.0008144164252591393, "loss": 3.2594, "step": 8995 }, { "epoch": 0.7107460859607905, "grad_norm": 0.11642475296686029, "learning_rate": 0.00081414840321633, "loss": 3.191, "step": 9000 }, { "epoch": 0.7111409448974354, "grad_norm": 0.08687380857119008, "learning_rate": 0.0008138802319463756, "loss": 3.1212, "step": 9005 }, { "epoch": 0.7115358038340803, "grad_norm": 0.08174512630649615, "learning_rate": 0.0008136119115766627, "loss": 2.9007, "step": 9010 }, { "epoch": 0.7119306627707251, "grad_norm": 0.09328392674968032, "learning_rate": 0.0008133434422346495, "loss": 3.1418, "step": 9015 }, { "epoch": 0.7123255217073701, "grad_norm": 0.10082273918735227, "learning_rate": 0.0008130748240478645, "loss": 3.331, "step": 9020 }, { "epoch": 0.7127203806440149, "grad_norm": 0.14223941743133175, "learning_rate": 0.0008128060571439069, "loss": 3.3669, "step": 9025 }, { "epoch": 0.7131152395806598, "grad_norm": 0.09807209717021953, "learning_rate": 0.0008125371416504467, "loss": 3.1096, "step": 9030 }, { "epoch": 0.7135100985173047, "grad_norm": 0.09838141298186402, "learning_rate": 0.0008122680776952245, "loss": 2.9629, "step": 9035 }, { "epoch": 0.7139049574539496, "grad_norm": 0.1235064095268593, "learning_rate": 0.0008119988654060515, "loss": 3.1875, "step": 9040 }, { "epoch": 0.7142998163905945, "grad_norm": 0.11419161177655116, "learning_rate": 0.0008117295049108091, "loss": 3.1593, "step": 9045 }, { "epoch": 0.7146946753272393, "grad_norm": 0.09447797705506736, "learning_rate": 0.0008114599963374493, "loss": 3.1227, "step": 9050 }, { "epoch": 0.7150895342638842, "grad_norm": 0.11507382761353004, "learning_rate": 0.0008111903398139944, "loss": 3.2038, "step": 9055 }, { "epoch": 0.7154843932005291, "grad_norm": 0.13473694156633426, "learning_rate": 0.0008109205354685367, "loss": 3.2224, "step": 9060 }, { "epoch": 0.715879252137174, "grad_norm": 0.12247131266877864, "learning_rate": 0.0008106505834292395, "loss": 3.3395, "step": 9065 }, { "epoch": 0.7162741110738189, "grad_norm": 0.13320786775248633, "learning_rate": 0.0008103804838243354, "loss": 3.1013, "step": 9070 }, { "epoch": 0.7166689700104638, "grad_norm": 0.11088469922443828, "learning_rate": 0.0008101102367821273, "loss": 3.0624, "step": 9075 }, { "epoch": 0.7170638289471086, "grad_norm": 0.09267133916583013, "learning_rate": 0.0008098398424309885, "loss": 3.1544, "step": 9080 }, { "epoch": 0.7174586878837536, "grad_norm": 0.11601792039588486, "learning_rate": 0.0008095693008993619, "loss": 3.1407, "step": 9085 }, { "epoch": 0.7178535468203984, "grad_norm": 0.09629901222837871, "learning_rate": 0.0008092986123157607, "loss": 2.9926, "step": 9090 }, { "epoch": 0.7182484057570433, "grad_norm": 0.10755206967450764, "learning_rate": 0.0008090277768087674, "loss": 3.0937, "step": 9095 }, { "epoch": 0.7186432646936882, "grad_norm": 0.15437737577601576, "learning_rate": 0.0008087567945070349, "loss": 3.3727, "step": 9100 }, { "epoch": 0.719038123630333, "grad_norm": 0.1401740926080587, "learning_rate": 0.0008084856655392852, "loss": 3.4358, "step": 9105 }, { "epoch": 0.719432982566978, "grad_norm": 0.12402005965317608, "learning_rate": 0.0008082143900343106, "loss": 3.1198, "step": 9110 }, { "epoch": 0.7198278415036228, "grad_norm": 0.12394111288042259, "learning_rate": 0.0008079429681209727, "loss": 3.2665, "step": 9115 }, { "epoch": 0.7202227004402677, "grad_norm": 0.1245877336600898, "learning_rate": 0.0008076713999282024, "loss": 3.1081, "step": 9120 }, { "epoch": 0.7206175593769126, "grad_norm": 0.17844054068499465, "learning_rate": 0.0008073996855850005, "loss": 3.2069, "step": 9125 }, { "epoch": 0.7210124183135574, "grad_norm": 0.1295212600282966, "learning_rate": 0.000807127825220437, "loss": 3.4079, "step": 9130 }, { "epoch": 0.7214072772502024, "grad_norm": 0.11651679059885829, "learning_rate": 0.0008068558189636515, "loss": 3.145, "step": 9135 }, { "epoch": 0.7218021361868473, "grad_norm": 0.10762446895945733, "learning_rate": 0.0008065836669438526, "loss": 3.2718, "step": 9140 }, { "epoch": 0.7221969951234921, "grad_norm": 0.11426099988518745, "learning_rate": 0.0008063113692903184, "loss": 3.1126, "step": 9145 }, { "epoch": 0.722591854060137, "grad_norm": 0.17815897613947163, "learning_rate": 0.000806038926132396, "loss": 2.9886, "step": 9150 }, { "epoch": 0.7229867129967819, "grad_norm": 0.12433214897146372, "learning_rate": 0.0008057663375995015, "loss": 3.2732, "step": 9155 }, { "epoch": 0.7233815719334268, "grad_norm": 0.09570039308494391, "learning_rate": 0.0008054936038211203, "loss": 3.1608, "step": 9160 }, { "epoch": 0.7237764308700717, "grad_norm": 0.10542553519956868, "learning_rate": 0.0008052207249268069, "loss": 3.0293, "step": 9165 }, { "epoch": 0.7241712898067165, "grad_norm": 0.10648967624938128, "learning_rate": 0.0008049477010461843, "loss": 3.011, "step": 9170 }, { "epoch": 0.7245661487433614, "grad_norm": 0.10361384698954103, "learning_rate": 0.0008046745323089449, "loss": 2.9138, "step": 9175 }, { "epoch": 0.7249610076800063, "grad_norm": 0.08966240667318452, "learning_rate": 0.0008044012188448493, "loss": 2.9845, "step": 9180 }, { "epoch": 0.7253558666166512, "grad_norm": 0.10170173120158874, "learning_rate": 0.0008041277607837275, "loss": 3.0313, "step": 9185 }, { "epoch": 0.7257507255532961, "grad_norm": 0.10681516736218404, "learning_rate": 0.0008038541582554777, "loss": 2.9819, "step": 9190 }, { "epoch": 0.7261455844899409, "grad_norm": 0.13443894732657896, "learning_rate": 0.0008035804113900671, "loss": 3.2932, "step": 9195 }, { "epoch": 0.7265404434265859, "grad_norm": 0.10440836210542352, "learning_rate": 0.000803306520317531, "loss": 3.1611, "step": 9200 }, { "epoch": 0.7269353023632308, "grad_norm": 0.09304969771820393, "learning_rate": 0.0008030324851679735, "loss": 3.054, "step": 9205 }, { "epoch": 0.7273301612998756, "grad_norm": 0.10498098118863916, "learning_rate": 0.0008027583060715672, "loss": 3.1456, "step": 9210 }, { "epoch": 0.7277250202365205, "grad_norm": 0.10823827035818123, "learning_rate": 0.0008024839831585528, "loss": 2.9724, "step": 9215 }, { "epoch": 0.7281198791731653, "grad_norm": 0.14048092393662412, "learning_rate": 0.00080220951655924, "loss": 3.3381, "step": 9220 }, { "epoch": 0.7285147381098103, "grad_norm": 0.11426670504000548, "learning_rate": 0.0008019349064040056, "loss": 3.2341, "step": 9225 }, { "epoch": 0.7289095970464552, "grad_norm": 0.12119181446136346, "learning_rate": 0.0008016601528232955, "loss": 2.9956, "step": 9230 }, { "epoch": 0.7293044559831, "grad_norm": 0.1341964406254254, "learning_rate": 0.0008013852559476236, "loss": 3.1979, "step": 9235 }, { "epoch": 0.7296993149197449, "grad_norm": 0.09834013103339934, "learning_rate": 0.0008011102159075717, "loss": 2.9703, "step": 9240 }, { "epoch": 0.7300941738563897, "grad_norm": 0.11913375478952315, "learning_rate": 0.0008008350328337895, "loss": 3.2289, "step": 9245 }, { "epoch": 0.7304890327930347, "grad_norm": 0.1072545063636982, "learning_rate": 0.0008005597068569948, "loss": 3.0462, "step": 9250 }, { "epoch": 0.7308838917296796, "grad_norm": 0.10026814689609027, "learning_rate": 0.0008002842381079734, "loss": 3.1446, "step": 9255 }, { "epoch": 0.7312787506663244, "grad_norm": 0.1106230247487028, "learning_rate": 0.0008000086267175788, "loss": 3.1244, "step": 9260 }, { "epoch": 0.7316736096029693, "grad_norm": 0.12756202527622934, "learning_rate": 0.000799732872816732, "loss": 3.3407, "step": 9265 }, { "epoch": 0.7320684685396143, "grad_norm": 0.10630704069278561, "learning_rate": 0.0007994569765364222, "loss": 3.0712, "step": 9270 }, { "epoch": 0.7324633274762591, "grad_norm": 0.11530745242739368, "learning_rate": 0.0007991809380077059, "loss": 3.1844, "step": 9275 }, { "epoch": 0.732858186412904, "grad_norm": 0.10152023789431308, "learning_rate": 0.0007989047573617072, "loss": 2.9653, "step": 9280 }, { "epoch": 0.7332530453495488, "grad_norm": 0.09135581118614226, "learning_rate": 0.000798628434729618, "loss": 3.1226, "step": 9285 }, { "epoch": 0.7336479042861938, "grad_norm": 0.11111224103746768, "learning_rate": 0.000798351970242697, "loss": 3.3425, "step": 9290 }, { "epoch": 0.7340427632228387, "grad_norm": 0.18663872700192435, "learning_rate": 0.0007980753640322709, "loss": 3.0955, "step": 9295 }, { "epoch": 0.7344376221594835, "grad_norm": 0.10663975295307175, "learning_rate": 0.0007977986162297335, "loss": 3.0195, "step": 9300 }, { "epoch": 0.7348324810961284, "grad_norm": 0.10443470700255178, "learning_rate": 0.0007975217269665459, "loss": 3.0767, "step": 9305 }, { "epoch": 0.7352273400327733, "grad_norm": 0.11522295111590326, "learning_rate": 0.0007972446963742364, "loss": 3.0271, "step": 9310 }, { "epoch": 0.7356221989694182, "grad_norm": 0.09313799096186881, "learning_rate": 0.0007969675245844001, "loss": 3.0653, "step": 9315 }, { "epoch": 0.7360170579060631, "grad_norm": 0.22807512931575244, "learning_rate": 0.0007966902117287, "loss": 3.0396, "step": 9320 }, { "epoch": 0.7364119168427079, "grad_norm": 0.10090111955867526, "learning_rate": 0.000796412757938865, "loss": 3.1464, "step": 9325 }, { "epoch": 0.7368067757793528, "grad_norm": 0.10578995799673667, "learning_rate": 0.000796135163346692, "loss": 3.2582, "step": 9330 }, { "epoch": 0.7372016347159978, "grad_norm": 0.17859117192118598, "learning_rate": 0.0007958574280840441, "loss": 3.1125, "step": 9335 }, { "epoch": 0.7375964936526426, "grad_norm": 0.11327427393953952, "learning_rate": 0.0007955795522828514, "loss": 3.021, "step": 9340 }, { "epoch": 0.7379913525892875, "grad_norm": 0.11279775228035403, "learning_rate": 0.0007953015360751108, "loss": 3.2474, "step": 9345 }, { "epoch": 0.7383862115259323, "grad_norm": 0.13304392002055626, "learning_rate": 0.000795023379592886, "loss": 3.1115, "step": 9350 }, { "epoch": 0.7387810704625772, "grad_norm": 0.13705460614936402, "learning_rate": 0.0007947450829683072, "loss": 2.9665, "step": 9355 }, { "epoch": 0.7391759293992222, "grad_norm": 0.13002471266482502, "learning_rate": 0.000794466646333571, "loss": 3.2417, "step": 9360 }, { "epoch": 0.739570788335867, "grad_norm": 0.09247561384335724, "learning_rate": 0.0007941880698209408, "loss": 3.1071, "step": 9365 }, { "epoch": 0.7399656472725119, "grad_norm": 0.10218563012563217, "learning_rate": 0.0007939093535627465, "loss": 3.1731, "step": 9370 }, { "epoch": 0.7403605062091568, "grad_norm": 0.1307064633986457, "learning_rate": 0.0007936304976913842, "loss": 3.3689, "step": 9375 }, { "epoch": 0.7407553651458016, "grad_norm": 0.09882793131645894, "learning_rate": 0.0007933515023393161, "loss": 3.0034, "step": 9380 }, { "epoch": 0.7411502240824466, "grad_norm": 0.09727547271563654, "learning_rate": 0.0007930723676390711, "loss": 3.1462, "step": 9385 }, { "epoch": 0.7415450830190914, "grad_norm": 0.1236092794069037, "learning_rate": 0.000792793093723244, "loss": 3.1313, "step": 9390 }, { "epoch": 0.7419399419557363, "grad_norm": 0.10265805715215626, "learning_rate": 0.0007925136807244959, "loss": 3.0652, "step": 9395 }, { "epoch": 0.7423348008923812, "grad_norm": 0.10919610654461266, "learning_rate": 0.0007922341287755539, "loss": 2.9433, "step": 9400 }, { "epoch": 0.7427296598290261, "grad_norm": 0.10916220257941396, "learning_rate": 0.0007919544380092109, "loss": 3.0799, "step": 9405 }, { "epoch": 0.743124518765671, "grad_norm": 0.17999245371129435, "learning_rate": 0.0007916746085583264, "loss": 3.1821, "step": 9410 }, { "epoch": 0.7435193777023158, "grad_norm": 0.11618002781723266, "learning_rate": 0.0007913946405558248, "loss": 3.2386, "step": 9415 }, { "epoch": 0.7439142366389607, "grad_norm": 0.10584512062204268, "learning_rate": 0.0007911145341346973, "loss": 3.4946, "step": 9420 }, { "epoch": 0.7443090955756057, "grad_norm": 0.09167581749413752, "learning_rate": 0.0007908342894279998, "loss": 2.9843, "step": 9425 }, { "epoch": 0.7447039545122505, "grad_norm": 0.09501979452491172, "learning_rate": 0.000790553906568855, "loss": 3.0238, "step": 9430 }, { "epoch": 0.7450988134488954, "grad_norm": 0.11422456414815087, "learning_rate": 0.0007902733856904505, "loss": 3.1049, "step": 9435 }, { "epoch": 0.7454936723855403, "grad_norm": 0.14914502520992226, "learning_rate": 0.0007899927269260395, "loss": 3.149, "step": 9440 }, { "epoch": 0.7458885313221851, "grad_norm": 0.1390589204654697, "learning_rate": 0.0007897119304089412, "loss": 2.9384, "step": 9445 }, { "epoch": 0.7462833902588301, "grad_norm": 0.11670965864647175, "learning_rate": 0.0007894309962725397, "loss": 3.1114, "step": 9450 }, { "epoch": 0.7466782491954749, "grad_norm": 0.09477313837343833, "learning_rate": 0.0007891499246502845, "loss": 2.9763, "step": 9455 }, { "epoch": 0.7470731081321198, "grad_norm": 0.08659463691951791, "learning_rate": 0.000788868715675691, "loss": 3.0668, "step": 9460 }, { "epoch": 0.7474679670687647, "grad_norm": 0.13352426243220483, "learning_rate": 0.0007885873694823389, "loss": 3.2965, "step": 9465 }, { "epoch": 0.7478628260054095, "grad_norm": 0.09290819474623012, "learning_rate": 0.0007883058862038741, "loss": 3.0994, "step": 9470 }, { "epoch": 0.7482576849420545, "grad_norm": 0.1312583771930028, "learning_rate": 0.0007880242659740067, "loss": 3.3642, "step": 9475 }, { "epoch": 0.7486525438786993, "grad_norm": 0.14093058328249564, "learning_rate": 0.0007877425089265126, "loss": 3.3631, "step": 9480 }, { "epoch": 0.7490474028153442, "grad_norm": 0.12281586999126344, "learning_rate": 0.0007874606151952321, "loss": 3.0236, "step": 9485 }, { "epoch": 0.7494422617519891, "grad_norm": 0.09164034729151299, "learning_rate": 0.0007871785849140709, "loss": 3.0708, "step": 9490 }, { "epoch": 0.749837120688634, "grad_norm": 0.10116938502804207, "learning_rate": 0.0007868964182169993, "loss": 3.1658, "step": 9495 }, { "epoch": 0.7502319796252789, "grad_norm": 0.09987265813010522, "learning_rate": 0.0007866141152380523, "loss": 3.0321, "step": 9500 }, { "epoch": 0.7506268385619238, "grad_norm": 0.09574773751122605, "learning_rate": 0.0007863316761113301, "loss": 3.2323, "step": 9505 }, { "epoch": 0.7510216974985686, "grad_norm": 0.09982206502916122, "learning_rate": 0.0007860491009709971, "loss": 3.2043, "step": 9510 }, { "epoch": 0.7514165564352135, "grad_norm": 0.10671605668574079, "learning_rate": 0.0007857663899512823, "loss": 3.0687, "step": 9515 }, { "epoch": 0.7518114153718584, "grad_norm": 0.17753745429768256, "learning_rate": 0.0007854835431864798, "loss": 3.2777, "step": 9520 }, { "epoch": 0.7522062743085033, "grad_norm": 0.10357971052537891, "learning_rate": 0.0007852005608109475, "loss": 2.9484, "step": 9525 }, { "epoch": 0.7526011332451482, "grad_norm": 0.11354637615307656, "learning_rate": 0.0007849174429591082, "loss": 3.2745, "step": 9530 }, { "epoch": 0.752995992181793, "grad_norm": 0.11770768565765577, "learning_rate": 0.0007846341897654486, "loss": 3.0199, "step": 9535 }, { "epoch": 0.753390851118438, "grad_norm": 0.1109789541360703, "learning_rate": 0.0007843508013645203, "loss": 3.2463, "step": 9540 }, { "epoch": 0.7537857100550828, "grad_norm": 0.13159538461705558, "learning_rate": 0.0007840672778909386, "loss": 3.1403, "step": 9545 }, { "epoch": 0.7541805689917277, "grad_norm": 0.11585921850406664, "learning_rate": 0.0007837836194793834, "loss": 3.0262, "step": 9550 }, { "epoch": 0.7545754279283726, "grad_norm": 0.2309303572244641, "learning_rate": 0.0007834998262645981, "loss": 3.0124, "step": 9555 }, { "epoch": 0.7549702868650174, "grad_norm": 0.15230421033854544, "learning_rate": 0.0007832158983813906, "loss": 3.0075, "step": 9560 }, { "epoch": 0.7553651458016624, "grad_norm": 0.15056947345727065, "learning_rate": 0.0007829318359646329, "loss": 3.3426, "step": 9565 }, { "epoch": 0.7557600047383073, "grad_norm": 0.09960219938754827, "learning_rate": 0.0007826476391492602, "loss": 2.9879, "step": 9570 }, { "epoch": 0.7561548636749521, "grad_norm": 0.13069950198284094, "learning_rate": 0.0007823633080702725, "loss": 3.1046, "step": 9575 }, { "epoch": 0.756549722611597, "grad_norm": 0.2200909078376061, "learning_rate": 0.0007820788428627328, "loss": 3.0275, "step": 9580 }, { "epoch": 0.7569445815482418, "grad_norm": 0.11399713393431962, "learning_rate": 0.0007817942436617682, "loss": 3.0646, "step": 9585 }, { "epoch": 0.7573394404848868, "grad_norm": 0.12911850056372542, "learning_rate": 0.0007815095106025693, "loss": 3.1403, "step": 9590 }, { "epoch": 0.7577342994215317, "grad_norm": 0.170427919692669, "learning_rate": 0.0007812246438203904, "loss": 3.2055, "step": 9595 }, { "epoch": 0.7581291583581765, "grad_norm": 0.11929528230220134, "learning_rate": 0.0007809396434505492, "loss": 3.0678, "step": 9600 }, { "epoch": 0.7585240172948214, "grad_norm": 0.13969509580988337, "learning_rate": 0.0007806545096284268, "loss": 3.0601, "step": 9605 }, { "epoch": 0.7589188762314663, "grad_norm": 0.13451516346648051, "learning_rate": 0.0007803692424894679, "loss": 3.0992, "step": 9610 }, { "epoch": 0.7593137351681112, "grad_norm": 0.11629585527265475, "learning_rate": 0.0007800838421691806, "loss": 3.0522, "step": 9615 }, { "epoch": 0.7597085941047561, "grad_norm": 0.10425897615450905, "learning_rate": 0.000779798308803136, "loss": 2.9918, "step": 9620 }, { "epoch": 0.7601034530414009, "grad_norm": 0.12073283987789056, "learning_rate": 0.0007795126425269683, "loss": 3.2172, "step": 9625 }, { "epoch": 0.7604983119780458, "grad_norm": 0.09863353309423459, "learning_rate": 0.0007792268434763752, "loss": 3.1496, "step": 9630 }, { "epoch": 0.7608931709146908, "grad_norm": 0.09712111053597938, "learning_rate": 0.0007789409117871173, "loss": 3.1711, "step": 9635 }, { "epoch": 0.7612880298513356, "grad_norm": 0.09987056082281089, "learning_rate": 0.0007786548475950182, "loss": 2.949, "step": 9640 }, { "epoch": 0.7616828887879805, "grad_norm": 0.09356775743111018, "learning_rate": 0.0007783686510359645, "loss": 3.1127, "step": 9645 }, { "epoch": 0.7620777477246253, "grad_norm": 0.09689210274584964, "learning_rate": 0.0007780823222459055, "loss": 3.1672, "step": 9650 }, { "epoch": 0.7624726066612703, "grad_norm": 0.08942794602223902, "learning_rate": 0.0007777958613608534, "loss": 3.0045, "step": 9655 }, { "epoch": 0.7628674655979152, "grad_norm": 0.09692567485376315, "learning_rate": 0.0007775092685168834, "loss": 3.0168, "step": 9660 }, { "epoch": 0.76326232453456, "grad_norm": 0.09579550421099908, "learning_rate": 0.000777222543850133, "loss": 3.0925, "step": 9665 }, { "epoch": 0.7636571834712049, "grad_norm": 0.09805738582066637, "learning_rate": 0.0007769356874968026, "loss": 3.0976, "step": 9670 }, { "epoch": 0.7640520424078497, "grad_norm": 0.1068434631377516, "learning_rate": 0.0007766486995931551, "loss": 3.113, "step": 9675 }, { "epoch": 0.7644469013444947, "grad_norm": 0.09567262777584672, "learning_rate": 0.0007763615802755157, "loss": 3.156, "step": 9680 }, { "epoch": 0.7648417602811396, "grad_norm": 0.11352353796529895, "learning_rate": 0.0007760743296802721, "loss": 3.1032, "step": 9685 }, { "epoch": 0.7652366192177844, "grad_norm": 0.13160985735078662, "learning_rate": 0.0007757869479438746, "loss": 3.1828, "step": 9690 }, { "epoch": 0.7656314781544293, "grad_norm": 0.09392998817405397, "learning_rate": 0.0007754994352028355, "loss": 3.0857, "step": 9695 }, { "epoch": 0.7660263370910743, "grad_norm": 0.08389402524135181, "learning_rate": 0.0007752117915937294, "loss": 3.0044, "step": 9700 }, { "epoch": 0.7664211960277191, "grad_norm": 0.09614848364384312, "learning_rate": 0.0007749240172531933, "loss": 2.9494, "step": 9705 }, { "epoch": 0.766816054964364, "grad_norm": 0.10550196323067783, "learning_rate": 0.0007746361123179258, "loss": 3.1161, "step": 9710 }, { "epoch": 0.7672109139010088, "grad_norm": 0.10873828693127034, "learning_rate": 0.0007743480769246881, "loss": 2.9952, "step": 9715 }, { "epoch": 0.7676057728376537, "grad_norm": 0.11994446505797106, "learning_rate": 0.0007740599112103031, "loss": 3.0889, "step": 9720 }, { "epoch": 0.7680006317742987, "grad_norm": 0.09233833122044902, "learning_rate": 0.0007737716153116554, "loss": 2.8956, "step": 9725 }, { "epoch": 0.7683954907109435, "grad_norm": 0.18029036903384543, "learning_rate": 0.0007734831893656919, "loss": 3.2419, "step": 9730 }, { "epoch": 0.7687903496475884, "grad_norm": 0.12422738319258306, "learning_rate": 0.0007731946335094209, "loss": 2.9818, "step": 9735 }, { "epoch": 0.7691852085842333, "grad_norm": 0.13749781598753494, "learning_rate": 0.0007729059478799125, "loss": 3.1226, "step": 9740 }, { "epoch": 0.7695800675208782, "grad_norm": 0.10612766111484565, "learning_rate": 0.0007726171326142987, "loss": 3.1388, "step": 9745 }, { "epoch": 0.7699749264575231, "grad_norm": 0.09559133008680833, "learning_rate": 0.0007723281878497728, "loss": 3.0463, "step": 9750 }, { "epoch": 0.7703697853941679, "grad_norm": 0.11422804719873142, "learning_rate": 0.0007720391137235894, "loss": 3.1757, "step": 9755 }, { "epoch": 0.7707646443308128, "grad_norm": 0.1274069657110579, "learning_rate": 0.0007717499103730653, "loss": 3.229, "step": 9760 }, { "epoch": 0.7711595032674577, "grad_norm": 0.10037374537796842, "learning_rate": 0.0007714605779355778, "loss": 2.9962, "step": 9765 }, { "epoch": 0.7715543622041026, "grad_norm": 0.09219163427158258, "learning_rate": 0.0007711711165485662, "loss": 3.1462, "step": 9770 }, { "epoch": 0.7719492211407475, "grad_norm": 0.10377824434514583, "learning_rate": 0.0007708815263495306, "loss": 2.9711, "step": 9775 }, { "epoch": 0.7723440800773923, "grad_norm": 0.10753091074082614, "learning_rate": 0.0007705918074760327, "loss": 3.0709, "step": 9780 }, { "epoch": 0.7727389390140372, "grad_norm": 0.12070121355500672, "learning_rate": 0.0007703019600656948, "loss": 2.9659, "step": 9785 }, { "epoch": 0.7731337979506822, "grad_norm": 0.12557427888683512, "learning_rate": 0.000770011984256201, "loss": 2.993, "step": 9790 }, { "epoch": 0.773528656887327, "grad_norm": 0.1884116468341977, "learning_rate": 0.0007697218801852954, "loss": 3.2387, "step": 9795 }, { "epoch": 0.7739235158239719, "grad_norm": 0.10380522639627433, "learning_rate": 0.0007694316479907842, "loss": 3.1489, "step": 9800 }, { "epoch": 0.7743183747606168, "grad_norm": 0.10134862163227906, "learning_rate": 0.0007691412878105332, "loss": 2.9952, "step": 9805 }, { "epoch": 0.7747132336972616, "grad_norm": 0.09875246725288045, "learning_rate": 0.0007688507997824699, "loss": 3.0048, "step": 9810 }, { "epoch": 0.7751080926339066, "grad_norm": 0.0914558825662177, "learning_rate": 0.0007685601840445826, "loss": 3.0165, "step": 9815 }, { "epoch": 0.7755029515705514, "grad_norm": 0.10423428580439485, "learning_rate": 0.0007682694407349195, "loss": 3.334, "step": 9820 }, { "epoch": 0.7758978105071963, "grad_norm": 0.11859532954194742, "learning_rate": 0.00076797856999159, "loss": 2.9899, "step": 9825 }, { "epoch": 0.7762926694438412, "grad_norm": 0.10369308402276918, "learning_rate": 0.0007676875719527638, "loss": 3.1297, "step": 9830 }, { "epoch": 0.776687528380486, "grad_norm": 0.10224580216314003, "learning_rate": 0.0007673964467566711, "loss": 3.192, "step": 9835 }, { "epoch": 0.777082387317131, "grad_norm": 0.09840101327746086, "learning_rate": 0.0007671051945416026, "loss": 3.0768, "step": 9840 }, { "epoch": 0.7774772462537758, "grad_norm": 0.10028451360429932, "learning_rate": 0.0007668138154459093, "loss": 2.9965, "step": 9845 }, { "epoch": 0.7778721051904207, "grad_norm": 0.13039362858625284, "learning_rate": 0.0007665223096080024, "loss": 3.1328, "step": 9850 }, { "epoch": 0.7782669641270656, "grad_norm": 0.08258908189092666, "learning_rate": 0.0007662306771663534, "loss": 3.0317, "step": 9855 }, { "epoch": 0.7786618230637105, "grad_norm": 0.14444810143728623, "learning_rate": 0.0007659389182594939, "loss": 3.2383, "step": 9860 }, { "epoch": 0.7790566820003554, "grad_norm": 0.12700673451179664, "learning_rate": 0.0007656470330260155, "loss": 3.134, "step": 9865 }, { "epoch": 0.7794515409370003, "grad_norm": 0.09317589678176665, "learning_rate": 0.0007653550216045697, "loss": 2.9495, "step": 9870 }, { "epoch": 0.7798463998736451, "grad_norm": 0.3023882407355481, "learning_rate": 0.0007650628841338685, "loss": 3.0756, "step": 9875 }, { "epoch": 0.78024125881029, "grad_norm": 0.11223080322256469, "learning_rate": 0.000764770620752683, "loss": 2.9413, "step": 9880 }, { "epoch": 0.7806361177469349, "grad_norm": 0.09702285931848872, "learning_rate": 0.0007644782315998446, "loss": 2.8724, "step": 9885 }, { "epoch": 0.7810309766835798, "grad_norm": 0.12839858883455596, "learning_rate": 0.0007641857168142446, "loss": 3.1091, "step": 9890 }, { "epoch": 0.7814258356202247, "grad_norm": 0.16469164380620868, "learning_rate": 0.0007638930765348335, "loss": 3.2332, "step": 9895 }, { "epoch": 0.7818206945568695, "grad_norm": 0.09928973710893327, "learning_rate": 0.0007636003109006216, "loss": 3.1156, "step": 9900 }, { "epoch": 0.7822155534935145, "grad_norm": 0.18236839584583067, "learning_rate": 0.0007633074200506789, "loss": 3.1181, "step": 9905 }, { "epoch": 0.7826104124301593, "grad_norm": 0.16326237386474243, "learning_rate": 0.0007630144041241347, "loss": 3.0643, "step": 9910 }, { "epoch": 0.7830052713668042, "grad_norm": 0.1825990837333348, "learning_rate": 0.0007627212632601776, "loss": 3.0002, "step": 9915 }, { "epoch": 0.7834001303034491, "grad_norm": 0.150521337858663, "learning_rate": 0.0007624279975980559, "loss": 2.9463, "step": 9920 }, { "epoch": 0.7837949892400939, "grad_norm": 0.11766988679244669, "learning_rate": 0.0007621346072770772, "loss": 2.9539, "step": 9925 }, { "epoch": 0.7841898481767389, "grad_norm": 0.14198042380712345, "learning_rate": 0.0007618410924366077, "loss": 3.0117, "step": 9930 }, { "epoch": 0.7845847071133838, "grad_norm": 0.11762578751054066, "learning_rate": 0.0007615474532160733, "loss": 3.3452, "step": 9935 }, { "epoch": 0.7849795660500286, "grad_norm": 0.138514148336183, "learning_rate": 0.0007612536897549591, "loss": 3.0131, "step": 9940 }, { "epoch": 0.7853744249866735, "grad_norm": 0.09538511202241036, "learning_rate": 0.0007609598021928088, "loss": 3.0166, "step": 9945 }, { "epoch": 0.7857692839233184, "grad_norm": 0.15670650899470276, "learning_rate": 0.0007606657906692252, "loss": 3.2663, "step": 9950 }, { "epoch": 0.7861641428599633, "grad_norm": 0.10471111792257741, "learning_rate": 0.00076037165532387, "loss": 2.9933, "step": 9955 }, { "epoch": 0.7865590017966082, "grad_norm": 0.1654095802783691, "learning_rate": 0.0007600773962964635, "loss": 3.1819, "step": 9960 }, { "epoch": 0.786953860733253, "grad_norm": 0.11879755203371999, "learning_rate": 0.0007597830137267853, "loss": 3.1197, "step": 9965 }, { "epoch": 0.787348719669898, "grad_norm": 0.08389982482752775, "learning_rate": 0.0007594885077546733, "loss": 3.0862, "step": 9970 }, { "epoch": 0.7877435786065428, "grad_norm": 0.10504004400055882, "learning_rate": 0.0007591938785200239, "loss": 2.9979, "step": 9975 }, { "epoch": 0.7881384375431877, "grad_norm": 0.0994020294090497, "learning_rate": 0.0007588991261627924, "loss": 3.0065, "step": 9980 }, { "epoch": 0.7885332964798326, "grad_norm": 0.09517286761979828, "learning_rate": 0.0007586042508229924, "loss": 3.2386, "step": 9985 }, { "epoch": 0.7889281554164774, "grad_norm": 0.09743621518714354, "learning_rate": 0.000758309252640696, "loss": 3.0122, "step": 9990 }, { "epoch": 0.7893230143531224, "grad_norm": 0.11547866563951514, "learning_rate": 0.0007580141317560333, "loss": 3.102, "step": 9995 }, { "epoch": 0.7897178732897673, "grad_norm": 0.09051742100047694, "learning_rate": 0.0007577188883091932, "loss": 2.8751, "step": 10000 }, { "epoch": 0.7901127322264121, "grad_norm": 0.09646448282165183, "learning_rate": 0.0007574235224404224, "loss": 2.9438, "step": 10005 }, { "epoch": 0.790507591163057, "grad_norm": 0.13779111018782794, "learning_rate": 0.0007571280342900263, "loss": 3.1062, "step": 10010 }, { "epoch": 0.7909024500997018, "grad_norm": 0.13417298047640555, "learning_rate": 0.0007568324239983677, "loss": 3.0803, "step": 10015 }, { "epoch": 0.7912973090363468, "grad_norm": 0.12433500029474893, "learning_rate": 0.000756536691705868, "loss": 3.1786, "step": 10020 }, { "epoch": 0.7916921679729917, "grad_norm": 0.10555065948585773, "learning_rate": 0.0007562408375530059, "loss": 3.1192, "step": 10025 }, { "epoch": 0.7920870269096365, "grad_norm": 0.16128610183817985, "learning_rate": 0.0007559448616803188, "loss": 3.0692, "step": 10030 }, { "epoch": 0.7924818858462814, "grad_norm": 0.14422578264925165, "learning_rate": 0.0007556487642284013, "loss": 3.2886, "step": 10035 }, { "epoch": 0.7928767447829262, "grad_norm": 0.11424460357830381, "learning_rate": 0.0007553525453379062, "loss": 2.9927, "step": 10040 }, { "epoch": 0.7932716037195712, "grad_norm": 0.1259888370154627, "learning_rate": 0.0007550562051495434, "loss": 2.9007, "step": 10045 }, { "epoch": 0.7936664626562161, "grad_norm": 0.1113664638091601, "learning_rate": 0.000754759743804081, "loss": 3.0859, "step": 10050 }, { "epoch": 0.7940613215928609, "grad_norm": 0.09401709219105199, "learning_rate": 0.0007544631614423443, "loss": 3.1089, "step": 10055 }, { "epoch": 0.7944561805295058, "grad_norm": 0.09882708265267999, "learning_rate": 0.0007541664582052163, "loss": 3.0203, "step": 10060 }, { "epoch": 0.7948510394661508, "grad_norm": 0.10364403717860914, "learning_rate": 0.0007538696342336373, "loss": 2.928, "step": 10065 }, { "epoch": 0.7952458984027956, "grad_norm": 0.1111400297079254, "learning_rate": 0.0007535726896686049, "loss": 3.0876, "step": 10070 }, { "epoch": 0.7956407573394405, "grad_norm": 0.09753292739233071, "learning_rate": 0.0007532756246511741, "loss": 3.1622, "step": 10075 }, { "epoch": 0.7960356162760853, "grad_norm": 0.09370424464115501, "learning_rate": 0.0007529784393224572, "loss": 3.151, "step": 10080 }, { "epoch": 0.7964304752127302, "grad_norm": 0.11148315741502722, "learning_rate": 0.0007526811338236232, "loss": 3.0806, "step": 10085 }, { "epoch": 0.7968253341493752, "grad_norm": 0.09881836533534188, "learning_rate": 0.0007523837082958988, "loss": 2.9548, "step": 10090 }, { "epoch": 0.79722019308602, "grad_norm": 0.08993603733101067, "learning_rate": 0.0007520861628805675, "loss": 2.8904, "step": 10095 }, { "epoch": 0.7976150520226649, "grad_norm": 0.11506792050111121, "learning_rate": 0.0007517884977189693, "loss": 3.3048, "step": 10100 }, { "epoch": 0.7980099109593097, "grad_norm": 0.09307191645286472, "learning_rate": 0.0007514907129525018, "loss": 3.1508, "step": 10105 }, { "epoch": 0.7984047698959547, "grad_norm": 0.13726215523513183, "learning_rate": 0.000751192808722619, "loss": 3.2831, "step": 10110 }, { "epoch": 0.7987996288325996, "grad_norm": 0.134323446581047, "learning_rate": 0.0007508947851708315, "loss": 3.2805, "step": 10115 }, { "epoch": 0.7991944877692444, "grad_norm": 0.09549987811300416, "learning_rate": 0.0007505966424387069, "loss": 3.0811, "step": 10120 }, { "epoch": 0.7995893467058893, "grad_norm": 0.09484532954429456, "learning_rate": 0.0007502983806678697, "loss": 3.1022, "step": 10125 }, { "epoch": 0.7999842056425343, "grad_norm": 0.11873188932658599, "learning_rate": 0.00075, "loss": 3.4339, "step": 10130 }, { "epoch": 0.8003790645791791, "grad_norm": 0.1469433984720342, "learning_rate": 0.0007497015005768353, "loss": 3.2153, "step": 10135 }, { "epoch": 0.800773923515824, "grad_norm": 0.12438091430202543, "learning_rate": 0.000749402882540169, "loss": 3.0318, "step": 10140 }, { "epoch": 0.8011687824524688, "grad_norm": 0.10074672540518398, "learning_rate": 0.000749104146031851, "loss": 3.1256, "step": 10145 }, { "epoch": 0.8015636413891137, "grad_norm": 0.10641694078114271, "learning_rate": 0.0007488052911937874, "loss": 2.9701, "step": 10150 }, { "epoch": 0.8019585003257587, "grad_norm": 0.09847209194153193, "learning_rate": 0.0007485063181679406, "loss": 2.9975, "step": 10155 }, { "epoch": 0.8023533592624035, "grad_norm": 0.15575855171485717, "learning_rate": 0.0007482072270963292, "loss": 3.0044, "step": 10160 }, { "epoch": 0.8027482181990484, "grad_norm": 0.09596961896609639, "learning_rate": 0.0007479080181210279, "loss": 3.0231, "step": 10165 }, { "epoch": 0.8031430771356933, "grad_norm": 0.11321621897356816, "learning_rate": 0.0007476086913841673, "loss": 3.1248, "step": 10170 }, { "epoch": 0.8035379360723381, "grad_norm": 0.11639001063056367, "learning_rate": 0.0007473092470279336, "loss": 3.1988, "step": 10175 }, { "epoch": 0.8039327950089831, "grad_norm": 0.09671733832011238, "learning_rate": 0.0007470096851945696, "loss": 2.9126, "step": 10180 }, { "epoch": 0.8043276539456279, "grad_norm": 0.10983815668107795, "learning_rate": 0.0007467100060263732, "loss": 3.1831, "step": 10185 }, { "epoch": 0.8047225128822728, "grad_norm": 0.09687510476453881, "learning_rate": 0.0007464102096656984, "loss": 2.9745, "step": 10190 }, { "epoch": 0.8051173718189177, "grad_norm": 0.1057862367913993, "learning_rate": 0.0007461102962549549, "loss": 3.1443, "step": 10195 }, { "epoch": 0.8055122307555626, "grad_norm": 0.1477115499276721, "learning_rate": 0.0007458102659366082, "loss": 3.146, "step": 10200 }, { "epoch": 0.8059070896922075, "grad_norm": 0.10405130996700254, "learning_rate": 0.0007455101188531785, "loss": 3.1344, "step": 10205 }, { "epoch": 0.8063019486288523, "grad_norm": 0.24332698530182265, "learning_rate": 0.0007452098551472426, "loss": 3.2028, "step": 10210 }, { "epoch": 0.8066968075654972, "grad_norm": 0.12134608497153701, "learning_rate": 0.0007449094749614318, "loss": 3.146, "step": 10215 }, { "epoch": 0.8070916665021421, "grad_norm": 0.10146569674067817, "learning_rate": 0.000744608978438433, "loss": 2.9751, "step": 10220 }, { "epoch": 0.807486525438787, "grad_norm": 0.09962784473129722, "learning_rate": 0.0007443083657209884, "loss": 2.9576, "step": 10225 }, { "epoch": 0.8078813843754319, "grad_norm": 0.08749386422196492, "learning_rate": 0.0007440076369518955, "loss": 3.0787, "step": 10230 }, { "epoch": 0.8082762433120768, "grad_norm": 0.11993869165344262, "learning_rate": 0.000743706792274007, "loss": 2.956, "step": 10235 }, { "epoch": 0.8086711022487216, "grad_norm": 0.11254274091186135, "learning_rate": 0.0007434058318302304, "loss": 3.0229, "step": 10240 }, { "epoch": 0.8090659611853666, "grad_norm": 0.09882829434893323, "learning_rate": 0.000743104755763528, "loss": 2.8651, "step": 10245 }, { "epoch": 0.8094608201220114, "grad_norm": 0.09688920123037478, "learning_rate": 0.0007428035642169176, "loss": 2.9478, "step": 10250 }, { "epoch": 0.8098556790586563, "grad_norm": 0.10583697398492568, "learning_rate": 0.0007425022573334716, "loss": 2.9935, "step": 10255 }, { "epoch": 0.8102505379953012, "grad_norm": 0.08736056255785571, "learning_rate": 0.0007422008352563171, "loss": 3.0386, "step": 10260 }, { "epoch": 0.810645396931946, "grad_norm": 0.33965785703454576, "learning_rate": 0.0007418992981286357, "loss": 2.9764, "step": 10265 }, { "epoch": 0.811040255868591, "grad_norm": 0.1291878967033615, "learning_rate": 0.0007415976460936642, "loss": 2.8811, "step": 10270 }, { "epoch": 0.8114351148052358, "grad_norm": 0.11754474872110994, "learning_rate": 0.0007412958792946937, "loss": 3.0861, "step": 10275 }, { "epoch": 0.8118299737418807, "grad_norm": 0.09982892676111582, "learning_rate": 0.0007409939978750697, "loss": 3.1045, "step": 10280 }, { "epoch": 0.8122248326785256, "grad_norm": 0.10086407804501187, "learning_rate": 0.0007406920019781924, "loss": 3.055, "step": 10285 }, { "epoch": 0.8126196916151704, "grad_norm": 0.18204972228888824, "learning_rate": 0.0007403898917475162, "loss": 2.8821, "step": 10290 }, { "epoch": 0.8130145505518154, "grad_norm": 0.10377035913807763, "learning_rate": 0.0007400876673265498, "loss": 3.1362, "step": 10295 }, { "epoch": 0.8134094094884603, "grad_norm": 0.15152377563559605, "learning_rate": 0.0007397853288588561, "loss": 3.1145, "step": 10300 }, { "epoch": 0.8138042684251051, "grad_norm": 0.09938077536795005, "learning_rate": 0.0007394828764880526, "loss": 2.9761, "step": 10305 }, { "epoch": 0.81419912736175, "grad_norm": 0.08925268159400544, "learning_rate": 0.0007391803103578103, "loss": 3.1033, "step": 10310 }, { "epoch": 0.8145939862983949, "grad_norm": 0.10925059320066409, "learning_rate": 0.0007388776306118547, "loss": 3.1027, "step": 10315 }, { "epoch": 0.8149888452350398, "grad_norm": 0.25422801803153167, "learning_rate": 0.0007385748373939649, "loss": 3.2741, "step": 10320 }, { "epoch": 0.8153837041716847, "grad_norm": 0.08462043706028574, "learning_rate": 0.0007382719308479744, "loss": 2.8774, "step": 10325 }, { "epoch": 0.8157785631083295, "grad_norm": 0.08737551886208668, "learning_rate": 0.0007379689111177699, "loss": 3.0443, "step": 10330 }, { "epoch": 0.8161734220449745, "grad_norm": 0.09211450657007046, "learning_rate": 0.0007376657783472923, "loss": 2.9685, "step": 10335 }, { "epoch": 0.8165682809816193, "grad_norm": 0.1091331970707739, "learning_rate": 0.000737362532680536, "loss": 3.0596, "step": 10340 }, { "epoch": 0.8169631399182642, "grad_norm": 0.11041254432190246, "learning_rate": 0.0007370591742615494, "loss": 2.9579, "step": 10345 }, { "epoch": 0.8173579988549091, "grad_norm": 0.09182584493314327, "learning_rate": 0.000736755703234434, "loss": 3.1258, "step": 10350 }, { "epoch": 0.8177528577915539, "grad_norm": 0.16094771344609715, "learning_rate": 0.0007364521197433449, "loss": 3.2956, "step": 10355 }, { "epoch": 0.8181477167281989, "grad_norm": 0.12018525995903082, "learning_rate": 0.0007361484239324907, "loss": 3.4276, "step": 10360 }, { "epoch": 0.8185425756648438, "grad_norm": 0.09695526885832592, "learning_rate": 0.0007358446159461334, "loss": 3.137, "step": 10365 }, { "epoch": 0.8189374346014886, "grad_norm": 0.14912298885299183, "learning_rate": 0.0007355406959285883, "loss": 3.1056, "step": 10370 }, { "epoch": 0.8193322935381335, "grad_norm": 0.1110924482521644, "learning_rate": 0.0007352366640242237, "loss": 2.8483, "step": 10375 }, { "epoch": 0.8197271524747783, "grad_norm": 0.17983734096566967, "learning_rate": 0.0007349325203774613, "loss": 3.248, "step": 10380 }, { "epoch": 0.8201220114114233, "grad_norm": 0.10455406000418604, "learning_rate": 0.0007346282651327756, "loss": 3.0728, "step": 10385 }, { "epoch": 0.8205168703480682, "grad_norm": 0.20824830191894553, "learning_rate": 0.0007343238984346945, "loss": 2.988, "step": 10390 }, { "epoch": 0.820911729284713, "grad_norm": 0.10009046736510155, "learning_rate": 0.0007340194204277986, "loss": 3.1284, "step": 10395 }, { "epoch": 0.8213065882213579, "grad_norm": 0.19294787501821475, "learning_rate": 0.0007337148312567213, "loss": 3.0385, "step": 10400 }, { "epoch": 0.8217014471580028, "grad_norm": 0.09081548870846697, "learning_rate": 0.0007334101310661489, "loss": 3.0282, "step": 10405 }, { "epoch": 0.8220963060946477, "grad_norm": 0.1320583333505218, "learning_rate": 0.0007331053200008206, "loss": 3.0888, "step": 10410 }, { "epoch": 0.8224911650312926, "grad_norm": 0.11558581864680344, "learning_rate": 0.000732800398205528, "loss": 3.001, "step": 10415 }, { "epoch": 0.8228860239679374, "grad_norm": 0.10058434653383144, "learning_rate": 0.0007324953658251152, "loss": 2.9001, "step": 10420 }, { "epoch": 0.8232808829045823, "grad_norm": 0.10736690350129628, "learning_rate": 0.0007321902230044794, "loss": 3.2013, "step": 10425 }, { "epoch": 0.8236757418412273, "grad_norm": 0.12108730461461208, "learning_rate": 0.0007318849698885699, "loss": 2.9267, "step": 10430 }, { "epoch": 0.8240706007778721, "grad_norm": 0.10897263462355292, "learning_rate": 0.0007315796066223883, "loss": 3.0313, "step": 10435 }, { "epoch": 0.824465459714517, "grad_norm": 0.1122466845436903, "learning_rate": 0.0007312741333509884, "loss": 3.0216, "step": 10440 }, { "epoch": 0.8248603186511618, "grad_norm": 0.08961254349966484, "learning_rate": 0.0007309685502194768, "loss": 3.0316, "step": 10445 }, { "epoch": 0.8252551775878068, "grad_norm": 0.1082587072209283, "learning_rate": 0.0007306628573730116, "loss": 3.03, "step": 10450 }, { "epoch": 0.8256500365244517, "grad_norm": 0.1145809853064507, "learning_rate": 0.0007303570549568037, "loss": 3.0121, "step": 10455 }, { "epoch": 0.8260448954610965, "grad_norm": 0.11264458635956899, "learning_rate": 0.0007300511431161156, "loss": 2.991, "step": 10460 }, { "epoch": 0.8264397543977414, "grad_norm": 0.09661943846191172, "learning_rate": 0.0007297451219962619, "loss": 2.8134, "step": 10465 }, { "epoch": 0.8268346133343862, "grad_norm": 0.09394985294621684, "learning_rate": 0.0007294389917426092, "loss": 2.9303, "step": 10470 }, { "epoch": 0.8272294722710312, "grad_norm": 0.11588569749610225, "learning_rate": 0.0007291327525005758, "loss": 3.1055, "step": 10475 }, { "epoch": 0.8276243312076761, "grad_norm": 0.17497625168234374, "learning_rate": 0.0007288264044156319, "loss": 3.054, "step": 10480 }, { "epoch": 0.8280191901443209, "grad_norm": 0.11524375574969088, "learning_rate": 0.000728519947633299, "loss": 3.1644, "step": 10485 }, { "epoch": 0.8284140490809658, "grad_norm": 0.11508929166573258, "learning_rate": 0.000728213382299151, "loss": 3.1531, "step": 10490 }, { "epoch": 0.8288089080176108, "grad_norm": 0.09262532753812, "learning_rate": 0.0007279067085588126, "loss": 3.0171, "step": 10495 }, { "epoch": 0.8292037669542556, "grad_norm": 0.09759388365856361, "learning_rate": 0.0007275999265579605, "loss": 3.126, "step": 10500 }, { "epoch": 0.8295986258909005, "grad_norm": 0.10890023569086527, "learning_rate": 0.0007272930364423226, "loss": 2.8694, "step": 10505 }, { "epoch": 0.8299934848275453, "grad_norm": 0.10578221066385028, "learning_rate": 0.0007269860383576782, "loss": 3.0783, "step": 10510 }, { "epoch": 0.8303883437641902, "grad_norm": 0.1393240439957165, "learning_rate": 0.0007266789324498579, "loss": 2.939, "step": 10515 }, { "epoch": 0.8307832027008352, "grad_norm": 0.1082532942853813, "learning_rate": 0.0007263717188647436, "loss": 3.1283, "step": 10520 }, { "epoch": 0.83117806163748, "grad_norm": 0.08823902634652628, "learning_rate": 0.0007260643977482681, "loss": 3.0782, "step": 10525 }, { "epoch": 0.8315729205741249, "grad_norm": 0.09914509218111739, "learning_rate": 0.0007257569692464156, "loss": 3.0098, "step": 10530 }, { "epoch": 0.8319677795107697, "grad_norm": 0.123012852943961, "learning_rate": 0.0007254494335052208, "loss": 2.9696, "step": 10535 }, { "epoch": 0.8323626384474146, "grad_norm": 0.0955844522637601, "learning_rate": 0.0007251417906707703, "loss": 3.0909, "step": 10540 }, { "epoch": 0.8327574973840596, "grad_norm": 0.0901994814046455, "learning_rate": 0.0007248340408892004, "loss": 2.9443, "step": 10545 }, { "epoch": 0.8331523563207044, "grad_norm": 0.07614052292463964, "learning_rate": 0.000724526184306699, "loss": 2.859, "step": 10550 }, { "epoch": 0.8335472152573493, "grad_norm": 0.11272274783987365, "learning_rate": 0.0007242182210695045, "loss": 3.0099, "step": 10555 }, { "epoch": 0.8339420741939942, "grad_norm": 0.10372482831984828, "learning_rate": 0.0007239101513239059, "loss": 2.9077, "step": 10560 }, { "epoch": 0.8343369331306391, "grad_norm": 0.09318986609614138, "learning_rate": 0.000723601975216243, "loss": 3.0182, "step": 10565 }, { "epoch": 0.834731792067284, "grad_norm": 0.10388161394157266, "learning_rate": 0.0007232936928929058, "loss": 3.0558, "step": 10570 }, { "epoch": 0.8351266510039288, "grad_norm": 0.08768562684298063, "learning_rate": 0.000722985304500335, "loss": 2.9456, "step": 10575 }, { "epoch": 0.8355215099405737, "grad_norm": 0.08987097793790651, "learning_rate": 0.0007226768101850216, "loss": 2.954, "step": 10580 }, { "epoch": 0.8359163688772187, "grad_norm": 0.11949079837673451, "learning_rate": 0.000722368210093507, "loss": 3.0467, "step": 10585 }, { "epoch": 0.8363112278138635, "grad_norm": 0.09962217205934272, "learning_rate": 0.0007220595043723827, "loss": 3.0483, "step": 10590 }, { "epoch": 0.8367060867505084, "grad_norm": 0.11359948643472717, "learning_rate": 0.0007217506931682904, "loss": 3.0993, "step": 10595 }, { "epoch": 0.8371009456871533, "grad_norm": 0.1001974860881291, "learning_rate": 0.0007214417766279222, "loss": 2.9931, "step": 10600 }, { "epoch": 0.8374958046237981, "grad_norm": 0.1268015653864932, "learning_rate": 0.0007211327548980197, "loss": 3.1105, "step": 10605 }, { "epoch": 0.8378906635604431, "grad_norm": 0.11154116001855341, "learning_rate": 0.0007208236281253751, "loss": 3.158, "step": 10610 }, { "epoch": 0.8382855224970879, "grad_norm": 0.09545732397119122, "learning_rate": 0.0007205143964568299, "loss": 3.0043, "step": 10615 }, { "epoch": 0.8386803814337328, "grad_norm": 0.17875242940026054, "learning_rate": 0.0007202050600392758, "loss": 2.9905, "step": 10620 }, { "epoch": 0.8390752403703777, "grad_norm": 0.08609383836147065, "learning_rate": 0.000719895619019654, "loss": 2.996, "step": 10625 }, { "epoch": 0.8394700993070225, "grad_norm": 0.11417880652882446, "learning_rate": 0.0007195860735449559, "loss": 2.9069, "step": 10630 }, { "epoch": 0.8398649582436675, "grad_norm": 0.1924649405945916, "learning_rate": 0.0007192764237622218, "loss": 2.877, "step": 10635 }, { "epoch": 0.8402598171803123, "grad_norm": 0.11685166847962733, "learning_rate": 0.000718966669818542, "loss": 2.9684, "step": 10640 }, { "epoch": 0.8406546761169572, "grad_norm": 0.09373696660075206, "learning_rate": 0.0007186568118610562, "loss": 2.9539, "step": 10645 }, { "epoch": 0.8410495350536021, "grad_norm": 0.10346138378331945, "learning_rate": 0.0007183468500369534, "loss": 2.8814, "step": 10650 }, { "epoch": 0.841444393990247, "grad_norm": 0.13652143334560116, "learning_rate": 0.0007180367844934723, "loss": 2.9509, "step": 10655 }, { "epoch": 0.8418392529268919, "grad_norm": 0.11594712476819512, "learning_rate": 0.0007177266153779006, "loss": 2.8186, "step": 10660 }, { "epoch": 0.8422341118635368, "grad_norm": 0.09940740121128128, "learning_rate": 0.0007174163428375748, "loss": 3.0588, "step": 10665 }, { "epoch": 0.8426289708001816, "grad_norm": 0.10012445586612832, "learning_rate": 0.0007171059670198812, "loss": 2.8902, "step": 10670 }, { "epoch": 0.8430238297368265, "grad_norm": 0.11995824836796304, "learning_rate": 0.0007167954880722547, "loss": 3.1005, "step": 10675 }, { "epoch": 0.8434186886734714, "grad_norm": 0.09181947512452722, "learning_rate": 0.0007164849061421797, "loss": 3.1511, "step": 10680 }, { "epoch": 0.8438135476101163, "grad_norm": 0.08971865287387676, "learning_rate": 0.0007161742213771889, "loss": 2.9821, "step": 10685 }, { "epoch": 0.8442084065467612, "grad_norm": 0.10300584494323879, "learning_rate": 0.0007158634339248644, "loss": 2.9162, "step": 10690 }, { "epoch": 0.844603265483406, "grad_norm": 0.09136797595503433, "learning_rate": 0.0007155525439328366, "loss": 2.9301, "step": 10695 }, { "epoch": 0.844998124420051, "grad_norm": 0.10341722946950781, "learning_rate": 0.0007152415515487851, "loss": 3.0524, "step": 10700 }, { "epoch": 0.8453929833566958, "grad_norm": 0.23316823850489718, "learning_rate": 0.0007149304569204376, "loss": 2.911, "step": 10705 }, { "epoch": 0.8457878422933407, "grad_norm": 0.1384830988270285, "learning_rate": 0.0007146192601955707, "loss": 2.994, "step": 10710 }, { "epoch": 0.8461827012299856, "grad_norm": 0.09201772117831614, "learning_rate": 0.0007143079615220094, "loss": 3.0213, "step": 10715 }, { "epoch": 0.8465775601666304, "grad_norm": 0.11978752578645721, "learning_rate": 0.0007139965610476274, "loss": 3.1847, "step": 10720 }, { "epoch": 0.8469724191032754, "grad_norm": 0.10055280048646645, "learning_rate": 0.0007136850589203461, "loss": 3.061, "step": 10725 }, { "epoch": 0.8473672780399203, "grad_norm": 0.16247241782602875, "learning_rate": 0.0007133734552881359, "loss": 3.0604, "step": 10730 }, { "epoch": 0.8477621369765651, "grad_norm": 0.10684379490359722, "learning_rate": 0.000713061750299015, "loss": 2.9918, "step": 10735 }, { "epoch": 0.84815699591321, "grad_norm": 0.2003025356857389, "learning_rate": 0.0007127499441010502, "loss": 2.8293, "step": 10740 }, { "epoch": 0.8485518548498548, "grad_norm": 0.10243823025115514, "learning_rate": 0.0007124380368423559, "loss": 2.9872, "step": 10745 }, { "epoch": 0.8489467137864998, "grad_norm": 0.11336235034328994, "learning_rate": 0.0007121260286710944, "loss": 3.0657, "step": 10750 }, { "epoch": 0.8493415727231447, "grad_norm": 0.10321151939313435, "learning_rate": 0.0007118139197354763, "loss": 2.9622, "step": 10755 }, { "epoch": 0.8497364316597895, "grad_norm": 0.103421688180354, "learning_rate": 0.0007115017101837598, "loss": 3.3237, "step": 10760 }, { "epoch": 0.8501312905964344, "grad_norm": 0.10064886258156627, "learning_rate": 0.0007111894001642514, "loss": 3.0293, "step": 10765 }, { "epoch": 0.8505261495330793, "grad_norm": 0.09728139749681675, "learning_rate": 0.0007108769898253049, "loss": 2.9649, "step": 10770 }, { "epoch": 0.8509210084697242, "grad_norm": 0.09288684935912442, "learning_rate": 0.0007105644793153217, "loss": 2.9013, "step": 10775 }, { "epoch": 0.8513158674063691, "grad_norm": 0.10303937281804063, "learning_rate": 0.0007102518687827511, "loss": 3.0829, "step": 10780 }, { "epoch": 0.8517107263430139, "grad_norm": 0.0969796157068504, "learning_rate": 0.0007099391583760895, "loss": 3.0934, "step": 10785 }, { "epoch": 0.8521055852796589, "grad_norm": 0.107539591467403, "learning_rate": 0.0007096263482438811, "loss": 2.8803, "step": 10790 }, { "epoch": 0.8525004442163038, "grad_norm": 0.0846313167697629, "learning_rate": 0.000709313438534717, "loss": 2.9313, "step": 10795 }, { "epoch": 0.8528953031529486, "grad_norm": 0.09946735814811701, "learning_rate": 0.0007090004293972365, "loss": 3.0963, "step": 10800 }, { "epoch": 0.8532901620895935, "grad_norm": 0.07835542642166657, "learning_rate": 0.0007086873209801252, "loss": 2.9436, "step": 10805 }, { "epoch": 0.8536850210262383, "grad_norm": 0.0878680791043063, "learning_rate": 0.0007083741134321165, "loss": 2.9118, "step": 10810 }, { "epoch": 0.8540798799628833, "grad_norm": 0.10919036894302385, "learning_rate": 0.0007080608069019903, "loss": 3.0973, "step": 10815 }, { "epoch": 0.8544747388995282, "grad_norm": 0.12393632628080643, "learning_rate": 0.000707747401538574, "loss": 2.9381, "step": 10820 }, { "epoch": 0.854869597836173, "grad_norm": 0.09829258108523749, "learning_rate": 0.0007074338974907418, "loss": 3.0272, "step": 10825 }, { "epoch": 0.8552644567728179, "grad_norm": 0.09564931978057321, "learning_rate": 0.0007071202949074148, "loss": 2.8548, "step": 10830 }, { "epoch": 0.8556593157094627, "grad_norm": 0.09975595688191939, "learning_rate": 0.0007068065939375606, "loss": 2.9634, "step": 10835 }, { "epoch": 0.8560541746461077, "grad_norm": 0.10824809482477021, "learning_rate": 0.0007064927947301943, "loss": 3.1323, "step": 10840 }, { "epoch": 0.8564490335827526, "grad_norm": 0.09028762323747669, "learning_rate": 0.0007061788974343766, "loss": 2.917, "step": 10845 }, { "epoch": 0.8568438925193974, "grad_norm": 0.0926852820875375, "learning_rate": 0.0007058649021992159, "loss": 3.0961, "step": 10850 }, { "epoch": 0.8572387514560423, "grad_norm": 0.1303718164154148, "learning_rate": 0.0007055508091738661, "loss": 3.1043, "step": 10855 }, { "epoch": 0.8576336103926873, "grad_norm": 0.09940167165764092, "learning_rate": 0.0007052366185075284, "loss": 2.926, "step": 10860 }, { "epoch": 0.8580284693293321, "grad_norm": 0.12779583169927092, "learning_rate": 0.0007049223303494498, "loss": 2.9942, "step": 10865 }, { "epoch": 0.858423328265977, "grad_norm": 0.10478101476313939, "learning_rate": 0.000704607944848924, "loss": 3.0831, "step": 10870 }, { "epoch": 0.8588181872026218, "grad_norm": 0.1103514608719597, "learning_rate": 0.0007042934621552907, "loss": 2.9181, "step": 10875 }, { "epoch": 0.8592130461392667, "grad_norm": 0.09034833199300213, "learning_rate": 0.0007039788824179359, "loss": 2.9902, "step": 10880 }, { "epoch": 0.8596079050759117, "grad_norm": 0.09807605654943302, "learning_rate": 0.0007036642057862914, "loss": 2.9735, "step": 10885 }, { "epoch": 0.8600027640125565, "grad_norm": 0.14280931760675042, "learning_rate": 0.0007033494324098356, "loss": 3.3577, "step": 10890 }, { "epoch": 0.8603976229492014, "grad_norm": 0.08397451659268937, "learning_rate": 0.0007030345624380923, "loss": 2.8946, "step": 10895 }, { "epoch": 0.8607924818858462, "grad_norm": 0.10441484941597577, "learning_rate": 0.0007027195960206315, "loss": 3.1411, "step": 10900 }, { "epoch": 0.8611873408224912, "grad_norm": 0.1483794212194042, "learning_rate": 0.0007024045333070688, "loss": 3.2575, "step": 10905 }, { "epoch": 0.8615821997591361, "grad_norm": 0.10944978647469851, "learning_rate": 0.0007020893744470657, "loss": 3.0534, "step": 10910 }, { "epoch": 0.8619770586957809, "grad_norm": 0.14223526557713306, "learning_rate": 0.0007017741195903296, "loss": 3.0528, "step": 10915 }, { "epoch": 0.8623719176324258, "grad_norm": 0.09716808382758434, "learning_rate": 0.000701458768886613, "loss": 2.9414, "step": 10920 }, { "epoch": 0.8627667765690707, "grad_norm": 0.099357737784927, "learning_rate": 0.000701143322485714, "loss": 3.2095, "step": 10925 }, { "epoch": 0.8631616355057156, "grad_norm": 0.11641089560401041, "learning_rate": 0.0007008277805374766, "loss": 3.1938, "step": 10930 }, { "epoch": 0.8635564944423605, "grad_norm": 0.10162862894456347, "learning_rate": 0.0007005121431917898, "loss": 3.0597, "step": 10935 }, { "epoch": 0.8639513533790053, "grad_norm": 0.16260075998233814, "learning_rate": 0.0007001964105985879, "loss": 2.8973, "step": 10940 }, { "epoch": 0.8643462123156502, "grad_norm": 0.11781781115325106, "learning_rate": 0.000699880582907851, "loss": 2.7875, "step": 10945 }, { "epoch": 0.8647410712522952, "grad_norm": 0.0945060018909876, "learning_rate": 0.0006995646602696034, "loss": 3.0099, "step": 10950 }, { "epoch": 0.86513593018894, "grad_norm": 0.10349815418785802, "learning_rate": 0.0006992486428339153, "loss": 3.1268, "step": 10955 }, { "epoch": 0.8655307891255849, "grad_norm": 0.08697392208926742, "learning_rate": 0.0006989325307509017, "loss": 2.8743, "step": 10960 }, { "epoch": 0.8659256480622297, "grad_norm": 0.09068773572975031, "learning_rate": 0.0006986163241707226, "loss": 2.7853, "step": 10965 }, { "epoch": 0.8663205069988746, "grad_norm": 0.09016411399322544, "learning_rate": 0.0006983000232435826, "loss": 2.9608, "step": 10970 }, { "epoch": 0.8667153659355196, "grad_norm": 0.11299835455913895, "learning_rate": 0.0006979836281197316, "loss": 3.0674, "step": 10975 }, { "epoch": 0.8671102248721644, "grad_norm": 0.09995847081395774, "learning_rate": 0.0006976671389494637, "loss": 2.9305, "step": 10980 }, { "epoch": 0.8675050838088093, "grad_norm": 0.09561726760304319, "learning_rate": 0.0006973505558831182, "loss": 2.9513, "step": 10985 }, { "epoch": 0.8678999427454542, "grad_norm": 0.12038037870181081, "learning_rate": 0.0006970338790710786, "loss": 3.1681, "step": 10990 }, { "epoch": 0.868294801682099, "grad_norm": 0.08643873124573435, "learning_rate": 0.0006967171086637732, "loss": 2.7938, "step": 10995 }, { "epoch": 0.868689660618744, "grad_norm": 0.10819186705370958, "learning_rate": 0.0006964002448116746, "loss": 3.0005, "step": 11000 }, { "epoch": 0.8690845195553888, "grad_norm": 0.08962292734850817, "learning_rate": 0.0006960832876652999, "loss": 2.9762, "step": 11005 }, { "epoch": 0.8694793784920337, "grad_norm": 0.09108256694728266, "learning_rate": 0.0006957662373752105, "loss": 3.1016, "step": 11010 }, { "epoch": 0.8698742374286786, "grad_norm": 0.0919525898616823, "learning_rate": 0.0006954490940920119, "loss": 2.8689, "step": 11015 }, { "epoch": 0.8702690963653235, "grad_norm": 0.09649389904555389, "learning_rate": 0.0006951318579663538, "loss": 3.002, "step": 11020 }, { "epoch": 0.8706639553019684, "grad_norm": 0.09294938510356435, "learning_rate": 0.0006948145291489301, "loss": 2.9391, "step": 11025 }, { "epoch": 0.8710588142386133, "grad_norm": 0.08634156919478775, "learning_rate": 0.0006944971077904788, "loss": 3.0055, "step": 11030 }, { "epoch": 0.8714536731752581, "grad_norm": 0.09675816140062535, "learning_rate": 0.0006941795940417819, "loss": 2.9297, "step": 11035 }, { "epoch": 0.871848532111903, "grad_norm": 0.10287274087871609, "learning_rate": 0.000693861988053665, "loss": 3.0363, "step": 11040 }, { "epoch": 0.8722433910485479, "grad_norm": 0.09847537092895689, "learning_rate": 0.0006935442899769975, "loss": 3.1239, "step": 11045 }, { "epoch": 0.8726382499851928, "grad_norm": 0.08876189178041381, "learning_rate": 0.000693226499962693, "loss": 2.9162, "step": 11050 }, { "epoch": 0.8730331089218377, "grad_norm": 0.09698414579293482, "learning_rate": 0.0006929086181617085, "loss": 2.9451, "step": 11055 }, { "epoch": 0.8734279678584825, "grad_norm": 0.09255094314470695, "learning_rate": 0.0006925906447250443, "loss": 3.1776, "step": 11060 }, { "epoch": 0.8738228267951275, "grad_norm": 0.10123725144822777, "learning_rate": 0.0006922725798037447, "loss": 2.9916, "step": 11065 }, { "epoch": 0.8742176857317723, "grad_norm": 0.07957761639979467, "learning_rate": 0.0006919544235488973, "loss": 3.0493, "step": 11070 }, { "epoch": 0.8746125446684172, "grad_norm": 0.1882537281783048, "learning_rate": 0.000691636176111633, "loss": 3.0736, "step": 11075 }, { "epoch": 0.8750074036050621, "grad_norm": 0.08560377972691896, "learning_rate": 0.0006913178376431262, "loss": 2.9142, "step": 11080 }, { "epoch": 0.8754022625417069, "grad_norm": 0.10061580180879115, "learning_rate": 0.0006909994082945942, "loss": 3.0341, "step": 11085 }, { "epoch": 0.8757971214783519, "grad_norm": 0.09260506357004093, "learning_rate": 0.0006906808882172978, "loss": 2.9452, "step": 11090 }, { "epoch": 0.8761919804149968, "grad_norm": 0.09425419228659328, "learning_rate": 0.0006903622775625409, "loss": 3.0031, "step": 11095 }, { "epoch": 0.8765868393516416, "grad_norm": 0.11643138333142952, "learning_rate": 0.00069004357648167, "loss": 3.1079, "step": 11100 }, { "epoch": 0.8769816982882865, "grad_norm": 0.10027335337840323, "learning_rate": 0.0006897247851260752, "loss": 3.0749, "step": 11105 }, { "epoch": 0.8773765572249314, "grad_norm": 0.10013066993621972, "learning_rate": 0.0006894059036471889, "loss": 3.0614, "step": 11110 }, { "epoch": 0.8777714161615763, "grad_norm": 0.10255088624373707, "learning_rate": 0.0006890869321964868, "loss": 3.0707, "step": 11115 }, { "epoch": 0.8781662750982212, "grad_norm": 0.11461109133026354, "learning_rate": 0.0006887678709254867, "loss": 3.1437, "step": 11120 }, { "epoch": 0.878561134034866, "grad_norm": 0.17552720390291587, "learning_rate": 0.00068844871998575, "loss": 2.7593, "step": 11125 }, { "epoch": 0.878955992971511, "grad_norm": 0.11214929611005386, "learning_rate": 0.0006881294795288797, "loss": 3.1328, "step": 11130 }, { "epoch": 0.8793508519081558, "grad_norm": 0.08897605997924615, "learning_rate": 0.0006878101497065219, "loss": 2.8944, "step": 11135 }, { "epoch": 0.8797457108448007, "grad_norm": 0.137337127660949, "learning_rate": 0.0006874907306703652, "loss": 2.9826, "step": 11140 }, { "epoch": 0.8801405697814456, "grad_norm": 0.20985654112323163, "learning_rate": 0.0006871712225721403, "loss": 3.0918, "step": 11145 }, { "epoch": 0.8805354287180904, "grad_norm": 0.10992725210419525, "learning_rate": 0.0006868516255636202, "loss": 2.8807, "step": 11150 }, { "epoch": 0.8809302876547354, "grad_norm": 0.11939788753536287, "learning_rate": 0.0006865319397966203, "loss": 3.0104, "step": 11155 }, { "epoch": 0.8813251465913803, "grad_norm": 0.10025593287893984, "learning_rate": 0.0006862121654229983, "loss": 2.8901, "step": 11160 }, { "epoch": 0.8817200055280251, "grad_norm": 0.08935483537783218, "learning_rate": 0.0006858923025946536, "loss": 2.9039, "step": 11165 }, { "epoch": 0.88211486446467, "grad_norm": 0.0887721205894587, "learning_rate": 0.0006855723514635278, "loss": 2.9664, "step": 11170 }, { "epoch": 0.8825097234013148, "grad_norm": 0.0918089385848176, "learning_rate": 0.0006852523121816046, "loss": 3.0914, "step": 11175 }, { "epoch": 0.8829045823379598, "grad_norm": 0.10443576707508806, "learning_rate": 0.0006849321849009093, "loss": 2.993, "step": 11180 }, { "epoch": 0.8832994412746047, "grad_norm": 0.09958409003108284, "learning_rate": 0.0006846119697735093, "loss": 3.0084, "step": 11185 }, { "epoch": 0.8836943002112495, "grad_norm": 0.10840865479321321, "learning_rate": 0.0006842916669515135, "loss": 3.0499, "step": 11190 }, { "epoch": 0.8840891591478944, "grad_norm": 0.14232175598183608, "learning_rate": 0.0006839712765870725, "loss": 2.9941, "step": 11195 }, { "epoch": 0.8844840180845392, "grad_norm": 0.11643560891107854, "learning_rate": 0.0006836507988323784, "loss": 2.9872, "step": 11200 }, { "epoch": 0.8848788770211842, "grad_norm": 0.08681151551735349, "learning_rate": 0.0006833302338396652, "loss": 2.9147, "step": 11205 }, { "epoch": 0.8852737359578291, "grad_norm": 0.09437411237432816, "learning_rate": 0.0006830095817612078, "loss": 2.9594, "step": 11210 }, { "epoch": 0.8856685948944739, "grad_norm": 0.10090457294573343, "learning_rate": 0.0006826888427493229, "loss": 2.9153, "step": 11215 }, { "epoch": 0.8860634538311188, "grad_norm": 0.10314774854181435, "learning_rate": 0.0006823680169563681, "loss": 3.0938, "step": 11220 }, { "epoch": 0.8864583127677638, "grad_norm": 0.09383438713197201, "learning_rate": 0.0006820471045347427, "loss": 2.8706, "step": 11225 }, { "epoch": 0.8868531717044086, "grad_norm": 0.14296262316875916, "learning_rate": 0.0006817261056368868, "loss": 2.9509, "step": 11230 }, { "epoch": 0.8872480306410535, "grad_norm": 0.2063520574224679, "learning_rate": 0.0006814050204152817, "loss": 3.194, "step": 11235 }, { "epoch": 0.8876428895776983, "grad_norm": 0.0968780845883407, "learning_rate": 0.0006810838490224496, "loss": 2.8549, "step": 11240 }, { "epoch": 0.8880377485143433, "grad_norm": 0.11332366062983303, "learning_rate": 0.0006807625916109539, "loss": 2.9374, "step": 11245 }, { "epoch": 0.8884326074509882, "grad_norm": 0.10625448046810275, "learning_rate": 0.0006804412483333984, "loss": 2.9104, "step": 11250 }, { "epoch": 0.888827466387633, "grad_norm": 0.10098473560691795, "learning_rate": 0.0006801198193424281, "loss": 2.952, "step": 11255 }, { "epoch": 0.8892223253242779, "grad_norm": 0.12849762218493252, "learning_rate": 0.0006797983047907286, "loss": 2.8862, "step": 11260 }, { "epoch": 0.8896171842609227, "grad_norm": 0.09841751071796599, "learning_rate": 0.0006794767048310259, "loss": 3.0752, "step": 11265 }, { "epoch": 0.8900120431975677, "grad_norm": 0.09861665979542425, "learning_rate": 0.0006791550196160874, "loss": 2.9238, "step": 11270 }, { "epoch": 0.8904069021342126, "grad_norm": 0.09102110460597113, "learning_rate": 0.0006788332492987198, "loss": 2.9551, "step": 11275 }, { "epoch": 0.8908017610708574, "grad_norm": 0.11452495238770077, "learning_rate": 0.0006785113940317711, "loss": 2.862, "step": 11280 }, { "epoch": 0.8911966200075023, "grad_norm": 0.09482515714342644, "learning_rate": 0.0006781894539681292, "loss": 3.087, "step": 11285 }, { "epoch": 0.8915914789441473, "grad_norm": 0.09347173880847762, "learning_rate": 0.0006778674292607224, "loss": 3.1432, "step": 11290 }, { "epoch": 0.8919863378807921, "grad_norm": 0.1478392151688219, "learning_rate": 0.0006775453200625194, "loss": 2.8263, "step": 11295 }, { "epoch": 0.892381196817437, "grad_norm": 0.08624633274533762, "learning_rate": 0.000677223126526529, "loss": 3.0945, "step": 11300 }, { "epoch": 0.8927760557540818, "grad_norm": 0.09363576103550217, "learning_rate": 0.0006769008488057998, "loss": 2.8338, "step": 11305 }, { "epoch": 0.8931709146907267, "grad_norm": 0.10019682532874716, "learning_rate": 0.0006765784870534205, "loss": 3.1508, "step": 11310 }, { "epoch": 0.8935657736273717, "grad_norm": 0.09879313876219285, "learning_rate": 0.0006762560414225198, "loss": 3.0455, "step": 11315 }, { "epoch": 0.8939606325640165, "grad_norm": 0.08504815971856176, "learning_rate": 0.0006759335120662664, "loss": 2.9486, "step": 11320 }, { "epoch": 0.8943554915006614, "grad_norm": 0.13831538805533497, "learning_rate": 0.0006756108991378684, "loss": 3.185, "step": 11325 }, { "epoch": 0.8947503504373062, "grad_norm": 0.08875110119433165, "learning_rate": 0.0006752882027905735, "loss": 2.8745, "step": 11330 }, { "epoch": 0.8951452093739511, "grad_norm": 0.11396360405875303, "learning_rate": 0.0006749654231776698, "loss": 3.0099, "step": 11335 }, { "epoch": 0.8955400683105961, "grad_norm": 0.08742747012006405, "learning_rate": 0.0006746425604524842, "loss": 2.8121, "step": 11340 }, { "epoch": 0.8959349272472409, "grad_norm": 0.0919834827532405, "learning_rate": 0.0006743196147683835, "loss": 2.9464, "step": 11345 }, { "epoch": 0.8963297861838858, "grad_norm": 0.09677892348954567, "learning_rate": 0.0006739965862787733, "loss": 3.0074, "step": 11350 }, { "epoch": 0.8967246451205307, "grad_norm": 0.09008371372816061, "learning_rate": 0.0006736734751370995, "loss": 3.0573, "step": 11355 }, { "epoch": 0.8971195040571756, "grad_norm": 0.20712190920017054, "learning_rate": 0.0006733502814968465, "loss": 2.8103, "step": 11360 }, { "epoch": 0.8975143629938205, "grad_norm": 0.10432093816025542, "learning_rate": 0.0006730270055115383, "loss": 2.9698, "step": 11365 }, { "epoch": 0.8979092219304653, "grad_norm": 0.10919320829194276, "learning_rate": 0.0006727036473347373, "loss": 3.1723, "step": 11370 }, { "epoch": 0.8983040808671102, "grad_norm": 0.11934759326786158, "learning_rate": 0.0006723802071200461, "loss": 2.9248, "step": 11375 }, { "epoch": 0.8986989398037551, "grad_norm": 0.09164343717537998, "learning_rate": 0.0006720566850211054, "loss": 2.889, "step": 11380 }, { "epoch": 0.8990937987404, "grad_norm": 0.10025805315551463, "learning_rate": 0.000671733081191595, "loss": 3.1504, "step": 11385 }, { "epoch": 0.8994886576770449, "grad_norm": 0.10075077224277323, "learning_rate": 0.0006714093957852338, "loss": 2.9984, "step": 11390 }, { "epoch": 0.8998835166136897, "grad_norm": 0.09869115183522309, "learning_rate": 0.0006710856289557789, "loss": 2.8918, "step": 11395 }, { "epoch": 0.9002783755503346, "grad_norm": 0.09943383126520346, "learning_rate": 0.0006707617808570266, "loss": 2.9716, "step": 11400 }, { "epoch": 0.9006732344869796, "grad_norm": 0.11316318933091936, "learning_rate": 0.0006704378516428116, "loss": 2.9549, "step": 11405 }, { "epoch": 0.9010680934236244, "grad_norm": 0.08447603963018453, "learning_rate": 0.0006701138414670071, "loss": 2.9163, "step": 11410 }, { "epoch": 0.9014629523602693, "grad_norm": 0.0899682397569705, "learning_rate": 0.0006697897504835249, "loss": 2.9042, "step": 11415 }, { "epoch": 0.9018578112969142, "grad_norm": 0.08980382302068568, "learning_rate": 0.0006694655788463148, "loss": 3.0226, "step": 11420 }, { "epoch": 0.902252670233559, "grad_norm": 0.12528009066428344, "learning_rate": 0.0006691413267093654, "loss": 2.7934, "step": 11425 }, { "epoch": 0.902647529170204, "grad_norm": 0.09467303236165887, "learning_rate": 0.0006688169942267035, "loss": 2.9217, "step": 11430 }, { "epoch": 0.9030423881068488, "grad_norm": 0.08389585386904956, "learning_rate": 0.0006684925815523936, "loss": 3.1065, "step": 11435 }, { "epoch": 0.9034372470434937, "grad_norm": 0.10466034193303676, "learning_rate": 0.0006681680888405387, "loss": 3.0701, "step": 11440 }, { "epoch": 0.9038321059801386, "grad_norm": 0.14342251784087146, "learning_rate": 0.0006678435162452796, "loss": 3.1046, "step": 11445 }, { "epoch": 0.9042269649167834, "grad_norm": 0.09174854667794385, "learning_rate": 0.0006675188639207954, "loss": 2.961, "step": 11450 }, { "epoch": 0.9046218238534284, "grad_norm": 0.10855491704352598, "learning_rate": 0.0006671941320213027, "loss": 2.8139, "step": 11455 }, { "epoch": 0.9050166827900733, "grad_norm": 0.10583191723530022, "learning_rate": 0.0006668693207010562, "loss": 2.9387, "step": 11460 }, { "epoch": 0.9054115417267181, "grad_norm": 0.09065457272666269, "learning_rate": 0.0006665444301143477, "loss": 2.7845, "step": 11465 }, { "epoch": 0.905806400663363, "grad_norm": 0.08871771911773865, "learning_rate": 0.0006662194604155075, "loss": 2.8931, "step": 11470 }, { "epoch": 0.9062012596000079, "grad_norm": 0.11448148365772354, "learning_rate": 0.000665894411758903, "loss": 2.9957, "step": 11475 }, { "epoch": 0.9065961185366528, "grad_norm": 0.09784785546471084, "learning_rate": 0.0006655692842989393, "loss": 2.9081, "step": 11480 }, { "epoch": 0.9069909774732977, "grad_norm": 0.11407180080872711, "learning_rate": 0.0006652440781900585, "loss": 3.07, "step": 11485 }, { "epoch": 0.9073858364099425, "grad_norm": 0.08634052658442476, "learning_rate": 0.0006649187935867407, "loss": 2.8879, "step": 11490 }, { "epoch": 0.9077806953465875, "grad_norm": 0.12315723137455817, "learning_rate": 0.000664593430643503, "loss": 2.9207, "step": 11495 }, { "epoch": 0.9081755542832323, "grad_norm": 0.09332590010089442, "learning_rate": 0.0006642679895148995, "loss": 2.9348, "step": 11500 }, { "epoch": 0.9085704132198772, "grad_norm": 0.0905765751629061, "learning_rate": 0.0006639424703555216, "loss": 2.9555, "step": 11505 }, { "epoch": 0.9089652721565221, "grad_norm": 0.09098352318138292, "learning_rate": 0.000663616873319998, "loss": 2.9468, "step": 11510 }, { "epoch": 0.9093601310931669, "grad_norm": 0.09016977435179743, "learning_rate": 0.000663291198562994, "loss": 2.9081, "step": 11515 }, { "epoch": 0.9097549900298119, "grad_norm": 0.10475669576831179, "learning_rate": 0.0006629654462392122, "loss": 3.1246, "step": 11520 }, { "epoch": 0.9101498489664568, "grad_norm": 0.11709184849351029, "learning_rate": 0.0006626396165033915, "loss": 3.0546, "step": 11525 }, { "epoch": 0.9105447079031016, "grad_norm": 0.10690744632681574, "learning_rate": 0.0006623137095103085, "loss": 2.8461, "step": 11530 }, { "epoch": 0.9109395668397465, "grad_norm": 0.08551160166815636, "learning_rate": 0.0006619877254147754, "loss": 2.9512, "step": 11535 }, { "epoch": 0.9113344257763913, "grad_norm": 0.10549643013249876, "learning_rate": 0.0006616616643716419, "loss": 3.0288, "step": 11540 }, { "epoch": 0.9117292847130363, "grad_norm": 0.09139098932035553, "learning_rate": 0.0006613355265357937, "loss": 2.9656, "step": 11545 }, { "epoch": 0.9121241436496812, "grad_norm": 0.0884697137927796, "learning_rate": 0.0006610093120621531, "loss": 3.0621, "step": 11550 }, { "epoch": 0.912519002586326, "grad_norm": 0.0823782802437238, "learning_rate": 0.0006606830211056791, "loss": 2.7701, "step": 11555 }, { "epoch": 0.9129138615229709, "grad_norm": 0.0905531720223422, "learning_rate": 0.0006603566538213666, "loss": 2.8696, "step": 11560 }, { "epoch": 0.9133087204596158, "grad_norm": 0.09459829516694046, "learning_rate": 0.0006600302103642471, "loss": 3.1019, "step": 11565 }, { "epoch": 0.9137035793962607, "grad_norm": 0.22003341232948337, "learning_rate": 0.0006597036908893883, "loss": 2.7975, "step": 11570 }, { "epoch": 0.9140984383329056, "grad_norm": 0.10402902142445562, "learning_rate": 0.0006593770955518937, "loss": 2.8414, "step": 11575 }, { "epoch": 0.9144932972695504, "grad_norm": 0.1082599864774481, "learning_rate": 0.000659050424506903, "loss": 2.9232, "step": 11580 }, { "epoch": 0.9148881562061953, "grad_norm": 0.0999548587539927, "learning_rate": 0.0006587236779095921, "loss": 2.8549, "step": 11585 }, { "epoch": 0.9152830151428403, "grad_norm": 0.11826584699654592, "learning_rate": 0.0006583968559151721, "loss": 3.0677, "step": 11590 }, { "epoch": 0.9156778740794851, "grad_norm": 0.11894032623626795, "learning_rate": 0.0006580699586788907, "loss": 3.2674, "step": 11595 }, { "epoch": 0.91607273301613, "grad_norm": 0.11395204983287194, "learning_rate": 0.0006577429863560307, "loss": 2.7528, "step": 11600 }, { "epoch": 0.9164675919527748, "grad_norm": 0.09561741086918192, "learning_rate": 0.0006574159391019113, "loss": 3.1482, "step": 11605 }, { "epoch": 0.9168624508894198, "grad_norm": 0.09766610970451362, "learning_rate": 0.0006570888170718867, "loss": 2.9859, "step": 11610 }, { "epoch": 0.9172573098260647, "grad_norm": 0.09387753238650634, "learning_rate": 0.0006567616204213466, "loss": 3.1849, "step": 11615 }, { "epoch": 0.9176521687627095, "grad_norm": 0.09163115421258097, "learning_rate": 0.0006564343493057167, "loss": 3.178, "step": 11620 }, { "epoch": 0.9180470276993544, "grad_norm": 0.0987703981641251, "learning_rate": 0.0006561070038804575, "loss": 2.8921, "step": 11625 }, { "epoch": 0.9184418866359992, "grad_norm": 0.09167770603533196, "learning_rate": 0.0006557795843010652, "loss": 3.0391, "step": 11630 }, { "epoch": 0.9188367455726442, "grad_norm": 0.08929152321337232, "learning_rate": 0.0006554520907230706, "loss": 2.9557, "step": 11635 }, { "epoch": 0.9192316045092891, "grad_norm": 0.09133278572334777, "learning_rate": 0.0006551245233020404, "loss": 2.8708, "step": 11640 }, { "epoch": 0.9196264634459339, "grad_norm": 0.0937638791702323, "learning_rate": 0.0006547968821935759, "loss": 2.8643, "step": 11645 }, { "epoch": 0.9200213223825788, "grad_norm": 0.11358252111672944, "learning_rate": 0.0006544691675533139, "loss": 3.1107, "step": 11650 }, { "epoch": 0.9204161813192238, "grad_norm": 0.19104196479523858, "learning_rate": 0.0006541413795369256, "loss": 2.9157, "step": 11655 }, { "epoch": 0.9208110402558686, "grad_norm": 0.10250034987819243, "learning_rate": 0.0006538135183001171, "loss": 2.9503, "step": 11660 }, { "epoch": 0.9212058991925135, "grad_norm": 0.10595090659585765, "learning_rate": 0.0006534855839986295, "loss": 2.9407, "step": 11665 }, { "epoch": 0.9216007581291583, "grad_norm": 0.12074598859823196, "learning_rate": 0.0006531575767882386, "loss": 2.9149, "step": 11670 }, { "epoch": 0.9219956170658032, "grad_norm": 0.1552217887380877, "learning_rate": 0.0006528294968247549, "loss": 2.8603, "step": 11675 }, { "epoch": 0.9223904760024482, "grad_norm": 0.1093754216730788, "learning_rate": 0.0006525013442640232, "loss": 3.1791, "step": 11680 }, { "epoch": 0.922785334939093, "grad_norm": 0.15841192639207072, "learning_rate": 0.0006521731192619229, "loss": 2.8225, "step": 11685 }, { "epoch": 0.9231801938757379, "grad_norm": 0.10490806917843708, "learning_rate": 0.0006518448219743678, "loss": 2.8665, "step": 11690 }, { "epoch": 0.9235750528123827, "grad_norm": 0.10605388855869573, "learning_rate": 0.0006515164525573061, "loss": 3.0328, "step": 11695 }, { "epoch": 0.9239699117490277, "grad_norm": 0.1101065580308737, "learning_rate": 0.0006511880111667202, "loss": 3.0178, "step": 11700 }, { "epoch": 0.9243647706856726, "grad_norm": 0.08892202622632095, "learning_rate": 0.0006508594979586269, "loss": 3.0314, "step": 11705 }, { "epoch": 0.9247596296223174, "grad_norm": 0.09860088841106801, "learning_rate": 0.0006505309130890766, "loss": 2.889, "step": 11710 }, { "epoch": 0.9251544885589623, "grad_norm": 0.108343649220901, "learning_rate": 0.0006502022567141545, "loss": 2.8822, "step": 11715 }, { "epoch": 0.9255493474956072, "grad_norm": 0.11576936554764725, "learning_rate": 0.0006498735289899789, "loss": 2.9247, "step": 11720 }, { "epoch": 0.9259442064322521, "grad_norm": 0.10745719084258047, "learning_rate": 0.0006495447300727027, "loss": 2.7724, "step": 11725 }, { "epoch": 0.926339065368897, "grad_norm": 0.09671404977263368, "learning_rate": 0.0006492158601185123, "loss": 2.9915, "step": 11730 }, { "epoch": 0.9267339243055418, "grad_norm": 0.0825022917851804, "learning_rate": 0.0006488869192836278, "loss": 2.7709, "step": 11735 }, { "epoch": 0.9271287832421867, "grad_norm": 0.098198854136399, "learning_rate": 0.0006485579077243032, "loss": 3.0343, "step": 11740 }, { "epoch": 0.9275236421788317, "grad_norm": 0.10165868818874406, "learning_rate": 0.000648228825596826, "loss": 3.0283, "step": 11745 }, { "epoch": 0.9279185011154765, "grad_norm": 0.09234558650514067, "learning_rate": 0.0006478996730575168, "loss": 2.8319, "step": 11750 }, { "epoch": 0.9283133600521214, "grad_norm": 0.09397408360151197, "learning_rate": 0.0006475704502627304, "loss": 3.0797, "step": 11755 }, { "epoch": 0.9287082189887662, "grad_norm": 0.09315525920770555, "learning_rate": 0.0006472411573688548, "loss": 2.8597, "step": 11760 }, { "epoch": 0.9291030779254111, "grad_norm": 0.1603068839111331, "learning_rate": 0.0006469117945323106, "loss": 3.2317, "step": 11765 }, { "epoch": 0.9294979368620561, "grad_norm": 0.13050108166282517, "learning_rate": 0.0006465823619095523, "loss": 2.879, "step": 11770 }, { "epoch": 0.9298927957987009, "grad_norm": 0.08643474079332743, "learning_rate": 0.0006462528596570675, "loss": 2.904, "step": 11775 }, { "epoch": 0.9302876547353458, "grad_norm": 0.10774750035707674, "learning_rate": 0.0006459232879313765, "loss": 2.8449, "step": 11780 }, { "epoch": 0.9306825136719907, "grad_norm": 0.1097698921784926, "learning_rate": 0.0006455936468890331, "loss": 3.146, "step": 11785 }, { "epoch": 0.9310773726086355, "grad_norm": 0.11760378798994683, "learning_rate": 0.0006452639366866235, "loss": 3.0016, "step": 11790 }, { "epoch": 0.9314722315452805, "grad_norm": 0.11157974442350277, "learning_rate": 0.0006449341574807675, "loss": 2.9364, "step": 11795 }, { "epoch": 0.9318670904819253, "grad_norm": 0.09913191070744541, "learning_rate": 0.0006446043094281167, "loss": 2.8341, "step": 11800 }, { "epoch": 0.9322619494185702, "grad_norm": 0.09327426755127521, "learning_rate": 0.0006442743926853564, "loss": 2.8553, "step": 11805 }, { "epoch": 0.9326568083552151, "grad_norm": 0.12010255760661588, "learning_rate": 0.0006439444074092036, "loss": 3.0066, "step": 11810 }, { "epoch": 0.93305166729186, "grad_norm": 0.09218691568275697, "learning_rate": 0.0006436143537564085, "loss": 2.7994, "step": 11815 }, { "epoch": 0.9334465262285049, "grad_norm": 0.11096763508841101, "learning_rate": 0.0006432842318837536, "loss": 3.0065, "step": 11820 }, { "epoch": 0.9338413851651497, "grad_norm": 0.11199122342024294, "learning_rate": 0.0006429540419480535, "loss": 2.9167, "step": 11825 }, { "epoch": 0.9342362441017946, "grad_norm": 0.08753858005505089, "learning_rate": 0.0006426237841061556, "loss": 2.8616, "step": 11830 }, { "epoch": 0.9346311030384395, "grad_norm": 0.13000009049728048, "learning_rate": 0.0006422934585149396, "loss": 2.8609, "step": 11835 }, { "epoch": 0.9350259619750844, "grad_norm": 0.11941571407574318, "learning_rate": 0.0006419630653313169, "loss": 3.0457, "step": 11840 }, { "epoch": 0.9354208209117293, "grad_norm": 0.14019302102629788, "learning_rate": 0.0006416326047122314, "loss": 2.9835, "step": 11845 }, { "epoch": 0.9358156798483742, "grad_norm": 0.14068966156196508, "learning_rate": 0.0006413020768146588, "loss": 3.0969, "step": 11850 }, { "epoch": 0.936210538785019, "grad_norm": 0.10718264698619705, "learning_rate": 0.0006409714817956068, "loss": 2.9952, "step": 11855 }, { "epoch": 0.936605397721664, "grad_norm": 0.08557400355102629, "learning_rate": 0.0006406408198121152, "loss": 2.9291, "step": 11860 }, { "epoch": 0.9370002566583088, "grad_norm": 0.14555664349688877, "learning_rate": 0.0006403100910212551, "loss": 2.9037, "step": 11865 }, { "epoch": 0.9373951155949537, "grad_norm": 0.08538177059804027, "learning_rate": 0.0006399792955801301, "loss": 2.9927, "step": 11870 }, { "epoch": 0.9377899745315986, "grad_norm": 0.14629561347296016, "learning_rate": 0.0006396484336458749, "loss": 2.8136, "step": 11875 }, { "epoch": 0.9381848334682434, "grad_norm": 0.08847589733134227, "learning_rate": 0.0006393175053756559, "loss": 2.9593, "step": 11880 }, { "epoch": 0.9385796924048884, "grad_norm": 0.09218680667775285, "learning_rate": 0.0006389865109266708, "loss": 2.8263, "step": 11885 }, { "epoch": 0.9389745513415333, "grad_norm": 0.09644740052762993, "learning_rate": 0.0006386554504561495, "loss": 3.0277, "step": 11890 }, { "epoch": 0.9393694102781781, "grad_norm": 0.10391916459785563, "learning_rate": 0.0006383243241213524, "loss": 2.9089, "step": 11895 }, { "epoch": 0.939764269214823, "grad_norm": 0.08528870677490216, "learning_rate": 0.0006379931320795713, "loss": 3.1194, "step": 11900 }, { "epoch": 0.9401591281514678, "grad_norm": 0.09300834012725463, "learning_rate": 0.00063766187448813, "loss": 3.0593, "step": 11905 }, { "epoch": 0.9405539870881128, "grad_norm": 0.10586818216953678, "learning_rate": 0.0006373305515043824, "loss": 3.0485, "step": 11910 }, { "epoch": 0.9409488460247577, "grad_norm": 0.1106690453999811, "learning_rate": 0.0006369991632857141, "loss": 3.1765, "step": 11915 }, { "epoch": 0.9413437049614025, "grad_norm": 0.13051906830656632, "learning_rate": 0.0006366677099895416, "loss": 3.1298, "step": 11920 }, { "epoch": 0.9417385638980474, "grad_norm": 0.1045912957848481, "learning_rate": 0.0006363361917733121, "loss": 2.8893, "step": 11925 }, { "epoch": 0.9421334228346923, "grad_norm": 0.08649959162520746, "learning_rate": 0.000636004608794504, "loss": 2.8628, "step": 11930 }, { "epoch": 0.9425282817713372, "grad_norm": 0.08530648256344021, "learning_rate": 0.0006356729612106261, "loss": 2.8847, "step": 11935 }, { "epoch": 0.9429231407079821, "grad_norm": 0.09014245821805648, "learning_rate": 0.0006353412491792176, "loss": 2.9399, "step": 11940 }, { "epoch": 0.9433179996446269, "grad_norm": 0.09943358419731262, "learning_rate": 0.0006350094728578495, "loss": 2.9075, "step": 11945 }, { "epoch": 0.9437128585812719, "grad_norm": 0.10322176773741709, "learning_rate": 0.0006346776324041222, "loss": 2.9224, "step": 11950 }, { "epoch": 0.9441077175179168, "grad_norm": 0.1448480024226695, "learning_rate": 0.0006343457279756669, "loss": 3.3627, "step": 11955 }, { "epoch": 0.9445025764545616, "grad_norm": 0.09419521648345537, "learning_rate": 0.0006340137597301452, "loss": 2.8255, "step": 11960 }, { "epoch": 0.9448974353912065, "grad_norm": 0.08524705218261693, "learning_rate": 0.0006336817278252493, "loss": 2.9751, "step": 11965 }, { "epoch": 0.9452922943278513, "grad_norm": 0.09834365818895359, "learning_rate": 0.0006333496324187011, "loss": 3.0912, "step": 11970 }, { "epoch": 0.9456871532644963, "grad_norm": 0.09924094111678028, "learning_rate": 0.0006330174736682532, "loss": 3.0264, "step": 11975 }, { "epoch": 0.9460820122011412, "grad_norm": 0.18573324245085807, "learning_rate": 0.0006326852517316879, "loss": 2.9497, "step": 11980 }, { "epoch": 0.946476871137786, "grad_norm": 0.09329635023778443, "learning_rate": 0.0006323529667668176, "loss": 2.8362, "step": 11985 }, { "epoch": 0.9468717300744309, "grad_norm": 0.10234826327235093, "learning_rate": 0.000632020618931485, "loss": 2.9984, "step": 11990 }, { "epoch": 0.9472665890110757, "grad_norm": 0.09477725751801634, "learning_rate": 0.000631688208383562, "loss": 2.826, "step": 11995 }, { "epoch": 0.9476614479477207, "grad_norm": 0.13856858612553705, "learning_rate": 0.0006313557352809507, "loss": 2.8896, "step": 12000 }, { "epoch": 0.9480563068843656, "grad_norm": 0.10240633811370134, "learning_rate": 0.0006310231997815832, "loss": 2.828, "step": 12005 }, { "epoch": 0.9484511658210104, "grad_norm": 0.09021343286399146, "learning_rate": 0.0006306906020434205, "loss": 3.0973, "step": 12010 }, { "epoch": 0.9488460247576553, "grad_norm": 0.09057430977155972, "learning_rate": 0.0006303579422244537, "loss": 3.1061, "step": 12015 }, { "epoch": 0.9492408836943003, "grad_norm": 0.11648394712910999, "learning_rate": 0.0006300252204827033, "loss": 3.2538, "step": 12020 }, { "epoch": 0.9496357426309451, "grad_norm": 0.15256512182759896, "learning_rate": 0.0006296924369762194, "loss": 3.2773, "step": 12025 }, { "epoch": 0.95003060156759, "grad_norm": 0.09097294684492134, "learning_rate": 0.0006293595918630808, "loss": 3.0014, "step": 12030 }, { "epoch": 0.9504254605042348, "grad_norm": 0.08776406464252155, "learning_rate": 0.0006290266853013961, "loss": 2.8692, "step": 12035 }, { "epoch": 0.9508203194408797, "grad_norm": 0.08683339517945594, "learning_rate": 0.0006286937174493031, "loss": 2.8824, "step": 12040 }, { "epoch": 0.9512151783775247, "grad_norm": 0.10768441204372432, "learning_rate": 0.0006283606884649685, "loss": 3.071, "step": 12045 }, { "epoch": 0.9516100373141695, "grad_norm": 0.10459135037470647, "learning_rate": 0.0006280275985065882, "loss": 3.0779, "step": 12050 }, { "epoch": 0.9520048962508144, "grad_norm": 0.09575511006597301, "learning_rate": 0.0006276944477323869, "loss": 2.9345, "step": 12055 }, { "epoch": 0.9523997551874592, "grad_norm": 0.09302880226649424, "learning_rate": 0.0006273612363006182, "loss": 2.9291, "step": 12060 }, { "epoch": 0.9527946141241042, "grad_norm": 0.09700579475684867, "learning_rate": 0.0006270279643695646, "loss": 2.9579, "step": 12065 }, { "epoch": 0.9531894730607491, "grad_norm": 0.11958814438004235, "learning_rate": 0.0006266946320975377, "loss": 3.0678, "step": 12070 }, { "epoch": 0.9535843319973939, "grad_norm": 0.09769544707313982, "learning_rate": 0.000626361239642877, "loss": 3.0633, "step": 12075 }, { "epoch": 0.9539791909340388, "grad_norm": 0.09410677764280032, "learning_rate": 0.000626027787163951, "loss": 3.0427, "step": 12080 }, { "epoch": 0.9543740498706837, "grad_norm": 0.10097928188304056, "learning_rate": 0.0006256942748191569, "loss": 2.9958, "step": 12085 }, { "epoch": 0.9547689088073286, "grad_norm": 0.11975496132445308, "learning_rate": 0.0006253607027669199, "loss": 3.1243, "step": 12090 }, { "epoch": 0.9551637677439735, "grad_norm": 0.09851589055901613, "learning_rate": 0.0006250270711656938, "loss": 3.0383, "step": 12095 }, { "epoch": 0.9555586266806183, "grad_norm": 0.16299911879273488, "learning_rate": 0.0006246933801739607, "loss": 3.0581, "step": 12100 }, { "epoch": 0.9559534856172632, "grad_norm": 0.09905123382523125, "learning_rate": 0.0006243596299502311, "loss": 3.0558, "step": 12105 }, { "epoch": 0.9563483445539082, "grad_norm": 0.0938583643024325, "learning_rate": 0.0006240258206530433, "loss": 2.8862, "step": 12110 }, { "epoch": 0.956743203490553, "grad_norm": 0.0797894522961717, "learning_rate": 0.0006236919524409638, "loss": 2.8894, "step": 12115 }, { "epoch": 0.9571380624271979, "grad_norm": 0.0878472007178943, "learning_rate": 0.0006233580254725869, "loss": 2.9836, "step": 12120 }, { "epoch": 0.9575329213638427, "grad_norm": 0.10295857490709152, "learning_rate": 0.0006230240399065352, "loss": 2.9318, "step": 12125 }, { "epoch": 0.9579277803004876, "grad_norm": 0.10058395696016088, "learning_rate": 0.0006226899959014587, "loss": 3.1041, "step": 12130 }, { "epoch": 0.9583226392371326, "grad_norm": 0.10705897520606387, "learning_rate": 0.0006223558936160357, "loss": 2.7816, "step": 12135 }, { "epoch": 0.9587174981737774, "grad_norm": 0.11871131726066957, "learning_rate": 0.0006220217332089715, "loss": 2.9723, "step": 12140 }, { "epoch": 0.9591123571104223, "grad_norm": 0.1261322188949939, "learning_rate": 0.0006216875148389996, "loss": 2.8852, "step": 12145 }, { "epoch": 0.9595072160470672, "grad_norm": 0.17943831931010165, "learning_rate": 0.0006213532386648808, "loss": 3.0321, "step": 12150 }, { "epoch": 0.959902074983712, "grad_norm": 0.10593361960324398, "learning_rate": 0.0006210189048454032, "loss": 2.8654, "step": 12155 }, { "epoch": 0.960296933920357, "grad_norm": 0.1556950579757148, "learning_rate": 0.0006206845135393828, "loss": 3.1409, "step": 12160 }, { "epoch": 0.9606917928570018, "grad_norm": 0.09175712086376568, "learning_rate": 0.0006203500649056621, "loss": 2.963, "step": 12165 }, { "epoch": 0.9610866517936467, "grad_norm": 0.09752167023375971, "learning_rate": 0.0006200155591031115, "loss": 3.0561, "step": 12170 }, { "epoch": 0.9614815107302916, "grad_norm": 0.14827354182861646, "learning_rate": 0.0006196809962906284, "loss": 3.0798, "step": 12175 }, { "epoch": 0.9618763696669365, "grad_norm": 0.10504571669711411, "learning_rate": 0.000619346376627137, "loss": 3.0168, "step": 12180 }, { "epoch": 0.9622712286035814, "grad_norm": 0.12542098256827333, "learning_rate": 0.000619011700271589, "loss": 2.9609, "step": 12185 }, { "epoch": 0.9626660875402262, "grad_norm": 0.11290935161363864, "learning_rate": 0.0006186769673829628, "loss": 2.8849, "step": 12190 }, { "epoch": 0.9630609464768711, "grad_norm": 0.09121880037558304, "learning_rate": 0.0006183421781202632, "loss": 3.0253, "step": 12195 }, { "epoch": 0.963455805413516, "grad_norm": 0.11743757194265751, "learning_rate": 0.0006180073326425226, "loss": 3.2204, "step": 12200 }, { "epoch": 0.9638506643501609, "grad_norm": 0.08687179674422021, "learning_rate": 0.0006176724311087995, "loss": 2.9187, "step": 12205 }, { "epoch": 0.9642455232868058, "grad_norm": 0.08306958505249522, "learning_rate": 0.0006173374736781793, "loss": 2.9981, "step": 12210 }, { "epoch": 0.9646403822234507, "grad_norm": 0.10797304192367317, "learning_rate": 0.0006170024605097737, "loss": 2.7441, "step": 12215 }, { "epoch": 0.9650352411600955, "grad_norm": 0.08008928348205939, "learning_rate": 0.0006166673917627213, "loss": 2.8702, "step": 12220 }, { "epoch": 0.9654301000967405, "grad_norm": 0.11737900197130775, "learning_rate": 0.0006163322675961868, "loss": 3.0821, "step": 12225 }, { "epoch": 0.9658249590333853, "grad_norm": 0.1181797133754914, "learning_rate": 0.0006159970881693611, "loss": 2.972, "step": 12230 }, { "epoch": 0.9662198179700302, "grad_norm": 0.0941121792263289, "learning_rate": 0.0006156618536414616, "loss": 2.9214, "step": 12235 }, { "epoch": 0.9666146769066751, "grad_norm": 0.14355664915026317, "learning_rate": 0.0006153265641717319, "loss": 2.91, "step": 12240 }, { "epoch": 0.96700953584332, "grad_norm": 0.09595459940716537, "learning_rate": 0.0006149912199194417, "loss": 2.7312, "step": 12245 }, { "epoch": 0.9674043947799649, "grad_norm": 0.10058376873276582, "learning_rate": 0.0006146558210438863, "loss": 2.9425, "step": 12250 }, { "epoch": 0.9677992537166097, "grad_norm": 0.21322730772143061, "learning_rate": 0.0006143203677043874, "loss": 2.8656, "step": 12255 }, { "epoch": 0.9681941126532546, "grad_norm": 0.23026783235150167, "learning_rate": 0.0006139848600602926, "loss": 2.8419, "step": 12260 }, { "epoch": 0.9685889715898995, "grad_norm": 0.08907140012550925, "learning_rate": 0.0006136492982709749, "loss": 2.8876, "step": 12265 }, { "epoch": 0.9689838305265444, "grad_norm": 0.09868414935858602, "learning_rate": 0.0006133136824958334, "loss": 2.7772, "step": 12270 }, { "epoch": 0.9693786894631893, "grad_norm": 0.0900175787351179, "learning_rate": 0.0006129780128942928, "loss": 2.7137, "step": 12275 }, { "epoch": 0.9697735483998342, "grad_norm": 0.1069060506013433, "learning_rate": 0.0006126422896258029, "loss": 3.0589, "step": 12280 }, { "epoch": 0.970168407336479, "grad_norm": 0.09204479492601587, "learning_rate": 0.0006123065128498398, "loss": 2.8614, "step": 12285 }, { "epoch": 0.970563266273124, "grad_norm": 0.0957884617534034, "learning_rate": 0.0006119706827259044, "loss": 2.8167, "step": 12290 }, { "epoch": 0.9709581252097688, "grad_norm": 0.09613283770016308, "learning_rate": 0.0006116347994135229, "loss": 2.8832, "step": 12295 }, { "epoch": 0.9713529841464137, "grad_norm": 0.09812362326711929, "learning_rate": 0.0006112988630722475, "loss": 2.8345, "step": 12300 }, { "epoch": 0.9717478430830586, "grad_norm": 0.11395327524677484, "learning_rate": 0.0006109628738616546, "loss": 3.0844, "step": 12305 }, { "epoch": 0.9721427020197034, "grad_norm": 0.11282240667140002, "learning_rate": 0.0006106268319413464, "loss": 3.0247, "step": 12310 }, { "epoch": 0.9725375609563484, "grad_norm": 0.0948928436812237, "learning_rate": 0.0006102907374709497, "loss": 2.9973, "step": 12315 }, { "epoch": 0.9729324198929933, "grad_norm": 0.08778245333320003, "learning_rate": 0.0006099545906101169, "loss": 2.9341, "step": 12320 }, { "epoch": 0.9733272788296381, "grad_norm": 0.12880186345606506, "learning_rate": 0.0006096183915185246, "loss": 2.9286, "step": 12325 }, { "epoch": 0.973722137766283, "grad_norm": 0.10712364880755285, "learning_rate": 0.0006092821403558745, "loss": 2.9067, "step": 12330 }, { "epoch": 0.9741169967029278, "grad_norm": 0.09768399866961217, "learning_rate": 0.0006089458372818933, "loss": 3.0042, "step": 12335 }, { "epoch": 0.9745118556395728, "grad_norm": 0.1424342039725903, "learning_rate": 0.0006086094824563317, "loss": 2.9324, "step": 12340 }, { "epoch": 0.9749067145762177, "grad_norm": 0.09003653817444777, "learning_rate": 0.0006082730760389656, "loss": 2.9252, "step": 12345 }, { "epoch": 0.9753015735128625, "grad_norm": 0.1090194794907455, "learning_rate": 0.0006079366181895951, "loss": 2.7089, "step": 12350 }, { "epoch": 0.9756964324495074, "grad_norm": 0.16974354497590546, "learning_rate": 0.0006076001090680448, "loss": 2.993, "step": 12355 }, { "epoch": 0.9760912913861522, "grad_norm": 0.1148337663382658, "learning_rate": 0.0006072635488341635, "loss": 2.9145, "step": 12360 }, { "epoch": 0.9764861503227972, "grad_norm": 0.08440453471073621, "learning_rate": 0.0006069269376478247, "loss": 2.9178, "step": 12365 }, { "epoch": 0.9768810092594421, "grad_norm": 0.1220045044021508, "learning_rate": 0.0006065902756689258, "loss": 2.9452, "step": 12370 }, { "epoch": 0.9772758681960869, "grad_norm": 0.10243813893108686, "learning_rate": 0.0006062535630573883, "loss": 3.2387, "step": 12375 }, { "epoch": 0.9776707271327318, "grad_norm": 0.12028828357407205, "learning_rate": 0.000605916799973158, "loss": 3.1643, "step": 12380 }, { "epoch": 0.9780655860693768, "grad_norm": 0.09775856610763692, "learning_rate": 0.0006055799865762042, "loss": 3.0271, "step": 12385 }, { "epoch": 0.9784604450060216, "grad_norm": 0.11568157875093564, "learning_rate": 0.0006052431230265205, "loss": 3.0658, "step": 12390 }, { "epoch": 0.9788553039426665, "grad_norm": 0.08831280245609487, "learning_rate": 0.0006049062094841244, "loss": 2.8594, "step": 12395 }, { "epoch": 0.9792501628793113, "grad_norm": 0.09341189572211331, "learning_rate": 0.0006045692461090565, "loss": 3.1638, "step": 12400 }, { "epoch": 0.9796450218159563, "grad_norm": 0.08939234225880895, "learning_rate": 0.000604232233061382, "loss": 2.8918, "step": 12405 }, { "epoch": 0.9800398807526012, "grad_norm": 0.10802750703608159, "learning_rate": 0.0006038951705011894, "loss": 2.8705, "step": 12410 }, { "epoch": 0.980434739689246, "grad_norm": 0.08840814759495638, "learning_rate": 0.0006035580585885901, "loss": 2.8309, "step": 12415 }, { "epoch": 0.9808295986258909, "grad_norm": 0.11747065579250415, "learning_rate": 0.0006032208974837194, "loss": 2.9261, "step": 12420 }, { "epoch": 0.9812244575625357, "grad_norm": 0.09691014931673683, "learning_rate": 0.0006028836873467364, "loss": 3.0526, "step": 12425 }, { "epoch": 0.9816193164991807, "grad_norm": 0.08691893727445898, "learning_rate": 0.0006025464283378226, "loss": 2.9084, "step": 12430 }, { "epoch": 0.9820141754358256, "grad_norm": 0.11778878521984282, "learning_rate": 0.0006022091206171833, "loss": 2.9313, "step": 12435 }, { "epoch": 0.9824090343724704, "grad_norm": 0.09348664068717995, "learning_rate": 0.0006018717643450469, "loss": 2.8487, "step": 12440 }, { "epoch": 0.9828038933091153, "grad_norm": 0.08181748252287485, "learning_rate": 0.0006015343596816647, "loss": 3.0892, "step": 12445 }, { "epoch": 0.9831987522457603, "grad_norm": 0.11948674869978629, "learning_rate": 0.0006011969067873112, "loss": 2.9303, "step": 12450 }, { "epoch": 0.9835936111824051, "grad_norm": 0.08502136547679515, "learning_rate": 0.0006008594058222836, "loss": 2.9692, "step": 12455 }, { "epoch": 0.98398847011905, "grad_norm": 0.07899129055178061, "learning_rate": 0.0006005218569469019, "loss": 2.863, "step": 12460 }, { "epoch": 0.9843833290556948, "grad_norm": 0.11638822293759453, "learning_rate": 0.0006001842603215091, "loss": 2.9779, "step": 12465 }, { "epoch": 0.9847781879923397, "grad_norm": 0.1090046408610304, "learning_rate": 0.0005998466161064707, "loss": 3.1149, "step": 12470 }, { "epoch": 0.9851730469289847, "grad_norm": 0.1023894208800286, "learning_rate": 0.0005995089244621748, "loss": 2.9422, "step": 12475 }, { "epoch": 0.9855679058656295, "grad_norm": 0.09102326965741274, "learning_rate": 0.0005991711855490322, "loss": 2.9097, "step": 12480 }, { "epoch": 0.9859627648022744, "grad_norm": 0.1185451176178751, "learning_rate": 0.0005988333995274759, "loss": 2.9779, "step": 12485 }, { "epoch": 0.9863576237389192, "grad_norm": 0.1331196962273949, "learning_rate": 0.0005984955665579615, "loss": 2.8954, "step": 12490 }, { "epoch": 0.9867524826755641, "grad_norm": 0.12268572029852756, "learning_rate": 0.0005981576868009667, "loss": 2.923, "step": 12495 }, { "epoch": 0.9871473416122091, "grad_norm": 0.08258962618653963, "learning_rate": 0.0005978197604169918, "loss": 2.8079, "step": 12500 }, { "epoch": 0.9875422005488539, "grad_norm": 0.09873269746981021, "learning_rate": 0.0005974817875665588, "loss": 2.9922, "step": 12505 }, { "epoch": 0.9879370594854988, "grad_norm": 0.09631299792889084, "learning_rate": 0.0005971437684102119, "loss": 2.9666, "step": 12510 }, { "epoch": 0.9883319184221437, "grad_norm": 0.10623993464960722, "learning_rate": 0.0005968057031085175, "loss": 2.9448, "step": 12515 }, { "epoch": 0.9887267773587886, "grad_norm": 0.10440971881847945, "learning_rate": 0.0005964675918220637, "loss": 2.8515, "step": 12520 }, { "epoch": 0.9891216362954335, "grad_norm": 0.09761320152111524, "learning_rate": 0.0005961294347114607, "loss": 3.0342, "step": 12525 }, { "epoch": 0.9895164952320783, "grad_norm": 0.08482322778488202, "learning_rate": 0.00059579123193734, "loss": 2.8121, "step": 12530 }, { "epoch": 0.9899113541687232, "grad_norm": 0.0831284520435388, "learning_rate": 0.0005954529836603553, "loss": 2.8151, "step": 12535 }, { "epoch": 0.9903062131053681, "grad_norm": 0.12838874217372995, "learning_rate": 0.0005951146900411815, "loss": 2.7926, "step": 12540 }, { "epoch": 0.990701072042013, "grad_norm": 0.08767261246053538, "learning_rate": 0.0005947763512405155, "loss": 2.8112, "step": 12545 }, { "epoch": 0.9910959309786579, "grad_norm": 0.10882299520518837, "learning_rate": 0.0005944379674190751, "loss": 2.9457, "step": 12550 }, { "epoch": 0.9914907899153027, "grad_norm": 0.11119927314638636, "learning_rate": 0.0005940995387376001, "loss": 2.8447, "step": 12555 }, { "epoch": 0.9918856488519476, "grad_norm": 0.14432315035472926, "learning_rate": 0.0005937610653568511, "loss": 3.1169, "step": 12560 }, { "epoch": 0.9922805077885926, "grad_norm": 0.08902197793420043, "learning_rate": 0.00059342254743761, "loss": 2.8465, "step": 12565 }, { "epoch": 0.9926753667252374, "grad_norm": 0.08789624454608251, "learning_rate": 0.0005930839851406802, "loss": 2.8812, "step": 12570 }, { "epoch": 0.9930702256618823, "grad_norm": 0.10480352632800144, "learning_rate": 0.0005927453786268859, "loss": 2.731, "step": 12575 }, { "epoch": 0.9934650845985272, "grad_norm": 0.08436297871836189, "learning_rate": 0.0005924067280570724, "loss": 2.7024, "step": 12580 }, { "epoch": 0.993859943535172, "grad_norm": 0.12041753262598845, "learning_rate": 0.0005920680335921058, "loss": 2.7723, "step": 12585 }, { "epoch": 0.994254802471817, "grad_norm": 0.09665142910242337, "learning_rate": 0.0005917292953928733, "loss": 2.9247, "step": 12590 }, { "epoch": 0.9946496614084618, "grad_norm": 0.11563460277968737, "learning_rate": 0.0005913905136202823, "loss": 2.8132, "step": 12595 }, { "epoch": 0.9950445203451067, "grad_norm": 0.11642696327168324, "learning_rate": 0.000591051688435262, "loss": 2.8172, "step": 12600 }, { "epoch": 0.9954393792817516, "grad_norm": 0.08278161300027931, "learning_rate": 0.000590712819998761, "loss": 3.2126, "step": 12605 }, { "epoch": 0.9958342382183965, "grad_norm": 0.08008902326378457, "learning_rate": 0.0005903739084717492, "loss": 2.8242, "step": 12610 }, { "epoch": 0.9962290971550414, "grad_norm": 0.10770060468736252, "learning_rate": 0.0005900349540152167, "loss": 2.9781, "step": 12615 }, { "epoch": 0.9966239560916862, "grad_norm": 0.12422464518843443, "learning_rate": 0.0005896959567901741, "loss": 3.1645, "step": 12620 }, { "epoch": 0.9970188150283311, "grad_norm": 0.10975595037195406, "learning_rate": 0.0005893569169576521, "loss": 2.9031, "step": 12625 }, { "epoch": 0.997413673964976, "grad_norm": 0.1155476172995376, "learning_rate": 0.000589017834678702, "loss": 3.1306, "step": 12630 }, { "epoch": 0.9978085329016209, "grad_norm": 0.10448810942974429, "learning_rate": 0.0005886787101143948, "loss": 2.8392, "step": 12635 }, { "epoch": 0.9982033918382658, "grad_norm": 0.13229447161066069, "learning_rate": 0.0005883395434258223, "loss": 2.9521, "step": 12640 }, { "epoch": 0.9985982507749107, "grad_norm": 0.09671704205865728, "learning_rate": 0.0005880003347740957, "loss": 2.898, "step": 12645 }, { "epoch": 0.9989931097115555, "grad_norm": 0.2078618951033788, "learning_rate": 0.0005876610843203459, "loss": 2.7484, "step": 12650 }, { "epoch": 0.9993879686482005, "grad_norm": 0.14306859173821596, "learning_rate": 0.0005873217922257245, "loss": 2.8646, "step": 12655 }, { "epoch": 0.9997828275848453, "grad_norm": 0.11355567720233475, "learning_rate": 0.0005869824586514022, "loss": 3.1261, "step": 12660 }, { "epoch": 0.9999407711595033, "eval_loss": 2.9186177253723145, "eval_runtime": 128.5525, "eval_samples_per_second": 20.606, "eval_steps_per_second": 20.606, "step": 12662 }, { "epoch": 1.0001776865214902, "grad_norm": 0.13862137932359728, "learning_rate": 0.0005866430837585697, "loss": 3.2114, "step": 12665 }, { "epoch": 1.0005725454581351, "grad_norm": 0.09857970708580131, "learning_rate": 0.0005863036677084372, "loss": 2.8334, "step": 12670 }, { "epoch": 1.00096740439478, "grad_norm": 0.11628164342313443, "learning_rate": 0.0005859642106622347, "loss": 2.9167, "step": 12675 }, { "epoch": 1.0013622633314248, "grad_norm": 0.09622782910778431, "learning_rate": 0.0005856247127812113, "loss": 2.9159, "step": 12680 }, { "epoch": 1.0017571222680697, "grad_norm": 0.21828699263670648, "learning_rate": 0.0005852851742266359, "loss": 2.8662, "step": 12685 }, { "epoch": 1.0021519812047146, "grad_norm": 0.11557768456771436, "learning_rate": 0.0005849455951597962, "loss": 2.9816, "step": 12690 }, { "epoch": 1.0025468401413595, "grad_norm": 0.13916892525299843, "learning_rate": 0.0005846059757419998, "loss": 2.8623, "step": 12695 }, { "epoch": 1.0029416990780045, "grad_norm": 0.10731755397206624, "learning_rate": 0.0005842663161345727, "loss": 2.8629, "step": 12700 }, { "epoch": 1.0033365580146492, "grad_norm": 0.09242735664022686, "learning_rate": 0.0005839266164988607, "loss": 2.8951, "step": 12705 }, { "epoch": 1.003731416951294, "grad_norm": 0.08955449321503321, "learning_rate": 0.0005835868769962282, "loss": 2.8319, "step": 12710 }, { "epoch": 1.004126275887939, "grad_norm": 0.10311447438628316, "learning_rate": 0.0005832470977880589, "loss": 2.8452, "step": 12715 }, { "epoch": 1.004521134824584, "grad_norm": 0.12053097987097068, "learning_rate": 0.0005829072790357548, "loss": 2.8624, "step": 12720 }, { "epoch": 1.0049159937612289, "grad_norm": 0.10027520285302126, "learning_rate": 0.0005825674209007371, "loss": 2.8991, "step": 12725 }, { "epoch": 1.0053108526978738, "grad_norm": 0.10865124954475656, "learning_rate": 0.0005822275235444458, "loss": 3.0469, "step": 12730 }, { "epoch": 1.0057057116345185, "grad_norm": 0.09404067674979452, "learning_rate": 0.0005818875871283392, "loss": 2.6658, "step": 12735 }, { "epoch": 1.0061005705711634, "grad_norm": 0.12530033523554285, "learning_rate": 0.000581547611813894, "loss": 2.8307, "step": 12740 }, { "epoch": 1.0064954295078083, "grad_norm": 0.11078191031123659, "learning_rate": 0.0005812075977626063, "loss": 2.7773, "step": 12745 }, { "epoch": 1.0068902884444533, "grad_norm": 0.08175070032215771, "learning_rate": 0.0005808675451359895, "loss": 2.8879, "step": 12750 }, { "epoch": 1.0072851473810982, "grad_norm": 0.10022345572913173, "learning_rate": 0.000580527454095576, "loss": 2.8647, "step": 12755 }, { "epoch": 1.007680006317743, "grad_norm": 0.09388478529880874, "learning_rate": 0.0005801873248029161, "loss": 2.8965, "step": 12760 }, { "epoch": 1.0080748652543878, "grad_norm": 0.08921926011073754, "learning_rate": 0.0005798471574195787, "loss": 2.9328, "step": 12765 }, { "epoch": 1.0084697241910328, "grad_norm": 0.1384580167726131, "learning_rate": 0.0005795069521071501, "loss": 2.9848, "step": 12770 }, { "epoch": 1.0088645831276777, "grad_norm": 0.12132353428239796, "learning_rate": 0.0005791667090272354, "loss": 2.7596, "step": 12775 }, { "epoch": 1.0092594420643226, "grad_norm": 0.1194566531784281, "learning_rate": 0.0005788264283414571, "loss": 2.7518, "step": 12780 }, { "epoch": 1.0096543010009673, "grad_norm": 0.0938136275068209, "learning_rate": 0.0005784861102114555, "loss": 2.8286, "step": 12785 }, { "epoch": 1.0100491599376122, "grad_norm": 0.09588048042374758, "learning_rate": 0.0005781457547988896, "loss": 2.949, "step": 12790 }, { "epoch": 1.0104440188742572, "grad_norm": 0.11531377368901256, "learning_rate": 0.0005778053622654347, "loss": 2.9874, "step": 12795 }, { "epoch": 1.010838877810902, "grad_norm": 0.09217924176226841, "learning_rate": 0.0005774649327727849, "loss": 2.8441, "step": 12800 }, { "epoch": 1.011233736747547, "grad_norm": 0.14119729960927094, "learning_rate": 0.0005771244664826512, "loss": 3.0921, "step": 12805 }, { "epoch": 1.0116285956841917, "grad_norm": 0.09524475144834628, "learning_rate": 0.0005767839635567625, "loss": 2.849, "step": 12810 }, { "epoch": 1.0120234546208366, "grad_norm": 0.08839391159355263, "learning_rate": 0.0005764434241568646, "loss": 2.921, "step": 12815 }, { "epoch": 1.0124183135574816, "grad_norm": 0.145995459504112, "learning_rate": 0.0005761028484447213, "loss": 2.9032, "step": 12820 }, { "epoch": 1.0128131724941265, "grad_norm": 0.10289076909036836, "learning_rate": 0.000575762236582113, "loss": 2.8076, "step": 12825 }, { "epoch": 1.0132080314307714, "grad_norm": 0.09178592115927561, "learning_rate": 0.0005754215887308377, "loss": 2.823, "step": 12830 }, { "epoch": 1.0136028903674161, "grad_norm": 0.09535279354936754, "learning_rate": 0.0005750809050527102, "loss": 2.8543, "step": 12835 }, { "epoch": 1.013997749304061, "grad_norm": 0.1305308357171906, "learning_rate": 0.0005747401857095627, "loss": 3.1456, "step": 12840 }, { "epoch": 1.014392608240706, "grad_norm": 0.09500616872073489, "learning_rate": 0.0005743994308632438, "loss": 2.791, "step": 12845 }, { "epoch": 1.014787467177351, "grad_norm": 0.11285993039526233, "learning_rate": 0.0005740586406756194, "loss": 2.8983, "step": 12850 }, { "epoch": 1.0151823261139958, "grad_norm": 0.10524762313278595, "learning_rate": 0.0005737178153085722, "loss": 2.8584, "step": 12855 }, { "epoch": 1.0155771850506408, "grad_norm": 0.09111643978760676, "learning_rate": 0.0005733769549240012, "loss": 3.0291, "step": 12860 }, { "epoch": 1.0159720439872855, "grad_norm": 0.09965797332681088, "learning_rate": 0.0005730360596838226, "loss": 3.0056, "step": 12865 }, { "epoch": 1.0163669029239304, "grad_norm": 0.16439303909517697, "learning_rate": 0.0005726951297499687, "loss": 3.0071, "step": 12870 }, { "epoch": 1.0167617618605753, "grad_norm": 0.2966036630257911, "learning_rate": 0.0005723541652843885, "loss": 2.7799, "step": 12875 }, { "epoch": 1.0171566207972202, "grad_norm": 0.12637157511555627, "learning_rate": 0.0005720131664490472, "loss": 3.0403, "step": 12880 }, { "epoch": 1.0175514797338652, "grad_norm": 0.13319818593417926, "learning_rate": 0.0005716721334059265, "loss": 2.8196, "step": 12885 }, { "epoch": 1.0179463386705099, "grad_norm": 0.09135863477436496, "learning_rate": 0.0005713310663170245, "loss": 2.8996, "step": 12890 }, { "epoch": 1.0183411976071548, "grad_norm": 0.12094929965834074, "learning_rate": 0.000570989965344355, "loss": 2.9034, "step": 12895 }, { "epoch": 1.0187360565437997, "grad_norm": 0.09960790808462976, "learning_rate": 0.0005706488306499484, "loss": 2.9215, "step": 12900 }, { "epoch": 1.0191309154804447, "grad_norm": 0.09880050467351871, "learning_rate": 0.000570307662395851, "loss": 2.9469, "step": 12905 }, { "epoch": 1.0195257744170896, "grad_norm": 0.11829886795260819, "learning_rate": 0.0005699664607441248, "loss": 3.2018, "step": 12910 }, { "epoch": 1.0199206333537343, "grad_norm": 0.11445920704120303, "learning_rate": 0.0005696252258568478, "loss": 3.027, "step": 12915 }, { "epoch": 1.0203154922903792, "grad_norm": 0.09951367991515181, "learning_rate": 0.0005692839578961137, "loss": 2.8333, "step": 12920 }, { "epoch": 1.0207103512270241, "grad_norm": 0.11654356681399158, "learning_rate": 0.0005689426570240322, "loss": 3.0883, "step": 12925 }, { "epoch": 1.021105210163669, "grad_norm": 0.09697355500868857, "learning_rate": 0.0005686013234027285, "loss": 3.0738, "step": 12930 }, { "epoch": 1.021500069100314, "grad_norm": 0.10067122422471278, "learning_rate": 0.000568259957194343, "loss": 2.7089, "step": 12935 }, { "epoch": 1.0218949280369587, "grad_norm": 0.09365498159642636, "learning_rate": 0.000567918558561032, "loss": 2.8205, "step": 12940 }, { "epoch": 1.0222897869736036, "grad_norm": 0.11423408873183832, "learning_rate": 0.0005675771276649672, "loss": 2.8884, "step": 12945 }, { "epoch": 1.0226846459102485, "grad_norm": 0.09797675432802831, "learning_rate": 0.0005672356646683357, "loss": 3.0838, "step": 12950 }, { "epoch": 1.0230795048468935, "grad_norm": 0.10027031617868516, "learning_rate": 0.0005668941697333392, "loss": 2.9939, "step": 12955 }, { "epoch": 1.0234743637835384, "grad_norm": 0.08851963624290105, "learning_rate": 0.0005665526430221952, "loss": 2.8287, "step": 12960 }, { "epoch": 1.023869222720183, "grad_norm": 0.13494508701579946, "learning_rate": 0.0005662110846971361, "loss": 2.8213, "step": 12965 }, { "epoch": 1.024264081656828, "grad_norm": 0.13941349439784237, "learning_rate": 0.0005658694949204094, "loss": 2.822, "step": 12970 }, { "epoch": 1.024658940593473, "grad_norm": 0.08797514170149204, "learning_rate": 0.0005655278738542775, "loss": 2.9659, "step": 12975 }, { "epoch": 1.0250537995301179, "grad_norm": 0.09539714861963115, "learning_rate": 0.0005651862216610176, "loss": 2.8506, "step": 12980 }, { "epoch": 1.0254486584667628, "grad_norm": 0.10630908860622994, "learning_rate": 0.0005648445385029217, "loss": 2.9329, "step": 12985 }, { "epoch": 1.0258435174034077, "grad_norm": 0.09981306409646885, "learning_rate": 0.0005645028245422967, "loss": 2.9106, "step": 12990 }, { "epoch": 1.0262383763400524, "grad_norm": 0.14600302002811963, "learning_rate": 0.0005641610799414637, "loss": 2.8523, "step": 12995 }, { "epoch": 1.0266332352766974, "grad_norm": 0.11915103562363474, "learning_rate": 0.0005638193048627589, "loss": 3.0049, "step": 13000 }, { "epoch": 1.0270280942133423, "grad_norm": 0.1100037932010766, "learning_rate": 0.0005634774994685325, "loss": 3.0318, "step": 13005 }, { "epoch": 1.0274229531499872, "grad_norm": 0.11429407165698219, "learning_rate": 0.0005631356639211493, "loss": 2.9897, "step": 13010 }, { "epoch": 1.0278178120866321, "grad_norm": 0.09606864952784662, "learning_rate": 0.0005627937983829885, "loss": 2.949, "step": 13015 }, { "epoch": 1.0282126710232768, "grad_norm": 0.10030556419406274, "learning_rate": 0.0005624519030164436, "loss": 2.9195, "step": 13020 }, { "epoch": 1.0286075299599218, "grad_norm": 0.12652997670537316, "learning_rate": 0.0005621099779839218, "loss": 2.8707, "step": 13025 }, { "epoch": 1.0290023888965667, "grad_norm": 0.09661541100507831, "learning_rate": 0.000561768023447845, "loss": 2.8732, "step": 13030 }, { "epoch": 1.0293972478332116, "grad_norm": 0.09162068729033268, "learning_rate": 0.0005614260395706489, "loss": 2.7972, "step": 13035 }, { "epoch": 1.0297921067698566, "grad_norm": 0.09284338506752374, "learning_rate": 0.0005610840265147829, "loss": 2.9208, "step": 13040 }, { "epoch": 1.0301869657065013, "grad_norm": 0.10841451670462761, "learning_rate": 0.0005607419844427105, "loss": 2.9167, "step": 13045 }, { "epoch": 1.0305818246431462, "grad_norm": 0.08620351391203203, "learning_rate": 0.000560399913516909, "loss": 3.0414, "step": 13050 }, { "epoch": 1.030976683579791, "grad_norm": 0.09289886253188877, "learning_rate": 0.0005600578138998692, "loss": 2.9658, "step": 13055 }, { "epoch": 1.031371542516436, "grad_norm": 0.08440193305401829, "learning_rate": 0.0005597156857540958, "loss": 2.8759, "step": 13060 }, { "epoch": 1.031766401453081, "grad_norm": 0.09106392190765475, "learning_rate": 0.0005593735292421068, "loss": 3.0108, "step": 13065 }, { "epoch": 1.0321612603897257, "grad_norm": 0.1343169592013468, "learning_rate": 0.000559031344526434, "loss": 2.8452, "step": 13070 }, { "epoch": 1.0325561193263706, "grad_norm": 0.08611823828998838, "learning_rate": 0.000558689131769622, "loss": 2.787, "step": 13075 }, { "epoch": 1.0329509782630155, "grad_norm": 0.15894454057185053, "learning_rate": 0.0005583468911342295, "loss": 3.1027, "step": 13080 }, { "epoch": 1.0333458371996604, "grad_norm": 0.08593101472658803, "learning_rate": 0.0005580046227828278, "loss": 2.9104, "step": 13085 }, { "epoch": 1.0337406961363054, "grad_norm": 0.08663435125648258, "learning_rate": 0.0005576623268780018, "loss": 2.8483, "step": 13090 }, { "epoch": 1.0341355550729503, "grad_norm": 0.09696527127427841, "learning_rate": 0.0005573200035823492, "loss": 2.8284, "step": 13095 }, { "epoch": 1.034530414009595, "grad_norm": 0.09053125550295069, "learning_rate": 0.0005569776530584808, "loss": 2.7611, "step": 13100 }, { "epoch": 1.03492527294624, "grad_norm": 0.09691695559914462, "learning_rate": 0.0005566352754690204, "loss": 3.0378, "step": 13105 }, { "epoch": 1.0353201318828849, "grad_norm": 0.08363122536962751, "learning_rate": 0.0005562928709766046, "loss": 2.8979, "step": 13110 }, { "epoch": 1.0357149908195298, "grad_norm": 0.10011887506242304, "learning_rate": 0.0005559504397438828, "loss": 2.8023, "step": 13115 }, { "epoch": 1.0361098497561747, "grad_norm": 0.08741836502750376, "learning_rate": 0.000555607981933517, "loss": 2.8036, "step": 13120 }, { "epoch": 1.0365047086928194, "grad_norm": 0.11983081869499643, "learning_rate": 0.000555265497708182, "loss": 2.81, "step": 13125 }, { "epoch": 1.0368995676294643, "grad_norm": 0.09487146719529026, "learning_rate": 0.000554922987230565, "loss": 2.8537, "step": 13130 }, { "epoch": 1.0372944265661093, "grad_norm": 0.0878601943361183, "learning_rate": 0.0005545804506633658, "loss": 2.8758, "step": 13135 }, { "epoch": 1.0376892855027542, "grad_norm": 0.08775994596291997, "learning_rate": 0.0005542378881692965, "loss": 3.049, "step": 13140 }, { "epoch": 1.0380841444393991, "grad_norm": 0.10369838121843275, "learning_rate": 0.0005538952999110815, "loss": 2.827, "step": 13145 }, { "epoch": 1.0384790033760438, "grad_norm": 0.09220474030026042, "learning_rate": 0.0005535526860514576, "loss": 2.9451, "step": 13150 }, { "epoch": 1.0388738623126887, "grad_norm": 0.08969297454692039, "learning_rate": 0.0005532100467531735, "loss": 2.9224, "step": 13155 }, { "epoch": 1.0392687212493337, "grad_norm": 0.10181674392805015, "learning_rate": 0.0005528673821789901, "loss": 2.9, "step": 13160 }, { "epoch": 1.0396635801859786, "grad_norm": 0.08682564203203534, "learning_rate": 0.0005525246924916805, "loss": 2.7895, "step": 13165 }, { "epoch": 1.0400584391226235, "grad_norm": 0.09430406687523751, "learning_rate": 0.0005521819778540293, "loss": 2.9738, "step": 13170 }, { "epoch": 1.0404532980592682, "grad_norm": 0.08840957285709126, "learning_rate": 0.0005518392384288338, "loss": 2.974, "step": 13175 }, { "epoch": 1.0408481569959132, "grad_norm": 0.08995976940266956, "learning_rate": 0.0005514964743789017, "loss": 2.8561, "step": 13180 }, { "epoch": 1.041243015932558, "grad_norm": 0.07971793248800968, "learning_rate": 0.0005511536858670537, "loss": 3.0014, "step": 13185 }, { "epoch": 1.041637874869203, "grad_norm": 0.10901751946526646, "learning_rate": 0.0005508108730561213, "loss": 2.7354, "step": 13190 }, { "epoch": 1.042032733805848, "grad_norm": 0.0902831571777054, "learning_rate": 0.0005504680361089481, "loss": 2.8394, "step": 13195 }, { "epoch": 1.0424275927424929, "grad_norm": 0.09383407330928689, "learning_rate": 0.0005501251751883887, "loss": 2.7677, "step": 13200 }, { "epoch": 1.0428224516791376, "grad_norm": 0.10707537217507815, "learning_rate": 0.0005497822904573095, "loss": 2.7778, "step": 13205 }, { "epoch": 1.0432173106157825, "grad_norm": 0.10269422828925899, "learning_rate": 0.000549439382078588, "loss": 2.9421, "step": 13210 }, { "epoch": 1.0436121695524274, "grad_norm": 0.09028609996249459, "learning_rate": 0.0005490964502151128, "loss": 2.8074, "step": 13215 }, { "epoch": 1.0440070284890723, "grad_norm": 0.0896004737326897, "learning_rate": 0.0005487534950297839, "loss": 2.7675, "step": 13220 }, { "epoch": 1.0444018874257173, "grad_norm": 0.08822000506674911, "learning_rate": 0.0005484105166855122, "loss": 2.9114, "step": 13225 }, { "epoch": 1.044796746362362, "grad_norm": 0.10610531154366569, "learning_rate": 0.0005480675153452197, "loss": 3.0924, "step": 13230 }, { "epoch": 1.045191605299007, "grad_norm": 0.09846684054050414, "learning_rate": 0.0005477244911718392, "loss": 2.7522, "step": 13235 }, { "epoch": 1.0455864642356518, "grad_norm": 0.14552985340776217, "learning_rate": 0.0005473814443283147, "loss": 2.9897, "step": 13240 }, { "epoch": 1.0459813231722968, "grad_norm": 0.08795835573436434, "learning_rate": 0.0005470383749776005, "loss": 2.9694, "step": 13245 }, { "epoch": 1.0463761821089417, "grad_norm": 0.09436833880181868, "learning_rate": 0.0005466952832826619, "loss": 2.9751, "step": 13250 }, { "epoch": 1.0467710410455864, "grad_norm": 0.0960982462742697, "learning_rate": 0.0005463521694064748, "loss": 2.7917, "step": 13255 }, { "epoch": 1.0471658999822313, "grad_norm": 0.10203398301558113, "learning_rate": 0.0005460090335120255, "loss": 2.8221, "step": 13260 }, { "epoch": 1.0475607589188762, "grad_norm": 0.09135570126687571, "learning_rate": 0.0005456658757623108, "loss": 2.9108, "step": 13265 }, { "epoch": 1.0479556178555212, "grad_norm": 0.1177229951570096, "learning_rate": 0.0005453226963203379, "loss": 3.0258, "step": 13270 }, { "epoch": 1.048350476792166, "grad_norm": 0.10035401419100684, "learning_rate": 0.000544979495349124, "loss": 2.8809, "step": 13275 }, { "epoch": 1.0487453357288108, "grad_norm": 0.09996707478200105, "learning_rate": 0.0005446362730116973, "loss": 3.0346, "step": 13280 }, { "epoch": 1.0491401946654557, "grad_norm": 0.11087507896409807, "learning_rate": 0.0005442930294710955, "loss": 3.0093, "step": 13285 }, { "epoch": 1.0495350536021006, "grad_norm": 0.08664372579912287, "learning_rate": 0.0005439497648903666, "loss": 2.8615, "step": 13290 }, { "epoch": 1.0499299125387456, "grad_norm": 0.09896962227731189, "learning_rate": 0.0005436064794325685, "loss": 3.2459, "step": 13295 }, { "epoch": 1.0503247714753905, "grad_norm": 0.09572525785676501, "learning_rate": 0.000543263173260769, "loss": 3.0054, "step": 13300 }, { "epoch": 1.0507196304120352, "grad_norm": 0.08555618347802425, "learning_rate": 0.0005429198465380459, "loss": 2.9414, "step": 13305 }, { "epoch": 1.0511144893486801, "grad_norm": 0.08881391707186352, "learning_rate": 0.0005425764994274867, "loss": 3.0014, "step": 13310 }, { "epoch": 1.051509348285325, "grad_norm": 0.08917127780054994, "learning_rate": 0.0005422331320921882, "loss": 2.8994, "step": 13315 }, { "epoch": 1.05190420722197, "grad_norm": 0.17404801801872744, "learning_rate": 0.0005418897446952578, "loss": 2.8755, "step": 13320 }, { "epoch": 1.052299066158615, "grad_norm": 0.0945279229471605, "learning_rate": 0.0005415463373998112, "loss": 2.9761, "step": 13325 }, { "epoch": 1.0526939250952596, "grad_norm": 0.11362878604618455, "learning_rate": 0.0005412029103689744, "loss": 2.7642, "step": 13330 }, { "epoch": 1.0530887840319045, "grad_norm": 0.11605505630474995, "learning_rate": 0.0005408594637658826, "loss": 2.7977, "step": 13335 }, { "epoch": 1.0534836429685495, "grad_norm": 0.1057823796653894, "learning_rate": 0.00054051599775368, "loss": 2.9753, "step": 13340 }, { "epoch": 1.0538785019051944, "grad_norm": 0.09935998548971313, "learning_rate": 0.0005401725124955202, "loss": 2.8217, "step": 13345 }, { "epoch": 1.0542733608418393, "grad_norm": 0.08566833403229536, "learning_rate": 0.0005398290081545662, "loss": 2.9309, "step": 13350 }, { "epoch": 1.0546682197784842, "grad_norm": 0.09974780444531389, "learning_rate": 0.0005394854848939898, "loss": 2.8929, "step": 13355 }, { "epoch": 1.055063078715129, "grad_norm": 0.10599112657019492, "learning_rate": 0.0005391419428769716, "loss": 3.0275, "step": 13360 }, { "epoch": 1.0554579376517739, "grad_norm": 0.086557313479538, "learning_rate": 0.0005387983822667016, "loss": 2.8067, "step": 13365 }, { "epoch": 1.0558527965884188, "grad_norm": 0.08970044549863639, "learning_rate": 0.0005384548032263782, "loss": 2.8621, "step": 13370 }, { "epoch": 1.0562476555250637, "grad_norm": 0.08386742879207475, "learning_rate": 0.0005381112059192088, "loss": 2.7636, "step": 13375 }, { "epoch": 1.0566425144617086, "grad_norm": 0.09316474653556232, "learning_rate": 0.0005377675905084094, "loss": 2.8913, "step": 13380 }, { "epoch": 1.0570373733983534, "grad_norm": 0.12035228372497382, "learning_rate": 0.0005374239571572045, "loss": 2.9388, "step": 13385 }, { "epoch": 1.0574322323349983, "grad_norm": 0.12203570072696097, "learning_rate": 0.0005370803060288274, "loss": 3.1628, "step": 13390 }, { "epoch": 1.0578270912716432, "grad_norm": 0.07826139979506902, "learning_rate": 0.0005367366372865195, "loss": 2.8912, "step": 13395 }, { "epoch": 1.0582219502082881, "grad_norm": 0.09146593051719837, "learning_rate": 0.0005363929510935307, "loss": 2.9462, "step": 13400 }, { "epoch": 1.058616809144933, "grad_norm": 0.08686240386595033, "learning_rate": 0.0005360492476131195, "loss": 2.8966, "step": 13405 }, { "epoch": 1.0590116680815778, "grad_norm": 0.08312131157877069, "learning_rate": 0.000535705527008552, "loss": 2.8294, "step": 13410 }, { "epoch": 1.0594065270182227, "grad_norm": 0.08111547237046092, "learning_rate": 0.0005353617894431028, "loss": 2.8638, "step": 13415 }, { "epoch": 1.0598013859548676, "grad_norm": 0.08900032002998166, "learning_rate": 0.0005350180350800549, "loss": 3.1249, "step": 13420 }, { "epoch": 1.0601962448915125, "grad_norm": 0.09889683696007304, "learning_rate": 0.0005346742640826985, "loss": 2.9382, "step": 13425 }, { "epoch": 1.0605911038281575, "grad_norm": 0.10448160876359151, "learning_rate": 0.0005343304766143321, "loss": 2.8947, "step": 13430 }, { "epoch": 1.0609859627648022, "grad_norm": 0.09713908012477156, "learning_rate": 0.0005339866728382623, "loss": 2.8187, "step": 13435 }, { "epoch": 1.061380821701447, "grad_norm": 0.08990288473819814, "learning_rate": 0.0005336428529178032, "loss": 2.8915, "step": 13440 }, { "epoch": 1.061775680638092, "grad_norm": 0.11858981392370019, "learning_rate": 0.0005332990170162763, "loss": 2.7442, "step": 13445 }, { "epoch": 1.062170539574737, "grad_norm": 0.08639139393280551, "learning_rate": 0.0005329551652970109, "loss": 2.8881, "step": 13450 }, { "epoch": 1.0625653985113819, "grad_norm": 0.09552609913339699, "learning_rate": 0.0005326112979233442, "loss": 2.7904, "step": 13455 }, { "epoch": 1.0629602574480268, "grad_norm": 0.16182771564078874, "learning_rate": 0.0005322674150586201, "loss": 2.7728, "step": 13460 }, { "epoch": 1.0633551163846715, "grad_norm": 0.10080170871002325, "learning_rate": 0.0005319235168661903, "loss": 3.0576, "step": 13465 }, { "epoch": 1.0637499753213164, "grad_norm": 0.0933801609088163, "learning_rate": 0.0005315796035094139, "loss": 2.7161, "step": 13470 }, { "epoch": 1.0641448342579614, "grad_norm": 0.09350099120066799, "learning_rate": 0.000531235675151657, "loss": 2.7242, "step": 13475 }, { "epoch": 1.0645396931946063, "grad_norm": 0.11866994503722303, "learning_rate": 0.0005308917319562929, "loss": 2.8865, "step": 13480 }, { "epoch": 1.0649345521312512, "grad_norm": 0.12882066646518925, "learning_rate": 0.0005305477740867019, "loss": 3.0367, "step": 13485 }, { "epoch": 1.065329411067896, "grad_norm": 0.09983992143149514, "learning_rate": 0.0005302038017062711, "loss": 2.7072, "step": 13490 }, { "epoch": 1.0657242700045408, "grad_norm": 0.11695043144293363, "learning_rate": 0.0005298598149783946, "loss": 3.0011, "step": 13495 }, { "epoch": 1.0661191289411858, "grad_norm": 0.11584171136710955, "learning_rate": 0.0005295158140664737, "loss": 2.7501, "step": 13500 }, { "epoch": 1.0665139878778307, "grad_norm": 0.12743448296558313, "learning_rate": 0.0005291717991339158, "loss": 2.7872, "step": 13505 }, { "epoch": 1.0669088468144756, "grad_norm": 0.09098655201858694, "learning_rate": 0.0005288277703441355, "loss": 2.8229, "step": 13510 }, { "epoch": 1.0673037057511203, "grad_norm": 0.11503193955145839, "learning_rate": 0.0005284837278605538, "loss": 2.8163, "step": 13515 }, { "epoch": 1.0676985646877653, "grad_norm": 0.09004550845906062, "learning_rate": 0.000528139671846598, "loss": 2.7177, "step": 13520 }, { "epoch": 1.0680934236244102, "grad_norm": 0.08504800575445964, "learning_rate": 0.0005277956024657021, "loss": 2.7678, "step": 13525 }, { "epoch": 1.068488282561055, "grad_norm": 0.10040187099010421, "learning_rate": 0.0005274515198813064, "loss": 2.9704, "step": 13530 }, { "epoch": 1.0688831414977, "grad_norm": 0.11852916122087226, "learning_rate": 0.000527107424256857, "loss": 2.9452, "step": 13535 }, { "epoch": 1.0692780004343447, "grad_norm": 0.10562243885157094, "learning_rate": 0.0005267633157558072, "loss": 3.044, "step": 13540 }, { "epoch": 1.0696728593709897, "grad_norm": 0.09826284123497957, "learning_rate": 0.0005264191945416154, "loss": 2.8221, "step": 13545 }, { "epoch": 1.0700677183076346, "grad_norm": 0.11264816640087183, "learning_rate": 0.0005260750607777467, "loss": 2.8747, "step": 13550 }, { "epoch": 1.0704625772442795, "grad_norm": 0.09816219695332155, "learning_rate": 0.0005257309146276719, "loss": 2.9148, "step": 13555 }, { "epoch": 1.0708574361809244, "grad_norm": 0.08768808359997433, "learning_rate": 0.0005253867562548675, "loss": 2.8724, "step": 13560 }, { "epoch": 1.0712522951175694, "grad_norm": 0.07753976570797771, "learning_rate": 0.0005250425858228163, "loss": 2.8155, "step": 13565 }, { "epoch": 1.071647154054214, "grad_norm": 0.08531110878532108, "learning_rate": 0.0005246984034950064, "loss": 2.8837, "step": 13570 }, { "epoch": 1.072042012990859, "grad_norm": 0.0929678790935391, "learning_rate": 0.0005243542094349317, "loss": 2.9639, "step": 13575 }, { "epoch": 1.072436871927504, "grad_norm": 0.09381152258987466, "learning_rate": 0.0005240100038060917, "loss": 2.9192, "step": 13580 }, { "epoch": 1.0728317308641488, "grad_norm": 0.10536677802204962, "learning_rate": 0.0005236657867719914, "loss": 2.8941, "step": 13585 }, { "epoch": 1.0732265898007936, "grad_norm": 0.08391735356001273, "learning_rate": 0.000523321558496141, "loss": 2.8582, "step": 13590 }, { "epoch": 1.0736214487374385, "grad_norm": 0.08346631843751177, "learning_rate": 0.0005229773191420565, "loss": 2.9863, "step": 13595 }, { "epoch": 1.0740163076740834, "grad_norm": 0.08435668428534676, "learning_rate": 0.0005226330688732587, "loss": 3.0653, "step": 13600 }, { "epoch": 1.0744111666107283, "grad_norm": 0.10216292901774446, "learning_rate": 0.0005222888078532737, "loss": 2.8075, "step": 13605 }, { "epoch": 1.0748060255473733, "grad_norm": 0.0980330277669179, "learning_rate": 0.000521944536245633, "loss": 3.0981, "step": 13610 }, { "epoch": 1.0752008844840182, "grad_norm": 0.09817252503178361, "learning_rate": 0.0005216002542138727, "loss": 2.931, "step": 13615 }, { "epoch": 1.0755957434206629, "grad_norm": 0.10091861309022615, "learning_rate": 0.0005212559619215343, "loss": 2.9603, "step": 13620 }, { "epoch": 1.0759906023573078, "grad_norm": 0.10764391046825442, "learning_rate": 0.0005209116595321636, "loss": 2.8609, "step": 13625 }, { "epoch": 1.0763854612939527, "grad_norm": 0.09547242890643588, "learning_rate": 0.0005205673472093117, "loss": 2.7941, "step": 13630 }, { "epoch": 1.0767803202305977, "grad_norm": 0.11624304995216989, "learning_rate": 0.0005202230251165343, "loss": 2.7939, "step": 13635 }, { "epoch": 1.0771751791672426, "grad_norm": 0.0992441211913396, "learning_rate": 0.0005198786934173917, "loss": 2.8744, "step": 13640 }, { "epoch": 1.0775700381038873, "grad_norm": 0.11067738488316228, "learning_rate": 0.0005195343522754486, "loss": 3.0461, "step": 13645 }, { "epoch": 1.0779648970405322, "grad_norm": 0.10211909583572872, "learning_rate": 0.0005191900018542744, "loss": 2.9704, "step": 13650 }, { "epoch": 1.0783597559771771, "grad_norm": 0.09646708460595677, "learning_rate": 0.000518845642317443, "loss": 2.7927, "step": 13655 }, { "epoch": 1.078754614913822, "grad_norm": 0.11213953224734673, "learning_rate": 0.0005185012738285323, "loss": 2.781, "step": 13660 }, { "epoch": 1.079149473850467, "grad_norm": 0.10721190518568874, "learning_rate": 0.0005181568965511246, "loss": 3.0451, "step": 13665 }, { "epoch": 1.0795443327871117, "grad_norm": 0.09587890864951318, "learning_rate": 0.0005178125106488064, "loss": 2.8618, "step": 13670 }, { "epoch": 1.0799391917237566, "grad_norm": 0.09746619761674377, "learning_rate": 0.0005174681162851685, "loss": 2.941, "step": 13675 }, { "epoch": 1.0803340506604016, "grad_norm": 0.09657176582957755, "learning_rate": 0.0005171237136238054, "loss": 2.8765, "step": 13680 }, { "epoch": 1.0807289095970465, "grad_norm": 0.1001975641456972, "learning_rate": 0.0005167793028283156, "loss": 2.8936, "step": 13685 }, { "epoch": 1.0811237685336914, "grad_norm": 0.10829786604997006, "learning_rate": 0.0005164348840623014, "loss": 2.9947, "step": 13690 }, { "epoch": 1.0815186274703361, "grad_norm": 0.09265232367938427, "learning_rate": 0.0005160904574893693, "loss": 2.7962, "step": 13695 }, { "epoch": 1.081913486406981, "grad_norm": 0.08306637391595488, "learning_rate": 0.0005157460232731291, "loss": 3.0035, "step": 13700 }, { "epoch": 1.082308345343626, "grad_norm": 0.09194668778486981, "learning_rate": 0.0005154015815771945, "loss": 3.0335, "step": 13705 }, { "epoch": 1.082703204280271, "grad_norm": 0.08838592852121925, "learning_rate": 0.0005150571325651821, "loss": 2.8711, "step": 13710 }, { "epoch": 1.0830980632169158, "grad_norm": 0.1070412311393883, "learning_rate": 0.0005147126764007131, "loss": 3.0456, "step": 13715 }, { "epoch": 1.0834929221535607, "grad_norm": 0.10466081400880124, "learning_rate": 0.000514368213247411, "loss": 2.8577, "step": 13720 }, { "epoch": 1.0838877810902054, "grad_norm": 0.09598475991983328, "learning_rate": 0.0005140237432689033, "loss": 2.9119, "step": 13725 }, { "epoch": 1.0842826400268504, "grad_norm": 0.10491952174693758, "learning_rate": 0.0005136792666288205, "loss": 3.0094, "step": 13730 }, { "epoch": 1.0846774989634953, "grad_norm": 0.08556000759709331, "learning_rate": 0.0005133347834907964, "loss": 2.8629, "step": 13735 }, { "epoch": 1.0850723579001402, "grad_norm": 0.11217028323488328, "learning_rate": 0.0005129902940184676, "loss": 2.8288, "step": 13740 }, { "epoch": 1.0854672168367852, "grad_norm": 0.08526475828157873, "learning_rate": 0.0005126457983754742, "loss": 2.8695, "step": 13745 }, { "epoch": 1.0858620757734299, "grad_norm": 0.08866637770657347, "learning_rate": 0.0005123012967254587, "loss": 2.7604, "step": 13750 }, { "epoch": 1.0862569347100748, "grad_norm": 0.10187989983441123, "learning_rate": 0.0005119567892320666, "loss": 2.917, "step": 13755 }, { "epoch": 1.0866517936467197, "grad_norm": 0.1122274445527874, "learning_rate": 0.0005116122760589465, "loss": 2.8528, "step": 13760 }, { "epoch": 1.0870466525833646, "grad_norm": 0.14088524691833204, "learning_rate": 0.0005112677573697491, "loss": 2.7258, "step": 13765 }, { "epoch": 1.0874415115200096, "grad_norm": 0.08795758907627108, "learning_rate": 0.0005109232333281282, "loss": 2.99, "step": 13770 }, { "epoch": 1.0878363704566543, "grad_norm": 0.08219832458394317, "learning_rate": 0.0005105787040977404, "loss": 2.9247, "step": 13775 }, { "epoch": 1.0882312293932992, "grad_norm": 0.10089689268529776, "learning_rate": 0.000510234169842244, "loss": 3.0203, "step": 13780 }, { "epoch": 1.0886260883299441, "grad_norm": 0.09312455434631034, "learning_rate": 0.0005098896307253001, "loss": 3.0687, "step": 13785 }, { "epoch": 1.089020947266589, "grad_norm": 0.12114882019802063, "learning_rate": 0.0005095450869105722, "loss": 3.1445, "step": 13790 }, { "epoch": 1.089415806203234, "grad_norm": 0.0792655786341938, "learning_rate": 0.0005092005385617259, "loss": 3.1468, "step": 13795 }, { "epoch": 1.0898106651398787, "grad_norm": 0.10491595683645556, "learning_rate": 0.000508855985842429, "loss": 2.7482, "step": 13800 }, { "epoch": 1.0902055240765236, "grad_norm": 0.09000424776602943, "learning_rate": 0.0005085114289163509, "loss": 3.0793, "step": 13805 }, { "epoch": 1.0906003830131685, "grad_norm": 0.1047152936840366, "learning_rate": 0.000508166867947164, "loss": 2.7572, "step": 13810 }, { "epoch": 1.0909952419498135, "grad_norm": 0.09791310702928943, "learning_rate": 0.0005078223030985419, "loss": 2.8922, "step": 13815 }, { "epoch": 1.0913901008864584, "grad_norm": 0.09158543190302941, "learning_rate": 0.0005074777345341602, "loss": 2.8847, "step": 13820 }, { "epoch": 1.0917849598231033, "grad_norm": 0.08587238970075797, "learning_rate": 0.000507133162417696, "loss": 3.0632, "step": 13825 }, { "epoch": 1.092179818759748, "grad_norm": 0.09725999334607961, "learning_rate": 0.0005067885869128287, "loss": 2.8217, "step": 13830 }, { "epoch": 1.092574677696393, "grad_norm": 0.09410598627883393, "learning_rate": 0.0005064440081832388, "loss": 2.7539, "step": 13835 }, { "epoch": 1.0929695366330379, "grad_norm": 0.10290532948732843, "learning_rate": 0.0005060994263926086, "loss": 2.8304, "step": 13840 }, { "epoch": 1.0933643955696828, "grad_norm": 0.09163366721479611, "learning_rate": 0.0005057548417046214, "loss": 2.7953, "step": 13845 }, { "epoch": 1.0937592545063277, "grad_norm": 0.09757271055060678, "learning_rate": 0.0005054102542829628, "loss": 2.7803, "step": 13850 }, { "epoch": 1.0941541134429724, "grad_norm": 0.09304177336879109, "learning_rate": 0.0005050656642913187, "loss": 3.0574, "step": 13855 }, { "epoch": 1.0945489723796173, "grad_norm": 0.11327491221688817, "learning_rate": 0.0005047210718933767, "loss": 2.9237, "step": 13860 }, { "epoch": 1.0949438313162623, "grad_norm": 0.09517067699771342, "learning_rate": 0.0005043764772528258, "loss": 2.6919, "step": 13865 }, { "epoch": 1.0953386902529072, "grad_norm": 0.24477390482575473, "learning_rate": 0.0005040318805333554, "loss": 2.6188, "step": 13870 }, { "epoch": 1.0957335491895521, "grad_norm": 0.12713744948562766, "learning_rate": 0.0005036872818986562, "loss": 2.9161, "step": 13875 }, { "epoch": 1.0961284081261968, "grad_norm": 0.10836079867714185, "learning_rate": 0.0005033426815124201, "loss": 3.1819, "step": 13880 }, { "epoch": 1.0965232670628418, "grad_norm": 0.09837453540982329, "learning_rate": 0.0005029980795383395, "loss": 2.7704, "step": 13885 }, { "epoch": 1.0969181259994867, "grad_norm": 0.12578579771841564, "learning_rate": 0.0005026534761401076, "loss": 2.8934, "step": 13890 }, { "epoch": 1.0973129849361316, "grad_norm": 0.09779333856923234, "learning_rate": 0.0005023088714814183, "loss": 2.7596, "step": 13895 }, { "epoch": 1.0977078438727765, "grad_norm": 0.09055222395856827, "learning_rate": 0.0005019642657259662, "loss": 2.8289, "step": 13900 }, { "epoch": 1.0981027028094212, "grad_norm": 0.1591107516631044, "learning_rate": 0.0005016196590374464, "loss": 2.9882, "step": 13905 }, { "epoch": 1.0984975617460662, "grad_norm": 0.10467771030033582, "learning_rate": 0.000501275051579554, "loss": 2.7794, "step": 13910 }, { "epoch": 1.098892420682711, "grad_norm": 0.0965918246020985, "learning_rate": 0.0005009304435159852, "loss": 2.7955, "step": 13915 }, { "epoch": 1.099287279619356, "grad_norm": 0.09952101429870468, "learning_rate": 0.0005005858350104361, "loss": 2.9582, "step": 13920 }, { "epoch": 1.099682138556001, "grad_norm": 0.09704706798466262, "learning_rate": 0.0005002412262266029, "loss": 2.9021, "step": 13925 }, { "epoch": 1.1000769974926459, "grad_norm": 0.09498906620994604, "learning_rate": 0.000499896617328182, "loss": 2.7912, "step": 13930 }, { "epoch": 1.1004718564292906, "grad_norm": 0.10121835442441482, "learning_rate": 0.0004995520084788701, "loss": 2.9001, "step": 13935 }, { "epoch": 1.1008667153659355, "grad_norm": 0.09904259795249543, "learning_rate": 0.0004992073998423637, "loss": 3.0544, "step": 13940 }, { "epoch": 1.1012615743025804, "grad_norm": 0.0872030465079283, "learning_rate": 0.0004988627915823593, "loss": 3.046, "step": 13945 }, { "epoch": 1.1016564332392254, "grad_norm": 0.08946351537209216, "learning_rate": 0.0004985181838625531, "loss": 2.7368, "step": 13950 }, { "epoch": 1.10205129217587, "grad_norm": 0.08518492156168402, "learning_rate": 0.0004981735768466406, "loss": 2.9332, "step": 13955 }, { "epoch": 1.102446151112515, "grad_norm": 0.09176730579763134, "learning_rate": 0.0004978289706983179, "loss": 2.861, "step": 13960 }, { "epoch": 1.10284101004916, "grad_norm": 0.08059216596092238, "learning_rate": 0.0004974843655812802, "loss": 2.797, "step": 13965 }, { "epoch": 1.1032358689858048, "grad_norm": 0.09595589236155407, "learning_rate": 0.0004971397616592221, "loss": 3.0654, "step": 13970 }, { "epoch": 1.1036307279224498, "grad_norm": 0.11493540479272385, "learning_rate": 0.0004967951590958377, "loss": 2.9192, "step": 13975 }, { "epoch": 1.1040255868590947, "grad_norm": 0.10713096590456028, "learning_rate": 0.0004964505580548206, "loss": 2.8798, "step": 13980 }, { "epoch": 1.1044204457957394, "grad_norm": 0.09420910487360587, "learning_rate": 0.0004961059586998637, "loss": 2.8719, "step": 13985 }, { "epoch": 1.1048153047323843, "grad_norm": 0.08799612417416139, "learning_rate": 0.0004957613611946589, "loss": 2.8767, "step": 13990 }, { "epoch": 1.1052101636690292, "grad_norm": 0.09279988872090397, "learning_rate": 0.0004954167657028972, "loss": 3.0176, "step": 13995 }, { "epoch": 1.1056050226056742, "grad_norm": 0.10483157179082785, "learning_rate": 0.0004950721723882689, "loss": 3.0182, "step": 14000 }, { "epoch": 1.105999881542319, "grad_norm": 0.0814581325666057, "learning_rate": 0.000494727581414463, "loss": 2.8683, "step": 14005 }, { "epoch": 1.1063947404789638, "grad_norm": 0.07663855227285661, "learning_rate": 0.0004943829929451678, "loss": 2.8541, "step": 14010 }, { "epoch": 1.1067895994156087, "grad_norm": 0.08082940682368817, "learning_rate": 0.0004940384071440698, "loss": 2.7387, "step": 14015 }, { "epoch": 1.1071844583522537, "grad_norm": 0.09745136296902437, "learning_rate": 0.0004936938241748548, "loss": 2.876, "step": 14020 }, { "epoch": 1.1075793172888986, "grad_norm": 0.08341006960074579, "learning_rate": 0.0004933492442012067, "loss": 3.1223, "step": 14025 }, { "epoch": 1.1079741762255435, "grad_norm": 0.08478221053786751, "learning_rate": 0.0004930046673868086, "loss": 2.9493, "step": 14030 }, { "epoch": 1.1083690351621882, "grad_norm": 0.10790766273341036, "learning_rate": 0.0004926600938953418, "loss": 2.7986, "step": 14035 }, { "epoch": 1.1087638940988331, "grad_norm": 0.11299194616472139, "learning_rate": 0.0004923155238904857, "loss": 2.9695, "step": 14040 }, { "epoch": 1.109158753035478, "grad_norm": 0.1200707657986064, "learning_rate": 0.0004919709575359185, "loss": 2.7867, "step": 14045 }, { "epoch": 1.109553611972123, "grad_norm": 0.09514985395941852, "learning_rate": 0.0004916263949953165, "loss": 3.0551, "step": 14050 }, { "epoch": 1.109948470908768, "grad_norm": 0.09359036941111337, "learning_rate": 0.0004912818364323542, "loss": 2.8562, "step": 14055 }, { "epoch": 1.1103433298454126, "grad_norm": 0.10840078098062089, "learning_rate": 0.0004909372820107044, "loss": 3.0987, "step": 14060 }, { "epoch": 1.1107381887820575, "grad_norm": 0.08694780019540656, "learning_rate": 0.0004905927318940375, "loss": 3.0046, "step": 14065 }, { "epoch": 1.1111330477187025, "grad_norm": 0.09513971928508866, "learning_rate": 0.0004902481862460223, "loss": 3.0542, "step": 14070 }, { "epoch": 1.1115279066553474, "grad_norm": 0.09699458265706604, "learning_rate": 0.000489903645230325, "loss": 2.6871, "step": 14075 }, { "epoch": 1.1119227655919923, "grad_norm": 0.10880748045165776, "learning_rate": 0.0004895591090106101, "loss": 3.049, "step": 14080 }, { "epoch": 1.1123176245286372, "grad_norm": 0.17592053270469463, "learning_rate": 0.0004892145777505397, "loss": 2.778, "step": 14085 }, { "epoch": 1.112712483465282, "grad_norm": 0.09181607809751176, "learning_rate": 0.0004888700516137732, "loss": 2.8189, "step": 14090 }, { "epoch": 1.1131073424019269, "grad_norm": 0.09310196606967669, "learning_rate": 0.0004885255307639679, "loss": 2.8249, "step": 14095 }, { "epoch": 1.1135022013385718, "grad_norm": 0.1083023531443384, "learning_rate": 0.0004881810153647786, "loss": 2.7153, "step": 14100 }, { "epoch": 1.1138970602752167, "grad_norm": 0.12314460783943684, "learning_rate": 0.00048783650557985734, "loss": 3.0493, "step": 14105 }, { "epoch": 1.1142919192118617, "grad_norm": 0.0938998772274811, "learning_rate": 0.0004874920015728535, "loss": 2.7459, "step": 14110 }, { "epoch": 1.1146867781485064, "grad_norm": 0.08502304778440771, "learning_rate": 0.0004871475035074139, "loss": 2.9613, "step": 14115 }, { "epoch": 1.1150816370851513, "grad_norm": 0.09691818204521593, "learning_rate": 0.0004868030115471822, "loss": 3.0925, "step": 14120 }, { "epoch": 1.1154764960217962, "grad_norm": 0.10725024532571187, "learning_rate": 0.0004864585258557999, "loss": 2.9971, "step": 14125 }, { "epoch": 1.1158713549584411, "grad_norm": 0.08548189126541086, "learning_rate": 0.0004861140465969044, "loss": 2.8534, "step": 14130 }, { "epoch": 1.116266213895086, "grad_norm": 0.09588162464048763, "learning_rate": 0.00048576957393413084, "loss": 2.9774, "step": 14135 }, { "epoch": 1.1166610728317308, "grad_norm": 0.09075088042007964, "learning_rate": 0.0004854251080311111, "loss": 2.8279, "step": 14140 }, { "epoch": 1.1170559317683757, "grad_norm": 0.09623768601721494, "learning_rate": 0.00048508064905147374, "loss": 3.0656, "step": 14145 }, { "epoch": 1.1174507907050206, "grad_norm": 0.0866017670065485, "learning_rate": 0.000484736197158844, "loss": 2.838, "step": 14150 }, { "epoch": 1.1178456496416656, "grad_norm": 0.10354438274607271, "learning_rate": 0.00048439175251684395, "loss": 2.8208, "step": 14155 }, { "epoch": 1.1182405085783105, "grad_norm": 0.1225489427561836, "learning_rate": 0.0004840473152890921, "loss": 2.8374, "step": 14160 }, { "epoch": 1.1186353675149552, "grad_norm": 0.0813432374730622, "learning_rate": 0.0004837028856392034, "loss": 2.761, "step": 14165 }, { "epoch": 1.1190302264516, "grad_norm": 0.08671256445113767, "learning_rate": 0.0004833584637307892, "loss": 2.8873, "step": 14170 }, { "epoch": 1.119425085388245, "grad_norm": 0.08032813114483332, "learning_rate": 0.00048301404972745716, "loss": 2.7479, "step": 14175 }, { "epoch": 1.11981994432489, "grad_norm": 0.10606553325028921, "learning_rate": 0.00048266964379281124, "loss": 2.7843, "step": 14180 }, { "epoch": 1.1202148032615349, "grad_norm": 0.10570350655144796, "learning_rate": 0.00048232524609045165, "loss": 2.8589, "step": 14185 }, { "epoch": 1.1206096621981798, "grad_norm": 0.08448780641775264, "learning_rate": 0.0004819808567839745, "loss": 2.6083, "step": 14190 }, { "epoch": 1.1210045211348245, "grad_norm": 0.08374326537145865, "learning_rate": 0.0004816364760369719, "loss": 2.7546, "step": 14195 }, { "epoch": 1.1213993800714694, "grad_norm": 0.13104828500914995, "learning_rate": 0.0004812921040130323, "loss": 2.9384, "step": 14200 }, { "epoch": 1.1217942390081144, "grad_norm": 0.14689383231151307, "learning_rate": 0.0004809477408757395, "loss": 3.0341, "step": 14205 }, { "epoch": 1.1221890979447593, "grad_norm": 0.11038569250123782, "learning_rate": 0.0004806033867886733, "loss": 2.8251, "step": 14210 }, { "epoch": 1.1225839568814042, "grad_norm": 0.10056361961187964, "learning_rate": 0.0004802590419154095, "loss": 2.7771, "step": 14215 }, { "epoch": 1.122978815818049, "grad_norm": 0.09778495801895554, "learning_rate": 0.0004799147064195185, "loss": 2.911, "step": 14220 }, { "epoch": 1.1233736747546939, "grad_norm": 0.11819885739571001, "learning_rate": 0.00047957038046456753, "loss": 3.0584, "step": 14225 }, { "epoch": 1.1237685336913388, "grad_norm": 0.08485366512711233, "learning_rate": 0.00047922606421411853, "loss": 2.8738, "step": 14230 }, { "epoch": 1.1241633926279837, "grad_norm": 0.08725673698943641, "learning_rate": 0.000478881757831729, "loss": 2.9735, "step": 14235 }, { "epoch": 1.1245582515646286, "grad_norm": 0.09286783859299348, "learning_rate": 0.00047853746148095183, "loss": 2.7827, "step": 14240 }, { "epoch": 1.1249531105012733, "grad_norm": 0.10062499273561441, "learning_rate": 0.00047819317532533505, "loss": 3.0137, "step": 14245 }, { "epoch": 1.1253479694379183, "grad_norm": 0.20054474956055848, "learning_rate": 0.00047784889952842187, "loss": 2.7855, "step": 14250 }, { "epoch": 1.1257428283745632, "grad_norm": 0.09210582909203038, "learning_rate": 0.00047750463425375055, "loss": 2.9224, "step": 14255 }, { "epoch": 1.1261376873112081, "grad_norm": 0.0854098614800156, "learning_rate": 0.0004771603796648545, "loss": 2.8861, "step": 14260 }, { "epoch": 1.126532546247853, "grad_norm": 0.09528600021596954, "learning_rate": 0.0004768161359252618, "loss": 2.8195, "step": 14265 }, { "epoch": 1.1269274051844977, "grad_norm": 0.14942721428050093, "learning_rate": 0.0004764719031984955, "loss": 3.0794, "step": 14270 }, { "epoch": 1.1273222641211427, "grad_norm": 0.08656642157351295, "learning_rate": 0.00047612768164807356, "loss": 2.8347, "step": 14275 }, { "epoch": 1.1277171230577876, "grad_norm": 0.09089875297429936, "learning_rate": 0.0004757834714375086, "loss": 3.0093, "step": 14280 }, { "epoch": 1.1281119819944325, "grad_norm": 0.11206852542521067, "learning_rate": 0.00047543927273030744, "loss": 2.905, "step": 14285 }, { "epoch": 1.1285068409310774, "grad_norm": 0.08134634371416963, "learning_rate": 0.0004750950856899721, "loss": 2.7524, "step": 14290 }, { "epoch": 1.1289016998677224, "grad_norm": 0.09450137176336386, "learning_rate": 0.0004747509104799985, "loss": 2.8292, "step": 14295 }, { "epoch": 1.129296558804367, "grad_norm": 0.09426624909390505, "learning_rate": 0.00047440674726387727, "loss": 2.9014, "step": 14300 }, { "epoch": 1.129691417741012, "grad_norm": 0.08227424386811337, "learning_rate": 0.00047406259620509314, "loss": 2.7267, "step": 14305 }, { "epoch": 1.130086276677657, "grad_norm": 0.076917879848375, "learning_rate": 0.00047371845746712514, "loss": 2.7718, "step": 14310 }, { "epoch": 1.1304811356143019, "grad_norm": 0.08711466577972658, "learning_rate": 0.0004733743312134465, "loss": 3.0204, "step": 14315 }, { "epoch": 1.1308759945509466, "grad_norm": 0.09252597952796045, "learning_rate": 0.0004730302176075245, "loss": 2.9695, "step": 14320 }, { "epoch": 1.1312708534875915, "grad_norm": 0.0936488008715834, "learning_rate": 0.00047268611681282037, "loss": 2.7357, "step": 14325 }, { "epoch": 1.1316657124242364, "grad_norm": 0.10010845498089647, "learning_rate": 0.0004723420289927893, "loss": 2.9468, "step": 14330 }, { "epoch": 1.1320605713608813, "grad_norm": 0.09022391916513252, "learning_rate": 0.00047199795431088014, "loss": 2.9239, "step": 14335 }, { "epoch": 1.1324554302975263, "grad_norm": 0.08801555508632215, "learning_rate": 0.00047165389293053576, "loss": 3.0548, "step": 14340 }, { "epoch": 1.1328502892341712, "grad_norm": 0.10638818396735338, "learning_rate": 0.00047130984501519274, "loss": 2.8065, "step": 14345 }, { "epoch": 1.133245148170816, "grad_norm": 0.103984014294357, "learning_rate": 0.00047096581072828106, "loss": 2.8618, "step": 14350 }, { "epoch": 1.1336400071074608, "grad_norm": 0.11730882810398209, "learning_rate": 0.00047062179023322413, "loss": 2.8026, "step": 14355 }, { "epoch": 1.1340348660441057, "grad_norm": 0.11013687505658475, "learning_rate": 0.00047027778369343915, "loss": 3.0539, "step": 14360 }, { "epoch": 1.1344297249807507, "grad_norm": 0.1165179800368748, "learning_rate": 0.00046993379127233646, "loss": 2.8453, "step": 14365 }, { "epoch": 1.1348245839173956, "grad_norm": 0.09951171594683392, "learning_rate": 0.0004695898131333197, "loss": 2.9115, "step": 14370 }, { "epoch": 1.1352194428540403, "grad_norm": 0.09501294336023709, "learning_rate": 0.00046924584943978584, "loss": 2.8224, "step": 14375 }, { "epoch": 1.1356143017906852, "grad_norm": 0.0829088094528144, "learning_rate": 0.00046890190035512485, "loss": 2.7981, "step": 14380 }, { "epoch": 1.1360091607273302, "grad_norm": 0.09606292348781591, "learning_rate": 0.00046855796604271987, "loss": 2.8422, "step": 14385 }, { "epoch": 1.136404019663975, "grad_norm": 0.10862358940007373, "learning_rate": 0.00046821404666594715, "loss": 2.6675, "step": 14390 }, { "epoch": 1.13679887860062, "grad_norm": 0.10684450134911964, "learning_rate": 0.00046787014238817526, "loss": 2.7931, "step": 14395 }, { "epoch": 1.137193737537265, "grad_norm": 0.08537470055383398, "learning_rate": 0.0004675262533727664, "loss": 2.7994, "step": 14400 }, { "epoch": 1.1375885964739096, "grad_norm": 0.10360083453901411, "learning_rate": 0.0004671823797830749, "loss": 3.1089, "step": 14405 }, { "epoch": 1.1379834554105546, "grad_norm": 0.10979275748225167, "learning_rate": 0.00046683852178244816, "loss": 2.7738, "step": 14410 }, { "epoch": 1.1383783143471995, "grad_norm": 0.09258517801260756, "learning_rate": 0.0004664946795342258, "loss": 3.0298, "step": 14415 }, { "epoch": 1.1387731732838444, "grad_norm": 0.08755606756420857, "learning_rate": 0.00046615085320174046, "loss": 2.6306, "step": 14420 }, { "epoch": 1.1391680322204891, "grad_norm": 0.08836281600601295, "learning_rate": 0.00046580704294831677, "loss": 3.043, "step": 14425 }, { "epoch": 1.139562891157134, "grad_norm": 0.1001004824472644, "learning_rate": 0.00046546324893727194, "loss": 2.7308, "step": 14430 }, { "epoch": 1.139957750093779, "grad_norm": 0.14815623622719507, "learning_rate": 0.00046511947133191534, "loss": 2.8231, "step": 14435 }, { "epoch": 1.140352609030424, "grad_norm": 0.09078713221619276, "learning_rate": 0.0004647757102955487, "loss": 2.8337, "step": 14440 }, { "epoch": 1.1407474679670688, "grad_norm": 0.08760462888792049, "learning_rate": 0.00046443196599146565, "loss": 3.0223, "step": 14445 }, { "epoch": 1.1411423269037138, "grad_norm": 0.09491956104651918, "learning_rate": 0.00046408823858295215, "loss": 3.0641, "step": 14450 }, { "epoch": 1.1415371858403585, "grad_norm": 0.22262006943354676, "learning_rate": 0.0004637445282332859, "loss": 2.8595, "step": 14455 }, { "epoch": 1.1419320447770034, "grad_norm": 0.09133919292120289, "learning_rate": 0.0004634008351057366, "loss": 2.7748, "step": 14460 }, { "epoch": 1.1423269037136483, "grad_norm": 0.10506184130198391, "learning_rate": 0.0004630571593635659, "loss": 2.7588, "step": 14465 }, { "epoch": 1.1427217626502932, "grad_norm": 0.08132098836329481, "learning_rate": 0.000462713501170027, "loss": 2.8503, "step": 14470 }, { "epoch": 1.143116621586938, "grad_norm": 0.10502573455633976, "learning_rate": 0.0004623698606883647, "loss": 2.9553, "step": 14475 }, { "epoch": 1.1435114805235829, "grad_norm": 0.09305022541457764, "learning_rate": 0.0004620262380818159, "loss": 2.9521, "step": 14480 }, { "epoch": 1.1439063394602278, "grad_norm": 0.09939468162865352, "learning_rate": 0.0004616826335136081, "loss": 3.014, "step": 14485 }, { "epoch": 1.1443011983968727, "grad_norm": 0.10496439081071574, "learning_rate": 0.0004613390471469608, "loss": 2.8327, "step": 14490 }, { "epoch": 1.1446960573335176, "grad_norm": 0.12305640456690718, "learning_rate": 0.00046099547914508517, "loss": 2.7446, "step": 14495 }, { "epoch": 1.1450909162701626, "grad_norm": 0.11233157686668038, "learning_rate": 0.00046065192967118305, "loss": 2.9717, "step": 14500 }, { "epoch": 1.1454857752068073, "grad_norm": 0.10523981784275165, "learning_rate": 0.00046030839888844765, "loss": 2.7372, "step": 14505 }, { "epoch": 1.1458806341434522, "grad_norm": 0.08904203923940683, "learning_rate": 0.00045996488696006354, "loss": 2.6922, "step": 14510 }, { "epoch": 1.1462754930800971, "grad_norm": 0.11452790522252311, "learning_rate": 0.0004596213940492061, "loss": 2.8313, "step": 14515 }, { "epoch": 1.146670352016742, "grad_norm": 0.11112527852346588, "learning_rate": 0.0004592779203190417, "loss": 2.6791, "step": 14520 }, { "epoch": 1.147065210953387, "grad_norm": 0.09820819092678702, "learning_rate": 0.0004589344659327277, "loss": 2.8332, "step": 14525 }, { "epoch": 1.1474600698900317, "grad_norm": 0.08766652885957933, "learning_rate": 0.0004585910310534122, "loss": 2.926, "step": 14530 }, { "epoch": 1.1478549288266766, "grad_norm": 0.09366150500227428, "learning_rate": 0.00045824761584423374, "loss": 2.7999, "step": 14535 }, { "epoch": 1.1482497877633215, "grad_norm": 0.09775792178781383, "learning_rate": 0.0004579042204683224, "loss": 2.9899, "step": 14540 }, { "epoch": 1.1486446466999665, "grad_norm": 0.09198729380156256, "learning_rate": 0.0004575608450887978, "loss": 2.8766, "step": 14545 }, { "epoch": 1.1490395056366114, "grad_norm": 0.1313878876303242, "learning_rate": 0.0004572174898687707, "loss": 2.859, "step": 14550 }, { "epoch": 1.1494343645732563, "grad_norm": 0.08274826093760923, "learning_rate": 0.0004568741549713421, "loss": 2.811, "step": 14555 }, { "epoch": 1.149829223509901, "grad_norm": 0.10999418586408423, "learning_rate": 0.00045653084055960333, "loss": 2.8488, "step": 14560 }, { "epoch": 1.150224082446546, "grad_norm": 0.08889001070312921, "learning_rate": 0.00045618754679663595, "loss": 3.0574, "step": 14565 }, { "epoch": 1.1506189413831909, "grad_norm": 0.08096476754799357, "learning_rate": 0.0004558442738455119, "loss": 2.8802, "step": 14570 }, { "epoch": 1.1510138003198358, "grad_norm": 0.08735125071918044, "learning_rate": 0.0004555010218692929, "loss": 2.8958, "step": 14575 }, { "epoch": 1.1514086592564805, "grad_norm": 0.0880923955707594, "learning_rate": 0.00045515779103103113, "loss": 2.7943, "step": 14580 }, { "epoch": 1.1518035181931254, "grad_norm": 0.09775255476810306, "learning_rate": 0.0004548145814937683, "loss": 2.7128, "step": 14585 }, { "epoch": 1.1521983771297704, "grad_norm": 0.12835147014717926, "learning_rate": 0.00045447139342053635, "loss": 2.7436, "step": 14590 }, { "epoch": 1.1525932360664153, "grad_norm": 0.09850036846946111, "learning_rate": 0.00045412822697435683, "loss": 2.6644, "step": 14595 }, { "epoch": 1.1529880950030602, "grad_norm": 0.09038277629263712, "learning_rate": 0.0004537850823182411, "loss": 3.1667, "step": 14600 }, { "epoch": 1.1533829539397051, "grad_norm": 0.08880783951055009, "learning_rate": 0.0004534419596151901, "loss": 3.1715, "step": 14605 }, { "epoch": 1.1537778128763498, "grad_norm": 0.09535360745793735, "learning_rate": 0.00045309885902819437, "loss": 2.7152, "step": 14610 }, { "epoch": 1.1541726718129948, "grad_norm": 0.08407570095599783, "learning_rate": 0.00045275578072023406, "loss": 2.7645, "step": 14615 }, { "epoch": 1.1545675307496397, "grad_norm": 0.1463810023907988, "learning_rate": 0.00045241272485427856, "loss": 3.034, "step": 14620 }, { "epoch": 1.1549623896862846, "grad_norm": 0.08745601622941797, "learning_rate": 0.00045206969159328664, "loss": 2.8955, "step": 14625 }, { "epoch": 1.1553572486229295, "grad_norm": 0.08918609310991224, "learning_rate": 0.0004517266811002064, "loss": 2.7716, "step": 14630 }, { "epoch": 1.1557521075595742, "grad_norm": 0.1304158584505904, "learning_rate": 0.0004513836935379752, "loss": 2.9698, "step": 14635 }, { "epoch": 1.1561469664962192, "grad_norm": 0.09364771717119719, "learning_rate": 0.00045104072906951916, "loss": 2.6148, "step": 14640 }, { "epoch": 1.156541825432864, "grad_norm": 0.11941398871025333, "learning_rate": 0.0004506977878577538, "loss": 2.7944, "step": 14645 }, { "epoch": 1.156936684369509, "grad_norm": 0.08728934154038889, "learning_rate": 0.0004503548700655835, "loss": 2.8583, "step": 14650 }, { "epoch": 1.157331543306154, "grad_norm": 0.17109380103588012, "learning_rate": 0.0004500119758559015, "loss": 2.833, "step": 14655 }, { "epoch": 1.1577264022427989, "grad_norm": 0.12416497870676918, "learning_rate": 0.00044966910539158975, "loss": 2.7903, "step": 14660 }, { "epoch": 1.1581212611794436, "grad_norm": 0.09878507244378383, "learning_rate": 0.0004493262588355189, "loss": 2.8955, "step": 14665 }, { "epoch": 1.1585161201160885, "grad_norm": 0.12910886267045968, "learning_rate": 0.0004489834363505483, "loss": 2.7873, "step": 14670 }, { "epoch": 1.1589109790527334, "grad_norm": 0.09029984868838664, "learning_rate": 0.0004486406380995261, "loss": 2.9093, "step": 14675 }, { "epoch": 1.1593058379893784, "grad_norm": 0.08654933378407274, "learning_rate": 0.0004482978642452885, "loss": 3.0143, "step": 14680 }, { "epoch": 1.159700696926023, "grad_norm": 0.09551780711583763, "learning_rate": 0.0004479551149506605, "loss": 2.7553, "step": 14685 }, { "epoch": 1.160095555862668, "grad_norm": 0.10006165458116931, "learning_rate": 0.0004476123903784554, "loss": 2.9525, "step": 14690 }, { "epoch": 1.160490414799313, "grad_norm": 0.08667854162055562, "learning_rate": 0.0004472696906914743, "loss": 2.9339, "step": 14695 }, { "epoch": 1.1608852737359578, "grad_norm": 0.09972841093712254, "learning_rate": 0.0004469270160525071, "loss": 2.7727, "step": 14700 }, { "epoch": 1.1612801326726028, "grad_norm": 0.10046927936006907, "learning_rate": 0.0004465843666243313, "loss": 2.8386, "step": 14705 }, { "epoch": 1.1616749916092477, "grad_norm": 0.07847202546769626, "learning_rate": 0.00044624174256971275, "loss": 2.8728, "step": 14710 }, { "epoch": 1.1620698505458924, "grad_norm": 0.18356407235695119, "learning_rate": 0.00044589914405140506, "loss": 2.9155, "step": 14715 }, { "epoch": 1.1624647094825373, "grad_norm": 0.10541137234368157, "learning_rate": 0.00044555657123214985, "loss": 2.9267, "step": 14720 }, { "epoch": 1.1628595684191823, "grad_norm": 0.10621469803549544, "learning_rate": 0.0004452140242746765, "loss": 2.9434, "step": 14725 }, { "epoch": 1.1632544273558272, "grad_norm": 0.08127117045424481, "learning_rate": 0.0004448715033417018, "loss": 2.7409, "step": 14730 }, { "epoch": 1.163649286292472, "grad_norm": 0.11412854239494447, "learning_rate": 0.0004445290085959309, "loss": 3.1597, "step": 14735 }, { "epoch": 1.1640441452291168, "grad_norm": 0.11264251516245899, "learning_rate": 0.00044418654020005575, "loss": 2.7665, "step": 14740 }, { "epoch": 1.1644390041657617, "grad_norm": 0.08027092835336964, "learning_rate": 0.0004438440983167564, "loss": 2.7494, "step": 14745 }, { "epoch": 1.1648338631024067, "grad_norm": 0.13162700373595937, "learning_rate": 0.00044350168310869955, "loss": 2.7572, "step": 14750 }, { "epoch": 1.1652287220390516, "grad_norm": 0.0877425654705483, "learning_rate": 0.00044315929473853977, "loss": 2.699, "step": 14755 }, { "epoch": 1.1656235809756965, "grad_norm": 0.10754661785065389, "learning_rate": 0.00044281693336891906, "loss": 2.9317, "step": 14760 }, { "epoch": 1.1660184399123412, "grad_norm": 0.10520627018070652, "learning_rate": 0.00044247459916246605, "loss": 3.0513, "step": 14765 }, { "epoch": 1.1664132988489861, "grad_norm": 0.09825656201612731, "learning_rate": 0.0004421322922817969, "loss": 3.1121, "step": 14770 }, { "epoch": 1.166808157785631, "grad_norm": 0.08380657403652285, "learning_rate": 0.00044179001288951466, "loss": 2.8952, "step": 14775 }, { "epoch": 1.167203016722276, "grad_norm": 0.14500648312968917, "learning_rate": 0.0004414477611482091, "loss": 2.8773, "step": 14780 }, { "epoch": 1.167597875658921, "grad_norm": 0.10871204211287061, "learning_rate": 0.00044110553722045726, "loss": 2.9886, "step": 14785 }, { "epoch": 1.1679927345955656, "grad_norm": 0.09366901053129681, "learning_rate": 0.0004407633412688226, "loss": 2.7306, "step": 14790 }, { "epoch": 1.1683875935322106, "grad_norm": 0.09081895968156702, "learning_rate": 0.0004404211734558555, "loss": 2.7117, "step": 14795 }, { "epoch": 1.1687824524688555, "grad_norm": 0.11114457404792583, "learning_rate": 0.00044007903394409283, "loss": 2.7051, "step": 14800 }, { "epoch": 1.1691773114055004, "grad_norm": 0.0896069140997964, "learning_rate": 0.00043973692289605833, "loss": 2.7311, "step": 14805 }, { "epoch": 1.1695721703421453, "grad_norm": 0.11312236589291474, "learning_rate": 0.0004393948404742618, "loss": 2.903, "step": 14810 }, { "epoch": 1.1699670292787903, "grad_norm": 0.08983682209075271, "learning_rate": 0.0004390527868411997, "loss": 2.8329, "step": 14815 }, { "epoch": 1.170361888215435, "grad_norm": 0.08886869329879983, "learning_rate": 0.0004387107621593548, "loss": 2.8636, "step": 14820 }, { "epoch": 1.17075674715208, "grad_norm": 0.09414305808527838, "learning_rate": 0.000438368766591196, "loss": 3.0419, "step": 14825 }, { "epoch": 1.1711516060887248, "grad_norm": 0.08848306118830034, "learning_rate": 0.0004380268002991784, "loss": 2.8109, "step": 14830 }, { "epoch": 1.1715464650253697, "grad_norm": 0.09184862212101656, "learning_rate": 0.0004376848634457434, "loss": 2.9667, "step": 14835 }, { "epoch": 1.1719413239620144, "grad_norm": 0.09279193887620517, "learning_rate": 0.000437342956193318, "loss": 3.0258, "step": 14840 }, { "epoch": 1.1723361828986594, "grad_norm": 0.10312716515552281, "learning_rate": 0.00043700107870431554, "loss": 2.7417, "step": 14845 }, { "epoch": 1.1727310418353043, "grad_norm": 0.08752447031370011, "learning_rate": 0.00043665923114113504, "loss": 2.7211, "step": 14850 }, { "epoch": 1.1731259007719492, "grad_norm": 0.13477780564010966, "learning_rate": 0.00043631741366616136, "loss": 2.768, "step": 14855 }, { "epoch": 1.1735207597085942, "grad_norm": 0.09690850594325405, "learning_rate": 0.000435975626441765, "loss": 2.9397, "step": 14860 }, { "epoch": 1.173915618645239, "grad_norm": 0.08885306413381261, "learning_rate": 0.00043563386963030217, "loss": 2.8434, "step": 14865 }, { "epoch": 1.1743104775818838, "grad_norm": 0.1325763589951582, "learning_rate": 0.00043529214339411455, "loss": 2.7456, "step": 14870 }, { "epoch": 1.1747053365185287, "grad_norm": 0.11821634683301291, "learning_rate": 0.0004349504478955293, "loss": 2.7681, "step": 14875 }, { "epoch": 1.1751001954551736, "grad_norm": 0.10025187945678858, "learning_rate": 0.0004346087832968591, "loss": 3.0352, "step": 14880 }, { "epoch": 1.1754950543918186, "grad_norm": 0.08676527486029208, "learning_rate": 0.00043426714976040195, "loss": 2.7267, "step": 14885 }, { "epoch": 1.1758899133284635, "grad_norm": 0.10031576703561651, "learning_rate": 0.00043392554744844084, "loss": 2.9134, "step": 14890 }, { "epoch": 1.1762847722651082, "grad_norm": 0.09161770180241466, "learning_rate": 0.00043358397652324424, "loss": 2.7267, "step": 14895 }, { "epoch": 1.1766796312017531, "grad_norm": 0.10132090577023745, "learning_rate": 0.0004332424371470655, "loss": 3.0733, "step": 14900 }, { "epoch": 1.177074490138398, "grad_norm": 0.08494714370610923, "learning_rate": 0.00043290092948214306, "loss": 2.7269, "step": 14905 }, { "epoch": 1.177469349075043, "grad_norm": 0.08913344290733184, "learning_rate": 0.0004325594536907003, "loss": 2.9823, "step": 14910 }, { "epoch": 1.177864208011688, "grad_norm": 0.09672974408194861, "learning_rate": 0.00043221800993494555, "loss": 2.8506, "step": 14915 }, { "epoch": 1.1782590669483328, "grad_norm": 0.1159636599454212, "learning_rate": 0.0004318765983770717, "loss": 2.7734, "step": 14920 }, { "epoch": 1.1786539258849775, "grad_norm": 0.10959890540850467, "learning_rate": 0.00043153521917925654, "loss": 2.8628, "step": 14925 }, { "epoch": 1.1790487848216225, "grad_norm": 0.11498379292691872, "learning_rate": 0.00043119387250366227, "loss": 3.0055, "step": 14930 }, { "epoch": 1.1794436437582674, "grad_norm": 0.4544797491977506, "learning_rate": 0.00043085255851243597, "loss": 2.7664, "step": 14935 }, { "epoch": 1.1798385026949123, "grad_norm": 0.1029169941843573, "learning_rate": 0.00043051127736770895, "loss": 2.9462, "step": 14940 }, { "epoch": 1.180233361631557, "grad_norm": 0.10355750527237197, "learning_rate": 0.00043017002923159697, "loss": 2.8479, "step": 14945 }, { "epoch": 1.180628220568202, "grad_norm": 0.10062581414856642, "learning_rate": 0.0004298288142662001, "loss": 3.0466, "step": 14950 }, { "epoch": 1.1810230795048469, "grad_norm": 0.11078701592726599, "learning_rate": 0.0004294876326336027, "loss": 2.8845, "step": 14955 }, { "epoch": 1.1814179384414918, "grad_norm": 0.10813038941889992, "learning_rate": 0.0004291464844958734, "loss": 2.8032, "step": 14960 }, { "epoch": 1.1818127973781367, "grad_norm": 0.08252910726981998, "learning_rate": 0.00042880537001506463, "loss": 2.7183, "step": 14965 }, { "epoch": 1.1822076563147816, "grad_norm": 0.08705176628135403, "learning_rate": 0.0004284642893532131, "loss": 2.7584, "step": 14970 }, { "epoch": 1.1826025152514263, "grad_norm": 0.08802399079466468, "learning_rate": 0.00042812324267233923, "loss": 3.0098, "step": 14975 }, { "epoch": 1.1829973741880713, "grad_norm": 0.09113256054183225, "learning_rate": 0.00042778223013444745, "loss": 2.533, "step": 14980 }, { "epoch": 1.1833922331247162, "grad_norm": 0.12425078393402897, "learning_rate": 0.000427441251901526, "loss": 2.8967, "step": 14985 }, { "epoch": 1.1837870920613611, "grad_norm": 0.11465225607585602, "learning_rate": 0.00042710030813554673, "loss": 2.9083, "step": 14990 }, { "epoch": 1.184181950998006, "grad_norm": 0.08744017476346388, "learning_rate": 0.00042675939899846504, "loss": 2.7599, "step": 14995 }, { "epoch": 1.1845768099346508, "grad_norm": 0.14792554086080756, "learning_rate": 0.0004264185246522202, "loss": 2.8549, "step": 15000 }, { "epoch": 1.1849716688712957, "grad_norm": 0.1322380401075845, "learning_rate": 0.0004260776852587347, "loss": 3.0169, "step": 15005 }, { "epoch": 1.1853665278079406, "grad_norm": 0.12997635242013386, "learning_rate": 0.0004257368809799145, "loss": 3.0437, "step": 15010 }, { "epoch": 1.1857613867445855, "grad_norm": 0.0918500575027443, "learning_rate": 0.00042539611197764865, "loss": 2.8109, "step": 15015 }, { "epoch": 1.1861562456812305, "grad_norm": 0.08888903695091653, "learning_rate": 0.00042505537841380965, "loss": 2.8577, "step": 15020 }, { "epoch": 1.1865511046178754, "grad_norm": 0.09317180373881652, "learning_rate": 0.00042471468045025335, "loss": 3.107, "step": 15025 }, { "epoch": 1.18694596355452, "grad_norm": 0.09304635455831232, "learning_rate": 0.00042437401824881846, "loss": 2.995, "step": 15030 }, { "epoch": 1.187340822491165, "grad_norm": 0.12716930117989705, "learning_rate": 0.00042403339197132664, "loss": 2.8837, "step": 15035 }, { "epoch": 1.18773568142781, "grad_norm": 0.08543762900365676, "learning_rate": 0.0004236928017795827, "loss": 2.7521, "step": 15040 }, { "epoch": 1.1881305403644549, "grad_norm": 0.09120423931765283, "learning_rate": 0.00042335224783537407, "loss": 2.7728, "step": 15045 }, { "epoch": 1.1885253993010996, "grad_norm": 0.12370225309807471, "learning_rate": 0.0004230117303004712, "loss": 2.7481, "step": 15050 }, { "epoch": 1.1889202582377445, "grad_norm": 0.10799857682309666, "learning_rate": 0.0004226712493366271, "loss": 3.0771, "step": 15055 }, { "epoch": 1.1893151171743894, "grad_norm": 0.09542517289129132, "learning_rate": 0.00042233080510557737, "loss": 2.8172, "step": 15060 }, { "epoch": 1.1897099761110344, "grad_norm": 0.088390190993913, "learning_rate": 0.0004219903977690404, "loss": 2.6466, "step": 15065 }, { "epoch": 1.1901048350476793, "grad_norm": 0.08116740123455787, "learning_rate": 0.0004216500274887165, "loss": 2.8836, "step": 15070 }, { "epoch": 1.1904996939843242, "grad_norm": 0.10783770764994013, "learning_rate": 0.00042130969442628923, "loss": 2.7565, "step": 15075 }, { "epoch": 1.190894552920969, "grad_norm": 0.08677240203371271, "learning_rate": 0.0004209693987434239, "loss": 2.6653, "step": 15080 }, { "epoch": 1.1912894118576138, "grad_norm": 0.0859056060650663, "learning_rate": 0.0004206291406017679, "loss": 2.9011, "step": 15085 }, { "epoch": 1.1916842707942588, "grad_norm": 0.09071983476563462, "learning_rate": 0.0004202889201629513, "loss": 3.1032, "step": 15090 }, { "epoch": 1.1920791297309037, "grad_norm": 0.10108992945284022, "learning_rate": 0.000419948737588586, "loss": 2.7222, "step": 15095 }, { "epoch": 1.1924739886675486, "grad_norm": 0.09061729612270006, "learning_rate": 0.00041960859304026595, "loss": 2.89, "step": 15100 }, { "epoch": 1.1928688476041933, "grad_norm": 0.09891469299609364, "learning_rate": 0.00041926848667956686, "loss": 2.9236, "step": 15105 }, { "epoch": 1.1932637065408382, "grad_norm": 0.08660674473098429, "learning_rate": 0.0004189284186680464, "loss": 2.7389, "step": 15110 }, { "epoch": 1.1936585654774832, "grad_norm": 0.09789206347651475, "learning_rate": 0.0004185883891672444, "loss": 2.9811, "step": 15115 }, { "epoch": 1.194053424414128, "grad_norm": 0.08803954794099937, "learning_rate": 0.0004182483983386818, "loss": 2.7902, "step": 15120 }, { "epoch": 1.194448283350773, "grad_norm": 0.09848456506704945, "learning_rate": 0.0004179084463438615, "loss": 2.8381, "step": 15125 }, { "epoch": 1.1948431422874177, "grad_norm": 0.08930295419380511, "learning_rate": 0.00041756853334426794, "loss": 2.7244, "step": 15130 }, { "epoch": 1.1952380012240627, "grad_norm": 0.10236159375720152, "learning_rate": 0.00041722865950136694, "loss": 3.0553, "step": 15135 }, { "epoch": 1.1956328601607076, "grad_norm": 0.11035665483757408, "learning_rate": 0.0004168888249766058, "loss": 2.8413, "step": 15140 }, { "epoch": 1.1960277190973525, "grad_norm": 0.0943966657825033, "learning_rate": 0.0004165490299314132, "loss": 2.7675, "step": 15145 }, { "epoch": 1.1964225780339974, "grad_norm": 0.08660614231220809, "learning_rate": 0.0004162092745271989, "loss": 2.8029, "step": 15150 }, { "epoch": 1.1968174369706421, "grad_norm": 0.0972649686198504, "learning_rate": 0.00041586955892535394, "loss": 2.6518, "step": 15155 }, { "epoch": 1.197212295907287, "grad_norm": 0.09184758166709811, "learning_rate": 0.0004155298832872504, "loss": 2.9118, "step": 15160 }, { "epoch": 1.197607154843932, "grad_norm": 0.07788419348849078, "learning_rate": 0.0004151902477742414, "loss": 2.8171, "step": 15165 }, { "epoch": 1.198002013780577, "grad_norm": 0.11554582288755062, "learning_rate": 0.00041485065254766106, "loss": 2.987, "step": 15170 }, { "epoch": 1.1983968727172218, "grad_norm": 0.10795526909159249, "learning_rate": 0.0004145110977688243, "loss": 2.9048, "step": 15175 }, { "epoch": 1.1987917316538668, "grad_norm": 0.078416729768956, "learning_rate": 0.00041417158359902676, "loss": 2.7192, "step": 15180 }, { "epoch": 1.1991865905905115, "grad_norm": 0.12443201223804568, "learning_rate": 0.000413832110199545, "loss": 2.7395, "step": 15185 }, { "epoch": 1.1995814495271564, "grad_norm": 0.09090615999306122, "learning_rate": 0.0004134926777316359, "loss": 2.6514, "step": 15190 }, { "epoch": 1.1999763084638013, "grad_norm": 0.09302484075106852, "learning_rate": 0.0004131532863565371, "loss": 2.8773, "step": 15195 }, { "epoch": 1.2003711674004462, "grad_norm": 0.09344144136303388, "learning_rate": 0.00041281393623546675, "loss": 3.0966, "step": 15200 }, { "epoch": 1.200766026337091, "grad_norm": 0.11827112133381397, "learning_rate": 0.0004124746275296234, "loss": 2.7403, "step": 15205 }, { "epoch": 1.2011608852737359, "grad_norm": 0.11590768350887151, "learning_rate": 0.0004121353604001857, "loss": 2.8342, "step": 15210 }, { "epoch": 1.2015557442103808, "grad_norm": 0.10637596842331495, "learning_rate": 0.0004117961350083128, "loss": 2.7659, "step": 15215 }, { "epoch": 1.2019506031470257, "grad_norm": 0.0936373189485554, "learning_rate": 0.00041145695151514406, "loss": 3.061, "step": 15220 }, { "epoch": 1.2023454620836707, "grad_norm": 0.08925330870424429, "learning_rate": 0.0004111178100817987, "loss": 2.7027, "step": 15225 }, { "epoch": 1.2027403210203156, "grad_norm": 0.10697808711232522, "learning_rate": 0.0004107787108693761, "loss": 2.7546, "step": 15230 }, { "epoch": 1.2031351799569603, "grad_norm": 0.12333181995812718, "learning_rate": 0.00041043965403895566, "loss": 2.8982, "step": 15235 }, { "epoch": 1.2035300388936052, "grad_norm": 0.18755663758060342, "learning_rate": 0.0004101006397515965, "loss": 2.7584, "step": 15240 }, { "epoch": 1.2039248978302501, "grad_norm": 0.13662729259648207, "learning_rate": 0.0004097616681683375, "loss": 2.8816, "step": 15245 }, { "epoch": 1.204319756766895, "grad_norm": 0.08940580847358343, "learning_rate": 0.00040942273945019747, "loss": 2.8809, "step": 15250 }, { "epoch": 1.20471461570354, "grad_norm": 0.12676277502916536, "learning_rate": 0.0004090838537581747, "loss": 2.9255, "step": 15255 }, { "epoch": 1.2051094746401847, "grad_norm": 0.07874872261274762, "learning_rate": 0.000408745011253247, "loss": 3.0293, "step": 15260 }, { "epoch": 1.2055043335768296, "grad_norm": 0.09020194598326181, "learning_rate": 0.00040840621209637165, "loss": 2.8643, "step": 15265 }, { "epoch": 1.2058991925134745, "grad_norm": 0.0859691249276068, "learning_rate": 0.0004080674564484857, "loss": 2.8828, "step": 15270 }, { "epoch": 1.2062940514501195, "grad_norm": 0.08206720284753316, "learning_rate": 0.0004077287444705053, "loss": 2.7409, "step": 15275 }, { "epoch": 1.2066889103867644, "grad_norm": 0.09202411899812205, "learning_rate": 0.0004073900763233254, "loss": 2.812, "step": 15280 }, { "epoch": 1.2070837693234093, "grad_norm": 0.1040003680946623, "learning_rate": 0.00040705145216782083, "loss": 2.8536, "step": 15285 }, { "epoch": 1.207478628260054, "grad_norm": 0.110308842475304, "learning_rate": 0.00040671287216484506, "loss": 2.7035, "step": 15290 }, { "epoch": 1.207873487196699, "grad_norm": 0.0872041614682184, "learning_rate": 0.00040637433647523096, "loss": 2.7623, "step": 15295 }, { "epoch": 1.2082683461333439, "grad_norm": 0.08418318869101492, "learning_rate": 0.00040603584525979007, "loss": 2.6771, "step": 15300 }, { "epoch": 1.2086632050699888, "grad_norm": 0.12966553518660384, "learning_rate": 0.0004056973986793129, "loss": 2.8635, "step": 15305 }, { "epoch": 1.2090580640066335, "grad_norm": 0.10091515901017464, "learning_rate": 0.00040535899689456877, "loss": 2.7773, "step": 15310 }, { "epoch": 1.2094529229432784, "grad_norm": 0.10790493304883593, "learning_rate": 0.00040502064006630564, "loss": 2.8642, "step": 15315 }, { "epoch": 1.2098477818799234, "grad_norm": 0.09564544142556078, "learning_rate": 0.0004046823283552502, "loss": 2.8211, "step": 15320 }, { "epoch": 1.2102426408165683, "grad_norm": 0.08202146471170313, "learning_rate": 0.0004043440619221076, "loss": 2.6796, "step": 15325 }, { "epoch": 1.2106374997532132, "grad_norm": 0.11051223209383565, "learning_rate": 0.0004040058409275617, "loss": 2.9679, "step": 15330 }, { "epoch": 1.2110323586898581, "grad_norm": 0.1078894397381991, "learning_rate": 0.0004036676655322744, "loss": 2.8822, "step": 15335 }, { "epoch": 1.2114272176265029, "grad_norm": 0.07901746141334998, "learning_rate": 0.0004033295358968865, "loss": 2.6309, "step": 15340 }, { "epoch": 1.2118220765631478, "grad_norm": 0.10190062256284547, "learning_rate": 0.00040299145218201655, "loss": 2.7816, "step": 15345 }, { "epoch": 1.2122169354997927, "grad_norm": 0.09016289052306015, "learning_rate": 0.00040265341454826154, "loss": 2.7997, "step": 15350 }, { "epoch": 1.2126117944364376, "grad_norm": 0.08946752960829839, "learning_rate": 0.00040231542315619643, "loss": 2.7615, "step": 15355 }, { "epoch": 1.2130066533730826, "grad_norm": 0.10419965140477114, "learning_rate": 0.0004019774781663743, "loss": 2.7921, "step": 15360 }, { "epoch": 1.2134015123097273, "grad_norm": 0.0929792205552894, "learning_rate": 0.0004016395797393264, "loss": 2.7975, "step": 15365 }, { "epoch": 1.2137963712463722, "grad_norm": 0.1011124719484364, "learning_rate": 0.0004013017280355613, "loss": 3.0826, "step": 15370 }, { "epoch": 1.2141912301830171, "grad_norm": 0.08601888396365508, "learning_rate": 0.00040096392321556585, "loss": 2.9411, "step": 15375 }, { "epoch": 1.214586089119662, "grad_norm": 0.10439356000460198, "learning_rate": 0.0004006261654398045, "loss": 2.8695, "step": 15380 }, { "epoch": 1.214980948056307, "grad_norm": 0.09890908631299365, "learning_rate": 0.0004002884548687194, "loss": 2.8585, "step": 15385 }, { "epoch": 1.215375806992952, "grad_norm": 0.09461117647691132, "learning_rate": 0.0003999507916627302, "loss": 3.0854, "step": 15390 }, { "epoch": 1.2157706659295966, "grad_norm": 0.09681638383505031, "learning_rate": 0.00039961317598223394, "loss": 2.6979, "step": 15395 }, { "epoch": 1.2161655248662415, "grad_norm": 0.13167438260203712, "learning_rate": 0.00039927560798760545, "loss": 2.905, "step": 15400 }, { "epoch": 1.2165603838028864, "grad_norm": 0.14591752312430814, "learning_rate": 0.00039893808783919647, "loss": 2.7296, "step": 15405 }, { "epoch": 1.2169552427395314, "grad_norm": 0.11212545128069362, "learning_rate": 0.00039860061569733625, "loss": 2.8469, "step": 15410 }, { "epoch": 1.217350101676176, "grad_norm": 0.13230943872961462, "learning_rate": 0.00039826319172233136, "loss": 3.0319, "step": 15415 }, { "epoch": 1.217744960612821, "grad_norm": 0.10167522219417538, "learning_rate": 0.0003979258160744652, "loss": 2.9935, "step": 15420 }, { "epoch": 1.218139819549466, "grad_norm": 0.11335394247110854, "learning_rate": 0.00039758848891399847, "loss": 2.8742, "step": 15425 }, { "epoch": 1.2185346784861109, "grad_norm": 0.1045490103418497, "learning_rate": 0.0003972512104011686, "loss": 2.8252, "step": 15430 }, { "epoch": 1.2189295374227558, "grad_norm": 0.08605598822013677, "learning_rate": 0.00039691398069619, "loss": 2.9333, "step": 15435 }, { "epoch": 1.2193243963594007, "grad_norm": 0.10656602929856836, "learning_rate": 0.00039657679995925405, "loss": 2.8134, "step": 15440 }, { "epoch": 1.2197192552960454, "grad_norm": 0.09131502168371063, "learning_rate": 0.00039623966835052873, "loss": 2.8339, "step": 15445 }, { "epoch": 1.2201141142326903, "grad_norm": 0.0858181272007071, "learning_rate": 0.0003959025860301586, "loss": 2.8889, "step": 15450 }, { "epoch": 1.2205089731693353, "grad_norm": 0.09618849128812783, "learning_rate": 0.0003955655531582649, "loss": 2.6032, "step": 15455 }, { "epoch": 1.2209038321059802, "grad_norm": 0.13030326723395916, "learning_rate": 0.0003952285698949455, "loss": 2.7555, "step": 15460 }, { "epoch": 1.2212986910426251, "grad_norm": 0.09096918257179008, "learning_rate": 0.0003948916364002744, "loss": 2.9324, "step": 15465 }, { "epoch": 1.2216935499792698, "grad_norm": 0.13831776318080255, "learning_rate": 0.0003945547528343023, "loss": 2.8574, "step": 15470 }, { "epoch": 1.2220884089159147, "grad_norm": 0.10262368063338353, "learning_rate": 0.000394217919357056, "loss": 2.8038, "step": 15475 }, { "epoch": 1.2224832678525597, "grad_norm": 0.10101956187350747, "learning_rate": 0.00039388113612853857, "loss": 2.7941, "step": 15480 }, { "epoch": 1.2228781267892046, "grad_norm": 0.11125288298765663, "learning_rate": 0.000393544403308729, "loss": 3.0577, "step": 15485 }, { "epoch": 1.2232729857258495, "grad_norm": 0.0968377845482275, "learning_rate": 0.0003932077210575827, "loss": 3.0244, "step": 15490 }, { "epoch": 1.2236678446624942, "grad_norm": 0.09346798237671393, "learning_rate": 0.0003928710895350308, "loss": 2.9453, "step": 15495 }, { "epoch": 1.2240627035991392, "grad_norm": 0.08062326422780508, "learning_rate": 0.0003925345089009805, "loss": 2.8311, "step": 15500 }, { "epoch": 1.224457562535784, "grad_norm": 0.10019193410177159, "learning_rate": 0.00039219797931531455, "loss": 2.8762, "step": 15505 }, { "epoch": 1.224852421472429, "grad_norm": 0.08912732817890105, "learning_rate": 0.0003918615009378918, "loss": 3.1026, "step": 15510 }, { "epoch": 1.225247280409074, "grad_norm": 0.098887721200479, "learning_rate": 0.00039152507392854654, "loss": 2.7558, "step": 15515 }, { "epoch": 1.2256421393457186, "grad_norm": 0.1252404812952231, "learning_rate": 0.0003911886984470886, "loss": 2.6519, "step": 15520 }, { "epoch": 1.2260369982823636, "grad_norm": 0.09033793716099629, "learning_rate": 0.0003908523746533037, "loss": 3.0944, "step": 15525 }, { "epoch": 1.2264318572190085, "grad_norm": 0.11121391421450424, "learning_rate": 0.00039051610270695247, "loss": 2.8777, "step": 15530 }, { "epoch": 1.2268267161556534, "grad_norm": 0.09743242401403976, "learning_rate": 0.0003901798827677716, "loss": 2.6964, "step": 15535 }, { "epoch": 1.2272215750922983, "grad_norm": 0.08358805197649703, "learning_rate": 0.00038984371499547257, "loss": 2.8721, "step": 15540 }, { "epoch": 1.2276164340289433, "grad_norm": 0.08432362368680256, "learning_rate": 0.00038950759954974187, "loss": 2.7503, "step": 15545 }, { "epoch": 1.228011292965588, "grad_norm": 0.084442429474757, "learning_rate": 0.00038917153659024165, "loss": 2.6117, "step": 15550 }, { "epoch": 1.228406151902233, "grad_norm": 0.08781340775765387, "learning_rate": 0.0003888355262766089, "loss": 2.7135, "step": 15555 }, { "epoch": 1.2288010108388778, "grad_norm": 0.08315793500956473, "learning_rate": 0.0003884995687684558, "loss": 2.8058, "step": 15560 }, { "epoch": 1.2291958697755228, "grad_norm": 0.08159529073098971, "learning_rate": 0.00038816366422536906, "loss": 2.6332, "step": 15565 }, { "epoch": 1.2295907287121675, "grad_norm": 0.10667950018155135, "learning_rate": 0.0003878278128069107, "loss": 2.7601, "step": 15570 }, { "epoch": 1.2299855876488124, "grad_norm": 0.08438907310669408, "learning_rate": 0.0003874920146726171, "loss": 2.6769, "step": 15575 }, { "epoch": 1.2303804465854573, "grad_norm": 0.12518951071351994, "learning_rate": 0.0003871562699819995, "loss": 2.6907, "step": 15580 }, { "epoch": 1.2307753055221022, "grad_norm": 0.08105111445148236, "learning_rate": 0.0003868205788945437, "loss": 2.7772, "step": 15585 }, { "epoch": 1.2311701644587472, "grad_norm": 0.08855546084452788, "learning_rate": 0.00038648494156971015, "loss": 2.8723, "step": 15590 }, { "epoch": 1.231565023395392, "grad_norm": 0.08164940622724594, "learning_rate": 0.00038614935816693363, "loss": 2.9286, "step": 15595 }, { "epoch": 1.2319598823320368, "grad_norm": 0.07702580385124709, "learning_rate": 0.0003858138288456234, "loss": 2.7453, "step": 15600 }, { "epoch": 1.2323547412686817, "grad_norm": 0.10309115658800137, "learning_rate": 0.0003854783537651629, "loss": 2.8715, "step": 15605 }, { "epoch": 1.2327496002053266, "grad_norm": 0.08769547607385908, "learning_rate": 0.00038514293308491017, "loss": 2.6907, "step": 15610 }, { "epoch": 1.2331444591419716, "grad_norm": 0.08511561022673197, "learning_rate": 0.0003848075669641968, "loss": 3.0765, "step": 15615 }, { "epoch": 1.2335393180786165, "grad_norm": 0.08874039749773441, "learning_rate": 0.0003844722555623291, "loss": 2.691, "step": 15620 }, { "epoch": 1.2339341770152612, "grad_norm": 0.08257050769103476, "learning_rate": 0.0003841369990385869, "loss": 2.9085, "step": 15625 }, { "epoch": 1.2343290359519061, "grad_norm": 0.10755089418945557, "learning_rate": 0.00038380179755222426, "loss": 2.856, "step": 15630 }, { "epoch": 1.234723894888551, "grad_norm": 0.11223040730233108, "learning_rate": 0.00038346665126246887, "loss": 2.901, "step": 15635 }, { "epoch": 1.235118753825196, "grad_norm": 0.09986313510635085, "learning_rate": 0.0003831315603285224, "loss": 2.727, "step": 15640 }, { "epoch": 1.235513612761841, "grad_norm": 0.08236835548443078, "learning_rate": 0.00038279652490955996, "loss": 2.7547, "step": 15645 }, { "epoch": 1.2359084716984858, "grad_norm": 0.08660100924292423, "learning_rate": 0.00038246154516473075, "loss": 2.7227, "step": 15650 }, { "epoch": 1.2363033306351305, "grad_norm": 0.09693524833841394, "learning_rate": 0.00038212662125315704, "loss": 2.7592, "step": 15655 }, { "epoch": 1.2366981895717755, "grad_norm": 0.288429536948322, "learning_rate": 0.0003817917533339349, "loss": 2.6379, "step": 15660 }, { "epoch": 1.2370930485084204, "grad_norm": 0.09764366848057722, "learning_rate": 0.00038145694156613356, "loss": 2.7974, "step": 15665 }, { "epoch": 1.2374879074450653, "grad_norm": 0.09413116397976262, "learning_rate": 0.00038112218610879587, "loss": 2.8991, "step": 15670 }, { "epoch": 1.23788276638171, "grad_norm": 0.09973203147629521, "learning_rate": 0.00038078748712093755, "loss": 2.8829, "step": 15675 }, { "epoch": 1.238277625318355, "grad_norm": 0.10080117805198695, "learning_rate": 0.0003804528447615479, "loss": 2.8747, "step": 15680 }, { "epoch": 1.2386724842549999, "grad_norm": 0.09039588152502828, "learning_rate": 0.00038011825918958904, "loss": 2.7639, "step": 15685 }, { "epoch": 1.2390673431916448, "grad_norm": 0.10264482157201046, "learning_rate": 0.0003797837305639963, "loss": 2.8921, "step": 15690 }, { "epoch": 1.2394622021282897, "grad_norm": 0.08735552764373365, "learning_rate": 0.00037944925904367764, "loss": 2.7626, "step": 15695 }, { "epoch": 1.2398570610649347, "grad_norm": 0.0727606896700857, "learning_rate": 0.0003791148447875144, "loss": 2.7189, "step": 15700 }, { "epoch": 1.2402519200015794, "grad_norm": 0.10209016370295013, "learning_rate": 0.00037878048795436024, "loss": 2.735, "step": 15705 }, { "epoch": 1.2406467789382243, "grad_norm": 0.10271859539947381, "learning_rate": 0.0003784461887030418, "loss": 2.7513, "step": 15710 }, { "epoch": 1.2410416378748692, "grad_norm": 0.08047798833269697, "learning_rate": 0.00037811194719235844, "loss": 2.8947, "step": 15715 }, { "epoch": 1.2414364968115141, "grad_norm": 0.10294307018979053, "learning_rate": 0.0003777777635810817, "loss": 2.8025, "step": 15720 }, { "epoch": 1.241831355748159, "grad_norm": 0.16237758172085093, "learning_rate": 0.000377443638027956, "loss": 2.7834, "step": 15725 }, { "epoch": 1.2422262146848038, "grad_norm": 0.09090334185694222, "learning_rate": 0.00037710957069169793, "loss": 3.0148, "step": 15730 }, { "epoch": 1.2426210736214487, "grad_norm": 0.08495415026598166, "learning_rate": 0.00037677556173099677, "loss": 2.9069, "step": 15735 }, { "epoch": 1.2430159325580936, "grad_norm": 0.08189822651123868, "learning_rate": 0.0003764416113045137, "loss": 2.793, "step": 15740 }, { "epoch": 1.2434107914947385, "grad_norm": 0.0885027265906985, "learning_rate": 0.0003761077195708823, "loss": 2.9059, "step": 15745 }, { "epoch": 1.2438056504313835, "grad_norm": 0.10732985499571306, "learning_rate": 0.0003757738866887081, "loss": 2.8658, "step": 15750 }, { "epoch": 1.2442005093680284, "grad_norm": 0.08875713601540455, "learning_rate": 0.00037544011281656896, "loss": 2.8563, "step": 15755 }, { "epoch": 1.244595368304673, "grad_norm": 0.10862598738062988, "learning_rate": 0.00037510639811301456, "loss": 2.8958, "step": 15760 }, { "epoch": 1.244990227241318, "grad_norm": 0.08840814540419331, "learning_rate": 0.00037477274273656637, "loss": 2.7262, "step": 15765 }, { "epoch": 1.245385086177963, "grad_norm": 0.09531024873845437, "learning_rate": 0.00037443914684571767, "loss": 2.8163, "step": 15770 }, { "epoch": 1.2457799451146079, "grad_norm": 0.16547602554940044, "learning_rate": 0.0003741056105989337, "loss": 3.0475, "step": 15775 }, { "epoch": 1.2461748040512526, "grad_norm": 0.09456496112134143, "learning_rate": 0.0003737721341546512, "loss": 2.8871, "step": 15780 }, { "epoch": 1.2465696629878975, "grad_norm": 0.09844349957856813, "learning_rate": 0.00037343871767127855, "loss": 2.8699, "step": 15785 }, { "epoch": 1.2469645219245424, "grad_norm": 0.09272002253256882, "learning_rate": 0.00037310536130719563, "loss": 2.7756, "step": 15790 }, { "epoch": 1.2473593808611874, "grad_norm": 0.09073314400592368, "learning_rate": 0.0003727720652207537, "loss": 2.7998, "step": 15795 }, { "epoch": 1.2477542397978323, "grad_norm": 0.08584020924574372, "learning_rate": 0.00037243882957027565, "loss": 2.9189, "step": 15800 }, { "epoch": 1.2481490987344772, "grad_norm": 0.08740742489593178, "learning_rate": 0.00037210565451405555, "loss": 2.8881, "step": 15805 }, { "epoch": 1.248543957671122, "grad_norm": 0.09918230232475642, "learning_rate": 0.00037177254021035825, "loss": 2.8974, "step": 15810 }, { "epoch": 1.2489388166077668, "grad_norm": 0.09216154507137445, "learning_rate": 0.00037143948681742024, "loss": 2.7046, "step": 15815 }, { "epoch": 1.2493336755444118, "grad_norm": 0.08034100744526915, "learning_rate": 0.00037110649449344896, "loss": 2.9312, "step": 15820 }, { "epoch": 1.2497285344810567, "grad_norm": 0.09295790575438553, "learning_rate": 0.0003707735633966227, "loss": 2.8141, "step": 15825 }, { "epoch": 1.2501233934177014, "grad_norm": 0.08281385022132634, "learning_rate": 0.00037044069368509107, "loss": 2.8127, "step": 15830 }, { "epoch": 1.2505182523543463, "grad_norm": 0.1017810083532629, "learning_rate": 0.00037010788551697404, "loss": 2.7636, "step": 15835 }, { "epoch": 1.2509131112909913, "grad_norm": 0.08651496680430432, "learning_rate": 0.0003697751390503626, "loss": 2.826, "step": 15840 }, { "epoch": 1.2513079702276362, "grad_norm": 0.08632278729380555, "learning_rate": 0.00036944245444331836, "loss": 2.7777, "step": 15845 }, { "epoch": 1.251702829164281, "grad_norm": 0.12254930501286554, "learning_rate": 0.0003691098318538735, "loss": 2.721, "step": 15850 }, { "epoch": 1.252097688100926, "grad_norm": 0.07977973712638471, "learning_rate": 0.0003687772714400308, "loss": 2.7858, "step": 15855 }, { "epoch": 1.252492547037571, "grad_norm": 0.09145410803486556, "learning_rate": 0.00036844477335976354, "loss": 2.8652, "step": 15860 }, { "epoch": 1.2528874059742157, "grad_norm": 0.0822673272337069, "learning_rate": 0.0003681123377710154, "loss": 2.6668, "step": 15865 }, { "epoch": 1.2532822649108606, "grad_norm": 0.10511463604946808, "learning_rate": 0.00036777996483170004, "loss": 2.97, "step": 15870 }, { "epoch": 1.2536771238475055, "grad_norm": 0.10941739507372573, "learning_rate": 0.0003674476546997021, "loss": 2.7636, "step": 15875 }, { "epoch": 1.2540719827841504, "grad_norm": 0.09492069318326593, "learning_rate": 0.00036711540753287566, "loss": 2.9375, "step": 15880 }, { "epoch": 1.2544668417207951, "grad_norm": 0.09522476194739923, "learning_rate": 0.0003667832234890452, "loss": 3.1016, "step": 15885 }, { "epoch": 1.25486170065744, "grad_norm": 0.11431749852624856, "learning_rate": 0.0003664511027260052, "loss": 2.8999, "step": 15890 }, { "epoch": 1.255256559594085, "grad_norm": 0.08593855980757092, "learning_rate": 0.00036611904540152014, "loss": 2.9103, "step": 15895 }, { "epoch": 1.25565141853073, "grad_norm": 0.10703124064070407, "learning_rate": 0.00036578705167332405, "loss": 2.7182, "step": 15900 }, { "epoch": 1.2560462774673748, "grad_norm": 0.0999381314220832, "learning_rate": 0.0003654551216991211, "loss": 3.0116, "step": 15905 }, { "epoch": 1.2564411364040198, "grad_norm": 0.08694248957758015, "learning_rate": 0.0003651232556365851, "loss": 2.9492, "step": 15910 }, { "epoch": 1.2568359953406645, "grad_norm": 0.09963686643375014, "learning_rate": 0.0003647914536433595, "loss": 2.9314, "step": 15915 }, { "epoch": 1.2572308542773094, "grad_norm": 0.10806462793224401, "learning_rate": 0.0003644597158770571, "loss": 2.7903, "step": 15920 }, { "epoch": 1.2576257132139543, "grad_norm": 0.08598211353606897, "learning_rate": 0.00036412804249526053, "loss": 2.9005, "step": 15925 }, { "epoch": 1.2580205721505993, "grad_norm": 0.08248407433173914, "learning_rate": 0.0003637964336555216, "loss": 2.7793, "step": 15930 }, { "epoch": 1.258415431087244, "grad_norm": 0.09619138557926421, "learning_rate": 0.0003634648895153615, "loss": 2.6483, "step": 15935 }, { "epoch": 1.258810290023889, "grad_norm": 0.08565889412070633, "learning_rate": 0.0003631334102322708, "loss": 2.6355, "step": 15940 }, { "epoch": 1.2592051489605338, "grad_norm": 0.1146612508953446, "learning_rate": 0.00036280199596370907, "loss": 2.9349, "step": 15945 }, { "epoch": 1.2596000078971787, "grad_norm": 0.11011836020501078, "learning_rate": 0.0003624706468671052, "loss": 2.742, "step": 15950 }, { "epoch": 1.2599948668338237, "grad_norm": 0.08421646497940753, "learning_rate": 0.0003621393630998571, "loss": 2.6561, "step": 15955 }, { "epoch": 1.2603897257704686, "grad_norm": 0.2769656886079538, "learning_rate": 0.0003618081448193314, "loss": 2.8931, "step": 15960 }, { "epoch": 1.2607845847071135, "grad_norm": 0.1184983655029801, "learning_rate": 0.00036147699218286403, "loss": 2.6665, "step": 15965 }, { "epoch": 1.2611794436437582, "grad_norm": 0.07988895813544357, "learning_rate": 0.0003611459053477592, "loss": 2.823, "step": 15970 }, { "epoch": 1.2615743025804032, "grad_norm": 0.09844380145947114, "learning_rate": 0.0003608148844712904, "loss": 2.7218, "step": 15975 }, { "epoch": 1.261969161517048, "grad_norm": 0.08290523130370978, "learning_rate": 0.0003604839297106996, "loss": 2.849, "step": 15980 }, { "epoch": 1.2623640204536928, "grad_norm": 0.10004884589978354, "learning_rate": 0.0003601530412231971, "loss": 2.7555, "step": 15985 }, { "epoch": 1.2627588793903377, "grad_norm": 0.07673048436528286, "learning_rate": 0.000359822219165962, "loss": 2.8431, "step": 15990 }, { "epoch": 1.2631537383269826, "grad_norm": 0.0886943889010512, "learning_rate": 0.0003594914636961419, "loss": 2.9283, "step": 15995 }, { "epoch": 1.2635485972636276, "grad_norm": 0.08779147286092684, "learning_rate": 0.00035916077497085254, "loss": 2.8271, "step": 16000 }, { "epoch": 1.2639434562002725, "grad_norm": 0.07772061090209013, "learning_rate": 0.00035883015314717813, "loss": 2.7674, "step": 16005 }, { "epoch": 1.2643383151369174, "grad_norm": 0.07564448241980315, "learning_rate": 0.00035849959838217104, "loss": 2.7194, "step": 16010 }, { "epoch": 1.2647331740735623, "grad_norm": 0.12542948963252737, "learning_rate": 0.00035816911083285166, "loss": 2.817, "step": 16015 }, { "epoch": 1.265128033010207, "grad_norm": 0.08580183339082235, "learning_rate": 0.0003578386906562088, "loss": 2.7518, "step": 16020 }, { "epoch": 1.265522891946852, "grad_norm": 0.09442850807434024, "learning_rate": 0.0003575083380091989, "loss": 2.7589, "step": 16025 }, { "epoch": 1.265917750883497, "grad_norm": 0.08896919833000135, "learning_rate": 0.00035717805304874654, "loss": 2.9422, "step": 16030 }, { "epoch": 1.2663126098201418, "grad_norm": 0.10518567485530249, "learning_rate": 0.00035684783593174394, "loss": 2.7812, "step": 16035 }, { "epoch": 1.2667074687567865, "grad_norm": 0.08639584960229398, "learning_rate": 0.0003565176868150514, "loss": 2.8305, "step": 16040 }, { "epoch": 1.2671023276934315, "grad_norm": 0.0965032502361092, "learning_rate": 0.0003561876058554966, "loss": 2.7806, "step": 16045 }, { "epoch": 1.2674971866300764, "grad_norm": 0.08473453955216471, "learning_rate": 0.000355857593209875, "loss": 2.8437, "step": 16050 }, { "epoch": 1.2678920455667213, "grad_norm": 0.0778897119348873, "learning_rate": 0.0003555276490349497, "loss": 2.7398, "step": 16055 }, { "epoch": 1.2682869045033662, "grad_norm": 0.1001458935832334, "learning_rate": 0.0003551977734874511, "loss": 2.7482, "step": 16060 }, { "epoch": 1.2686817634400112, "grad_norm": 0.10236231513629884, "learning_rate": 0.00035486796672407696, "loss": 2.8061, "step": 16065 }, { "epoch": 1.2690766223766559, "grad_norm": 0.08393387702776288, "learning_rate": 0.0003545382289014929, "loss": 2.709, "step": 16070 }, { "epoch": 1.2694714813133008, "grad_norm": 0.11710913421736624, "learning_rate": 0.00035420856017633084, "loss": 2.9216, "step": 16075 }, { "epoch": 1.2698663402499457, "grad_norm": 0.08334577798831724, "learning_rate": 0.00035387896070519065, "loss": 2.8388, "step": 16080 }, { "epoch": 1.2702611991865906, "grad_norm": 0.10322671069359682, "learning_rate": 0.0003535494306446391, "loss": 3.053, "step": 16085 }, { "epoch": 1.2706560581232353, "grad_norm": 0.10352919087952932, "learning_rate": 0.00035321997015120963, "loss": 2.7693, "step": 16090 }, { "epoch": 1.2710509170598803, "grad_norm": 0.09429451661404488, "learning_rate": 0.00035289057938140344, "loss": 2.781, "step": 16095 }, { "epoch": 1.2714457759965252, "grad_norm": 0.11250144946200534, "learning_rate": 0.00035256125849168795, "loss": 2.8241, "step": 16100 }, { "epoch": 1.2718406349331701, "grad_norm": 0.08410821615999892, "learning_rate": 0.0003522320076384974, "loss": 2.6941, "step": 16105 }, { "epoch": 1.272235493869815, "grad_norm": 0.09342308384245929, "learning_rate": 0.00035190282697823304, "loss": 2.901, "step": 16110 }, { "epoch": 1.27263035280646, "grad_norm": 0.09036839917877168, "learning_rate": 0.0003515737166672627, "loss": 2.7662, "step": 16115 }, { "epoch": 1.273025211743105, "grad_norm": 0.0895037282715694, "learning_rate": 0.00035124467686192064, "loss": 2.7651, "step": 16120 }, { "epoch": 1.2734200706797496, "grad_norm": 0.09190349942408858, "learning_rate": 0.0003509157077185078, "loss": 2.8522, "step": 16125 }, { "epoch": 1.2738149296163945, "grad_norm": 0.08777559109221646, "learning_rate": 0.00035058680939329144, "loss": 2.8174, "step": 16130 }, { "epoch": 1.2742097885530395, "grad_norm": 0.09567569127988562, "learning_rate": 0.0003502579820425052, "loss": 2.8812, "step": 16135 }, { "epoch": 1.2746046474896844, "grad_norm": 0.10920162016651613, "learning_rate": 0.0003499292258223492, "loss": 2.9368, "step": 16140 }, { "epoch": 1.274999506426329, "grad_norm": 0.10735351769642512, "learning_rate": 0.0003496005408889895, "loss": 3.1374, "step": 16145 }, { "epoch": 1.275394365362974, "grad_norm": 0.10709197014390108, "learning_rate": 0.0003492719273985584, "loss": 2.8495, "step": 16150 }, { "epoch": 1.275789224299619, "grad_norm": 0.09009947136935953, "learning_rate": 0.0003489433855071543, "loss": 2.7386, "step": 16155 }, { "epoch": 1.2761840832362639, "grad_norm": 0.10428470912800991, "learning_rate": 0.00034861491537084175, "loss": 2.9899, "step": 16160 }, { "epoch": 1.2765789421729088, "grad_norm": 0.10517579270742478, "learning_rate": 0.00034828651714565053, "loss": 2.7525, "step": 16165 }, { "epoch": 1.2769738011095537, "grad_norm": 0.12427210758111965, "learning_rate": 0.0003479581909875771, "loss": 2.7704, "step": 16170 }, { "epoch": 1.2773686600461984, "grad_norm": 0.09776490991797428, "learning_rate": 0.0003476299370525833, "loss": 2.9247, "step": 16175 }, { "epoch": 1.2777635189828433, "grad_norm": 0.13376059739648802, "learning_rate": 0.0003473017554965966, "loss": 2.8925, "step": 16180 }, { "epoch": 1.2781583779194883, "grad_norm": 0.0978299300749577, "learning_rate": 0.0003469736464755103, "loss": 2.8207, "step": 16185 }, { "epoch": 1.2785532368561332, "grad_norm": 0.08118239183023983, "learning_rate": 0.00034664561014518316, "loss": 3.0206, "step": 16190 }, { "epoch": 1.278948095792778, "grad_norm": 0.11311802718245043, "learning_rate": 0.00034631764666143925, "loss": 2.8354, "step": 16195 }, { "epoch": 1.2793429547294228, "grad_norm": 0.08964424571647432, "learning_rate": 0.0003459897561800683, "loss": 2.6874, "step": 16200 }, { "epoch": 1.2797378136660678, "grad_norm": 0.09251689687666477, "learning_rate": 0.00034566193885682517, "loss": 2.9735, "step": 16205 }, { "epoch": 1.2801326726027127, "grad_norm": 0.08833563647351872, "learning_rate": 0.00034533419484742995, "loss": 2.5636, "step": 16210 }, { "epoch": 1.2805275315393576, "grad_norm": 0.08778977855751023, "learning_rate": 0.0003450065243075682, "loss": 2.7615, "step": 16215 }, { "epoch": 1.2809223904760025, "grad_norm": 0.0935484921601917, "learning_rate": 0.0003446789273928902, "loss": 2.903, "step": 16220 }, { "epoch": 1.2813172494126475, "grad_norm": 0.10207437043727298, "learning_rate": 0.0003443514042590115, "loss": 2.815, "step": 16225 }, { "epoch": 1.2817121083492922, "grad_norm": 0.1049281648886145, "learning_rate": 0.0003440239550615124, "loss": 2.9522, "step": 16230 }, { "epoch": 1.282106967285937, "grad_norm": 0.0897571204461447, "learning_rate": 0.0003436965799559384, "loss": 3.0806, "step": 16235 }, { "epoch": 1.282501826222582, "grad_norm": 0.08134880334215797, "learning_rate": 0.00034336927909779945, "loss": 2.7178, "step": 16240 }, { "epoch": 1.282896685159227, "grad_norm": 0.08019957807372886, "learning_rate": 0.0003430420526425706, "loss": 2.7963, "step": 16245 }, { "epoch": 1.2832915440958717, "grad_norm": 0.08399836104811034, "learning_rate": 0.000342714900745691, "loss": 2.6897, "step": 16250 }, { "epoch": 1.2836864030325166, "grad_norm": 0.0897410003289323, "learning_rate": 0.00034238782356256503, "loss": 2.9305, "step": 16255 }, { "epoch": 1.2840812619691615, "grad_norm": 0.09375930185934966, "learning_rate": 0.00034206082124856106, "loss": 2.6631, "step": 16260 }, { "epoch": 1.2844761209058064, "grad_norm": 0.08732367425279366, "learning_rate": 0.00034173389395901233, "loss": 2.7926, "step": 16265 }, { "epoch": 1.2848709798424514, "grad_norm": 0.07983283445790015, "learning_rate": 0.0003414070418492161, "loss": 2.804, "step": 16270 }, { "epoch": 1.2852658387790963, "grad_norm": 0.08137133385624631, "learning_rate": 0.00034108026507443414, "loss": 2.6436, "step": 16275 }, { "epoch": 1.285660697715741, "grad_norm": 0.0890925732108901, "learning_rate": 0.0003407535637898921, "loss": 2.7, "step": 16280 }, { "epoch": 1.286055556652386, "grad_norm": 0.08868613029409816, "learning_rate": 0.00034042693815078016, "loss": 2.7763, "step": 16285 }, { "epoch": 1.2864504155890308, "grad_norm": 0.10303703139917475, "learning_rate": 0.00034010038831225266, "loss": 2.9719, "step": 16290 }, { "epoch": 1.2868452745256758, "grad_norm": 0.09256413345538433, "learning_rate": 0.0003397739144294273, "loss": 2.9464, "step": 16295 }, { "epoch": 1.2872401334623205, "grad_norm": 0.0931942928694779, "learning_rate": 0.00033944751665738635, "loss": 2.7702, "step": 16300 }, { "epoch": 1.2876349923989654, "grad_norm": 0.09417849062701508, "learning_rate": 0.00033912119515117555, "loss": 2.6456, "step": 16305 }, { "epoch": 1.2880298513356103, "grad_norm": 0.0819747250658422, "learning_rate": 0.0003387949500658045, "loss": 2.8252, "step": 16310 }, { "epoch": 1.2884247102722552, "grad_norm": 0.1011862878503725, "learning_rate": 0.0003384687815562466, "loss": 2.8043, "step": 16315 }, { "epoch": 1.2888195692089002, "grad_norm": 0.0895193708995304, "learning_rate": 0.00033814268977743877, "loss": 2.7653, "step": 16320 }, { "epoch": 1.289214428145545, "grad_norm": 0.09404483585018475, "learning_rate": 0.0003378166748842816, "loss": 2.9729, "step": 16325 }, { "epoch": 1.28960928708219, "grad_norm": 0.08818794569186825, "learning_rate": 0.0003374907370316388, "loss": 2.9754, "step": 16330 }, { "epoch": 1.2900041460188347, "grad_norm": 0.08476001074327524, "learning_rate": 0.0003371648763743382, "loss": 2.8976, "step": 16335 }, { "epoch": 1.2903990049554797, "grad_norm": 0.14171535322137713, "learning_rate": 0.0003368390930671702, "loss": 2.7727, "step": 16340 }, { "epoch": 1.2907938638921246, "grad_norm": 0.1466562219714567, "learning_rate": 0.00033651338726488865, "loss": 2.7167, "step": 16345 }, { "epoch": 1.2911887228287693, "grad_norm": 0.0976826336439171, "learning_rate": 0.000336187759122211, "loss": 2.8047, "step": 16350 }, { "epoch": 1.2915835817654142, "grad_norm": 0.09652003193768748, "learning_rate": 0.00033586220879381734, "loss": 2.7261, "step": 16355 }, { "epoch": 1.2919784407020591, "grad_norm": 0.08386191680372863, "learning_rate": 0.000335536736434351, "loss": 2.8782, "step": 16360 }, { "epoch": 1.292373299638704, "grad_norm": 0.13474640959657366, "learning_rate": 0.00033521134219841834, "loss": 3.0397, "step": 16365 }, { "epoch": 1.292768158575349, "grad_norm": 0.08978621281873132, "learning_rate": 0.0003348860262405883, "loss": 2.8658, "step": 16370 }, { "epoch": 1.293163017511994, "grad_norm": 0.08015888262802714, "learning_rate": 0.0003345607887153932, "loss": 2.6613, "step": 16375 }, { "epoch": 1.2935578764486388, "grad_norm": 0.08355313605274733, "learning_rate": 0.0003342356297773274, "loss": 2.7298, "step": 16380 }, { "epoch": 1.2939527353852835, "grad_norm": 0.09855110894699412, "learning_rate": 0.0003339105495808485, "loss": 2.6827, "step": 16385 }, { "epoch": 1.2943475943219285, "grad_norm": 0.10156063269214163, "learning_rate": 0.00033358554828037635, "loss": 2.8071, "step": 16390 }, { "epoch": 1.2947424532585734, "grad_norm": 0.09792142236032124, "learning_rate": 0.0003332606260302936, "loss": 2.6, "step": 16395 }, { "epoch": 1.2951373121952183, "grad_norm": 0.08330849812066325, "learning_rate": 0.0003329357829849451, "loss": 2.7519, "step": 16400 }, { "epoch": 1.295532171131863, "grad_norm": 0.0932816638683545, "learning_rate": 0.000332611019298638, "loss": 2.8452, "step": 16405 }, { "epoch": 1.295927030068508, "grad_norm": 0.09819707143468787, "learning_rate": 0.0003322863351256423, "loss": 3.0114, "step": 16410 }, { "epoch": 1.2963218890051529, "grad_norm": 0.0792405499880977, "learning_rate": 0.00033196173062018965, "loss": 2.9147, "step": 16415 }, { "epoch": 1.2967167479417978, "grad_norm": 0.09029150061345392, "learning_rate": 0.00033163720593647406, "loss": 2.6463, "step": 16420 }, { "epoch": 1.2971116068784427, "grad_norm": 0.09036463171349281, "learning_rate": 0.00033131276122865184, "loss": 2.6758, "step": 16425 }, { "epoch": 1.2975064658150877, "grad_norm": 0.08349613348291807, "learning_rate": 0.0003309883966508407, "loss": 2.9038, "step": 16430 }, { "epoch": 1.2979013247517324, "grad_norm": 0.08285708252863301, "learning_rate": 0.00033066411235712094, "loss": 2.7624, "step": 16435 }, { "epoch": 1.2982961836883773, "grad_norm": 0.08110775627156026, "learning_rate": 0.0003303399085015345, "loss": 2.7623, "step": 16440 }, { "epoch": 1.2986910426250222, "grad_norm": 0.0977422587947357, "learning_rate": 0.00033001578523808497, "loss": 2.8792, "step": 16445 }, { "epoch": 1.2990859015616671, "grad_norm": 0.0872527880149941, "learning_rate": 0.0003296917427207378, "loss": 2.8044, "step": 16450 }, { "epoch": 1.2994807604983118, "grad_norm": 0.08504564900881217, "learning_rate": 0.00032936778110342, "loss": 2.6715, "step": 16455 }, { "epoch": 1.2998756194349568, "grad_norm": 0.07958438357858963, "learning_rate": 0.0003290439005400203, "loss": 2.873, "step": 16460 }, { "epoch": 1.3002704783716017, "grad_norm": 0.08884074726909301, "learning_rate": 0.0003287201011843888, "loss": 2.7927, "step": 16465 }, { "epoch": 1.3006653373082466, "grad_norm": 0.08971645243119193, "learning_rate": 0.00032839638319033705, "loss": 2.7265, "step": 16470 }, { "epoch": 1.3010601962448916, "grad_norm": 0.08038678045694957, "learning_rate": 0.0003280727467116379, "loss": 2.87, "step": 16475 }, { "epoch": 1.3014550551815365, "grad_norm": 0.08382523734085524, "learning_rate": 0.0003277491919020255, "loss": 2.9422, "step": 16480 }, { "epoch": 1.3018499141181814, "grad_norm": 0.10989664787566454, "learning_rate": 0.00032742571891519534, "loss": 2.8523, "step": 16485 }, { "epoch": 1.302244773054826, "grad_norm": 0.09537497019390398, "learning_rate": 0.00032710232790480385, "loss": 2.6678, "step": 16490 }, { "epoch": 1.302639631991471, "grad_norm": 0.09163299067360693, "learning_rate": 0.0003267790190244688, "loss": 2.8702, "step": 16495 }, { "epoch": 1.303034490928116, "grad_norm": 0.1252672477507151, "learning_rate": 0.00032645579242776855, "loss": 2.8079, "step": 16500 }, { "epoch": 1.3034293498647609, "grad_norm": 0.12172064836589436, "learning_rate": 0.0003261326482682427, "loss": 2.9011, "step": 16505 }, { "epoch": 1.3038242088014056, "grad_norm": 0.10075349622794703, "learning_rate": 0.00032580958669939164, "loss": 2.8255, "step": 16510 }, { "epoch": 1.3042190677380505, "grad_norm": 0.09333652226950402, "learning_rate": 0.0003254866078746762, "loss": 2.7711, "step": 16515 }, { "epoch": 1.3046139266746954, "grad_norm": 0.09408855741131135, "learning_rate": 0.00032516371194751836, "loss": 3.0657, "step": 16520 }, { "epoch": 1.3050087856113404, "grad_norm": 0.09724325170868917, "learning_rate": 0.0003248408990713004, "loss": 2.8002, "step": 16525 }, { "epoch": 1.3054036445479853, "grad_norm": 0.0835650889309781, "learning_rate": 0.00032451816939936534, "loss": 2.7586, "step": 16530 }, { "epoch": 1.3057985034846302, "grad_norm": 0.08465750284796025, "learning_rate": 0.00032419552308501665, "loss": 2.7858, "step": 16535 }, { "epoch": 1.306193362421275, "grad_norm": 0.09230013238469237, "learning_rate": 0.0003238729602815181, "loss": 2.6411, "step": 16540 }, { "epoch": 1.3065882213579199, "grad_norm": 0.0932207436023961, "learning_rate": 0.00032355048114209386, "loss": 2.972, "step": 16545 }, { "epoch": 1.3069830802945648, "grad_norm": 0.08959603263019587, "learning_rate": 0.00032322808581992826, "loss": 2.8991, "step": 16550 }, { "epoch": 1.3073779392312097, "grad_norm": 0.08650755420838614, "learning_rate": 0.00032290577446816615, "loss": 2.7367, "step": 16555 }, { "epoch": 1.3077727981678544, "grad_norm": 0.08059826040976602, "learning_rate": 0.0003225835472399121, "loss": 2.8489, "step": 16560 }, { "epoch": 1.3081676571044993, "grad_norm": 0.07735368221743515, "learning_rate": 0.0003222614042882309, "loss": 2.8221, "step": 16565 }, { "epoch": 1.3085625160411443, "grad_norm": 0.10378852965532492, "learning_rate": 0.0003219393457661472, "loss": 2.8501, "step": 16570 }, { "epoch": 1.3089573749777892, "grad_norm": 0.08994779811862193, "learning_rate": 0.00032161737182664576, "loss": 2.7477, "step": 16575 }, { "epoch": 1.3093522339144341, "grad_norm": 0.08401471405115113, "learning_rate": 0.0003212954826226708, "loss": 2.7986, "step": 16580 }, { "epoch": 1.309747092851079, "grad_norm": 0.10180652459119265, "learning_rate": 0.0003209736783071268, "loss": 2.8931, "step": 16585 }, { "epoch": 1.310141951787724, "grad_norm": 0.08481443320200893, "learning_rate": 0.00032065195903287734, "loss": 2.8984, "step": 16590 }, { "epoch": 1.3105368107243687, "grad_norm": 0.10655191416862392, "learning_rate": 0.00032033032495274615, "loss": 2.6816, "step": 16595 }, { "epoch": 1.3109316696610136, "grad_norm": 0.09134491850990747, "learning_rate": 0.00032000877621951617, "loss": 2.8267, "step": 16600 }, { "epoch": 1.3113265285976585, "grad_norm": 0.09069539482718669, "learning_rate": 0.0003196873129859297, "loss": 2.6872, "step": 16605 }, { "epoch": 1.3117213875343035, "grad_norm": 0.09094723600330334, "learning_rate": 0.0003193659354046886, "loss": 2.9651, "step": 16610 }, { "epoch": 1.3121162464709482, "grad_norm": 0.10688016747202216, "learning_rate": 0.0003190446436284541, "loss": 2.9301, "step": 16615 }, { "epoch": 1.312511105407593, "grad_norm": 0.10381201024227027, "learning_rate": 0.0003187234378098467, "loss": 2.8634, "step": 16620 }, { "epoch": 1.312905964344238, "grad_norm": 0.10256998250144524, "learning_rate": 0.0003184023181014457, "loss": 3.0089, "step": 16625 }, { "epoch": 1.313300823280883, "grad_norm": 0.09660008152340152, "learning_rate": 0.00031808128465579005, "loss": 2.5941, "step": 16630 }, { "epoch": 1.3136956822175279, "grad_norm": 0.08786570056093168, "learning_rate": 0.00031776033762537736, "loss": 2.7691, "step": 16635 }, { "epoch": 1.3140905411541728, "grad_norm": 0.07801946908036193, "learning_rate": 0.0003174394771626642, "loss": 2.9335, "step": 16640 }, { "epoch": 1.3144854000908175, "grad_norm": 0.08491341135096955, "learning_rate": 0.0003171187034200661, "loss": 2.9635, "step": 16645 }, { "epoch": 1.3148802590274624, "grad_norm": 0.14434726966020658, "learning_rate": 0.0003167980165499574, "loss": 2.815, "step": 16650 }, { "epoch": 1.3152751179641073, "grad_norm": 0.08573100205136372, "learning_rate": 0.0003164774167046711, "loss": 2.862, "step": 16655 }, { "epoch": 1.3156699769007523, "grad_norm": 0.08000356506647954, "learning_rate": 0.00031615690403649904, "loss": 2.6797, "step": 16660 }, { "epoch": 1.316064835837397, "grad_norm": 0.08709099191953996, "learning_rate": 0.0003158364786976914, "loss": 3.0672, "step": 16665 }, { "epoch": 1.316459694774042, "grad_norm": 0.09464606379374514, "learning_rate": 0.000315516140840457, "loss": 2.7472, "step": 16670 }, { "epoch": 1.3168545537106868, "grad_norm": 0.12280668122893566, "learning_rate": 0.00031519589061696326, "loss": 2.7717, "step": 16675 }, { "epoch": 1.3172494126473318, "grad_norm": 0.08322490719568199, "learning_rate": 0.0003148757281793357, "loss": 2.9539, "step": 16680 }, { "epoch": 1.3176442715839767, "grad_norm": 0.08181546313504348, "learning_rate": 0.0003145556536796583, "loss": 3.0992, "step": 16685 }, { "epoch": 1.3180391305206216, "grad_norm": 0.08864471529029265, "learning_rate": 0.0003142356672699734, "loss": 2.6022, "step": 16690 }, { "epoch": 1.3184339894572663, "grad_norm": 0.07793705612626302, "learning_rate": 0.00031391576910228095, "loss": 2.901, "step": 16695 }, { "epoch": 1.3188288483939112, "grad_norm": 0.08415268662389253, "learning_rate": 0.00031359595932853936, "loss": 2.7623, "step": 16700 }, { "epoch": 1.3192237073305562, "grad_norm": 0.0811539773755003, "learning_rate": 0.0003132762381006654, "loss": 2.8804, "step": 16705 }, { "epoch": 1.319618566267201, "grad_norm": 0.08171555206438301, "learning_rate": 0.0003129566055705332, "loss": 2.899, "step": 16710 }, { "epoch": 1.3200134252038458, "grad_norm": 0.08568816797738212, "learning_rate": 0.0003126370618899751, "loss": 2.7557, "step": 16715 }, { "epoch": 1.3204082841404907, "grad_norm": 0.14026714250373598, "learning_rate": 0.00031231760721078095, "loss": 2.8782, "step": 16720 }, { "epoch": 1.3208031430771356, "grad_norm": 0.10454356907181828, "learning_rate": 0.0003119982416846986, "loss": 2.9273, "step": 16725 }, { "epoch": 1.3211980020137806, "grad_norm": 0.08070138520678967, "learning_rate": 0.00031167896546343345, "loss": 2.6923, "step": 16730 }, { "epoch": 1.3215928609504255, "grad_norm": 0.08227981446196656, "learning_rate": 0.00031135977869864846, "loss": 2.9052, "step": 16735 }, { "epoch": 1.3219877198870704, "grad_norm": 0.09323916141285434, "learning_rate": 0.0003110406815419641, "loss": 2.9305, "step": 16740 }, { "epoch": 1.3223825788237153, "grad_norm": 0.08367817712125443, "learning_rate": 0.0003107216741449581, "loss": 2.7362, "step": 16745 }, { "epoch": 1.32277743776036, "grad_norm": 0.1512517198769021, "learning_rate": 0.00031040275665916603, "loss": 2.8614, "step": 16750 }, { "epoch": 1.323172296697005, "grad_norm": 0.10261834876385095, "learning_rate": 0.0003100839292360803, "loss": 2.615, "step": 16755 }, { "epoch": 1.32356715563365, "grad_norm": 0.1041852763820754, "learning_rate": 0.0003097651920271507, "loss": 2.8214, "step": 16760 }, { "epoch": 1.3239620145702948, "grad_norm": 0.08746489136786212, "learning_rate": 0.00030944654518378414, "loss": 2.8625, "step": 16765 }, { "epoch": 1.3243568735069395, "grad_norm": 0.09047006805794526, "learning_rate": 0.00030912798885734466, "loss": 2.6345, "step": 16770 }, { "epoch": 1.3247517324435845, "grad_norm": 0.13623910543863707, "learning_rate": 0.0003088095231991532, "loss": 2.6878, "step": 16775 }, { "epoch": 1.3251465913802294, "grad_norm": 0.11283817567551477, "learning_rate": 0.00030849114836048794, "loss": 2.8131, "step": 16780 }, { "epoch": 1.3255414503168743, "grad_norm": 0.11590229504393515, "learning_rate": 0.00030817286449258326, "loss": 2.8321, "step": 16785 }, { "epoch": 1.3259363092535192, "grad_norm": 0.09822913881757202, "learning_rate": 0.00030785467174663096, "loss": 2.7029, "step": 16790 }, { "epoch": 1.3263311681901642, "grad_norm": 0.10590644750020002, "learning_rate": 0.00030753657027377933, "loss": 2.8765, "step": 16795 }, { "epoch": 1.3267260271268089, "grad_norm": 0.08526952542182825, "learning_rate": 0.00030721856022513326, "loss": 2.7874, "step": 16800 }, { "epoch": 1.3271208860634538, "grad_norm": 0.09374659195071795, "learning_rate": 0.00030690064175175436, "loss": 2.7468, "step": 16805 }, { "epoch": 1.3275157450000987, "grad_norm": 0.09600087513951223, "learning_rate": 0.00030658281500466056, "loss": 2.6922, "step": 16810 }, { "epoch": 1.3279106039367436, "grad_norm": 0.09753723663071097, "learning_rate": 0.00030626508013482625, "loss": 2.8556, "step": 16815 }, { "epoch": 1.3283054628733884, "grad_norm": 0.09821788217563707, "learning_rate": 0.0003059474372931822, "loss": 2.7509, "step": 16820 }, { "epoch": 1.3287003218100333, "grad_norm": 0.07796675388238322, "learning_rate": 0.00030562988663061574, "loss": 2.8496, "step": 16825 }, { "epoch": 1.3290951807466782, "grad_norm": 0.09823704370826356, "learning_rate": 0.0003053124282979699, "loss": 2.7984, "step": 16830 }, { "epoch": 1.3294900396833231, "grad_norm": 0.10144895159904323, "learning_rate": 0.00030499506244604423, "loss": 2.7265, "step": 16835 }, { "epoch": 1.329884898619968, "grad_norm": 0.10254384924843947, "learning_rate": 0.0003046777892255942, "loss": 2.9436, "step": 16840 }, { "epoch": 1.330279757556613, "grad_norm": 0.09458646946098904, "learning_rate": 0.0003043606087873313, "loss": 2.8326, "step": 16845 }, { "epoch": 1.330674616493258, "grad_norm": 0.08354659340882564, "learning_rate": 0.00030404352128192305, "loss": 2.9115, "step": 16850 }, { "epoch": 1.3310694754299026, "grad_norm": 0.08411095107878522, "learning_rate": 0.0003037265268599925, "loss": 2.7055, "step": 16855 }, { "epoch": 1.3314643343665475, "grad_norm": 0.0835631631019328, "learning_rate": 0.0003034096256721189, "loss": 2.884, "step": 16860 }, { "epoch": 1.3318591933031925, "grad_norm": 0.09669848664206299, "learning_rate": 0.00030309281786883725, "loss": 2.9295, "step": 16865 }, { "epoch": 1.3322540522398374, "grad_norm": 0.10845561991767413, "learning_rate": 0.00030277610360063744, "loss": 2.8303, "step": 16870 }, { "epoch": 1.332648911176482, "grad_norm": 0.09329818912130927, "learning_rate": 0.0003024594830179658, "loss": 2.6828, "step": 16875 }, { "epoch": 1.333043770113127, "grad_norm": 0.0943278285434544, "learning_rate": 0.0003021429562712237, "loss": 2.8232, "step": 16880 }, { "epoch": 1.333438629049772, "grad_norm": 0.09122208448082457, "learning_rate": 0.0003018265235107681, "loss": 2.6527, "step": 16885 }, { "epoch": 1.3338334879864169, "grad_norm": 0.11635920282000217, "learning_rate": 0.0003015101848869111, "loss": 2.76, "step": 16890 }, { "epoch": 1.3342283469230618, "grad_norm": 0.09617349466971002, "learning_rate": 0.00030119394054992055, "loss": 2.6907, "step": 16895 }, { "epoch": 1.3346232058597067, "grad_norm": 0.08360728924533982, "learning_rate": 0.0003008777906500191, "loss": 2.7161, "step": 16900 }, { "epoch": 1.3350180647963514, "grad_norm": 0.11214574138587534, "learning_rate": 0.0003005617353373845, "loss": 2.9504, "step": 16905 }, { "epoch": 1.3354129237329964, "grad_norm": 0.155171662065237, "learning_rate": 0.00030024577476214987, "loss": 2.8343, "step": 16910 }, { "epoch": 1.3358077826696413, "grad_norm": 0.11251733291407948, "learning_rate": 0.0002999299090744031, "loss": 2.8015, "step": 16915 }, { "epoch": 1.3362026416062862, "grad_norm": 0.0861850376168779, "learning_rate": 0.00029961413842418707, "loss": 2.6568, "step": 16920 }, { "epoch": 1.336597500542931, "grad_norm": 0.0923090417464619, "learning_rate": 0.00029929846296149963, "loss": 2.8173, "step": 16925 }, { "epoch": 1.3369923594795758, "grad_norm": 0.08321370280078108, "learning_rate": 0.0002989828828362932, "loss": 2.671, "step": 16930 }, { "epoch": 1.3373872184162208, "grad_norm": 0.12065380862352465, "learning_rate": 0.0002986673981984751, "loss": 2.9209, "step": 16935 }, { "epoch": 1.3377820773528657, "grad_norm": 0.09022670464932075, "learning_rate": 0.0002983520091979071, "loss": 2.7012, "step": 16940 }, { "epoch": 1.3381769362895106, "grad_norm": 0.09541704551136916, "learning_rate": 0.0002980367159844058, "loss": 2.7853, "step": 16945 }, { "epoch": 1.3385717952261555, "grad_norm": 0.09901337763701046, "learning_rate": 0.0002977215187077421, "loss": 2.8426, "step": 16950 }, { "epoch": 1.3389666541628005, "grad_norm": 0.09778728378269458, "learning_rate": 0.00029740641751764156, "loss": 3.0101, "step": 16955 }, { "epoch": 1.3393615130994452, "grad_norm": 0.10983745676312678, "learning_rate": 0.00029709141256378357, "loss": 2.8131, "step": 16960 }, { "epoch": 1.33975637203609, "grad_norm": 0.36220860051052617, "learning_rate": 0.0002967765039958022, "loss": 2.8281, "step": 16965 }, { "epoch": 1.340151230972735, "grad_norm": 0.1012392560510747, "learning_rate": 0.00029646169196328586, "loss": 2.7988, "step": 16970 }, { "epoch": 1.34054608990938, "grad_norm": 0.1688155136033892, "learning_rate": 0.00029614697661577697, "loss": 2.9848, "step": 16975 }, { "epoch": 1.3409409488460247, "grad_norm": 0.13322680693010394, "learning_rate": 0.0002958323581027719, "loss": 2.7534, "step": 16980 }, { "epoch": 1.3413358077826696, "grad_norm": 0.09671966079457214, "learning_rate": 0.0002955178365737211, "loss": 2.9605, "step": 16985 }, { "epoch": 1.3417306667193145, "grad_norm": 0.13766754521731925, "learning_rate": 0.00029520341217802886, "loss": 2.847, "step": 16990 }, { "epoch": 1.3421255256559594, "grad_norm": 0.13699578367780693, "learning_rate": 0.00029488908506505364, "loss": 2.7361, "step": 16995 }, { "epoch": 1.3425203845926044, "grad_norm": 0.09935757438325493, "learning_rate": 0.0002945748553841073, "loss": 2.7227, "step": 17000 }, { "epoch": 1.3429152435292493, "grad_norm": 0.09463032154319001, "learning_rate": 0.0002942607232844557, "loss": 2.7918, "step": 17005 }, { "epoch": 1.343310102465894, "grad_norm": 0.1164664633850083, "learning_rate": 0.0002939466889153181, "loss": 2.6591, "step": 17010 }, { "epoch": 1.343704961402539, "grad_norm": 0.0908592816814429, "learning_rate": 0.0002936327524258677, "loss": 2.9404, "step": 17015 }, { "epoch": 1.3440998203391838, "grad_norm": 0.09253121559823534, "learning_rate": 0.0002933189139652308, "loss": 2.8411, "step": 17020 }, { "epoch": 1.3444946792758288, "grad_norm": 0.08540100013850767, "learning_rate": 0.00029300517368248744, "loss": 2.6774, "step": 17025 }, { "epoch": 1.3448895382124735, "grad_norm": 0.09038513808465044, "learning_rate": 0.0002926915317266708, "loss": 2.7083, "step": 17030 }, { "epoch": 1.3452843971491184, "grad_norm": 0.07890493725985288, "learning_rate": 0.0002923779882467675, "loss": 2.6076, "step": 17035 }, { "epoch": 1.3456792560857633, "grad_norm": 0.08931299892712123, "learning_rate": 0.0002920645433917173, "loss": 2.7568, "step": 17040 }, { "epoch": 1.3460741150224083, "grad_norm": 0.11106607850871564, "learning_rate": 0.0002917511973104134, "loss": 2.8132, "step": 17045 }, { "epoch": 1.3464689739590532, "grad_norm": 0.08583122516009276, "learning_rate": 0.00029143795015170126, "loss": 2.7756, "step": 17050 }, { "epoch": 1.346863832895698, "grad_norm": 0.10205105150004923, "learning_rate": 0.00029112480206438054, "loss": 2.6832, "step": 17055 }, { "epoch": 1.3472586918323428, "grad_norm": 0.08538478081308229, "learning_rate": 0.0002908117531972026, "loss": 2.7581, "step": 17060 }, { "epoch": 1.3476535507689877, "grad_norm": 0.08899133894617543, "learning_rate": 0.0002904988036988727, "loss": 2.7023, "step": 17065 }, { "epoch": 1.3480484097056327, "grad_norm": 0.09458240875658473, "learning_rate": 0.0002901859537180485, "loss": 2.9496, "step": 17070 }, { "epoch": 1.3484432686422776, "grad_norm": 0.08421265728234213, "learning_rate": 0.00028987320340334013, "loss": 2.6505, "step": 17075 }, { "epoch": 1.3488381275789223, "grad_norm": 0.08260467448692288, "learning_rate": 0.0002895605529033108, "loss": 2.7024, "step": 17080 }, { "epoch": 1.3492329865155672, "grad_norm": 0.08735107714236158, "learning_rate": 0.00028924800236647597, "loss": 2.743, "step": 17085 }, { "epoch": 1.3496278454522121, "grad_norm": 0.10668428221967402, "learning_rate": 0.00028893555194130406, "loss": 2.9619, "step": 17090 }, { "epoch": 1.350022704388857, "grad_norm": 0.08336706654400473, "learning_rate": 0.00028862320177621526, "loss": 2.7449, "step": 17095 }, { "epoch": 1.350417563325502, "grad_norm": 0.12053879512205828, "learning_rate": 0.0002883109520195828, "loss": 2.7315, "step": 17100 }, { "epoch": 1.350812422262147, "grad_norm": 0.08809380236812502, "learning_rate": 0.0002879988028197317, "loss": 2.6644, "step": 17105 }, { "epoch": 1.3512072811987919, "grad_norm": 0.09177382365678531, "learning_rate": 0.0002876867543249394, "loss": 2.765, "step": 17110 }, { "epoch": 1.3516021401354366, "grad_norm": 0.10654220579475558, "learning_rate": 0.000287374806683436, "loss": 2.7134, "step": 17115 }, { "epoch": 1.3519969990720815, "grad_norm": 0.09031637340711181, "learning_rate": 0.0002870629600434028, "loss": 2.7322, "step": 17120 }, { "epoch": 1.3523918580087264, "grad_norm": 0.09097614177337045, "learning_rate": 0.0002867512145529738, "loss": 2.7425, "step": 17125 }, { "epoch": 1.3527867169453713, "grad_norm": 0.09161740763968455, "learning_rate": 0.0002864395703602346, "loss": 2.922, "step": 17130 }, { "epoch": 1.353181575882016, "grad_norm": 0.13305442695435057, "learning_rate": 0.00028612802761322266, "loss": 2.81, "step": 17135 }, { "epoch": 1.353576434818661, "grad_norm": 0.11377010390337208, "learning_rate": 0.0002858165864599277, "loss": 2.8431, "step": 17140 }, { "epoch": 1.353971293755306, "grad_norm": 0.11449739447951078, "learning_rate": 0.00028550524704829043, "loss": 2.6803, "step": 17145 }, { "epoch": 1.3543661526919508, "grad_norm": 0.11438673735946263, "learning_rate": 0.00028519400952620416, "loss": 2.7847, "step": 17150 }, { "epoch": 1.3547610116285957, "grad_norm": 0.16513055183256936, "learning_rate": 0.0002848828740415129, "loss": 2.7019, "step": 17155 }, { "epoch": 1.3551558705652407, "grad_norm": 0.09095077562679608, "learning_rate": 0.0002845718407420129, "loss": 2.7875, "step": 17160 }, { "epoch": 1.3555507295018854, "grad_norm": 0.08625647121602527, "learning_rate": 0.00028426090977545126, "loss": 2.8751, "step": 17165 }, { "epoch": 1.3559455884385303, "grad_norm": 0.09087898617720426, "learning_rate": 0.00028395008128952713, "loss": 2.7625, "step": 17170 }, { "epoch": 1.3563404473751752, "grad_norm": 0.09877466341141043, "learning_rate": 0.0002836393554318902, "loss": 2.8265, "step": 17175 }, { "epoch": 1.3567353063118202, "grad_norm": 0.0954129444821213, "learning_rate": 0.0002833287323501422, "loss": 2.9589, "step": 17180 }, { "epoch": 1.3571301652484649, "grad_norm": 0.0858729917476015, "learning_rate": 0.0002830182121918356, "loss": 2.7895, "step": 17185 }, { "epoch": 1.3575250241851098, "grad_norm": 0.0849687355506857, "learning_rate": 0.00028270779510447384, "loss": 2.6845, "step": 17190 }, { "epoch": 1.3579198831217547, "grad_norm": 0.1077444925314342, "learning_rate": 0.000282397481235512, "loss": 2.7012, "step": 17195 }, { "epoch": 1.3583147420583996, "grad_norm": 0.08422097046565995, "learning_rate": 0.00028208727073235525, "loss": 2.732, "step": 17200 }, { "epoch": 1.3587096009950446, "grad_norm": 0.08921363280579037, "learning_rate": 0.00028177716374236064, "loss": 2.74, "step": 17205 }, { "epoch": 1.3591044599316895, "grad_norm": 0.09137620683665727, "learning_rate": 0.0002814671604128351, "loss": 2.7919, "step": 17210 }, { "epoch": 1.3594993188683344, "grad_norm": 0.08435658197375148, "learning_rate": 0.0002811572608910372, "loss": 2.7539, "step": 17215 }, { "epoch": 1.3598941778049791, "grad_norm": 0.10293057321000278, "learning_rate": 0.0002808474653241756, "loss": 2.707, "step": 17220 }, { "epoch": 1.360289036741624, "grad_norm": 0.0997256013643858, "learning_rate": 0.00028053777385940947, "loss": 2.8388, "step": 17225 }, { "epoch": 1.360683895678269, "grad_norm": 0.08839295314028289, "learning_rate": 0.0002802281866438494, "loss": 2.5814, "step": 17230 }, { "epoch": 1.361078754614914, "grad_norm": 0.1143145964209611, "learning_rate": 0.00027991870382455526, "loss": 2.8325, "step": 17235 }, { "epoch": 1.3614736135515586, "grad_norm": 0.0875274007812914, "learning_rate": 0.0002796093255485385, "loss": 2.7975, "step": 17240 }, { "epoch": 1.3618684724882035, "grad_norm": 0.08258506615582097, "learning_rate": 0.0002793000519627598, "loss": 2.8936, "step": 17245 }, { "epoch": 1.3622633314248485, "grad_norm": 0.09750054946336748, "learning_rate": 0.00027899088321413124, "loss": 2.8465, "step": 17250 }, { "epoch": 1.3626581903614934, "grad_norm": 0.09115520603282246, "learning_rate": 0.00027868181944951397, "loss": 2.851, "step": 17255 }, { "epoch": 1.3630530492981383, "grad_norm": 0.12695800020485679, "learning_rate": 0.0002783728608157202, "loss": 2.8624, "step": 17260 }, { "epoch": 1.3634479082347832, "grad_norm": 0.10298614887798306, "learning_rate": 0.00027806400745951175, "loss": 2.7809, "step": 17265 }, { "epoch": 1.363842767171428, "grad_norm": 0.0915254762836989, "learning_rate": 0.0002777552595276004, "loss": 2.765, "step": 17270 }, { "epoch": 1.3642376261080729, "grad_norm": 0.0881665836762046, "learning_rate": 0.00027744661716664823, "loss": 2.7851, "step": 17275 }, { "epoch": 1.3646324850447178, "grad_norm": 0.0970323531776423, "learning_rate": 0.0002771380805232665, "loss": 2.7728, "step": 17280 }, { "epoch": 1.3650273439813627, "grad_norm": 0.09153613170850179, "learning_rate": 0.000276829649744017, "loss": 2.8526, "step": 17285 }, { "epoch": 1.3654222029180074, "grad_norm": 0.09405695474481379, "learning_rate": 0.00027652132497541057, "loss": 2.9286, "step": 17290 }, { "epoch": 1.3658170618546523, "grad_norm": 0.09800135224655977, "learning_rate": 0.00027621310636390833, "loss": 2.9201, "step": 17295 }, { "epoch": 1.3662119207912973, "grad_norm": 0.07400885770569357, "learning_rate": 0.0002759049940559203, "loss": 2.9063, "step": 17300 }, { "epoch": 1.3666067797279422, "grad_norm": 0.09197979174518887, "learning_rate": 0.00027559698819780654, "loss": 2.9254, "step": 17305 }, { "epoch": 1.3670016386645871, "grad_norm": 0.08126262140207073, "learning_rate": 0.0002752890889358768, "loss": 2.7823, "step": 17310 }, { "epoch": 1.367396497601232, "grad_norm": 0.10091651622004671, "learning_rate": 0.00027498129641638887, "loss": 2.9729, "step": 17315 }, { "epoch": 1.367791356537877, "grad_norm": 0.11202083390394578, "learning_rate": 0.00027467361078555133, "loss": 2.7897, "step": 17320 }, { "epoch": 1.3681862154745217, "grad_norm": 0.09172836620054468, "learning_rate": 0.0002743660321895211, "loss": 2.8006, "step": 17325 }, { "epoch": 1.3685810744111666, "grad_norm": 0.11586434658747359, "learning_rate": 0.0002740585607744045, "loss": 2.7544, "step": 17330 }, { "epoch": 1.3689759333478115, "grad_norm": 0.08993200188072976, "learning_rate": 0.0002737511966862573, "loss": 2.7534, "step": 17335 }, { "epoch": 1.3693707922844562, "grad_norm": 0.08048383889330629, "learning_rate": 0.0002734439400710836, "loss": 2.7568, "step": 17340 }, { "epoch": 1.3697656512211012, "grad_norm": 0.09679438503510558, "learning_rate": 0.00027313679107483714, "loss": 2.6782, "step": 17345 }, { "epoch": 1.370160510157746, "grad_norm": 0.07587059136873388, "learning_rate": 0.0002728297498434197, "loss": 2.7051, "step": 17350 }, { "epoch": 1.370555369094391, "grad_norm": 0.07887948604249867, "learning_rate": 0.00027252281652268294, "loss": 2.7043, "step": 17355 }, { "epoch": 1.370950228031036, "grad_norm": 0.08360885545569997, "learning_rate": 0.0002722159912584262, "loss": 2.7239, "step": 17360 }, { "epoch": 1.3713450869676809, "grad_norm": 0.10894584602342486, "learning_rate": 0.0002719092741963984, "loss": 2.6974, "step": 17365 }, { "epoch": 1.3717399459043258, "grad_norm": 0.09507631385759772, "learning_rate": 0.0002716026654822963, "loss": 2.8398, "step": 17370 }, { "epoch": 1.3721348048409705, "grad_norm": 0.1193641322236442, "learning_rate": 0.0002712961652617656, "loss": 2.7639, "step": 17375 }, { "epoch": 1.3725296637776154, "grad_norm": 0.09089636909313155, "learning_rate": 0.00027098977368040063, "loss": 2.5482, "step": 17380 }, { "epoch": 1.3729245227142604, "grad_norm": 0.09024736403617808, "learning_rate": 0.00027068349088374356, "loss": 2.7089, "step": 17385 }, { "epoch": 1.3733193816509053, "grad_norm": 0.0793807091380404, "learning_rate": 0.0002703773170172855, "loss": 2.9953, "step": 17390 }, { "epoch": 1.37371424058755, "grad_norm": 0.0892556065042681, "learning_rate": 0.00027007125222646545, "loss": 2.9344, "step": 17395 }, { "epoch": 1.374109099524195, "grad_norm": 0.0808112667405697, "learning_rate": 0.00026976529665667036, "loss": 2.6423, "step": 17400 }, { "epoch": 1.3745039584608398, "grad_norm": 0.09954745031683802, "learning_rate": 0.0002694594504532361, "loss": 2.865, "step": 17405 }, { "epoch": 1.3748988173974848, "grad_norm": 0.09352793228090592, "learning_rate": 0.0002691537137614456, "loss": 2.9302, "step": 17410 }, { "epoch": 1.3752936763341297, "grad_norm": 0.09599225335087602, "learning_rate": 0.0002688480867265307, "loss": 2.7868, "step": 17415 }, { "epoch": 1.3756885352707746, "grad_norm": 0.08397072910938778, "learning_rate": 0.0002685425694936704, "loss": 2.6765, "step": 17420 }, { "epoch": 1.3760833942074193, "grad_norm": 0.0870235132461321, "learning_rate": 0.0002682371622079921, "loss": 2.6901, "step": 17425 }, { "epoch": 1.3764782531440642, "grad_norm": 0.08079380746659444, "learning_rate": 0.0002679318650145704, "loss": 2.5975, "step": 17430 }, { "epoch": 1.3768731120807092, "grad_norm": 0.08603400519841477, "learning_rate": 0.00026762667805842834, "loss": 2.6389, "step": 17435 }, { "epoch": 1.377267971017354, "grad_norm": 0.08659292290524735, "learning_rate": 0.00026732160148453584, "loss": 2.8772, "step": 17440 }, { "epoch": 1.3776628299539988, "grad_norm": 0.10319211627508325, "learning_rate": 0.00026701663543781106, "loss": 2.7267, "step": 17445 }, { "epoch": 1.3780576888906437, "grad_norm": 0.1025427025848482, "learning_rate": 0.0002667117800631189, "loss": 2.7788, "step": 17450 }, { "epoch": 1.3784525478272887, "grad_norm": 0.09843665400423943, "learning_rate": 0.00026640703550527247, "loss": 2.734, "step": 17455 }, { "epoch": 1.3788474067639336, "grad_norm": 0.10430494746674468, "learning_rate": 0.0002661024019090319, "loss": 2.9365, "step": 17460 }, { "epoch": 1.3792422657005785, "grad_norm": 0.09110293575444066, "learning_rate": 0.00026579787941910434, "loss": 2.6964, "step": 17465 }, { "epoch": 1.3796371246372234, "grad_norm": 0.07967874563096052, "learning_rate": 0.00026549346818014477, "loss": 2.8159, "step": 17470 }, { "epoch": 1.3800319835738684, "grad_norm": 0.08327258260986937, "learning_rate": 0.00026518916833675457, "loss": 2.6847, "step": 17475 }, { "epoch": 1.380426842510513, "grad_norm": 0.07875568725163344, "learning_rate": 0.00026488498003348303, "loss": 2.749, "step": 17480 }, { "epoch": 1.380821701447158, "grad_norm": 0.09348954509387258, "learning_rate": 0.00026458090341482585, "loss": 2.6076, "step": 17485 }, { "epoch": 1.381216560383803, "grad_norm": 0.12007373278965837, "learning_rate": 0.00026427693862522564, "loss": 2.9001, "step": 17490 }, { "epoch": 1.3816114193204478, "grad_norm": 0.08312279275249747, "learning_rate": 0.0002639730858090725, "loss": 2.8011, "step": 17495 }, { "epoch": 1.3820062782570925, "grad_norm": 0.09301100250980479, "learning_rate": 0.0002636693451107025, "loss": 2.9954, "step": 17500 }, { "epoch": 1.3824011371937375, "grad_norm": 0.08787384579546352, "learning_rate": 0.00026336571667439946, "loss": 2.9088, "step": 17505 }, { "epoch": 1.3827959961303824, "grad_norm": 0.08897928524039699, "learning_rate": 0.00026306220064439286, "loss": 2.8489, "step": 17510 }, { "epoch": 1.3831908550670273, "grad_norm": 0.11846400003981679, "learning_rate": 0.00026275879716485955, "loss": 2.6964, "step": 17515 }, { "epoch": 1.3835857140036723, "grad_norm": 0.07842039004195052, "learning_rate": 0.00026245550637992235, "loss": 2.6671, "step": 17520 }, { "epoch": 1.3839805729403172, "grad_norm": 0.08155301706634033, "learning_rate": 0.0002621523284336509, "loss": 2.7835, "step": 17525 }, { "epoch": 1.3843754318769619, "grad_norm": 0.10963842539769834, "learning_rate": 0.00026184926347006134, "loss": 3.039, "step": 17530 }, { "epoch": 1.3847702908136068, "grad_norm": 0.08131565144754695, "learning_rate": 0.0002615463116331156, "loss": 2.8047, "step": 17535 }, { "epoch": 1.3851651497502517, "grad_norm": 0.08104372704520717, "learning_rate": 0.00026124347306672266, "loss": 2.7378, "step": 17540 }, { "epoch": 1.3855600086868967, "grad_norm": 0.09853881605038721, "learning_rate": 0.00026094074791473674, "loss": 2.7185, "step": 17545 }, { "epoch": 1.3859548676235414, "grad_norm": 0.0860421779419733, "learning_rate": 0.0002606381363209592, "loss": 2.8535, "step": 17550 }, { "epoch": 1.3863497265601863, "grad_norm": 0.09554674715713374, "learning_rate": 0.00026033563842913643, "loss": 2.8463, "step": 17555 }, { "epoch": 1.3867445854968312, "grad_norm": 0.07975829976456246, "learning_rate": 0.00026003325438296186, "loss": 2.9095, "step": 17560 }, { "epoch": 1.3871394444334761, "grad_norm": 0.08442860712998605, "learning_rate": 0.00025973098432607395, "loss": 2.9159, "step": 17565 }, { "epoch": 1.387534303370121, "grad_norm": 0.0822399914413966, "learning_rate": 0.0002594288284020574, "loss": 2.8762, "step": 17570 }, { "epoch": 1.387929162306766, "grad_norm": 0.08777593303658053, "learning_rate": 0.0002591267867544432, "loss": 2.8566, "step": 17575 }, { "epoch": 1.388324021243411, "grad_norm": 0.08850320884288994, "learning_rate": 0.0002588248595267068, "loss": 2.8938, "step": 17580 }, { "epoch": 1.3887188801800556, "grad_norm": 0.08374021690983276, "learning_rate": 0.00025852304686227044, "loss": 2.9103, "step": 17585 }, { "epoch": 1.3891137391167006, "grad_norm": 0.08850618305701156, "learning_rate": 0.0002582213489045013, "loss": 2.6031, "step": 17590 }, { "epoch": 1.3895085980533455, "grad_norm": 0.08491881541910729, "learning_rate": 0.0002579197657967124, "loss": 2.627, "step": 17595 }, { "epoch": 1.3899034569899904, "grad_norm": 0.09021247275527902, "learning_rate": 0.00025761829768216227, "loss": 2.9546, "step": 17600 }, { "epoch": 1.390298315926635, "grad_norm": 0.08486335813996138, "learning_rate": 0.00025731694470405435, "loss": 2.7344, "step": 17605 }, { "epoch": 1.39069317486328, "grad_norm": 0.08510713069858218, "learning_rate": 0.0002570157070055381, "loss": 2.9083, "step": 17610 }, { "epoch": 1.391088033799925, "grad_norm": 0.07766411175768291, "learning_rate": 0.00025671458472970726, "loss": 2.7162, "step": 17615 }, { "epoch": 1.3914828927365699, "grad_norm": 0.10171836150555565, "learning_rate": 0.00025641357801960184, "loss": 2.8129, "step": 17620 }, { "epoch": 1.3918777516732148, "grad_norm": 0.09431874004356526, "learning_rate": 0.0002561126870182059, "loss": 2.8867, "step": 17625 }, { "epoch": 1.3922726106098597, "grad_norm": 0.08110911095103247, "learning_rate": 0.00025581191186844956, "loss": 2.8007, "step": 17630 }, { "epoch": 1.3926674695465044, "grad_norm": 0.09205290288222272, "learning_rate": 0.00025551125271320685, "loss": 2.732, "step": 17635 }, { "epoch": 1.3930623284831494, "grad_norm": 0.07970683522615621, "learning_rate": 0.0002552107096952977, "loss": 2.6511, "step": 17640 }, { "epoch": 1.3934571874197943, "grad_norm": 0.10184549101717155, "learning_rate": 0.00025491028295748596, "loss": 2.7408, "step": 17645 }, { "epoch": 1.3938520463564392, "grad_norm": 0.09466385253785012, "learning_rate": 0.0002546099726424811, "loss": 2.9228, "step": 17650 }, { "epoch": 1.394246905293084, "grad_norm": 0.10630459953687547, "learning_rate": 0.0002543097788929368, "loss": 2.7372, "step": 17655 }, { "epoch": 1.3946417642297289, "grad_norm": 0.08937862215187399, "learning_rate": 0.00025400970185145133, "loss": 2.8829, "step": 17660 }, { "epoch": 1.3950366231663738, "grad_norm": 0.08731816192908716, "learning_rate": 0.0002537097416605678, "loss": 2.9102, "step": 17665 }, { "epoch": 1.3954314821030187, "grad_norm": 0.12276249594660554, "learning_rate": 0.0002534098984627732, "loss": 2.8042, "step": 17670 }, { "epoch": 1.3958263410396636, "grad_norm": 0.10055467118681803, "learning_rate": 0.0002531101724004998, "loss": 2.6738, "step": 17675 }, { "epoch": 1.3962211999763086, "grad_norm": 0.09208493416440579, "learning_rate": 0.000252810563616124, "loss": 2.7472, "step": 17680 }, { "epoch": 1.3966160589129535, "grad_norm": 0.08577790308969264, "learning_rate": 0.0002525110722519658, "loss": 2.6628, "step": 17685 }, { "epoch": 1.3970109178495982, "grad_norm": 0.08329960322106993, "learning_rate": 0.0002522116984502905, "loss": 2.7303, "step": 17690 }, { "epoch": 1.3974057767862431, "grad_norm": 0.08002013593316841, "learning_rate": 0.0002519124423533065, "loss": 2.8218, "step": 17695 }, { "epoch": 1.397800635722888, "grad_norm": 0.08959945439185138, "learning_rate": 0.00025161330410316716, "loss": 2.8187, "step": 17700 }, { "epoch": 1.3981954946595327, "grad_norm": 0.16138871928165038, "learning_rate": 0.0002513142838419692, "loss": 2.9111, "step": 17705 }, { "epoch": 1.3985903535961777, "grad_norm": 0.09573128549276287, "learning_rate": 0.0002510153817117539, "loss": 2.6517, "step": 17710 }, { "epoch": 1.3989852125328226, "grad_norm": 0.1094870376585219, "learning_rate": 0.0002507165978545056, "loss": 2.8447, "step": 17715 }, { "epoch": 1.3993800714694675, "grad_norm": 0.09093916669234828, "learning_rate": 0.00025041793241215336, "loss": 2.6429, "step": 17720 }, { "epoch": 1.3997749304061124, "grad_norm": 0.08054669727648676, "learning_rate": 0.00025011938552656963, "loss": 2.6863, "step": 17725 }, { "epoch": 1.4001697893427574, "grad_norm": 0.08443307440886376, "learning_rate": 0.0002498209573395703, "loss": 2.7498, "step": 17730 }, { "epoch": 1.4005646482794023, "grad_norm": 0.08706165587857292, "learning_rate": 0.00024952264799291525, "loss": 2.7768, "step": 17735 }, { "epoch": 1.400959507216047, "grad_norm": 0.07753448252390921, "learning_rate": 0.00024922445762830747, "loss": 2.6485, "step": 17740 }, { "epoch": 1.401354366152692, "grad_norm": 0.15337004583571134, "learning_rate": 0.00024892638638739404, "loss": 2.7304, "step": 17745 }, { "epoch": 1.4017492250893369, "grad_norm": 0.09496253714171131, "learning_rate": 0.000248628434411765, "loss": 2.676, "step": 17750 }, { "epoch": 1.4021440840259818, "grad_norm": 0.08279200656584415, "learning_rate": 0.00024833060184295357, "loss": 2.7455, "step": 17755 }, { "epoch": 1.4025389429626265, "grad_norm": 0.06981339229507244, "learning_rate": 0.00024803288882243696, "loss": 2.7099, "step": 17760 }, { "epoch": 1.4029338018992714, "grad_norm": 0.07609830708711934, "learning_rate": 0.0002477352954916347, "loss": 2.6971, "step": 17765 }, { "epoch": 1.4033286608359163, "grad_norm": 0.12402422086180559, "learning_rate": 0.00024743782199191045, "loss": 2.9655, "step": 17770 }, { "epoch": 1.4037235197725613, "grad_norm": 0.10571484049796889, "learning_rate": 0.0002471404684645699, "loss": 2.6442, "step": 17775 }, { "epoch": 1.4041183787092062, "grad_norm": 0.08039568302406266, "learning_rate": 0.0002468432350508627, "loss": 2.7487, "step": 17780 }, { "epoch": 1.4045132376458511, "grad_norm": 0.0850423153365748, "learning_rate": 0.00024654612189198066, "loss": 2.7027, "step": 17785 }, { "epoch": 1.4049080965824958, "grad_norm": 0.0813368175051498, "learning_rate": 0.00024624912912905904, "loss": 2.7579, "step": 17790 }, { "epoch": 1.4053029555191408, "grad_norm": 0.08375544530095685, "learning_rate": 0.00024595225690317584, "loss": 2.7225, "step": 17795 }, { "epoch": 1.4056978144557857, "grad_norm": 0.08580691862632699, "learning_rate": 0.00024565550535535127, "loss": 2.617, "step": 17800 }, { "epoch": 1.4060926733924306, "grad_norm": 0.09281843688947065, "learning_rate": 0.00024535887462654897, "loss": 2.6985, "step": 17805 }, { "epoch": 1.4064875323290753, "grad_norm": 0.08718378486308785, "learning_rate": 0.0002450623648576745, "loss": 2.8808, "step": 17810 }, { "epoch": 1.4068823912657202, "grad_norm": 0.09018356528499318, "learning_rate": 0.0002447659761895766, "loss": 2.7379, "step": 17815 }, { "epoch": 1.4072772502023652, "grad_norm": 0.09836330760312383, "learning_rate": 0.0002444697087630458, "loss": 2.9284, "step": 17820 }, { "epoch": 1.40767210913901, "grad_norm": 0.08157948588625716, "learning_rate": 0.0002441735627188158, "loss": 2.714, "step": 17825 }, { "epoch": 1.408066968075655, "grad_norm": 0.0859555846778601, "learning_rate": 0.00024387753819756186, "loss": 3.0221, "step": 17830 }, { "epoch": 1.4084618270123, "grad_norm": 0.08814905229724104, "learning_rate": 0.00024358163533990206, "loss": 2.8696, "step": 17835 }, { "epoch": 1.4088566859489449, "grad_norm": 0.10099722356582773, "learning_rate": 0.00024328585428639693, "loss": 2.7084, "step": 17840 }, { "epoch": 1.4092515448855896, "grad_norm": 0.08429723852378873, "learning_rate": 0.000242990195177548, "loss": 2.93, "step": 17845 }, { "epoch": 1.4096464038222345, "grad_norm": 0.07591035611950192, "learning_rate": 0.0002426946581538001, "loss": 2.7954, "step": 17850 }, { "epoch": 1.4100412627588794, "grad_norm": 0.08641715434332566, "learning_rate": 0.0002423992433555393, "loss": 2.6665, "step": 17855 }, { "epoch": 1.4104361216955243, "grad_norm": 0.08763510985001881, "learning_rate": 0.00024210395092309417, "loss": 2.7911, "step": 17860 }, { "epoch": 1.410830980632169, "grad_norm": 0.07696902438621943, "learning_rate": 0.00024180878099673453, "loss": 2.8862, "step": 17865 }, { "epoch": 1.411225839568814, "grad_norm": 0.09939928036204813, "learning_rate": 0.00024151373371667257, "loss": 2.7288, "step": 17870 }, { "epoch": 1.411620698505459, "grad_norm": 0.08088188837813057, "learning_rate": 0.00024121880922306217, "loss": 2.7683, "step": 17875 }, { "epoch": 1.4120155574421038, "grad_norm": 0.09595363918168591, "learning_rate": 0.00024092400765599826, "loss": 2.6959, "step": 17880 }, { "epoch": 1.4124104163787488, "grad_norm": 0.09180170975660669, "learning_rate": 0.00024062932915551826, "loss": 2.9137, "step": 17885 }, { "epoch": 1.4128052753153937, "grad_norm": 0.10207068957026093, "learning_rate": 0.00024033477386160035, "loss": 2.906, "step": 17890 }, { "epoch": 1.4132001342520384, "grad_norm": 0.100502779628428, "learning_rate": 0.0002400403419141649, "loss": 2.6631, "step": 17895 }, { "epoch": 1.4135949931886833, "grad_norm": 0.0841243294408685, "learning_rate": 0.00023974603345307293, "loss": 2.7644, "step": 17900 }, { "epoch": 1.4139898521253282, "grad_norm": 0.0832749815778646, "learning_rate": 0.00023945184861812757, "loss": 2.9526, "step": 17905 }, { "epoch": 1.4143847110619732, "grad_norm": 0.09342877792789815, "learning_rate": 0.00023915778754907247, "loss": 2.757, "step": 17910 }, { "epoch": 1.4147795699986179, "grad_norm": 0.08828449742346388, "learning_rate": 0.000238863850385593, "loss": 2.9974, "step": 17915 }, { "epoch": 1.4151744289352628, "grad_norm": 0.07879533172713955, "learning_rate": 0.00023857003726731585, "loss": 2.8408, "step": 17920 }, { "epoch": 1.4155692878719077, "grad_norm": 0.1286888891444107, "learning_rate": 0.0002382763483338082, "loss": 2.714, "step": 17925 }, { "epoch": 1.4159641468085526, "grad_norm": 0.08878957151608269, "learning_rate": 0.00023798278372457848, "loss": 2.8283, "step": 17930 }, { "epoch": 1.4163590057451976, "grad_norm": 0.0892428476441591, "learning_rate": 0.00023768934357907602, "loss": 2.7387, "step": 17935 }, { "epoch": 1.4167538646818425, "grad_norm": 0.09552622565840029, "learning_rate": 0.00023739602803669126, "loss": 2.6471, "step": 17940 }, { "epoch": 1.4171487236184874, "grad_norm": 0.11017515031281334, "learning_rate": 0.0002371028372367554, "loss": 3.1095, "step": 17945 }, { "epoch": 1.4175435825551321, "grad_norm": 0.08685044881034401, "learning_rate": 0.00023680977131853998, "loss": 2.9181, "step": 17950 }, { "epoch": 1.417938441491777, "grad_norm": 0.09558947103262389, "learning_rate": 0.0002365168304212579, "loss": 3.041, "step": 17955 }, { "epoch": 1.418333300428422, "grad_norm": 0.10033493860589299, "learning_rate": 0.0002362240146840618, "loss": 2.921, "step": 17960 }, { "epoch": 1.418728159365067, "grad_norm": 0.08531814907884772, "learning_rate": 0.0002359313242460459, "loss": 2.9721, "step": 17965 }, { "epoch": 1.4191230183017116, "grad_norm": 0.1067081288419868, "learning_rate": 0.0002356387592462439, "loss": 2.8965, "step": 17970 }, { "epoch": 1.4195178772383565, "grad_norm": 0.0940069882658937, "learning_rate": 0.0002353463198236307, "loss": 3.0781, "step": 17975 }, { "epoch": 1.4199127361750015, "grad_norm": 0.09351570542933818, "learning_rate": 0.00023505400611712097, "loss": 2.757, "step": 17980 }, { "epoch": 1.4203075951116464, "grad_norm": 0.0896483334003401, "learning_rate": 0.00023476181826556996, "loss": 3.0037, "step": 17985 }, { "epoch": 1.4207024540482913, "grad_norm": 0.09020514055518011, "learning_rate": 0.0002344697564077734, "loss": 2.7986, "step": 17990 }, { "epoch": 1.4210973129849362, "grad_norm": 0.09558899381869639, "learning_rate": 0.00023417782068246634, "loss": 2.7039, "step": 17995 }, { "epoch": 1.421492171921581, "grad_norm": 0.09558315550403197, "learning_rate": 0.00023388601122832498, "loss": 2.7764, "step": 18000 }, { "epoch": 1.4218870308582259, "grad_norm": 0.09538390031865625, "learning_rate": 0.0002335943281839645, "loss": 2.9735, "step": 18005 }, { "epoch": 1.4222818897948708, "grad_norm": 0.08619283795318881, "learning_rate": 0.0002333027716879409, "loss": 2.7034, "step": 18010 }, { "epoch": 1.4226767487315157, "grad_norm": 0.08709628443133959, "learning_rate": 0.00023301134187874955, "loss": 2.6656, "step": 18015 }, { "epoch": 1.4230716076681604, "grad_norm": 0.08299581712906347, "learning_rate": 0.00023272003889482563, "loss": 2.6581, "step": 18020 }, { "epoch": 1.4234664666048054, "grad_norm": 0.08866590262617943, "learning_rate": 0.00023242886287454446, "loss": 2.9142, "step": 18025 }, { "epoch": 1.4238613255414503, "grad_norm": 0.08052881369611367, "learning_rate": 0.00023213781395622058, "loss": 2.8421, "step": 18030 }, { "epoch": 1.4242561844780952, "grad_norm": 0.08940373916596518, "learning_rate": 0.00023184689227810867, "loss": 2.7414, "step": 18035 }, { "epoch": 1.4246510434147401, "grad_norm": 0.10228798195884883, "learning_rate": 0.00023155609797840244, "loss": 2.7387, "step": 18040 }, { "epoch": 1.425045902351385, "grad_norm": 0.09021968298745446, "learning_rate": 0.00023126543119523558, "loss": 2.8833, "step": 18045 }, { "epoch": 1.42544076128803, "grad_norm": 0.10561806949010616, "learning_rate": 0.00023097489206668064, "loss": 2.6521, "step": 18050 }, { "epoch": 1.4258356202246747, "grad_norm": 0.09158061789036548, "learning_rate": 0.0002306844807307501, "loss": 2.6384, "step": 18055 }, { "epoch": 1.4262304791613196, "grad_norm": 0.08481338864936896, "learning_rate": 0.00023039419732539564, "loss": 2.7206, "step": 18060 }, { "epoch": 1.4266253380979645, "grad_norm": 0.08343185609397541, "learning_rate": 0.0002301040419885076, "loss": 2.9476, "step": 18065 }, { "epoch": 1.4270201970346093, "grad_norm": 0.7509779113663442, "learning_rate": 0.00022981401485791636, "loss": 2.9329, "step": 18070 }, { "epoch": 1.4274150559712542, "grad_norm": 0.1313062370478794, "learning_rate": 0.00022952411607139056, "loss": 2.846, "step": 18075 }, { "epoch": 1.427809914907899, "grad_norm": 0.08749508767115738, "learning_rate": 0.00022923434576663866, "loss": 2.6867, "step": 18080 }, { "epoch": 1.428204773844544, "grad_norm": 0.09171952322788801, "learning_rate": 0.00022894470408130725, "loss": 2.9582, "step": 18085 }, { "epoch": 1.428599632781189, "grad_norm": 0.2465733278516523, "learning_rate": 0.00022865519115298266, "loss": 2.6913, "step": 18090 }, { "epoch": 1.4289944917178339, "grad_norm": 0.09695044446680233, "learning_rate": 0.0002283658071191893, "loss": 2.8197, "step": 18095 }, { "epoch": 1.4293893506544788, "grad_norm": 0.08847060198365192, "learning_rate": 0.00022807655211739116, "loss": 2.8952, "step": 18100 }, { "epoch": 1.4297842095911235, "grad_norm": 0.09071383591595936, "learning_rate": 0.0002277874262849902, "loss": 2.7139, "step": 18105 }, { "epoch": 1.4301790685277684, "grad_norm": 0.08224829555320974, "learning_rate": 0.00022749842975932716, "loss": 2.6975, "step": 18110 }, { "epoch": 1.4305739274644134, "grad_norm": 0.10429762519207297, "learning_rate": 0.0002272095626776819, "loss": 2.6882, "step": 18115 }, { "epoch": 1.4309687864010583, "grad_norm": 0.07829714510239148, "learning_rate": 0.00022692082517727203, "loss": 2.7205, "step": 18120 }, { "epoch": 1.431363645337703, "grad_norm": 0.09795445483402239, "learning_rate": 0.00022663221739525435, "loss": 2.6758, "step": 18125 }, { "epoch": 1.431758504274348, "grad_norm": 0.13804596372934594, "learning_rate": 0.00022634373946872329, "loss": 2.6228, "step": 18130 }, { "epoch": 1.4321533632109928, "grad_norm": 0.10033519532530225, "learning_rate": 0.00022605539153471217, "loss": 2.9876, "step": 18135 }, { "epoch": 1.4325482221476378, "grad_norm": 0.08678305326306814, "learning_rate": 0.0002257671737301925, "loss": 2.7745, "step": 18140 }, { "epoch": 1.4329430810842827, "grad_norm": 0.08048598960081876, "learning_rate": 0.00022547908619207353, "loss": 2.697, "step": 18145 }, { "epoch": 1.4333379400209276, "grad_norm": 0.12028120955585901, "learning_rate": 0.00022519112905720334, "loss": 2.9096, "step": 18150 }, { "epoch": 1.4337327989575723, "grad_norm": 0.12195716353852096, "learning_rate": 0.00022490330246236718, "loss": 2.9758, "step": 18155 }, { "epoch": 1.4341276578942173, "grad_norm": 0.07798271911969473, "learning_rate": 0.0002246156065442893, "loss": 2.6622, "step": 18160 }, { "epoch": 1.4345225168308622, "grad_norm": 0.08653918532478642, "learning_rate": 0.00022432804143963087, "loss": 2.7668, "step": 18165 }, { "epoch": 1.434917375767507, "grad_norm": 0.08308488470346018, "learning_rate": 0.00022404060728499183, "loss": 2.6683, "step": 18170 }, { "epoch": 1.4353122347041518, "grad_norm": 0.11481126011229932, "learning_rate": 0.00022375330421690908, "loss": 2.7725, "step": 18175 }, { "epoch": 1.4357070936407967, "grad_norm": 0.08418822277970096, "learning_rate": 0.00022346613237185788, "loss": 2.6543, "step": 18180 }, { "epoch": 1.4361019525774417, "grad_norm": 0.08454112364912678, "learning_rate": 0.00022317909188625113, "loss": 2.7436, "step": 18185 }, { "epoch": 1.4364968115140866, "grad_norm": 0.07966703661787773, "learning_rate": 0.0002228921828964389, "loss": 2.8352, "step": 18190 }, { "epoch": 1.4368916704507315, "grad_norm": 0.081962738127862, "learning_rate": 0.00022260540553870913, "loss": 2.6182, "step": 18195 }, { "epoch": 1.4372865293873764, "grad_norm": 0.08170981374574073, "learning_rate": 0.00022231875994928684, "loss": 2.7912, "step": 18200 }, { "epoch": 1.4376813883240214, "grad_norm": 0.08618239014545331, "learning_rate": 0.00022203224626433511, "loss": 2.6883, "step": 18205 }, { "epoch": 1.438076247260666, "grad_norm": 0.0789225975720156, "learning_rate": 0.000221745864619954, "loss": 2.7029, "step": 18210 }, { "epoch": 1.438471106197311, "grad_norm": 0.08341902385099302, "learning_rate": 0.0002214596151521806, "loss": 2.6247, "step": 18215 }, { "epoch": 1.438865965133956, "grad_norm": 0.10302862627498623, "learning_rate": 0.00022117349799698983, "loss": 2.5043, "step": 18220 }, { "epoch": 1.4392608240706009, "grad_norm": 0.08335273676966906, "learning_rate": 0.000220887513290293, "loss": 2.804, "step": 18225 }, { "epoch": 1.4396556830072456, "grad_norm": 0.07671696273225173, "learning_rate": 0.00022060166116793928, "loss": 2.8755, "step": 18230 }, { "epoch": 1.4400505419438905, "grad_norm": 0.09641372704336901, "learning_rate": 0.0002203159417657141, "loss": 3.0703, "step": 18235 }, { "epoch": 1.4404454008805354, "grad_norm": 0.08207590194726092, "learning_rate": 0.00022003035521934067, "loss": 2.7433, "step": 18240 }, { "epoch": 1.4408402598171803, "grad_norm": 0.11982906213262523, "learning_rate": 0.0002197449016644782, "loss": 2.6973, "step": 18245 }, { "epoch": 1.4412351187538253, "grad_norm": 0.0932967450819402, "learning_rate": 0.00021945958123672338, "loss": 2.7895, "step": 18250 }, { "epoch": 1.4416299776904702, "grad_norm": 0.0836729962797552, "learning_rate": 0.0002191743940716096, "loss": 2.6381, "step": 18255 }, { "epoch": 1.442024836627115, "grad_norm": 0.09884245214819053, "learning_rate": 0.00021888934030460657, "loss": 2.9795, "step": 18260 }, { "epoch": 1.4424196955637598, "grad_norm": 0.08638895399255878, "learning_rate": 0.00021860442007112107, "loss": 2.9525, "step": 18265 }, { "epoch": 1.4428145545004047, "grad_norm": 0.09514333398274964, "learning_rate": 0.00021831963350649604, "loss": 2.7951, "step": 18270 }, { "epoch": 1.4432094134370497, "grad_norm": 0.08120414672375044, "learning_rate": 0.00021803498074601135, "loss": 2.869, "step": 18275 }, { "epoch": 1.4436042723736944, "grad_norm": 0.11176457176325846, "learning_rate": 0.00021775046192488295, "loss": 2.8618, "step": 18280 }, { "epoch": 1.4439991313103393, "grad_norm": 0.109114570229091, "learning_rate": 0.00021746607717826316, "loss": 2.6831, "step": 18285 }, { "epoch": 1.4443939902469842, "grad_norm": 0.099831803500688, "learning_rate": 0.00021718182664124116, "loss": 2.7849, "step": 18290 }, { "epoch": 1.4447888491836292, "grad_norm": 0.09010706202195962, "learning_rate": 0.0002168977104488415, "loss": 2.6688, "step": 18295 }, { "epoch": 1.445183708120274, "grad_norm": 0.09977954830844678, "learning_rate": 0.0002166137287360258, "loss": 2.5941, "step": 18300 }, { "epoch": 1.445578567056919, "grad_norm": 0.08843098651854818, "learning_rate": 0.00021632988163769107, "loss": 2.8221, "step": 18305 }, { "epoch": 1.445973425993564, "grad_norm": 0.07805697892275541, "learning_rate": 0.00021604616928867098, "loss": 2.7058, "step": 18310 }, { "epoch": 1.4463682849302086, "grad_norm": 0.08327883616375158, "learning_rate": 0.00021576259182373464, "loss": 2.7326, "step": 18315 }, { "epoch": 1.4467631438668536, "grad_norm": 0.09326352314733417, "learning_rate": 0.0002154791493775876, "loss": 2.6886, "step": 18320 }, { "epoch": 1.4471580028034985, "grad_norm": 0.1080935626997785, "learning_rate": 0.00021519584208487075, "loss": 2.8111, "step": 18325 }, { "epoch": 1.4475528617401434, "grad_norm": 0.09202714972508991, "learning_rate": 0.00021491267008016124, "loss": 2.7078, "step": 18330 }, { "epoch": 1.4479477206767881, "grad_norm": 0.09090076428907791, "learning_rate": 0.0002146296334979719, "loss": 2.9855, "step": 18335 }, { "epoch": 1.448342579613433, "grad_norm": 0.08397472266819433, "learning_rate": 0.00021434673247275072, "loss": 2.6623, "step": 18340 }, { "epoch": 1.448737438550078, "grad_norm": 0.08757312706954885, "learning_rate": 0.0002140639671388821, "loss": 2.8226, "step": 18345 }, { "epoch": 1.449132297486723, "grad_norm": 0.08844337498193791, "learning_rate": 0.00021378133763068515, "loss": 2.7782, "step": 18350 }, { "epoch": 1.4495271564233678, "grad_norm": 0.07905306607611662, "learning_rate": 0.00021349884408241533, "loss": 2.6583, "step": 18355 }, { "epoch": 1.4499220153600128, "grad_norm": 0.08361398567647524, "learning_rate": 0.00021321648662826255, "loss": 2.8168, "step": 18360 }, { "epoch": 1.4503168742966575, "grad_norm": 0.08142429781598369, "learning_rate": 0.00021293426540235305, "loss": 2.7168, "step": 18365 }, { "epoch": 1.4507117332333024, "grad_norm": 0.09518094724602517, "learning_rate": 0.00021265218053874774, "loss": 2.7491, "step": 18370 }, { "epoch": 1.4511065921699473, "grad_norm": 0.08667207120782587, "learning_rate": 0.00021237023217144258, "loss": 2.7233, "step": 18375 }, { "epoch": 1.4515014511065922, "grad_norm": 0.09568924202577787, "learning_rate": 0.00021208842043436954, "loss": 3.0417, "step": 18380 }, { "epoch": 1.451896310043237, "grad_norm": 0.10189818466522647, "learning_rate": 0.00021180674546139466, "loss": 2.7962, "step": 18385 }, { "epoch": 1.4522911689798819, "grad_norm": 0.10252828118512938, "learning_rate": 0.00021152520738631998, "loss": 2.7316, "step": 18390 }, { "epoch": 1.4526860279165268, "grad_norm": 0.08746556224597986, "learning_rate": 0.00021124380634288166, "loss": 2.7492, "step": 18395 }, { "epoch": 1.4530808868531717, "grad_norm": 0.0928139823162737, "learning_rate": 0.00021096254246475132, "loss": 2.6573, "step": 18400 }, { "epoch": 1.4534757457898166, "grad_norm": 0.10262908098509106, "learning_rate": 0.00021068141588553545, "loss": 2.5843, "step": 18405 }, { "epoch": 1.4538706047264616, "grad_norm": 0.08613513472018322, "learning_rate": 0.0002104004267387748, "loss": 2.8067, "step": 18410 }, { "epoch": 1.4542654636631063, "grad_norm": 0.09927378731391114, "learning_rate": 0.00021011957515794545, "loss": 3.0743, "step": 18415 }, { "epoch": 1.4546603225997512, "grad_norm": 0.08814610057363462, "learning_rate": 0.00020983886127645758, "loss": 2.7916, "step": 18420 }, { "epoch": 1.4550551815363961, "grad_norm": 0.09244765395083226, "learning_rate": 0.00020955828522765648, "loss": 2.771, "step": 18425 }, { "epoch": 1.455450040473041, "grad_norm": 0.09698197234321518, "learning_rate": 0.00020927784714482145, "loss": 2.9195, "step": 18430 }, { "epoch": 1.4558448994096858, "grad_norm": 0.08450190413450376, "learning_rate": 0.00020899754716116687, "loss": 2.6166, "step": 18435 }, { "epoch": 1.4562397583463307, "grad_norm": 0.09660055183034334, "learning_rate": 0.00020871738540984076, "loss": 2.7799, "step": 18440 }, { "epoch": 1.4566346172829756, "grad_norm": 0.10893825607754479, "learning_rate": 0.0002084373620239261, "loss": 2.597, "step": 18445 }, { "epoch": 1.4570294762196205, "grad_norm": 0.10270173051292975, "learning_rate": 0.00020815747713644013, "loss": 2.7339, "step": 18450 }, { "epoch": 1.4574243351562655, "grad_norm": 0.07549540916158942, "learning_rate": 0.0002078777308803339, "loss": 2.6212, "step": 18455 }, { "epoch": 1.4578191940929104, "grad_norm": 0.11456575503306117, "learning_rate": 0.00020759812338849276, "loss": 2.6191, "step": 18460 }, { "epoch": 1.4582140530295553, "grad_norm": 0.12682903795928593, "learning_rate": 0.00020731865479373606, "loss": 2.6645, "step": 18465 }, { "epoch": 1.4586089119662, "grad_norm": 0.09254248813169921, "learning_rate": 0.00020703932522881746, "loss": 2.8465, "step": 18470 }, { "epoch": 1.459003770902845, "grad_norm": 0.08228746094865311, "learning_rate": 0.00020676013482642463, "loss": 2.6975, "step": 18475 }, { "epoch": 1.4593986298394899, "grad_norm": 0.09484297916034423, "learning_rate": 0.00020648108371917857, "loss": 2.7726, "step": 18480 }, { "epoch": 1.4597934887761348, "grad_norm": 0.13053433741452336, "learning_rate": 0.00020620217203963482, "loss": 2.8807, "step": 18485 }, { "epoch": 1.4601883477127795, "grad_norm": 0.2338498670929768, "learning_rate": 0.00020592339992028192, "loss": 2.5826, "step": 18490 }, { "epoch": 1.4605832066494244, "grad_norm": 0.09098495676221381, "learning_rate": 0.0002056447674935429, "loss": 2.6822, "step": 18495 }, { "epoch": 1.4609780655860694, "grad_norm": 0.11880832290110581, "learning_rate": 0.0002053662748917738, "loss": 3.1907, "step": 18500 }, { "epoch": 1.4613729245227143, "grad_norm": 0.11622021207880914, "learning_rate": 0.00020508792224726476, "loss": 2.8027, "step": 18505 }, { "epoch": 1.4617677834593592, "grad_norm": 0.08630694087810814, "learning_rate": 0.00020480970969223884, "loss": 2.7294, "step": 18510 }, { "epoch": 1.4621626423960041, "grad_norm": 0.13368680446316727, "learning_rate": 0.0002045316373588531, "loss": 2.7865, "step": 18515 }, { "epoch": 1.4625575013326488, "grad_norm": 0.07523308456330385, "learning_rate": 0.00020425370537919803, "loss": 2.6989, "step": 18520 }, { "epoch": 1.4629523602692938, "grad_norm": 0.08286074666763528, "learning_rate": 0.00020397591388529674, "loss": 2.739, "step": 18525 }, { "epoch": 1.4633472192059387, "grad_norm": 0.07611678136492762, "learning_rate": 0.00020369826300910642, "loss": 2.6458, "step": 18530 }, { "epoch": 1.4637420781425836, "grad_norm": 0.08200577802947734, "learning_rate": 0.00020342075288251688, "loss": 2.8296, "step": 18535 }, { "epoch": 1.4641369370792283, "grad_norm": 0.09035886568531673, "learning_rate": 0.00020314338363735163, "loss": 2.6544, "step": 18540 }, { "epoch": 1.4645317960158732, "grad_norm": 0.09409630542001457, "learning_rate": 0.00020286615540536667, "loss": 2.7185, "step": 18545 }, { "epoch": 1.4649266549525182, "grad_norm": 0.08085911965348075, "learning_rate": 0.00020258906831825118, "loss": 2.7834, "step": 18550 }, { "epoch": 1.465321513889163, "grad_norm": 0.09870010095831692, "learning_rate": 0.0002023121225076277, "loss": 2.7603, "step": 18555 }, { "epoch": 1.465716372825808, "grad_norm": 0.09419559770118907, "learning_rate": 0.00020203531810505105, "loss": 2.7787, "step": 18560 }, { "epoch": 1.466111231762453, "grad_norm": 0.0812025136575757, "learning_rate": 0.00020175865524200954, "loss": 2.8777, "step": 18565 }, { "epoch": 1.4665060906990979, "grad_norm": 0.14720945365421795, "learning_rate": 0.00020148213404992348, "loss": 2.6477, "step": 18570 }, { "epoch": 1.4669009496357426, "grad_norm": 0.07992440493391413, "learning_rate": 0.00020120575466014675, "loss": 2.8777, "step": 18575 }, { "epoch": 1.4672958085723875, "grad_norm": 0.07673657222567544, "learning_rate": 0.0002009295172039649, "loss": 2.685, "step": 18580 }, { "epoch": 1.4676906675090324, "grad_norm": 0.08903166044983803, "learning_rate": 0.0002006534218125971, "loss": 2.8729, "step": 18585 }, { "epoch": 1.4680855264456774, "grad_norm": 0.13549773852936176, "learning_rate": 0.00020037746861719402, "loss": 2.7832, "step": 18590 }, { "epoch": 1.468480385382322, "grad_norm": 0.10457943797479992, "learning_rate": 0.00020010165774883953, "loss": 2.8477, "step": 18595 }, { "epoch": 1.468875244318967, "grad_norm": 0.11546734899243488, "learning_rate": 0.00019982598933854985, "loss": 3.0047, "step": 18600 }, { "epoch": 1.469270103255612, "grad_norm": 0.13823478415692614, "learning_rate": 0.0001995504635172728, "loss": 3.1505, "step": 18605 }, { "epoch": 1.4696649621922568, "grad_norm": 0.08413958291896477, "learning_rate": 0.00019927508041588942, "loss": 2.6931, "step": 18610 }, { "epoch": 1.4700598211289018, "grad_norm": 0.0749332163666133, "learning_rate": 0.00019899984016521227, "loss": 2.5361, "step": 18615 }, { "epoch": 1.4704546800655467, "grad_norm": 0.08974157405402297, "learning_rate": 0.00019872474289598646, "loss": 2.893, "step": 18620 }, { "epoch": 1.4708495390021914, "grad_norm": 0.07992634919548383, "learning_rate": 0.0001984497887388888, "loss": 2.7348, "step": 18625 }, { "epoch": 1.4712443979388363, "grad_norm": 0.07838335036707507, "learning_rate": 0.0001981749778245287, "loss": 2.7023, "step": 18630 }, { "epoch": 1.4716392568754812, "grad_norm": 0.08528435189064412, "learning_rate": 0.00019790031028344684, "loss": 2.8632, "step": 18635 }, { "epoch": 1.4720341158121262, "grad_norm": 0.08564869533659908, "learning_rate": 0.0001976257862461161, "loss": 2.6675, "step": 18640 }, { "epoch": 1.4724289747487709, "grad_norm": 0.08762040542774825, "learning_rate": 0.00019735140584294154, "loss": 2.7718, "step": 18645 }, { "epoch": 1.4728238336854158, "grad_norm": 0.1335410807986151, "learning_rate": 0.00019707716920425923, "loss": 2.7155, "step": 18650 }, { "epoch": 1.4732186926220607, "grad_norm": 0.0832071754821339, "learning_rate": 0.0001968030764603378, "loss": 2.7938, "step": 18655 }, { "epoch": 1.4736135515587057, "grad_norm": 0.09079234356667745, "learning_rate": 0.00019652912774137678, "loss": 2.6877, "step": 18660 }, { "epoch": 1.4740084104953506, "grad_norm": 0.08816375080811821, "learning_rate": 0.00019625532317750767, "loss": 2.8141, "step": 18665 }, { "epoch": 1.4744032694319955, "grad_norm": 0.07438250955786713, "learning_rate": 0.00019598166289879377, "loss": 2.6858, "step": 18670 }, { "epoch": 1.4747981283686404, "grad_norm": 0.10046855126313099, "learning_rate": 0.00019570814703522905, "loss": 2.8334, "step": 18675 }, { "epoch": 1.4751929873052851, "grad_norm": 0.08476691986452206, "learning_rate": 0.00019543477571673968, "loss": 2.7634, "step": 18680 }, { "epoch": 1.47558784624193, "grad_norm": 0.08819164125172188, "learning_rate": 0.00019516154907318257, "loss": 2.6396, "step": 18685 }, { "epoch": 1.475982705178575, "grad_norm": 0.12680832488695254, "learning_rate": 0.00019488846723434645, "loss": 2.8961, "step": 18690 }, { "epoch": 1.47637756411522, "grad_norm": 0.09732365262101418, "learning_rate": 0.00019461553032995061, "loss": 2.7263, "step": 18695 }, { "epoch": 1.4767724230518646, "grad_norm": 0.09065483675522942, "learning_rate": 0.00019434273848964624, "loss": 2.7878, "step": 18700 }, { "epoch": 1.4771672819885096, "grad_norm": 0.08721577117014037, "learning_rate": 0.00019407009184301487, "loss": 2.8899, "step": 18705 }, { "epoch": 1.4775621409251545, "grad_norm": 0.09392874555245624, "learning_rate": 0.0001937975905195697, "loss": 2.754, "step": 18710 }, { "epoch": 1.4779569998617994, "grad_norm": 0.10799823419301731, "learning_rate": 0.00019352523464875472, "loss": 2.9359, "step": 18715 }, { "epoch": 1.4783518587984443, "grad_norm": 0.10539515901770079, "learning_rate": 0.00019325302435994435, "loss": 2.7231, "step": 18720 }, { "epoch": 1.4787467177350893, "grad_norm": 0.09135932252307005, "learning_rate": 0.00019298095978244478, "loss": 2.7085, "step": 18725 }, { "epoch": 1.479141576671734, "grad_norm": 0.108954844175885, "learning_rate": 0.00019270904104549185, "loss": 2.7728, "step": 18730 }, { "epoch": 1.4795364356083789, "grad_norm": 0.07523779273346134, "learning_rate": 0.00019243726827825318, "loss": 2.7854, "step": 18735 }, { "epoch": 1.4799312945450238, "grad_norm": 0.07785739093108666, "learning_rate": 0.00019216564160982625, "loss": 2.8158, "step": 18740 }, { "epoch": 1.4803261534816687, "grad_norm": 0.07409769549175113, "learning_rate": 0.0001918941611692396, "loss": 2.7639, "step": 18745 }, { "epoch": 1.4807210124183134, "grad_norm": 0.08215766889589482, "learning_rate": 0.00019162282708545242, "loss": 2.7254, "step": 18750 }, { "epoch": 1.4811158713549584, "grad_norm": 0.08771996780912752, "learning_rate": 0.00019135163948735378, "loss": 2.7276, "step": 18755 }, { "epoch": 1.4815107302916033, "grad_norm": 0.07984375617178872, "learning_rate": 0.0001910805985037639, "loss": 2.9286, "step": 18760 }, { "epoch": 1.4819055892282482, "grad_norm": 0.08603236444295992, "learning_rate": 0.00019080970426343263, "loss": 2.8546, "step": 18765 }, { "epoch": 1.4823004481648931, "grad_norm": 0.10880908630473596, "learning_rate": 0.0001905389568950408, "loss": 2.6121, "step": 18770 }, { "epoch": 1.482695307101538, "grad_norm": 0.08050563719934266, "learning_rate": 0.00019026835652719882, "loss": 2.7159, "step": 18775 }, { "epoch": 1.4830901660381828, "grad_norm": 0.08954633016635541, "learning_rate": 0.00018999790328844796, "loss": 2.7135, "step": 18780 }, { "epoch": 1.4834850249748277, "grad_norm": 0.09443200034433649, "learning_rate": 0.0001897275973072588, "loss": 2.6593, "step": 18785 }, { "epoch": 1.4838798839114726, "grad_norm": 0.14595823851023307, "learning_rate": 0.00018945743871203268, "loss": 2.9606, "step": 18790 }, { "epoch": 1.4842747428481176, "grad_norm": 0.11092052622195589, "learning_rate": 0.00018918742763110086, "loss": 2.8258, "step": 18795 }, { "epoch": 1.4846696017847623, "grad_norm": 0.08067905083338059, "learning_rate": 0.00018891756419272382, "loss": 2.5858, "step": 18800 }, { "epoch": 1.4850644607214072, "grad_norm": 0.111495631251125, "learning_rate": 0.00018864784852509292, "loss": 2.6817, "step": 18805 }, { "epoch": 1.4854593196580521, "grad_norm": 0.09090796648678542, "learning_rate": 0.0001883782807563285, "loss": 2.8427, "step": 18810 }, { "epoch": 1.485854178594697, "grad_norm": 0.09085312900882068, "learning_rate": 0.00018810886101448089, "loss": 2.7206, "step": 18815 }, { "epoch": 1.486249037531342, "grad_norm": 0.08694206617268613, "learning_rate": 0.0001878395894275305, "loss": 2.6889, "step": 18820 }, { "epoch": 1.486643896467987, "grad_norm": 0.08927340722469605, "learning_rate": 0.00018757046612338663, "loss": 2.7102, "step": 18825 }, { "epoch": 1.4870387554046318, "grad_norm": 0.1363316146043505, "learning_rate": 0.00018730149122988897, "loss": 2.8053, "step": 18830 }, { "epoch": 1.4874336143412765, "grad_norm": 0.08210732829918956, "learning_rate": 0.0001870326648748059, "loss": 2.619, "step": 18835 }, { "epoch": 1.4878284732779214, "grad_norm": 0.21688685222145043, "learning_rate": 0.00018676398718583603, "loss": 2.5793, "step": 18840 }, { "epoch": 1.4882233322145664, "grad_norm": 0.08124826169594171, "learning_rate": 0.00018649545829060654, "loss": 2.712, "step": 18845 }, { "epoch": 1.4886181911512113, "grad_norm": 0.0955623014960184, "learning_rate": 0.00018622707831667472, "loss": 2.5956, "step": 18850 }, { "epoch": 1.489013050087856, "grad_norm": 0.08216169127673727, "learning_rate": 0.0001859588473915264, "loss": 2.7774, "step": 18855 }, { "epoch": 1.489407909024501, "grad_norm": 0.07944433492203085, "learning_rate": 0.00018569076564257714, "loss": 2.8822, "step": 18860 }, { "epoch": 1.4898027679611459, "grad_norm": 0.1260427788320825, "learning_rate": 0.00018542283319717158, "loss": 2.8261, "step": 18865 }, { "epoch": 1.4901976268977908, "grad_norm": 0.10211120945136591, "learning_rate": 0.000185155050182583, "loss": 2.7325, "step": 18870 }, { "epoch": 1.4905924858344357, "grad_norm": 0.08023210244796584, "learning_rate": 0.00018488741672601433, "loss": 2.8497, "step": 18875 }, { "epoch": 1.4909873447710806, "grad_norm": 0.1070279952351659, "learning_rate": 0.00018461993295459679, "loss": 3.0472, "step": 18880 }, { "epoch": 1.4913822037077253, "grad_norm": 0.08094039689327294, "learning_rate": 0.00018435259899539114, "loss": 2.6965, "step": 18885 }, { "epoch": 1.4917770626443703, "grad_norm": 0.09307923426941918, "learning_rate": 0.00018408541497538645, "loss": 2.6749, "step": 18890 }, { "epoch": 1.4921719215810152, "grad_norm": 0.08484572097204027, "learning_rate": 0.00018381838102150095, "loss": 2.8145, "step": 18895 }, { "epoch": 1.4925667805176601, "grad_norm": 0.08048398313850201, "learning_rate": 0.0001835514972605814, "loss": 2.8408, "step": 18900 }, { "epoch": 1.4929616394543048, "grad_norm": 0.07782889369872492, "learning_rate": 0.00018328476381940307, "loss": 2.7248, "step": 18905 }, { "epoch": 1.4933564983909497, "grad_norm": 0.08313590428006101, "learning_rate": 0.0001830181808246702, "loss": 2.7549, "step": 18910 }, { "epoch": 1.4937513573275947, "grad_norm": 0.08740657933427692, "learning_rate": 0.00018275174840301516, "loss": 2.5668, "step": 18915 }, { "epoch": 1.4941462162642396, "grad_norm": 0.0801525154085414, "learning_rate": 0.00018248546668099924, "loss": 2.6937, "step": 18920 }, { "epoch": 1.4945410752008845, "grad_norm": 0.09882739868004749, "learning_rate": 0.00018221933578511164, "loss": 2.7207, "step": 18925 }, { "epoch": 1.4949359341375295, "grad_norm": 0.08019733055166622, "learning_rate": 0.00018195335584177025, "loss": 2.7711, "step": 18930 }, { "epoch": 1.4953307930741744, "grad_norm": 0.10045295278180053, "learning_rate": 0.00018168752697732144, "loss": 2.7543, "step": 18935 }, { "epoch": 1.495725652010819, "grad_norm": 0.08865789537635312, "learning_rate": 0.00018142184931803913, "loss": 2.7104, "step": 18940 }, { "epoch": 1.496120510947464, "grad_norm": 0.093145144333253, "learning_rate": 0.00018115632299012623, "loss": 2.7367, "step": 18945 }, { "epoch": 1.496515369884109, "grad_norm": 0.08277446707841006, "learning_rate": 0.00018089094811971301, "loss": 2.9434, "step": 18950 }, { "epoch": 1.4969102288207539, "grad_norm": 0.09481801011159587, "learning_rate": 0.0001806257248328585, "loss": 2.9026, "step": 18955 }, { "epoch": 1.4973050877573986, "grad_norm": 0.08408851475780055, "learning_rate": 0.000180360653255549, "loss": 2.6559, "step": 18960 }, { "epoch": 1.4976999466940435, "grad_norm": 0.07877868888944269, "learning_rate": 0.00018009573351369956, "loss": 2.6064, "step": 18965 }, { "epoch": 1.4980948056306884, "grad_norm": 0.0927209955393855, "learning_rate": 0.0001798309657331522, "loss": 2.9473, "step": 18970 }, { "epoch": 1.4984896645673333, "grad_norm": 0.08553995582290043, "learning_rate": 0.0001795663500396777, "loss": 2.9317, "step": 18975 }, { "epoch": 1.4988845235039783, "grad_norm": 0.07980468463330433, "learning_rate": 0.00017930188655897362, "loss": 2.7916, "step": 18980 }, { "epoch": 1.4992793824406232, "grad_norm": 0.09804342765011984, "learning_rate": 0.00017903757541666603, "loss": 2.6618, "step": 18985 }, { "epoch": 1.499674241377268, "grad_norm": 0.10248003743626945, "learning_rate": 0.0001787734167383086, "loss": 2.9158, "step": 18990 }, { "epoch": 1.5000691003139128, "grad_norm": 0.09025078127397908, "learning_rate": 0.00017850941064938164, "loss": 2.8993, "step": 18995 }, { "epoch": 1.5004639592505578, "grad_norm": 0.0950855702269422, "learning_rate": 0.00017824555727529413, "loss": 2.7345, "step": 19000 }, { "epoch": 1.5008588181872025, "grad_norm": 0.08702163589044246, "learning_rate": 0.00017798185674138163, "loss": 2.7152, "step": 19005 }, { "epoch": 1.5012536771238474, "grad_norm": 0.08418395402904148, "learning_rate": 0.00017771830917290765, "loss": 2.7615, "step": 19010 }, { "epoch": 1.5016485360604923, "grad_norm": 0.08287050437006566, "learning_rate": 0.00017745491469506314, "loss": 2.6816, "step": 19015 }, { "epoch": 1.5020433949971372, "grad_norm": 0.07287145635634534, "learning_rate": 0.00017719167343296566, "loss": 2.6814, "step": 19020 }, { "epoch": 1.5024382539337822, "grad_norm": 0.07933338300049474, "learning_rate": 0.0001769285855116607, "loss": 2.7369, "step": 19025 }, { "epoch": 1.502833112870427, "grad_norm": 0.09039537502931957, "learning_rate": 0.0001766656510561202, "loss": 2.8887, "step": 19030 }, { "epoch": 1.503227971807072, "grad_norm": 0.10745843093869162, "learning_rate": 0.00017640287019124408, "loss": 2.9787, "step": 19035 }, { "epoch": 1.503622830743717, "grad_norm": 0.09478650387608148, "learning_rate": 0.00017614024304185837, "loss": 2.8149, "step": 19040 }, { "epoch": 1.5040176896803616, "grad_norm": 0.12163150095447772, "learning_rate": 0.00017587776973271692, "loss": 2.638, "step": 19045 }, { "epoch": 1.5044125486170066, "grad_norm": 0.08216470346687903, "learning_rate": 0.00017561545038849975, "loss": 2.7638, "step": 19050 }, { "epoch": 1.5048074075536515, "grad_norm": 0.07475391009687808, "learning_rate": 0.00017535328513381426, "loss": 2.7474, "step": 19055 }, { "epoch": 1.5052022664902962, "grad_norm": 0.08886884539823812, "learning_rate": 0.0001750912740931947, "loss": 2.721, "step": 19060 }, { "epoch": 1.5055971254269411, "grad_norm": 0.08460555896279191, "learning_rate": 0.0001748294173911015, "loss": 2.6387, "step": 19065 }, { "epoch": 1.505991984363586, "grad_norm": 0.13003286230781624, "learning_rate": 0.00017456771515192245, "loss": 2.7838, "step": 19070 }, { "epoch": 1.506386843300231, "grad_norm": 0.08695669518831745, "learning_rate": 0.00017430616749997157, "loss": 2.6148, "step": 19075 }, { "epoch": 1.506781702236876, "grad_norm": 0.08696579820450653, "learning_rate": 0.00017404477455948932, "loss": 2.8418, "step": 19080 }, { "epoch": 1.5071765611735208, "grad_norm": 0.07998914058456952, "learning_rate": 0.00017378353645464319, "loss": 2.8757, "step": 19085 }, { "epoch": 1.5075714201101658, "grad_norm": 0.07715702059527478, "learning_rate": 0.00017352245330952654, "loss": 2.6594, "step": 19090 }, { "epoch": 1.5079662790468107, "grad_norm": 0.09783890948890688, "learning_rate": 0.00017326152524815968, "loss": 2.7067, "step": 19095 }, { "epoch": 1.5083611379834554, "grad_norm": 0.08509535293935651, "learning_rate": 0.00017300075239448865, "loss": 2.684, "step": 19100 }, { "epoch": 1.5087559969201003, "grad_norm": 0.0985334660732604, "learning_rate": 0.00017274013487238637, "loss": 2.9904, "step": 19105 }, { "epoch": 1.509150855856745, "grad_norm": 0.08625964981313364, "learning_rate": 0.00017247967280565146, "loss": 2.7473, "step": 19110 }, { "epoch": 1.50954571479339, "grad_norm": 0.08309063700815261, "learning_rate": 0.00017221936631800917, "loss": 2.7033, "step": 19115 }, { "epoch": 1.5099405737300349, "grad_norm": 0.07490887054118614, "learning_rate": 0.00017195921553311022, "loss": 2.7936, "step": 19120 }, { "epoch": 1.5103354326666798, "grad_norm": 0.07993434064040698, "learning_rate": 0.00017169922057453196, "loss": 2.8246, "step": 19125 }, { "epoch": 1.5107302916033247, "grad_norm": 0.08490689933939548, "learning_rate": 0.00017143938156577776, "loss": 2.6299, "step": 19130 }, { "epoch": 1.5111251505399697, "grad_norm": 0.10458277377405956, "learning_rate": 0.00017117969863027617, "loss": 2.6533, "step": 19135 }, { "epoch": 1.5115200094766146, "grad_norm": 0.07727289240289154, "learning_rate": 0.00017092017189138247, "loss": 2.6865, "step": 19140 }, { "epoch": 1.5119148684132595, "grad_norm": 0.0882223019703118, "learning_rate": 0.00017066080147237707, "loss": 2.6882, "step": 19145 }, { "epoch": 1.5123097273499042, "grad_norm": 0.0843285370242462, "learning_rate": 0.00017040158749646666, "loss": 2.7911, "step": 19150 }, { "epoch": 1.5127045862865491, "grad_norm": 0.08285687821538525, "learning_rate": 0.0001701425300867831, "loss": 2.8698, "step": 19155 }, { "epoch": 1.513099445223194, "grad_norm": 0.07665881641880445, "learning_rate": 0.00016988362936638451, "loss": 2.6829, "step": 19160 }, { "epoch": 1.5134943041598388, "grad_norm": 0.08531684234593276, "learning_rate": 0.00016962488545825395, "loss": 2.7134, "step": 19165 }, { "epoch": 1.5138891630964837, "grad_norm": 0.08794597587208763, "learning_rate": 0.00016936629848530017, "loss": 2.588, "step": 19170 }, { "epoch": 1.5142840220331286, "grad_norm": 0.0803608512693623, "learning_rate": 0.00016910786857035775, "loss": 2.6856, "step": 19175 }, { "epoch": 1.5146788809697735, "grad_norm": 0.09343964015524858, "learning_rate": 0.00016884959583618608, "loss": 2.739, "step": 19180 }, { "epoch": 1.5150737399064185, "grad_norm": 0.10195800103270129, "learning_rate": 0.00016859148040547045, "loss": 2.6461, "step": 19185 }, { "epoch": 1.5154685988430634, "grad_norm": 0.1185526527585038, "learning_rate": 0.0001683335224008209, "loss": 2.6637, "step": 19190 }, { "epoch": 1.5158634577797083, "grad_norm": 0.09667915378428692, "learning_rate": 0.00016807572194477323, "loss": 2.5453, "step": 19195 }, { "epoch": 1.5162583167163532, "grad_norm": 0.11327561093160682, "learning_rate": 0.0001678180791597878, "loss": 2.7146, "step": 19200 }, { "epoch": 1.516653175652998, "grad_norm": 0.13431547267397337, "learning_rate": 0.00016756059416825055, "loss": 2.7865, "step": 19205 }, { "epoch": 1.5170480345896429, "grad_norm": 0.1427250161100938, "learning_rate": 0.00016730326709247245, "loss": 2.769, "step": 19210 }, { "epoch": 1.5174428935262876, "grad_norm": 0.07958555137977803, "learning_rate": 0.000167046098054689, "loss": 2.6778, "step": 19215 }, { "epoch": 1.5178377524629325, "grad_norm": 0.07565523731448781, "learning_rate": 0.0001667890871770613, "loss": 2.7924, "step": 19220 }, { "epoch": 1.5182326113995774, "grad_norm": 0.0884623239180449, "learning_rate": 0.0001665322345816746, "loss": 2.6779, "step": 19225 }, { "epoch": 1.5186274703362224, "grad_norm": 0.07989639363492654, "learning_rate": 0.00016627554039053965, "loss": 2.8411, "step": 19230 }, { "epoch": 1.5190223292728673, "grad_norm": 0.07130357310827254, "learning_rate": 0.00016601900472559128, "loss": 2.6859, "step": 19235 }, { "epoch": 1.5194171882095122, "grad_norm": 0.10647388942394218, "learning_rate": 0.00016576262770868978, "loss": 2.9261, "step": 19240 }, { "epoch": 1.5198120471461571, "grad_norm": 0.07548118188014341, "learning_rate": 0.0001655064094616192, "loss": 2.6199, "step": 19245 }, { "epoch": 1.520206906082802, "grad_norm": 0.08125053966796401, "learning_rate": 0.0001652503501060889, "loss": 2.8178, "step": 19250 }, { "epoch": 1.5206017650194468, "grad_norm": 0.07979187059216329, "learning_rate": 0.00016499444976373278, "loss": 2.7692, "step": 19255 }, { "epoch": 1.5209966239560917, "grad_norm": 0.10260698096560418, "learning_rate": 0.00016473870855610834, "loss": 2.8162, "step": 19260 }, { "epoch": 1.5213914828927366, "grad_norm": 0.09603465907573051, "learning_rate": 0.00016448312660469845, "loss": 2.6947, "step": 19265 }, { "epoch": 1.5217863418293813, "grad_norm": 0.08465369513308012, "learning_rate": 0.00016422770403090969, "loss": 2.675, "step": 19270 }, { "epoch": 1.5221812007660263, "grad_norm": 0.08202830203291274, "learning_rate": 0.00016397244095607333, "loss": 2.778, "step": 19275 }, { "epoch": 1.5225760597026712, "grad_norm": 0.11717850828389892, "learning_rate": 0.00016371733750144497, "loss": 2.8637, "step": 19280 }, { "epoch": 1.522970918639316, "grad_norm": 0.09430025421470653, "learning_rate": 0.00016346239378820372, "loss": 2.7704, "step": 19285 }, { "epoch": 1.523365777575961, "grad_norm": 0.08213673685494614, "learning_rate": 0.00016320760993745358, "loss": 2.692, "step": 19290 }, { "epoch": 1.523760636512606, "grad_norm": 0.09176080856552349, "learning_rate": 0.00016295298607022207, "loss": 2.8581, "step": 19295 }, { "epoch": 1.5241554954492509, "grad_norm": 0.08249146793499787, "learning_rate": 0.00016269852230746114, "loss": 2.6761, "step": 19300 }, { "epoch": 1.5245503543858956, "grad_norm": 0.11441201972557227, "learning_rate": 0.00016244421877004616, "loss": 2.8223, "step": 19305 }, { "epoch": 1.5249452133225405, "grad_norm": 0.07900343564711612, "learning_rate": 0.000162190075578777, "loss": 2.8015, "step": 19310 }, { "epoch": 1.5253400722591854, "grad_norm": 0.07934631301579512, "learning_rate": 0.00016193609285437683, "loss": 2.7851, "step": 19315 }, { "epoch": 1.5257349311958301, "grad_norm": 0.07788971532040051, "learning_rate": 0.00016168227071749293, "loss": 2.9776, "step": 19320 }, { "epoch": 1.526129790132475, "grad_norm": 0.08370382137328194, "learning_rate": 0.0001614286092886963, "loss": 2.6987, "step": 19325 }, { "epoch": 1.52652464906912, "grad_norm": 0.08633017344645863, "learning_rate": 0.00016117510868848118, "loss": 2.7661, "step": 19330 }, { "epoch": 1.526919508005765, "grad_norm": 0.08494307338718322, "learning_rate": 0.00016092176903726612, "loss": 2.5843, "step": 19335 }, { "epoch": 1.5273143669424099, "grad_norm": 0.07300434621574341, "learning_rate": 0.00016066859045539263, "loss": 2.7457, "step": 19340 }, { "epoch": 1.5277092258790548, "grad_norm": 0.11382967588521696, "learning_rate": 0.0001604155730631257, "loss": 2.9503, "step": 19345 }, { "epoch": 1.5281040848156997, "grad_norm": 0.0774927915244876, "learning_rate": 0.00016016271698065428, "loss": 2.754, "step": 19350 }, { "epoch": 1.5284989437523446, "grad_norm": 0.07703573795128855, "learning_rate": 0.0001599100223280901, "loss": 2.6778, "step": 19355 }, { "epoch": 1.5288938026889893, "grad_norm": 0.08400487852264926, "learning_rate": 0.00015965748922546874, "loss": 2.8893, "step": 19360 }, { "epoch": 1.5292886616256343, "grad_norm": 0.1308741086068532, "learning_rate": 0.00015940511779274848, "loss": 2.7939, "step": 19365 }, { "epoch": 1.529683520562279, "grad_norm": 0.08436320858075819, "learning_rate": 0.00015915290814981132, "loss": 2.6659, "step": 19370 }, { "epoch": 1.530078379498924, "grad_norm": 0.09844042437789595, "learning_rate": 0.00015890086041646202, "loss": 2.792, "step": 19375 }, { "epoch": 1.5304732384355688, "grad_norm": 0.10911889588070879, "learning_rate": 0.00015864897471242883, "loss": 2.6933, "step": 19380 }, { "epoch": 1.5308680973722137, "grad_norm": 0.08352078579411625, "learning_rate": 0.00015839725115736247, "loss": 2.7453, "step": 19385 }, { "epoch": 1.5312629563088587, "grad_norm": 0.10023745271201641, "learning_rate": 0.0001581456898708371, "loss": 2.7872, "step": 19390 }, { "epoch": 1.5316578152455036, "grad_norm": 0.07829333674683733, "learning_rate": 0.00015789429097234992, "loss": 2.9756, "step": 19395 }, { "epoch": 1.5320526741821485, "grad_norm": 0.0926281902365949, "learning_rate": 0.00015764305458132027, "loss": 2.7519, "step": 19400 }, { "epoch": 1.5324475331187934, "grad_norm": 0.0949828036486746, "learning_rate": 0.00015739198081709123, "loss": 2.7535, "step": 19405 }, { "epoch": 1.5328423920554382, "grad_norm": 0.07887905864082831, "learning_rate": 0.00015714106979892767, "loss": 2.7057, "step": 19410 }, { "epoch": 1.533237250992083, "grad_norm": 0.10725929524446151, "learning_rate": 0.00015689032164601809, "loss": 2.7331, "step": 19415 }, { "epoch": 1.533632109928728, "grad_norm": 0.10918382805193275, "learning_rate": 0.00015663973647747276, "loss": 2.9007, "step": 19420 }, { "epoch": 1.5340269688653727, "grad_norm": 0.10001140950041748, "learning_rate": 0.00015638931441232535, "loss": 2.9349, "step": 19425 }, { "epoch": 1.5344218278020176, "grad_norm": 0.12048513759423611, "learning_rate": 0.00015613905556953144, "loss": 2.7349, "step": 19430 }, { "epoch": 1.5348166867386626, "grad_norm": 0.118049657049475, "learning_rate": 0.0001558889600679691, "loss": 2.7589, "step": 19435 }, { "epoch": 1.5352115456753075, "grad_norm": 0.08008554920194155, "learning_rate": 0.00015563902802643936, "loss": 2.8891, "step": 19440 }, { "epoch": 1.5356064046119524, "grad_norm": 0.08176301194228476, "learning_rate": 0.00015538925956366489, "loss": 2.7523, "step": 19445 }, { "epoch": 1.5360012635485973, "grad_norm": 0.07461570364691288, "learning_rate": 0.0001551396547982913, "loss": 2.8324, "step": 19450 }, { "epoch": 1.5363961224852423, "grad_norm": 0.08679800690962522, "learning_rate": 0.00015489021384888584, "loss": 2.7263, "step": 19455 }, { "epoch": 1.5367909814218872, "grad_norm": 0.08465626565917439, "learning_rate": 0.0001546409368339386, "loss": 2.8557, "step": 19460 }, { "epoch": 1.537185840358532, "grad_norm": 0.08911899928376797, "learning_rate": 0.000154391823871861, "loss": 2.7534, "step": 19465 }, { "epoch": 1.5375806992951768, "grad_norm": 0.09097619412037311, "learning_rate": 0.0001541428750809873, "loss": 2.6894, "step": 19470 }, { "epoch": 1.5379755582318215, "grad_norm": 0.09710607696297904, "learning_rate": 0.00015389409057957348, "loss": 2.9026, "step": 19475 }, { "epoch": 1.5383704171684665, "grad_norm": 0.07711532418130228, "learning_rate": 0.00015364547048579718, "loss": 2.7475, "step": 19480 }, { "epoch": 1.5387652761051114, "grad_norm": 0.11016631442188117, "learning_rate": 0.0001533970149177586, "loss": 2.8565, "step": 19485 }, { "epoch": 1.5391601350417563, "grad_norm": 0.10366120893184869, "learning_rate": 0.0001531487239934789, "loss": 2.9295, "step": 19490 }, { "epoch": 1.5395549939784012, "grad_norm": 0.0874500561093145, "learning_rate": 0.000152900597830902, "loss": 2.7501, "step": 19495 }, { "epoch": 1.5399498529150462, "grad_norm": 0.08600747136460565, "learning_rate": 0.00015265263654789269, "loss": 2.6774, "step": 19500 }, { "epoch": 1.540344711851691, "grad_norm": 0.098457425450106, "learning_rate": 0.00015240484026223815, "loss": 2.6849, "step": 19505 }, { "epoch": 1.540739570788336, "grad_norm": 0.0923235942517422, "learning_rate": 0.00015215720909164665, "loss": 2.6755, "step": 19510 }, { "epoch": 1.5411344297249807, "grad_norm": 0.08204361827290844, "learning_rate": 0.00015190974315374827, "loss": 2.6832, "step": 19515 }, { "epoch": 1.5415292886616256, "grad_norm": 0.09292414557775314, "learning_rate": 0.0001516624425660949, "loss": 2.7626, "step": 19520 }, { "epoch": 1.5419241475982706, "grad_norm": 0.07975844057789826, "learning_rate": 0.00015141530744615907, "loss": 2.8338, "step": 19525 }, { "epoch": 1.5423190065349153, "grad_norm": 0.09054493703183547, "learning_rate": 0.00015116833791133544, "loss": 2.6765, "step": 19530 }, { "epoch": 1.5427138654715602, "grad_norm": 0.07887058790382993, "learning_rate": 0.00015092153407893967, "loss": 2.8479, "step": 19535 }, { "epoch": 1.5431087244082051, "grad_norm": 0.09250794255641193, "learning_rate": 0.0001506748960662088, "loss": 2.6094, "step": 19540 }, { "epoch": 1.54350358334485, "grad_norm": 0.08196460358386833, "learning_rate": 0.00015042842399030137, "loss": 2.7025, "step": 19545 }, { "epoch": 1.543898442281495, "grad_norm": 0.08410829075977064, "learning_rate": 0.00015018211796829651, "loss": 2.6424, "step": 19550 }, { "epoch": 1.54429330121814, "grad_norm": 0.08255725978835556, "learning_rate": 0.00014993597811719506, "loss": 2.8082, "step": 19555 }, { "epoch": 1.5446881601547848, "grad_norm": 0.0821512721063876, "learning_rate": 0.0001496900045539184, "loss": 2.7091, "step": 19560 }, { "epoch": 1.5450830190914295, "grad_norm": 0.10838866839581565, "learning_rate": 0.00014944419739530945, "loss": 2.6922, "step": 19565 }, { "epoch": 1.5454778780280745, "grad_norm": 0.09375927039362485, "learning_rate": 0.00014919855675813153, "loss": 2.7444, "step": 19570 }, { "epoch": 1.5458727369647194, "grad_norm": 0.10756196467290809, "learning_rate": 0.00014895308275906954, "loss": 2.904, "step": 19575 }, { "epoch": 1.546267595901364, "grad_norm": 0.08805182368949506, "learning_rate": 0.00014870777551472837, "loss": 2.8838, "step": 19580 }, { "epoch": 1.546662454838009, "grad_norm": 0.08729438766057544, "learning_rate": 0.00014846263514163438, "loss": 2.7276, "step": 19585 }, { "epoch": 1.547057313774654, "grad_norm": 0.10460752079604481, "learning_rate": 0.00014821766175623458, "loss": 2.713, "step": 19590 }, { "epoch": 1.5474521727112989, "grad_norm": 0.08588831304074239, "learning_rate": 0.00014797285547489626, "loss": 2.9558, "step": 19595 }, { "epoch": 1.5478470316479438, "grad_norm": 0.09411511003684521, "learning_rate": 0.0001477282164139078, "loss": 2.7466, "step": 19600 }, { "epoch": 1.5482418905845887, "grad_norm": 0.0786819710315055, "learning_rate": 0.00014748374468947795, "loss": 2.6609, "step": 19605 }, { "epoch": 1.5486367495212336, "grad_norm": 0.08394378251327587, "learning_rate": 0.00014723944041773558, "loss": 2.6848, "step": 19610 }, { "epoch": 1.5490316084578786, "grad_norm": 0.09113337369011544, "learning_rate": 0.00014699530371473085, "loss": 2.8059, "step": 19615 }, { "epoch": 1.5494264673945233, "grad_norm": 0.08062094076688099, "learning_rate": 0.00014675133469643352, "loss": 2.6635, "step": 19620 }, { "epoch": 1.5498213263311682, "grad_norm": 0.08318351491493227, "learning_rate": 0.00014650753347873434, "loss": 2.7509, "step": 19625 }, { "epoch": 1.550216185267813, "grad_norm": 0.0899737285576982, "learning_rate": 0.00014626390017744378, "loss": 2.8398, "step": 19630 }, { "epoch": 1.5506110442044578, "grad_norm": 0.08348845899533452, "learning_rate": 0.00014602043490829315, "loss": 2.7404, "step": 19635 }, { "epoch": 1.5510059031411028, "grad_norm": 0.08886474352439112, "learning_rate": 0.00014577713778693313, "loss": 2.7418, "step": 19640 }, { "epoch": 1.5514007620777477, "grad_norm": 0.0784499134027431, "learning_rate": 0.00014553400892893553, "loss": 2.7346, "step": 19645 }, { "epoch": 1.5517956210143926, "grad_norm": 0.08700450572709756, "learning_rate": 0.00014529104844979125, "loss": 3.147, "step": 19650 }, { "epoch": 1.5521904799510375, "grad_norm": 0.08675626612227949, "learning_rate": 0.00014504825646491205, "loss": 2.7001, "step": 19655 }, { "epoch": 1.5525853388876825, "grad_norm": 0.12619856910378996, "learning_rate": 0.00014480563308962897, "loss": 3.0097, "step": 19660 }, { "epoch": 1.5529801978243274, "grad_norm": 0.08919661068646662, "learning_rate": 0.0001445631784391933, "loss": 2.8652, "step": 19665 }, { "epoch": 1.553375056760972, "grad_norm": 0.10077726949597143, "learning_rate": 0.0001443208926287764, "loss": 2.7027, "step": 19670 }, { "epoch": 1.553769915697617, "grad_norm": 0.08878410980199816, "learning_rate": 0.00014407877577346878, "loss": 2.6518, "step": 19675 }, { "epoch": 1.554164774634262, "grad_norm": 0.08526075022034735, "learning_rate": 0.00014383682798828145, "loss": 2.8261, "step": 19680 }, { "epoch": 1.5545596335709067, "grad_norm": 0.10218382674407683, "learning_rate": 0.0001435950493881444, "loss": 2.8486, "step": 19685 }, { "epoch": 1.5549544925075516, "grad_norm": 0.09050215013366891, "learning_rate": 0.00014335344008790786, "loss": 2.7924, "step": 19690 }, { "epoch": 1.5553493514441965, "grad_norm": 0.0900787708196885, "learning_rate": 0.00014311200020234122, "loss": 2.6064, "step": 19695 }, { "epoch": 1.5557442103808414, "grad_norm": 0.08924776199264503, "learning_rate": 0.00014287072984613342, "loss": 2.6645, "step": 19700 }, { "epoch": 1.5561390693174864, "grad_norm": 0.07335830006397721, "learning_rate": 0.00014262962913389328, "loss": 2.7843, "step": 19705 }, { "epoch": 1.5565339282541313, "grad_norm": 0.08531955746431079, "learning_rate": 0.00014238869818014837, "loss": 2.5836, "step": 19710 }, { "epoch": 1.5569287871907762, "grad_norm": 0.09040573987153545, "learning_rate": 0.00014214793709934644, "loss": 2.6717, "step": 19715 }, { "epoch": 1.5573236461274211, "grad_norm": 0.09743766306362568, "learning_rate": 0.0001419073460058537, "loss": 3.0071, "step": 19720 }, { "epoch": 1.5577185050640658, "grad_norm": 0.11752723204416894, "learning_rate": 0.00014166692501395634, "loss": 2.7957, "step": 19725 }, { "epoch": 1.5581133640007108, "grad_norm": 0.08115584409066334, "learning_rate": 0.00014142667423785914, "loss": 2.6711, "step": 19730 }, { "epoch": 1.5585082229373555, "grad_norm": 0.08370516784211728, "learning_rate": 0.0001411865937916864, "loss": 2.7758, "step": 19735 }, { "epoch": 1.5589030818740004, "grad_norm": 0.0846777876898213, "learning_rate": 0.00014094668378948166, "loss": 2.9953, "step": 19740 }, { "epoch": 1.5592979408106453, "grad_norm": 0.2973848168004624, "learning_rate": 0.00014070694434520692, "loss": 2.9962, "step": 19745 }, { "epoch": 1.5596927997472902, "grad_norm": 0.09790881742066816, "learning_rate": 0.0001404673755727437, "loss": 2.6928, "step": 19750 }, { "epoch": 1.5600876586839352, "grad_norm": 0.11025245761736645, "learning_rate": 0.00014022797758589208, "loss": 2.6915, "step": 19755 }, { "epoch": 1.56048251762058, "grad_norm": 0.15695820650847886, "learning_rate": 0.0001399887504983714, "loss": 2.771, "step": 19760 }, { "epoch": 1.560877376557225, "grad_norm": 0.1049642286651753, "learning_rate": 0.0001397496944238193, "loss": 2.9924, "step": 19765 }, { "epoch": 1.56127223549387, "grad_norm": 0.09147178101479776, "learning_rate": 0.00013951080947579276, "loss": 2.8225, "step": 19770 }, { "epoch": 1.5616670944305147, "grad_norm": 0.08515781619591022, "learning_rate": 0.00013927209576776694, "loss": 2.8078, "step": 19775 }, { "epoch": 1.5620619533671596, "grad_norm": 0.07723920408975482, "learning_rate": 0.000139033553413136, "loss": 2.7011, "step": 19780 }, { "epoch": 1.5624568123038045, "grad_norm": 0.1309885666229004, "learning_rate": 0.00013879518252521295, "loss": 2.7501, "step": 19785 }, { "epoch": 1.5628516712404492, "grad_norm": 0.08456272559427894, "learning_rate": 0.00013855698321722844, "loss": 2.6015, "step": 19790 }, { "epoch": 1.5632465301770941, "grad_norm": 0.08182442184744947, "learning_rate": 0.0001383189556023326, "loss": 2.8174, "step": 19795 }, { "epoch": 1.563641389113739, "grad_norm": 0.11869968764416876, "learning_rate": 0.00013808109979359322, "loss": 2.6981, "step": 19800 }, { "epoch": 1.564036248050384, "grad_norm": 0.08886901617307233, "learning_rate": 0.00013784341590399713, "loss": 2.7032, "step": 19805 }, { "epoch": 1.564431106987029, "grad_norm": 0.07996741503643658, "learning_rate": 0.00013760590404644928, "loss": 2.7414, "step": 19810 }, { "epoch": 1.5648259659236738, "grad_norm": 0.08865384756576371, "learning_rate": 0.00013736856433377255, "loss": 2.7589, "step": 19815 }, { "epoch": 1.5652208248603188, "grad_norm": 0.09587917186552287, "learning_rate": 0.00013713139687870863, "loss": 2.7767, "step": 19820 }, { "epoch": 1.5656156837969637, "grad_norm": 0.08187340719288742, "learning_rate": 0.00013689440179391678, "loss": 2.8248, "step": 19825 }, { "epoch": 1.5660105427336084, "grad_norm": 0.09272811309255516, "learning_rate": 0.00013665757919197497, "loss": 2.9079, "step": 19830 }, { "epoch": 1.5664054016702533, "grad_norm": 0.07350214650732306, "learning_rate": 0.00013642092918537868, "loss": 2.84, "step": 19835 }, { "epoch": 1.566800260606898, "grad_norm": 0.08244366249289414, "learning_rate": 0.00013618445188654193, "loss": 2.7448, "step": 19840 }, { "epoch": 1.567195119543543, "grad_norm": 0.08771598969023409, "learning_rate": 0.00013594814740779628, "loss": 2.6509, "step": 19845 }, { "epoch": 1.5675899784801879, "grad_norm": 0.1333906307656089, "learning_rate": 0.00013571201586139132, "loss": 2.7073, "step": 19850 }, { "epoch": 1.5679848374168328, "grad_norm": 0.07777204120567627, "learning_rate": 0.00013547605735949485, "loss": 2.7954, "step": 19855 }, { "epoch": 1.5683796963534777, "grad_norm": 0.08036712747212574, "learning_rate": 0.00013524027201419176, "loss": 2.7393, "step": 19860 }, { "epoch": 1.5687745552901227, "grad_norm": 0.07457535234004922, "learning_rate": 0.0001350046599374854, "loss": 2.7837, "step": 19865 }, { "epoch": 1.5691694142267676, "grad_norm": 0.07668986510609264, "learning_rate": 0.00013476922124129636, "loss": 2.9058, "step": 19870 }, { "epoch": 1.5695642731634125, "grad_norm": 0.07807223735778301, "learning_rate": 0.000134533956037463, "loss": 2.7271, "step": 19875 }, { "epoch": 1.5699591321000572, "grad_norm": 0.08738448078530996, "learning_rate": 0.00013429886443774115, "loss": 2.674, "step": 19880 }, { "epoch": 1.5703539910367021, "grad_norm": 0.08147715197953394, "learning_rate": 0.00013406394655380443, "loss": 2.7838, "step": 19885 }, { "epoch": 1.570748849973347, "grad_norm": 0.0801661760988669, "learning_rate": 0.00013382920249724396, "loss": 2.8315, "step": 19890 }, { "epoch": 1.5711437089099918, "grad_norm": 0.09163587452753615, "learning_rate": 0.00013359463237956783, "loss": 2.7741, "step": 19895 }, { "epoch": 1.5715385678466367, "grad_norm": 0.07590322622243083, "learning_rate": 0.00013336023631220208, "loss": 2.5892, "step": 19900 }, { "epoch": 1.5719334267832816, "grad_norm": 0.0791020662216678, "learning_rate": 0.00013312601440648958, "loss": 2.8736, "step": 19905 }, { "epoch": 1.5723282857199266, "grad_norm": 0.08569471349659151, "learning_rate": 0.00013289196677369097, "loss": 2.6506, "step": 19910 }, { "epoch": 1.5727231446565715, "grad_norm": 0.08170588287665266, "learning_rate": 0.00013265809352498354, "loss": 2.8546, "step": 19915 }, { "epoch": 1.5731180035932164, "grad_norm": 0.08864071408036785, "learning_rate": 0.00013242439477146228, "loss": 2.7251, "step": 19920 }, { "epoch": 1.5735128625298613, "grad_norm": 0.07754184415599968, "learning_rate": 0.0001321908706241388, "loss": 2.7212, "step": 19925 }, { "epoch": 1.573907721466506, "grad_norm": 0.07199274603908482, "learning_rate": 0.0001319575211939422, "loss": 2.779, "step": 19930 }, { "epoch": 1.574302580403151, "grad_norm": 0.12185178951480691, "learning_rate": 0.00013172434659171852, "loss": 2.9814, "step": 19935 }, { "epoch": 1.574697439339796, "grad_norm": 0.08221443817133789, "learning_rate": 0.00013149134692823028, "loss": 2.7921, "step": 19940 }, { "epoch": 1.5750922982764406, "grad_norm": 0.1475779294674758, "learning_rate": 0.00013125852231415765, "loss": 2.7129, "step": 19945 }, { "epoch": 1.5754871572130855, "grad_norm": 0.12139587516642629, "learning_rate": 0.0001310258728600968, "loss": 3.0718, "step": 19950 }, { "epoch": 1.5758820161497304, "grad_norm": 0.09731629227023582, "learning_rate": 0.00013079339867656166, "loss": 2.7508, "step": 19955 }, { "epoch": 1.5762768750863754, "grad_norm": 0.0906989935614889, "learning_rate": 0.00013056109987398206, "loss": 2.9243, "step": 19960 }, { "epoch": 1.5766717340230203, "grad_norm": 0.15290943949415056, "learning_rate": 0.0001303289765627048, "loss": 2.6054, "step": 19965 }, { "epoch": 1.5770665929596652, "grad_norm": 0.09121365034133112, "learning_rate": 0.00013009702885299367, "loss": 2.6997, "step": 19970 }, { "epoch": 1.5774614518963102, "grad_norm": 0.09046905047983621, "learning_rate": 0.00012986525685502843, "loss": 2.8491, "step": 19975 }, { "epoch": 1.577856310832955, "grad_norm": 0.08546277144684333, "learning_rate": 0.0001296336606789059, "loss": 2.8154, "step": 19980 }, { "epoch": 1.5782511697695998, "grad_norm": 0.0829512660235777, "learning_rate": 0.000129402240434639, "loss": 2.7421, "step": 19985 }, { "epoch": 1.5786460287062447, "grad_norm": 0.0841221485766275, "learning_rate": 0.0001291709962321575, "loss": 2.8271, "step": 19990 }, { "epoch": 1.5790408876428894, "grad_norm": 0.08358762699472135, "learning_rate": 0.00012893992818130694, "loss": 2.7541, "step": 19995 }, { "epoch": 1.5794357465795343, "grad_norm": 0.08755758165426548, "learning_rate": 0.00012870903639184978, "loss": 2.8601, "step": 20000 }, { "epoch": 1.5798306055161793, "grad_norm": 0.0781161987796131, "learning_rate": 0.00012847832097346456, "loss": 2.5733, "step": 20005 }, { "epoch": 1.5802254644528242, "grad_norm": 0.08148363349634935, "learning_rate": 0.00012824778203574582, "loss": 2.7218, "step": 20010 }, { "epoch": 1.5806203233894691, "grad_norm": 0.10516698940720479, "learning_rate": 0.00012801741968820462, "loss": 2.6782, "step": 20015 }, { "epoch": 1.581015182326114, "grad_norm": 0.07827136857555606, "learning_rate": 0.00012778723404026772, "loss": 2.7127, "step": 20020 }, { "epoch": 1.581410041262759, "grad_norm": 0.0791574509138594, "learning_rate": 0.00012755722520127843, "loss": 2.6411, "step": 20025 }, { "epoch": 1.581804900199404, "grad_norm": 0.08020085353666337, "learning_rate": 0.00012732739328049554, "loss": 2.8434, "step": 20030 }, { "epoch": 1.5821997591360486, "grad_norm": 0.09965802518866358, "learning_rate": 0.00012709773838709442, "loss": 2.6251, "step": 20035 }, { "epoch": 1.5825946180726935, "grad_norm": 0.09586206112632223, "learning_rate": 0.00012686826063016565, "loss": 3.0705, "step": 20040 }, { "epoch": 1.5829894770093385, "grad_norm": 0.08008160927060526, "learning_rate": 0.00012663896011871624, "loss": 2.7206, "step": 20045 }, { "epoch": 1.5833843359459832, "grad_norm": 0.08147479658590055, "learning_rate": 0.000126409836961669, "loss": 2.9036, "step": 20050 }, { "epoch": 1.583779194882628, "grad_norm": 0.11195026573819312, "learning_rate": 0.00012618089126786175, "loss": 2.7528, "step": 20055 }, { "epoch": 1.584174053819273, "grad_norm": 0.08430187657829098, "learning_rate": 0.00012595212314604897, "loss": 2.748, "step": 20060 }, { "epoch": 1.584568912755918, "grad_norm": 0.08073789540383626, "learning_rate": 0.0001257235327049001, "loss": 2.6042, "step": 20065 }, { "epoch": 1.5849637716925629, "grad_norm": 0.08153490684680252, "learning_rate": 0.00012549512005300068, "loss": 2.8505, "step": 20070 }, { "epoch": 1.5853586306292078, "grad_norm": 0.07985862238767025, "learning_rate": 0.00012526688529885133, "loss": 2.5687, "step": 20075 }, { "epoch": 1.5857534895658527, "grad_norm": 0.07984548012282217, "learning_rate": 0.00012503882855086846, "loss": 2.7741, "step": 20080 }, { "epoch": 1.5861483485024976, "grad_norm": 0.08179483532130112, "learning_rate": 0.00012481094991738406, "loss": 2.7857, "step": 20085 }, { "epoch": 1.5865432074391423, "grad_norm": 0.08148733827828163, "learning_rate": 0.00012458324950664502, "loss": 2.7517, "step": 20090 }, { "epoch": 1.5869380663757873, "grad_norm": 0.07795122403421063, "learning_rate": 0.0001243557274268141, "loss": 2.9047, "step": 20095 }, { "epoch": 1.587332925312432, "grad_norm": 0.07635391102611132, "learning_rate": 0.00012412838378596887, "loss": 2.6993, "step": 20100 }, { "epoch": 1.587727784249077, "grad_norm": 0.07786363745876207, "learning_rate": 0.00012390121869210259, "loss": 2.5952, "step": 20105 }, { "epoch": 1.5881226431857218, "grad_norm": 0.15765153631644396, "learning_rate": 0.0001236742322531233, "loss": 2.7801, "step": 20110 }, { "epoch": 1.5885175021223668, "grad_norm": 0.08847796787718237, "learning_rate": 0.00012344742457685455, "loss": 2.9064, "step": 20115 }, { "epoch": 1.5889123610590117, "grad_norm": 0.07916555453217562, "learning_rate": 0.00012322079577103463, "loss": 2.71, "step": 20120 }, { "epoch": 1.5893072199956566, "grad_norm": 0.07926390228219655, "learning_rate": 0.00012299434594331704, "loss": 2.6933, "step": 20125 }, { "epoch": 1.5897020789323015, "grad_norm": 0.08575130784654991, "learning_rate": 0.00012276807520127044, "loss": 2.7246, "step": 20130 }, { "epoch": 1.5900969378689465, "grad_norm": 0.28977336697089634, "learning_rate": 0.00012254198365237813, "loss": 2.8814, "step": 20135 }, { "epoch": 1.5904917968055912, "grad_norm": 0.09636659605479635, "learning_rate": 0.00012231607140403828, "loss": 2.779, "step": 20140 }, { "epoch": 1.590886655742236, "grad_norm": 0.08297235863700014, "learning_rate": 0.00012209033856356395, "loss": 2.7807, "step": 20145 }, { "epoch": 1.591281514678881, "grad_norm": 0.08048701851474853, "learning_rate": 0.00012186478523818312, "loss": 2.6041, "step": 20150 }, { "epoch": 1.5916763736155257, "grad_norm": 0.11156834799606181, "learning_rate": 0.00012163941153503849, "loss": 2.7242, "step": 20155 }, { "epoch": 1.5920712325521706, "grad_norm": 0.09115625208524425, "learning_rate": 0.00012141421756118715, "loss": 2.8291, "step": 20160 }, { "epoch": 1.5924660914888156, "grad_norm": 0.08178227361540658, "learning_rate": 0.00012118920342360124, "loss": 2.8164, "step": 20165 }, { "epoch": 1.5928609504254605, "grad_norm": 0.0773418306854549, "learning_rate": 0.00012096436922916698, "loss": 2.9533, "step": 20170 }, { "epoch": 1.5932558093621054, "grad_norm": 0.08502491508031446, "learning_rate": 0.00012073971508468568, "loss": 2.7393, "step": 20175 }, { "epoch": 1.5936506682987504, "grad_norm": 0.08538682238794411, "learning_rate": 0.00012051524109687245, "loss": 2.716, "step": 20180 }, { "epoch": 1.5940455272353953, "grad_norm": 0.11779382599697855, "learning_rate": 0.00012029094737235751, "loss": 2.7033, "step": 20185 }, { "epoch": 1.5944403861720402, "grad_norm": 0.08561962075796635, "learning_rate": 0.00012006683401768487, "loss": 2.8119, "step": 20190 }, { "epoch": 1.594835245108685, "grad_norm": 0.0913765925681904, "learning_rate": 0.00011984290113931328, "loss": 2.75, "step": 20195 }, { "epoch": 1.5952301040453298, "grad_norm": 0.09620248750885944, "learning_rate": 0.00011961914884361569, "loss": 2.7976, "step": 20200 }, { "epoch": 1.5956249629819745, "grad_norm": 0.08030147161605104, "learning_rate": 0.00011939557723687888, "loss": 2.781, "step": 20205 }, { "epoch": 1.5960198219186195, "grad_norm": 0.08528335018249171, "learning_rate": 0.00011917218642530448, "loss": 2.8999, "step": 20210 }, { "epoch": 1.5964146808552644, "grad_norm": 0.11066535485220101, "learning_rate": 0.00011894897651500747, "loss": 3.0162, "step": 20215 }, { "epoch": 1.5968095397919093, "grad_norm": 0.07916778266494727, "learning_rate": 0.00011872594761201766, "loss": 2.8491, "step": 20220 }, { "epoch": 1.5972043987285542, "grad_norm": 0.08125849595092097, "learning_rate": 0.00011850309982227825, "loss": 2.8186, "step": 20225 }, { "epoch": 1.5975992576651992, "grad_norm": 0.08226840360693198, "learning_rate": 0.00011828043325164661, "loss": 2.6221, "step": 20230 }, { "epoch": 1.597994116601844, "grad_norm": 0.08114821946264933, "learning_rate": 0.00011805794800589436, "loss": 2.598, "step": 20235 }, { "epoch": 1.598388975538489, "grad_norm": 0.08343469005954728, "learning_rate": 0.00011783564419070637, "loss": 2.6115, "step": 20240 }, { "epoch": 1.5987838344751337, "grad_norm": 0.08094916885841277, "learning_rate": 0.00011761352191168195, "loss": 2.7701, "step": 20245 }, { "epoch": 1.5991786934117787, "grad_norm": 0.08546412055116918, "learning_rate": 0.00011739158127433364, "loss": 2.7231, "step": 20250 }, { "epoch": 1.5995735523484236, "grad_norm": 0.08098421356173822, "learning_rate": 0.00011716982238408819, "loss": 2.7853, "step": 20255 }, { "epoch": 1.5999684112850683, "grad_norm": 0.08677400709041576, "learning_rate": 0.0001169482453462855, "loss": 2.8652, "step": 20260 }, { "epoch": 1.6003632702217132, "grad_norm": 0.09719101173040133, "learning_rate": 0.00011672685026617952, "loss": 2.8083, "step": 20265 }, { "epoch": 1.6007581291583581, "grad_norm": 0.0834365751528855, "learning_rate": 0.00011650563724893775, "loss": 2.7216, "step": 20270 }, { "epoch": 1.601152988095003, "grad_norm": 0.07858927616983528, "learning_rate": 0.00011628460639964072, "loss": 2.6357, "step": 20275 }, { "epoch": 1.601547847031648, "grad_norm": 0.07794654760083781, "learning_rate": 0.0001160637578232831, "loss": 2.6128, "step": 20280 }, { "epoch": 1.601942705968293, "grad_norm": 0.07964464061780996, "learning_rate": 0.0001158430916247723, "loss": 2.7635, "step": 20285 }, { "epoch": 1.6023375649049378, "grad_norm": 0.08693561134413463, "learning_rate": 0.0001156226079089298, "loss": 2.7958, "step": 20290 }, { "epoch": 1.6027324238415825, "grad_norm": 0.085545274988175, "learning_rate": 0.00011540230678048969, "loss": 2.7964, "step": 20295 }, { "epoch": 1.6031272827782275, "grad_norm": 0.08554757793726595, "learning_rate": 0.00011518218834409994, "loss": 3.0934, "step": 20300 }, { "epoch": 1.6035221417148724, "grad_norm": 0.11037813398455319, "learning_rate": 0.00011496225270432126, "loss": 2.6378, "step": 20305 }, { "epoch": 1.603917000651517, "grad_norm": 0.09279340389048929, "learning_rate": 0.00011474249996562803, "loss": 2.7429, "step": 20310 }, { "epoch": 1.604311859588162, "grad_norm": 0.08652031157626322, "learning_rate": 0.00011452293023240718, "loss": 2.6831, "step": 20315 }, { "epoch": 1.604706718524807, "grad_norm": 0.0752277611348294, "learning_rate": 0.00011430354360895895, "loss": 2.8046, "step": 20320 }, { "epoch": 1.6051015774614519, "grad_norm": 0.08562184330377592, "learning_rate": 0.00011408434019949692, "loss": 2.9219, "step": 20325 }, { "epoch": 1.6054964363980968, "grad_norm": 0.09223989658068589, "learning_rate": 0.00011386532010814704, "loss": 2.7566, "step": 20330 }, { "epoch": 1.6058912953347417, "grad_norm": 0.08483563282417829, "learning_rate": 0.00011364648343894879, "loss": 2.8567, "step": 20335 }, { "epoch": 1.6062861542713867, "grad_norm": 0.08223407745517249, "learning_rate": 0.0001134278302958539, "loss": 2.8206, "step": 20340 }, { "epoch": 1.6066810132080316, "grad_norm": 0.09431193257831727, "learning_rate": 0.00011320936078272742, "loss": 2.7958, "step": 20345 }, { "epoch": 1.6070758721446763, "grad_norm": 0.08678102686201598, "learning_rate": 0.00011299107500334715, "loss": 2.6806, "step": 20350 }, { "epoch": 1.6074707310813212, "grad_norm": 0.0790772769177951, "learning_rate": 0.00011277297306140317, "loss": 2.7253, "step": 20355 }, { "epoch": 1.607865590017966, "grad_norm": 0.09165030500695168, "learning_rate": 0.00011255505506049873, "loss": 2.7093, "step": 20360 }, { "epoch": 1.6082604489546108, "grad_norm": 0.08018712842386673, "learning_rate": 0.00011233732110414935, "loss": 2.7566, "step": 20365 }, { "epoch": 1.6086553078912558, "grad_norm": 0.07577494551728158, "learning_rate": 0.00011211977129578343, "loss": 2.7186, "step": 20370 }, { "epoch": 1.6090501668279007, "grad_norm": 0.12847364514946158, "learning_rate": 0.0001119024057387415, "loss": 2.8357, "step": 20375 }, { "epoch": 1.6094450257645456, "grad_norm": 0.08259803251043085, "learning_rate": 0.0001116852245362771, "loss": 2.6789, "step": 20380 }, { "epoch": 1.6098398847011905, "grad_norm": 0.07546407501505734, "learning_rate": 0.00011146822779155563, "loss": 2.6458, "step": 20385 }, { "epoch": 1.6102347436378355, "grad_norm": 0.0761221329331604, "learning_rate": 0.00011125141560765529, "loss": 2.9823, "step": 20390 }, { "epoch": 1.6106296025744804, "grad_norm": 0.08268706636560805, "learning_rate": 0.00011103478808756651, "loss": 2.7211, "step": 20395 }, { "epoch": 1.611024461511125, "grad_norm": 0.08339506573069654, "learning_rate": 0.00011081834533419189, "loss": 2.9343, "step": 20400 }, { "epoch": 1.61141932044777, "grad_norm": 0.0760049642528142, "learning_rate": 0.00011060208745034628, "loss": 2.7521, "step": 20405 }, { "epoch": 1.611814179384415, "grad_norm": 0.07946032186488125, "learning_rate": 0.00011038601453875663, "loss": 2.7763, "step": 20410 }, { "epoch": 1.6122090383210597, "grad_norm": 0.10446473157286053, "learning_rate": 0.00011017012670206233, "loss": 2.7344, "step": 20415 }, { "epoch": 1.6126038972577046, "grad_norm": 0.20967962674682836, "learning_rate": 0.0001099544240428148, "loss": 2.513, "step": 20420 }, { "epoch": 1.6129987561943495, "grad_norm": 0.09031675480107364, "learning_rate": 0.00010973890666347703, "loss": 2.8828, "step": 20425 }, { "epoch": 1.6133936151309944, "grad_norm": 0.08884898223338528, "learning_rate": 0.0001095235746664247, "loss": 2.7645, "step": 20430 }, { "epoch": 1.6137884740676394, "grad_norm": 0.07496581688795183, "learning_rate": 0.0001093084281539447, "loss": 2.6391, "step": 20435 }, { "epoch": 1.6141833330042843, "grad_norm": 0.08783018106457569, "learning_rate": 0.00010909346722823655, "loss": 2.7027, "step": 20440 }, { "epoch": 1.6145781919409292, "grad_norm": 0.1069596242882821, "learning_rate": 0.00010887869199141093, "loss": 2.9742, "step": 20445 }, { "epoch": 1.6149730508775741, "grad_norm": 0.07875575126732692, "learning_rate": 0.00010866410254549091, "loss": 2.8046, "step": 20450 }, { "epoch": 1.6153679098142188, "grad_norm": 0.1064848524469561, "learning_rate": 0.0001084496989924107, "loss": 2.8295, "step": 20455 }, { "epoch": 1.6157627687508638, "grad_norm": 0.08518384656983341, "learning_rate": 0.00010823548143401674, "loss": 2.726, "step": 20460 }, { "epoch": 1.6161576276875085, "grad_norm": 0.08672492938060002, "learning_rate": 0.00010802144997206697, "loss": 2.858, "step": 20465 }, { "epoch": 1.6165524866241534, "grad_norm": 0.09184441435716537, "learning_rate": 0.0001078076047082307, "loss": 2.8229, "step": 20470 }, { "epoch": 1.6169473455607983, "grad_norm": 0.0808679836076009, "learning_rate": 0.00010759394574408909, "loss": 2.8888, "step": 20475 }, { "epoch": 1.6173422044974433, "grad_norm": 0.09400118158922832, "learning_rate": 0.00010738047318113453, "loss": 2.8463, "step": 20480 }, { "epoch": 1.6177370634340882, "grad_norm": 0.09558037927019399, "learning_rate": 0.00010716718712077117, "loss": 2.7513, "step": 20485 }, { "epoch": 1.6181319223707331, "grad_norm": 0.08360030780914086, "learning_rate": 0.00010695408766431436, "loss": 2.7452, "step": 20490 }, { "epoch": 1.618526781307378, "grad_norm": 0.07912778230123953, "learning_rate": 0.00010674117491299068, "loss": 2.5192, "step": 20495 }, { "epoch": 1.618921640244023, "grad_norm": 0.10108276071842927, "learning_rate": 0.0001065284489679385, "loss": 2.7184, "step": 20500 }, { "epoch": 1.6193164991806677, "grad_norm": 0.08605336055277532, "learning_rate": 0.00010631590993020679, "loss": 2.8485, "step": 20505 }, { "epoch": 1.6197113581173126, "grad_norm": 0.07833966156018309, "learning_rate": 0.00010610355790075649, "loss": 2.7834, "step": 20510 }, { "epoch": 1.6201062170539575, "grad_norm": 0.08007941623023654, "learning_rate": 0.00010589139298045897, "loss": 2.7197, "step": 20515 }, { "epoch": 1.6205010759906022, "grad_norm": 0.08316494093917756, "learning_rate": 0.00010567941527009734, "loss": 2.8615, "step": 20520 }, { "epoch": 1.6208959349272472, "grad_norm": 0.08843413018807364, "learning_rate": 0.00010546762487036521, "loss": 2.8651, "step": 20525 }, { "epoch": 1.621290793863892, "grad_norm": 0.07745416624205673, "learning_rate": 0.00010525602188186777, "loss": 2.87, "step": 20530 }, { "epoch": 1.621685652800537, "grad_norm": 0.07689546090124605, "learning_rate": 0.00010504460640512076, "loss": 2.6658, "step": 20535 }, { "epoch": 1.622080511737182, "grad_norm": 0.09721352157742623, "learning_rate": 0.00010483337854055097, "loss": 3.0046, "step": 20540 }, { "epoch": 1.6224753706738269, "grad_norm": 0.10626975251438596, "learning_rate": 0.00010462233838849638, "loss": 2.824, "step": 20545 }, { "epoch": 1.6228702296104718, "grad_norm": 0.08585203410379455, "learning_rate": 0.00010441148604920525, "loss": 2.8321, "step": 20550 }, { "epoch": 1.6232650885471167, "grad_norm": 0.09970667157484843, "learning_rate": 0.00010420082162283712, "loss": 2.7703, "step": 20555 }, { "epoch": 1.6236599474837614, "grad_norm": 0.08438184125924851, "learning_rate": 0.00010399034520946182, "loss": 2.6574, "step": 20560 }, { "epoch": 1.6240548064204063, "grad_norm": 0.09710593257971467, "learning_rate": 0.00010378005690906039, "loss": 2.8618, "step": 20565 }, { "epoch": 1.624449665357051, "grad_norm": 0.0835440924510844, "learning_rate": 0.00010356995682152392, "loss": 2.5988, "step": 20570 }, { "epoch": 1.624844524293696, "grad_norm": 0.15322933053318172, "learning_rate": 0.00010336004504665464, "loss": 2.8294, "step": 20575 }, { "epoch": 1.625239383230341, "grad_norm": 0.08835084345926023, "learning_rate": 0.00010315032168416505, "loss": 2.8103, "step": 20580 }, { "epoch": 1.6256342421669858, "grad_norm": 0.08393536334702882, "learning_rate": 0.00010294078683367797, "loss": 2.9382, "step": 20585 }, { "epoch": 1.6260291011036307, "grad_norm": 0.08542512502811034, "learning_rate": 0.00010273144059472722, "loss": 2.7371, "step": 20590 }, { "epoch": 1.6264239600402757, "grad_norm": 0.08911850102374598, "learning_rate": 0.00010252228306675637, "loss": 2.6554, "step": 20595 }, { "epoch": 1.6268188189769206, "grad_norm": 0.08750172220556389, "learning_rate": 0.00010231331434912, "loss": 2.735, "step": 20600 }, { "epoch": 1.6272136779135655, "grad_norm": 0.08349763910843977, "learning_rate": 0.00010210453454108243, "loss": 2.8051, "step": 20605 }, { "epoch": 1.6276085368502102, "grad_norm": 0.0886020798646283, "learning_rate": 0.00010189594374181854, "loss": 2.7728, "step": 20610 }, { "epoch": 1.6280033957868552, "grad_norm": 0.10526894967978138, "learning_rate": 0.00010168754205041352, "loss": 2.911, "step": 20615 }, { "epoch": 1.6283982547235, "grad_norm": 0.09758433568587827, "learning_rate": 0.00010147932956586242, "loss": 2.9586, "step": 20620 }, { "epoch": 1.6287931136601448, "grad_norm": 0.08333272076579078, "learning_rate": 0.00010127130638707072, "loss": 2.9402, "step": 20625 }, { "epoch": 1.6291879725967897, "grad_norm": 0.08661047104029548, "learning_rate": 0.00010106347261285364, "loss": 2.8394, "step": 20630 }, { "epoch": 1.6295828315334346, "grad_norm": 0.09497706008189939, "learning_rate": 0.00010085582834193691, "loss": 2.8733, "step": 20635 }, { "epoch": 1.6299776904700796, "grad_norm": 0.08208581206971774, "learning_rate": 0.00010064837367295564, "loss": 2.7141, "step": 20640 }, { "epoch": 1.6303725494067245, "grad_norm": 0.08337909581695328, "learning_rate": 0.0001004411087044555, "loss": 2.739, "step": 20645 }, { "epoch": 1.6307674083433694, "grad_norm": 0.08337964017523614, "learning_rate": 0.00010023403353489146, "loss": 2.9609, "step": 20650 }, { "epoch": 1.6311622672800143, "grad_norm": 0.08465235032053235, "learning_rate": 0.0001000271482626287, "loss": 2.7042, "step": 20655 }, { "epoch": 1.631557126216659, "grad_norm": 0.09371520112535368, "learning_rate": 9.982045298594228e-05, "loss": 2.8099, "step": 20660 }, { "epoch": 1.631951985153304, "grad_norm": 0.09403170498119263, "learning_rate": 9.961394780301658e-05, "loss": 2.6636, "step": 20665 }, { "epoch": 1.632346844089949, "grad_norm": 0.08281165169025465, "learning_rate": 9.940763281194631e-05, "loss": 2.9326, "step": 20670 }, { "epoch": 1.6327417030265936, "grad_norm": 0.1316285010854943, "learning_rate": 9.920150811073497e-05, "loss": 2.7973, "step": 20675 }, { "epoch": 1.6331365619632385, "grad_norm": 0.09186006708767089, "learning_rate": 9.899557379729635e-05, "loss": 2.7214, "step": 20680 }, { "epoch": 1.6335314208998835, "grad_norm": 0.07678538177133672, "learning_rate": 9.878982996945385e-05, "loss": 2.7565, "step": 20685 }, { "epoch": 1.6339262798365284, "grad_norm": 0.07644032118315322, "learning_rate": 9.858427672493975e-05, "loss": 2.8972, "step": 20690 }, { "epoch": 1.6343211387731733, "grad_norm": 0.10488742919268033, "learning_rate": 9.837891416139655e-05, "loss": 2.8229, "step": 20695 }, { "epoch": 1.6347159977098182, "grad_norm": 0.08696035334555799, "learning_rate": 9.817374237637555e-05, "loss": 2.6085, "step": 20700 }, { "epoch": 1.6351108566464632, "grad_norm": 0.08515252658467622, "learning_rate": 9.796876146733791e-05, "loss": 2.6747, "step": 20705 }, { "epoch": 1.635505715583108, "grad_norm": 0.12147182911673403, "learning_rate": 9.776397153165368e-05, "loss": 2.8797, "step": 20710 }, { "epoch": 1.6359005745197528, "grad_norm": 0.07965140257773791, "learning_rate": 9.755937266660265e-05, "loss": 2.7774, "step": 20715 }, { "epoch": 1.6362954334563977, "grad_norm": 0.0939351653502117, "learning_rate": 9.73549649693734e-05, "loss": 2.6882, "step": 20720 }, { "epoch": 1.6366902923930424, "grad_norm": 0.07317041782129216, "learning_rate": 9.715074853706401e-05, "loss": 2.8721, "step": 20725 }, { "epoch": 1.6370851513296873, "grad_norm": 0.07247244548269133, "learning_rate": 9.694672346668171e-05, "loss": 2.6626, "step": 20730 }, { "epoch": 1.6374800102663323, "grad_norm": 0.10440547977649599, "learning_rate": 9.674288985514256e-05, "loss": 2.6977, "step": 20735 }, { "epoch": 1.6378748692029772, "grad_norm": 0.08171747416048004, "learning_rate": 9.653924779927203e-05, "loss": 2.9219, "step": 20740 }, { "epoch": 1.6382697281396221, "grad_norm": 0.0911594155052128, "learning_rate": 9.633579739580418e-05, "loss": 2.872, "step": 20745 }, { "epoch": 1.638664587076267, "grad_norm": 0.0814137074956473, "learning_rate": 9.613253874138245e-05, "loss": 2.6686, "step": 20750 }, { "epoch": 1.639059446012912, "grad_norm": 0.07854067199259218, "learning_rate": 9.592947193255903e-05, "loss": 2.7793, "step": 20755 }, { "epoch": 1.639454304949557, "grad_norm": 0.08879275650828503, "learning_rate": 9.572659706579467e-05, "loss": 2.6696, "step": 20760 }, { "epoch": 1.6398491638862016, "grad_norm": 0.08670019307188845, "learning_rate": 9.552391423745954e-05, "loss": 2.7598, "step": 20765 }, { "epoch": 1.6402440228228465, "grad_norm": 0.08718398637899494, "learning_rate": 9.532142354383205e-05, "loss": 2.6649, "step": 20770 }, { "epoch": 1.6406388817594915, "grad_norm": 0.07861228679133056, "learning_rate": 9.511912508109982e-05, "loss": 2.7399, "step": 20775 }, { "epoch": 1.6410337406961362, "grad_norm": 0.08808756286499395, "learning_rate": 9.491701894535859e-05, "loss": 2.8885, "step": 20780 }, { "epoch": 1.641428599632781, "grad_norm": 0.0812409640056546, "learning_rate": 9.47151052326134e-05, "loss": 2.7614, "step": 20785 }, { "epoch": 1.641823458569426, "grad_norm": 0.09302530467700933, "learning_rate": 9.451338403877719e-05, "loss": 2.7552, "step": 20790 }, { "epoch": 1.642218317506071, "grad_norm": 0.08063861135774615, "learning_rate": 9.431185545967213e-05, "loss": 2.7651, "step": 20795 }, { "epoch": 1.6426131764427159, "grad_norm": 0.07715445626050561, "learning_rate": 9.411051959102829e-05, "loss": 2.7481, "step": 20800 }, { "epoch": 1.6430080353793608, "grad_norm": 0.07731800727944861, "learning_rate": 9.390937652848458e-05, "loss": 2.6485, "step": 20805 }, { "epoch": 1.6434028943160057, "grad_norm": 0.0817514698257231, "learning_rate": 9.370842636758836e-05, "loss": 2.5852, "step": 20810 }, { "epoch": 1.6437977532526507, "grad_norm": 0.19928633553750205, "learning_rate": 9.350766920379494e-05, "loss": 2.584, "step": 20815 }, { "epoch": 1.6441926121892954, "grad_norm": 0.08463865509448695, "learning_rate": 9.330710513246848e-05, "loss": 2.8867, "step": 20820 }, { "epoch": 1.6445874711259403, "grad_norm": 0.1304476276222514, "learning_rate": 9.310673424888095e-05, "loss": 2.7899, "step": 20825 }, { "epoch": 1.644982330062585, "grad_norm": 0.07836164868434148, "learning_rate": 9.290655664821296e-05, "loss": 2.9018, "step": 20830 }, { "epoch": 1.64537718899923, "grad_norm": 0.15306946470902433, "learning_rate": 9.270657242555286e-05, "loss": 2.8212, "step": 20835 }, { "epoch": 1.6457720479358748, "grad_norm": 0.08899063956790158, "learning_rate": 9.250678167589771e-05, "loss": 2.7926, "step": 20840 }, { "epoch": 1.6461669068725198, "grad_norm": 0.08269609214670957, "learning_rate": 9.230718449415216e-05, "loss": 2.9762, "step": 20845 }, { "epoch": 1.6465617658091647, "grad_norm": 0.08190267158392693, "learning_rate": 9.210778097512896e-05, "loss": 2.7401, "step": 20850 }, { "epoch": 1.6469566247458096, "grad_norm": 0.09484273406960123, "learning_rate": 9.190857121354934e-05, "loss": 2.6141, "step": 20855 }, { "epoch": 1.6473514836824545, "grad_norm": 0.11042022660204935, "learning_rate": 9.170955530404184e-05, "loss": 2.691, "step": 20860 }, { "epoch": 1.6477463426190995, "grad_norm": 0.09445404064952179, "learning_rate": 9.151073334114346e-05, "loss": 2.7264, "step": 20865 }, { "epoch": 1.6481412015557442, "grad_norm": 0.12926038031801704, "learning_rate": 9.131210541929863e-05, "loss": 2.6686, "step": 20870 }, { "epoch": 1.648536060492389, "grad_norm": 0.13917358639979605, "learning_rate": 9.111367163286e-05, "loss": 2.8691, "step": 20875 }, { "epoch": 1.648930919429034, "grad_norm": 0.07416009004470406, "learning_rate": 9.091543207608782e-05, "loss": 2.7083, "step": 20880 }, { "epoch": 1.6493257783656787, "grad_norm": 0.08364914818927309, "learning_rate": 9.071738684314995e-05, "loss": 2.5884, "step": 20885 }, { "epoch": 1.6497206373023237, "grad_norm": 0.08606139345773534, "learning_rate": 9.051953602812224e-05, "loss": 2.9632, "step": 20890 }, { "epoch": 1.6501154962389686, "grad_norm": 0.08787138988153094, "learning_rate": 9.032187972498785e-05, "loss": 2.5562, "step": 20895 }, { "epoch": 1.6505103551756135, "grad_norm": 0.08234534802017109, "learning_rate": 9.012441802763782e-05, "loss": 2.7773, "step": 20900 }, { "epoch": 1.6509052141122584, "grad_norm": 0.07927221418612469, "learning_rate": 8.992715102987053e-05, "loss": 2.773, "step": 20905 }, { "epoch": 1.6513000730489034, "grad_norm": 0.08117925486646141, "learning_rate": 8.973007882539219e-05, "loss": 2.7082, "step": 20910 }, { "epoch": 1.6516949319855483, "grad_norm": 0.08478713257373068, "learning_rate": 8.953320150781597e-05, "loss": 2.7064, "step": 20915 }, { "epoch": 1.652089790922193, "grad_norm": 0.08190080543439156, "learning_rate": 8.933651917066294e-05, "loss": 2.8069, "step": 20920 }, { "epoch": 1.652484649858838, "grad_norm": 0.0915539594220303, "learning_rate": 8.914003190736154e-05, "loss": 2.7617, "step": 20925 }, { "epoch": 1.6528795087954828, "grad_norm": 0.08754622512171639, "learning_rate": 8.894373981124703e-05, "loss": 2.6947, "step": 20930 }, { "epoch": 1.6532743677321275, "grad_norm": 0.07758619418440132, "learning_rate": 8.874764297556276e-05, "loss": 2.842, "step": 20935 }, { "epoch": 1.6536692266687725, "grad_norm": 0.17105285211102522, "learning_rate": 8.855174149345835e-05, "loss": 2.905, "step": 20940 }, { "epoch": 1.6540640856054174, "grad_norm": 0.1339141783976258, "learning_rate": 8.835603545799137e-05, "loss": 2.714, "step": 20945 }, { "epoch": 1.6544589445420623, "grad_norm": 0.07739650920967428, "learning_rate": 8.816052496212657e-05, "loss": 2.6765, "step": 20950 }, { "epoch": 1.6548538034787073, "grad_norm": 0.08882136353781454, "learning_rate": 8.79652100987352e-05, "loss": 2.7766, "step": 20955 }, { "epoch": 1.6552486624153522, "grad_norm": 0.07809951383050837, "learning_rate": 8.777009096059635e-05, "loss": 2.6803, "step": 20960 }, { "epoch": 1.655643521351997, "grad_norm": 0.11504062068995224, "learning_rate": 8.757516764039535e-05, "loss": 2.8324, "step": 20965 }, { "epoch": 1.656038380288642, "grad_norm": 0.08107419055543054, "learning_rate": 8.738044023072523e-05, "loss": 2.7698, "step": 20970 }, { "epoch": 1.6564332392252867, "grad_norm": 0.07840028911999236, "learning_rate": 8.718590882408534e-05, "loss": 2.7307, "step": 20975 }, { "epoch": 1.6568280981619317, "grad_norm": 0.08182627228287032, "learning_rate": 8.699157351288251e-05, "loss": 2.7783, "step": 20980 }, { "epoch": 1.6572229570985766, "grad_norm": 0.11587148183436437, "learning_rate": 8.679743438942989e-05, "loss": 2.5413, "step": 20985 }, { "epoch": 1.6576178160352213, "grad_norm": 0.16213514416001895, "learning_rate": 8.660349154594788e-05, "loss": 2.6671, "step": 20990 }, { "epoch": 1.6580126749718662, "grad_norm": 0.08252116172272375, "learning_rate": 8.640974507456317e-05, "loss": 2.6429, "step": 20995 }, { "epoch": 1.6584075339085111, "grad_norm": 0.07582006304370413, "learning_rate": 8.621619506730965e-05, "loss": 2.8582, "step": 21000 }, { "epoch": 1.658802392845156, "grad_norm": 0.09180398435682798, "learning_rate": 8.602284161612773e-05, "loss": 2.9069, "step": 21005 }, { "epoch": 1.659197251781801, "grad_norm": 0.08975897631195302, "learning_rate": 8.582968481286418e-05, "loss": 2.6327, "step": 21010 }, { "epoch": 1.659592110718446, "grad_norm": 0.07875128515734121, "learning_rate": 8.563672474927281e-05, "loss": 2.7562, "step": 21015 }, { "epoch": 1.6599869696550908, "grad_norm": 0.07968736753281062, "learning_rate": 8.544396151701361e-05, "loss": 2.575, "step": 21020 }, { "epoch": 1.6603818285917356, "grad_norm": 0.08471942460766675, "learning_rate": 8.525139520765308e-05, "loss": 2.8626, "step": 21025 }, { "epoch": 1.6607766875283805, "grad_norm": 0.07542389164900468, "learning_rate": 8.505902591266451e-05, "loss": 2.8141, "step": 21030 }, { "epoch": 1.6611715464650254, "grad_norm": 0.08195162153050901, "learning_rate": 8.486685372342717e-05, "loss": 2.6402, "step": 21035 }, { "epoch": 1.66156640540167, "grad_norm": 0.08046562327173915, "learning_rate": 8.467487873122714e-05, "loss": 2.6657, "step": 21040 }, { "epoch": 1.661961264338315, "grad_norm": 0.10391130676700279, "learning_rate": 8.448310102725642e-05, "loss": 2.5948, "step": 21045 }, { "epoch": 1.66235612327496, "grad_norm": 0.0943506033148167, "learning_rate": 8.42915207026137e-05, "loss": 2.5998, "step": 21050 }, { "epoch": 1.6627509822116049, "grad_norm": 0.07916187916238684, "learning_rate": 8.410013784830345e-05, "loss": 2.7035, "step": 21055 }, { "epoch": 1.6631458411482498, "grad_norm": 0.09125607991652147, "learning_rate": 8.390895255523678e-05, "loss": 2.7814, "step": 21060 }, { "epoch": 1.6635407000848947, "grad_norm": 0.08279127722138431, "learning_rate": 8.371796491423061e-05, "loss": 2.6667, "step": 21065 }, { "epoch": 1.6639355590215397, "grad_norm": 0.07758747449142733, "learning_rate": 8.352717501600809e-05, "loss": 2.8733, "step": 21070 }, { "epoch": 1.6643304179581846, "grad_norm": 0.0839985690707755, "learning_rate": 8.333658295119872e-05, "loss": 2.6583, "step": 21075 }, { "epoch": 1.6647252768948293, "grad_norm": 0.07251640379640492, "learning_rate": 8.314618881033747e-05, "loss": 2.7859, "step": 21080 }, { "epoch": 1.6651201358314742, "grad_norm": 0.07550283175389164, "learning_rate": 8.295599268386578e-05, "loss": 2.6707, "step": 21085 }, { "epoch": 1.665514994768119, "grad_norm": 0.0819567766668806, "learning_rate": 8.27659946621307e-05, "loss": 2.8797, "step": 21090 }, { "epoch": 1.6659098537047639, "grad_norm": 0.07648181882153904, "learning_rate": 8.257619483538547e-05, "loss": 2.6729, "step": 21095 }, { "epoch": 1.6663047126414088, "grad_norm": 0.11294713266171094, "learning_rate": 8.238659329378883e-05, "loss": 2.8026, "step": 21100 }, { "epoch": 1.6666995715780537, "grad_norm": 0.08076495897491354, "learning_rate": 8.219719012740579e-05, "loss": 2.7338, "step": 21105 }, { "epoch": 1.6670944305146986, "grad_norm": 0.0767001844178586, "learning_rate": 8.200798542620669e-05, "loss": 2.7415, "step": 21110 }, { "epoch": 1.6674892894513436, "grad_norm": 0.0750369704113165, "learning_rate": 8.18189792800677e-05, "loss": 2.7734, "step": 21115 }, { "epoch": 1.6678841483879885, "grad_norm": 0.07977809984239853, "learning_rate": 8.163017177877091e-05, "loss": 2.8472, "step": 21120 }, { "epoch": 1.6682790073246334, "grad_norm": 0.08039977662172866, "learning_rate": 8.144156301200373e-05, "loss": 2.6941, "step": 21125 }, { "epoch": 1.6686738662612781, "grad_norm": 0.09049189305219858, "learning_rate": 8.125315306935954e-05, "loss": 2.8388, "step": 21130 }, { "epoch": 1.669068725197923, "grad_norm": 0.10657944630329333, "learning_rate": 8.106494204033676e-05, "loss": 2.5879, "step": 21135 }, { "epoch": 1.669463584134568, "grad_norm": 0.0838230374597399, "learning_rate": 8.087693001433977e-05, "loss": 2.6436, "step": 21140 }, { "epoch": 1.6698584430712127, "grad_norm": 0.0741099174743945, "learning_rate": 8.068911708067839e-05, "loss": 2.7433, "step": 21145 }, { "epoch": 1.6702533020078576, "grad_norm": 0.13519512262553138, "learning_rate": 8.050150332856749e-05, "loss": 2.711, "step": 21150 }, { "epoch": 1.6706481609445025, "grad_norm": 0.08737074793091065, "learning_rate": 8.031408884712782e-05, "loss": 2.741, "step": 21155 }, { "epoch": 1.6710430198811475, "grad_norm": 0.08178216016731746, "learning_rate": 8.012687372538497e-05, "loss": 2.6961, "step": 21160 }, { "epoch": 1.6714378788177924, "grad_norm": 0.11490058493790593, "learning_rate": 7.993985805227038e-05, "loss": 2.8258, "step": 21165 }, { "epoch": 1.6718327377544373, "grad_norm": 0.08980556477800315, "learning_rate": 7.975304191662019e-05, "loss": 2.6093, "step": 21170 }, { "epoch": 1.6722275966910822, "grad_norm": 0.12002904884181814, "learning_rate": 7.956642540717623e-05, "loss": 2.5713, "step": 21175 }, { "epoch": 1.6726224556277272, "grad_norm": 0.08411296434465551, "learning_rate": 7.938000861258504e-05, "loss": 2.7203, "step": 21180 }, { "epoch": 1.6730173145643719, "grad_norm": 0.08174353362455704, "learning_rate": 7.91937916213989e-05, "loss": 2.6717, "step": 21185 }, { "epoch": 1.6734121735010168, "grad_norm": 0.08037348726920449, "learning_rate": 7.900777452207447e-05, "loss": 2.8263, "step": 21190 }, { "epoch": 1.6738070324376615, "grad_norm": 0.08116307748887654, "learning_rate": 7.882195740297398e-05, "loss": 2.6951, "step": 21195 }, { "epoch": 1.6742018913743064, "grad_norm": 0.08623177092974871, "learning_rate": 7.863634035236472e-05, "loss": 2.7221, "step": 21200 }, { "epoch": 1.6745967503109513, "grad_norm": 0.07987929773560248, "learning_rate": 7.845092345841825e-05, "loss": 2.7172, "step": 21205 }, { "epoch": 1.6749916092475963, "grad_norm": 0.11384319316730573, "learning_rate": 7.826570680921192e-05, "loss": 2.9468, "step": 21210 }, { "epoch": 1.6753864681842412, "grad_norm": 0.08650893629840536, "learning_rate": 7.80806904927272e-05, "loss": 2.7439, "step": 21215 }, { "epoch": 1.6757813271208861, "grad_norm": 0.07390933856681543, "learning_rate": 7.789587459685105e-05, "loss": 2.8247, "step": 21220 }, { "epoch": 1.676176186057531, "grad_norm": 0.10773999587824692, "learning_rate": 7.771125920937494e-05, "loss": 3.0806, "step": 21225 }, { "epoch": 1.676571044994176, "grad_norm": 0.08542129746085914, "learning_rate": 7.752684441799484e-05, "loss": 2.7183, "step": 21230 }, { "epoch": 1.6769659039308207, "grad_norm": 0.09457439847786024, "learning_rate": 7.734263031031197e-05, "loss": 2.7324, "step": 21235 }, { "epoch": 1.6773607628674656, "grad_norm": 0.13611333170482415, "learning_rate": 7.715861697383169e-05, "loss": 2.9129, "step": 21240 }, { "epoch": 1.6777556218041105, "grad_norm": 0.07614806750448859, "learning_rate": 7.69748044959645e-05, "loss": 2.7259, "step": 21245 }, { "epoch": 1.6781504807407552, "grad_norm": 0.09304856581829793, "learning_rate": 7.679119296402488e-05, "loss": 2.9112, "step": 21250 }, { "epoch": 1.6785453396774002, "grad_norm": 0.10187785972849604, "learning_rate": 7.660778246523253e-05, "loss": 2.8516, "step": 21255 }, { "epoch": 1.678940198614045, "grad_norm": 0.09231799627443822, "learning_rate": 7.642457308671107e-05, "loss": 2.9516, "step": 21260 }, { "epoch": 1.67933505755069, "grad_norm": 0.07770641438742835, "learning_rate": 7.624156491548889e-05, "loss": 2.7986, "step": 21265 }, { "epoch": 1.679729916487335, "grad_norm": 0.07713609515708018, "learning_rate": 7.605875803849899e-05, "loss": 2.7495, "step": 21270 }, { "epoch": 1.6801247754239799, "grad_norm": 0.10046140593162954, "learning_rate": 7.587615254257813e-05, "loss": 2.6625, "step": 21275 }, { "epoch": 1.6805196343606248, "grad_norm": 0.08170281926084767, "learning_rate": 7.569374851446808e-05, "loss": 2.8073, "step": 21280 }, { "epoch": 1.6809144932972695, "grad_norm": 0.07977604176143999, "learning_rate": 7.551154604081456e-05, "loss": 2.8891, "step": 21285 }, { "epoch": 1.6813093522339144, "grad_norm": 0.07962839688729006, "learning_rate": 7.532954520816743e-05, "loss": 2.6489, "step": 21290 }, { "epoch": 1.6817042111705593, "grad_norm": 0.07515987308682345, "learning_rate": 7.514774610298115e-05, "loss": 2.7741, "step": 21295 }, { "epoch": 1.682099070107204, "grad_norm": 0.09209277246198233, "learning_rate": 7.496614881161395e-05, "loss": 2.7897, "step": 21300 }, { "epoch": 1.682493929043849, "grad_norm": 0.08371210868212478, "learning_rate": 7.478475342032859e-05, "loss": 2.687, "step": 21305 }, { "epoch": 1.682888787980494, "grad_norm": 0.08399200124259967, "learning_rate": 7.460356001529156e-05, "loss": 2.7255, "step": 21310 }, { "epoch": 1.6832836469171388, "grad_norm": 0.08414527667268391, "learning_rate": 7.442256868257369e-05, "loss": 2.7431, "step": 21315 }, { "epoch": 1.6836785058537838, "grad_norm": 0.10166641101613424, "learning_rate": 7.42417795081496e-05, "loss": 2.7311, "step": 21320 }, { "epoch": 1.6840733647904287, "grad_norm": 0.08251331369215811, "learning_rate": 7.406119257789812e-05, "loss": 2.8396, "step": 21325 }, { "epoch": 1.6844682237270736, "grad_norm": 0.14131576088947653, "learning_rate": 7.38808079776016e-05, "loss": 2.647, "step": 21330 }, { "epoch": 1.6848630826637185, "grad_norm": 0.08966464018582876, "learning_rate": 7.370062579294673e-05, "loss": 2.6706, "step": 21335 }, { "epoch": 1.6852579416003632, "grad_norm": 0.07749543127258854, "learning_rate": 7.352064610952397e-05, "loss": 2.8107, "step": 21340 }, { "epoch": 1.6856528005370082, "grad_norm": 0.077338619718885, "learning_rate": 7.334086901282727e-05, "loss": 2.6667, "step": 21345 }, { "epoch": 1.6860476594736529, "grad_norm": 0.08178436509722624, "learning_rate": 7.316129458825476e-05, "loss": 2.8413, "step": 21350 }, { "epoch": 1.6864425184102978, "grad_norm": 0.0893485005128174, "learning_rate": 7.298192292110784e-05, "loss": 2.8901, "step": 21355 }, { "epoch": 1.6868373773469427, "grad_norm": 0.08776173334910352, "learning_rate": 7.28027540965921e-05, "loss": 2.6508, "step": 21360 }, { "epoch": 1.6872322362835876, "grad_norm": 0.10750494970075637, "learning_rate": 7.26237881998163e-05, "loss": 2.6472, "step": 21365 }, { "epoch": 1.6876270952202326, "grad_norm": 0.09540202572133001, "learning_rate": 7.244502531579327e-05, "loss": 2.6985, "step": 21370 }, { "epoch": 1.6880219541568775, "grad_norm": 0.09329789363152598, "learning_rate": 7.226646552943911e-05, "loss": 3.0192, "step": 21375 }, { "epoch": 1.6884168130935224, "grad_norm": 0.08874056303440737, "learning_rate": 7.208810892557326e-05, "loss": 2.8904, "step": 21380 }, { "epoch": 1.6888116720301674, "grad_norm": 0.07521684273119898, "learning_rate": 7.190995558891916e-05, "loss": 2.7702, "step": 21385 }, { "epoch": 1.689206530966812, "grad_norm": 0.09923299567477904, "learning_rate": 7.173200560410325e-05, "loss": 2.8427, "step": 21390 }, { "epoch": 1.689601389903457, "grad_norm": 0.07571219067890757, "learning_rate": 7.155425905565571e-05, "loss": 2.7467, "step": 21395 }, { "epoch": 1.689996248840102, "grad_norm": 0.0827147670254489, "learning_rate": 7.137671602800971e-05, "loss": 2.8819, "step": 21400 }, { "epoch": 1.6903911077767466, "grad_norm": 0.11381306737094055, "learning_rate": 7.119937660550219e-05, "loss": 2.7938, "step": 21405 }, { "epoch": 1.6907859667133915, "grad_norm": 0.07284963255903015, "learning_rate": 7.102224087237285e-05, "loss": 2.7451, "step": 21410 }, { "epoch": 1.6911808256500365, "grad_norm": 0.07855786658500634, "learning_rate": 7.084530891276508e-05, "loss": 2.5998, "step": 21415 }, { "epoch": 1.6915756845866814, "grad_norm": 0.08749266375344077, "learning_rate": 7.066858081072541e-05, "loss": 2.7096, "step": 21420 }, { "epoch": 1.6919705435233263, "grad_norm": 0.09195539243905144, "learning_rate": 7.049205665020314e-05, "loss": 2.7853, "step": 21425 }, { "epoch": 1.6923654024599712, "grad_norm": 0.07939660490375025, "learning_rate": 7.031573651505136e-05, "loss": 2.7185, "step": 21430 }, { "epoch": 1.6927602613966162, "grad_norm": 0.07588802163179907, "learning_rate": 7.01396204890255e-05, "loss": 2.7572, "step": 21435 }, { "epoch": 1.693155120333261, "grad_norm": 0.08236564955292201, "learning_rate": 6.996370865578467e-05, "loss": 2.7505, "step": 21440 }, { "epoch": 1.6935499792699058, "grad_norm": 0.08679372164342543, "learning_rate": 6.978800109889055e-05, "loss": 2.8271, "step": 21445 }, { "epoch": 1.6939448382065507, "grad_norm": 0.09665551328713078, "learning_rate": 6.961249790180813e-05, "loss": 2.8274, "step": 21450 }, { "epoch": 1.6943396971431954, "grad_norm": 0.07867206019241055, "learning_rate": 6.943719914790487e-05, "loss": 2.7546, "step": 21455 }, { "epoch": 1.6947345560798404, "grad_norm": 0.11407540384945096, "learning_rate": 6.92621049204516e-05, "loss": 2.8067, "step": 21460 }, { "epoch": 1.6951294150164853, "grad_norm": 0.09739211998700155, "learning_rate": 6.90872153026219e-05, "loss": 2.7314, "step": 21465 }, { "epoch": 1.6955242739531302, "grad_norm": 0.07928772154983149, "learning_rate": 6.891253037749173e-05, "loss": 2.7514, "step": 21470 }, { "epoch": 1.6959191328897751, "grad_norm": 0.07523454790802561, "learning_rate": 6.873805022804036e-05, "loss": 2.6557, "step": 21475 }, { "epoch": 1.69631399182642, "grad_norm": 0.07985932630160904, "learning_rate": 6.856377493714938e-05, "loss": 2.6124, "step": 21480 }, { "epoch": 1.696708850763065, "grad_norm": 0.0809275241444805, "learning_rate": 6.838970458760335e-05, "loss": 2.7832, "step": 21485 }, { "epoch": 1.69710370969971, "grad_norm": 0.08418894952537949, "learning_rate": 6.821583926208947e-05, "loss": 2.8937, "step": 21490 }, { "epoch": 1.6974985686363546, "grad_norm": 0.08941419675355167, "learning_rate": 6.804217904319726e-05, "loss": 2.605, "step": 21495 }, { "epoch": 1.6978934275729995, "grad_norm": 0.07543412105125663, "learning_rate": 6.786872401341915e-05, "loss": 2.6583, "step": 21500 }, { "epoch": 1.6982882865096445, "grad_norm": 0.0771518311068326, "learning_rate": 6.769547425514982e-05, "loss": 2.7185, "step": 21505 }, { "epoch": 1.6986831454462892, "grad_norm": 0.07922360634840765, "learning_rate": 6.752242985068674e-05, "loss": 2.7817, "step": 21510 }, { "epoch": 1.699078004382934, "grad_norm": 0.0895770376247278, "learning_rate": 6.734959088222947e-05, "loss": 2.8758, "step": 21515 }, { "epoch": 1.699472863319579, "grad_norm": 0.0841176472006834, "learning_rate": 6.717695743188041e-05, "loss": 2.7779, "step": 21520 }, { "epoch": 1.699867722256224, "grad_norm": 0.07099109880793615, "learning_rate": 6.700452958164388e-05, "loss": 2.827, "step": 21525 }, { "epoch": 1.7002625811928689, "grad_norm": 0.08308215481476487, "learning_rate": 6.683230741342683e-05, "loss": 2.6554, "step": 21530 }, { "epoch": 1.7006574401295138, "grad_norm": 0.07144862702859185, "learning_rate": 6.666029100903865e-05, "loss": 2.6396, "step": 21535 }, { "epoch": 1.7010522990661587, "grad_norm": 0.08415860400969606, "learning_rate": 6.64884804501904e-05, "loss": 2.6929, "step": 21540 }, { "epoch": 1.7014471580028037, "grad_norm": 0.07646545787553705, "learning_rate": 6.63168758184961e-05, "loss": 2.5716, "step": 21545 }, { "epoch": 1.7018420169394484, "grad_norm": 0.09108409005252198, "learning_rate": 6.614547719547137e-05, "loss": 2.6532, "step": 21550 }, { "epoch": 1.7022368758760933, "grad_norm": 0.07610356653577682, "learning_rate": 6.597428466253413e-05, "loss": 2.7233, "step": 21555 }, { "epoch": 1.702631734812738, "grad_norm": 0.09208013690814615, "learning_rate": 6.580329830100467e-05, "loss": 2.7149, "step": 21560 }, { "epoch": 1.703026593749383, "grad_norm": 0.0831747225162357, "learning_rate": 6.563251819210497e-05, "loss": 2.7515, "step": 21565 }, { "epoch": 1.7034214526860278, "grad_norm": 0.08823411647827019, "learning_rate": 6.54619444169593e-05, "loss": 2.6707, "step": 21570 }, { "epoch": 1.7038163116226728, "grad_norm": 0.12025567530776243, "learning_rate": 6.529157705659372e-05, "loss": 2.726, "step": 21575 }, { "epoch": 1.7042111705593177, "grad_norm": 0.10369590195271183, "learning_rate": 6.512141619193645e-05, "loss": 2.7364, "step": 21580 }, { "epoch": 1.7046060294959626, "grad_norm": 0.07926496289029335, "learning_rate": 6.495146190381729e-05, "loss": 2.8644, "step": 21585 }, { "epoch": 1.7050008884326076, "grad_norm": 0.08013071775714871, "learning_rate": 6.478171427296848e-05, "loss": 2.7555, "step": 21590 }, { "epoch": 1.7053957473692525, "grad_norm": 0.0765311996227954, "learning_rate": 6.461217338002335e-05, "loss": 2.7593, "step": 21595 }, { "epoch": 1.7057906063058972, "grad_norm": 0.07839981570929268, "learning_rate": 6.444283930551764e-05, "loss": 2.5931, "step": 21600 }, { "epoch": 1.706185465242542, "grad_norm": 0.09280247672524371, "learning_rate": 6.427371212988864e-05, "loss": 2.5843, "step": 21605 }, { "epoch": 1.706580324179187, "grad_norm": 0.07306281274707815, "learning_rate": 6.410479193347524e-05, "loss": 2.678, "step": 21610 }, { "epoch": 1.7069751831158317, "grad_norm": 0.08419027948996481, "learning_rate": 6.39360787965182e-05, "loss": 2.8103, "step": 21615 }, { "epoch": 1.7073700420524767, "grad_norm": 0.08214577139335263, "learning_rate": 6.376757279915974e-05, "loss": 2.8545, "step": 21620 }, { "epoch": 1.7077649009891216, "grad_norm": 0.08451116603154372, "learning_rate": 6.359927402144388e-05, "loss": 2.7809, "step": 21625 }, { "epoch": 1.7081597599257665, "grad_norm": 0.08358098990728013, "learning_rate": 6.343118254331598e-05, "loss": 2.7491, "step": 21630 }, { "epoch": 1.7085546188624114, "grad_norm": 0.10887687931001486, "learning_rate": 6.32632984446232e-05, "loss": 2.6136, "step": 21635 }, { "epoch": 1.7089494777990564, "grad_norm": 0.08133536346790436, "learning_rate": 6.309562180511402e-05, "loss": 2.8494, "step": 21640 }, { "epoch": 1.7093443367357013, "grad_norm": 0.10758757599638853, "learning_rate": 6.29281527044382e-05, "loss": 3.1159, "step": 21645 }, { "epoch": 1.709739195672346, "grad_norm": 0.10067348486252613, "learning_rate": 6.276089122214734e-05, "loss": 2.9006, "step": 21650 }, { "epoch": 1.710134054608991, "grad_norm": 0.0829014664032563, "learning_rate": 6.259383743769398e-05, "loss": 2.8086, "step": 21655 }, { "epoch": 1.7105289135456359, "grad_norm": 0.08018360068932, "learning_rate": 6.242699143043245e-05, "loss": 2.842, "step": 21660 }, { "epoch": 1.7109237724822806, "grad_norm": 0.08708425163640597, "learning_rate": 6.226035327961787e-05, "loss": 2.6564, "step": 21665 }, { "epoch": 1.7113186314189255, "grad_norm": 0.0766605755626106, "learning_rate": 6.20939230644072e-05, "loss": 2.7704, "step": 21670 }, { "epoch": 1.7117134903555704, "grad_norm": 0.08444243517457692, "learning_rate": 6.192770086385802e-05, "loss": 2.6427, "step": 21675 }, { "epoch": 1.7121083492922153, "grad_norm": 0.08863589332885902, "learning_rate": 6.17616867569295e-05, "loss": 2.656, "step": 21680 }, { "epoch": 1.7125032082288603, "grad_norm": 0.07849959531718152, "learning_rate": 6.159588082248202e-05, "loss": 2.7184, "step": 21685 }, { "epoch": 1.7128980671655052, "grad_norm": 0.07584924344102761, "learning_rate": 6.143028313927662e-05, "loss": 2.5401, "step": 21690 }, { "epoch": 1.7132929261021501, "grad_norm": 0.07473410247412389, "learning_rate": 6.126489378597599e-05, "loss": 2.8178, "step": 21695 }, { "epoch": 1.713687785038795, "grad_norm": 0.0782391823905646, "learning_rate": 6.109971284114335e-05, "loss": 2.6857, "step": 21700 }, { "epoch": 1.7140826439754397, "grad_norm": 0.0755988445446461, "learning_rate": 6.093474038324332e-05, "loss": 2.6025, "step": 21705 }, { "epoch": 1.7144775029120847, "grad_norm": 0.08103674402765007, "learning_rate": 6.076997649064114e-05, "loss": 2.8451, "step": 21710 }, { "epoch": 1.7148723618487294, "grad_norm": 0.0841856578536551, "learning_rate": 6.060542124160334e-05, "loss": 2.7708, "step": 21715 }, { "epoch": 1.7152672207853743, "grad_norm": 0.10272936147364874, "learning_rate": 6.0441074714297e-05, "loss": 2.7304, "step": 21720 }, { "epoch": 1.7156620797220192, "grad_norm": 0.08039019673938821, "learning_rate": 6.027693698679021e-05, "loss": 2.6051, "step": 21725 }, { "epoch": 1.7160569386586642, "grad_norm": 0.08029932148421087, "learning_rate": 6.011300813705211e-05, "loss": 2.8519, "step": 21730 }, { "epoch": 1.716451797595309, "grad_norm": 0.09934753658610654, "learning_rate": 5.994928824295198e-05, "loss": 2.8575, "step": 21735 }, { "epoch": 1.716846656531954, "grad_norm": 0.086659783123612, "learning_rate": 5.9785777382260545e-05, "loss": 2.6629, "step": 21740 }, { "epoch": 1.717241515468599, "grad_norm": 0.07654748745076927, "learning_rate": 5.962247563264866e-05, "loss": 2.7644, "step": 21745 }, { "epoch": 1.7176363744052439, "grad_norm": 0.10658445741115928, "learning_rate": 5.945938307168836e-05, "loss": 2.633, "step": 21750 }, { "epoch": 1.7180312333418886, "grad_norm": 0.07516684959383815, "learning_rate": 5.929649977685197e-05, "loss": 2.7734, "step": 21755 }, { "epoch": 1.7184260922785335, "grad_norm": 0.08335996054036648, "learning_rate": 5.9133825825512464e-05, "loss": 2.8007, "step": 21760 }, { "epoch": 1.7188209512151784, "grad_norm": 0.07765597194892522, "learning_rate": 5.897136129494357e-05, "loss": 2.5772, "step": 21765 }, { "epoch": 1.7192158101518231, "grad_norm": 0.084462131092697, "learning_rate": 5.880910626231917e-05, "loss": 2.5199, "step": 21770 }, { "epoch": 1.719610669088468, "grad_norm": 0.09000989081603074, "learning_rate": 5.864706080471405e-05, "loss": 2.6145, "step": 21775 }, { "epoch": 1.720005528025113, "grad_norm": 0.0853339661551098, "learning_rate": 5.848522499910303e-05, "loss": 2.7174, "step": 21780 }, { "epoch": 1.720400386961758, "grad_norm": 0.07731592282441481, "learning_rate": 5.832359892236172e-05, "loss": 2.5809, "step": 21785 }, { "epoch": 1.7207952458984028, "grad_norm": 0.08810524295412739, "learning_rate": 5.816218265126577e-05, "loss": 2.8106, "step": 21790 }, { "epoch": 1.7211901048350478, "grad_norm": 0.08510863549865581, "learning_rate": 5.80009762624914e-05, "loss": 2.9358, "step": 21795 }, { "epoch": 1.7215849637716927, "grad_norm": 0.08394821770410835, "learning_rate": 5.7839979832615184e-05, "loss": 2.9415, "step": 21800 }, { "epoch": 1.7219798227083376, "grad_norm": 0.08346352690781046, "learning_rate": 5.7679193438113564e-05, "loss": 2.7929, "step": 21805 }, { "epoch": 1.7223746816449823, "grad_norm": 0.11883358924547525, "learning_rate": 5.751861715536383e-05, "loss": 2.7895, "step": 21810 }, { "epoch": 1.7227695405816272, "grad_norm": 0.08715980347225814, "learning_rate": 5.735825106064285e-05, "loss": 3.0197, "step": 21815 }, { "epoch": 1.723164399518272, "grad_norm": 0.0733921973093567, "learning_rate": 5.7198095230127925e-05, "loss": 2.6247, "step": 21820 }, { "epoch": 1.7235592584549169, "grad_norm": 0.07804680430710269, "learning_rate": 5.703814973989668e-05, "loss": 2.6507, "step": 21825 }, { "epoch": 1.7239541173915618, "grad_norm": 0.08173067343588088, "learning_rate": 5.6878414665926304e-05, "loss": 2.7364, "step": 21830 }, { "epoch": 1.7243489763282067, "grad_norm": 0.08346993598630151, "learning_rate": 5.671889008409464e-05, "loss": 2.686, "step": 21835 }, { "epoch": 1.7247438352648516, "grad_norm": 0.07291130868666527, "learning_rate": 5.655957607017898e-05, "loss": 2.5644, "step": 21840 }, { "epoch": 1.7251386942014966, "grad_norm": 0.18007025333221335, "learning_rate": 5.6400472699857097e-05, "loss": 2.8486, "step": 21845 }, { "epoch": 1.7255335531381415, "grad_norm": 0.07988737242072307, "learning_rate": 5.624158004870622e-05, "loss": 2.7459, "step": 21850 }, { "epoch": 1.7259284120747864, "grad_norm": 0.0821784945252213, "learning_rate": 5.6082898192203965e-05, "loss": 2.8101, "step": 21855 }, { "epoch": 1.7263232710114311, "grad_norm": 0.07867869252156354, "learning_rate": 5.592442720572738e-05, "loss": 2.7127, "step": 21860 }, { "epoch": 1.726718129948076, "grad_norm": 0.08563793888253529, "learning_rate": 5.5766167164553685e-05, "loss": 2.674, "step": 21865 }, { "epoch": 1.727112988884721, "grad_norm": 0.06897929682840136, "learning_rate": 5.560811814385958e-05, "loss": 2.6261, "step": 21870 }, { "epoch": 1.7275078478213657, "grad_norm": 0.1199599996341646, "learning_rate": 5.5450280218721836e-05, "loss": 2.7158, "step": 21875 }, { "epoch": 1.7279027067580106, "grad_norm": 0.08506158955495922, "learning_rate": 5.529265346411688e-05, "loss": 2.78, "step": 21880 }, { "epoch": 1.7282975656946555, "grad_norm": 0.08533813401821, "learning_rate": 5.513523795492054e-05, "loss": 2.6096, "step": 21885 }, { "epoch": 1.7286924246313005, "grad_norm": 0.11307492763388186, "learning_rate": 5.497803376590871e-05, "loss": 2.6381, "step": 21890 }, { "epoch": 1.7290872835679454, "grad_norm": 0.0785429051683741, "learning_rate": 5.4821040971756555e-05, "loss": 2.7623, "step": 21895 }, { "epoch": 1.7294821425045903, "grad_norm": 0.08281399586732756, "learning_rate": 5.4664259647039136e-05, "loss": 2.725, "step": 21900 }, { "epoch": 1.7298770014412352, "grad_norm": 0.08614721923440306, "learning_rate": 5.450768986623089e-05, "loss": 2.7024, "step": 21905 }, { "epoch": 1.7302718603778802, "grad_norm": 0.08209479272597063, "learning_rate": 5.435133170370554e-05, "loss": 2.6797, "step": 21910 }, { "epoch": 1.7306667193145249, "grad_norm": 0.07321241156270338, "learning_rate": 5.419518523373684e-05, "loss": 2.545, "step": 21915 }, { "epoch": 1.7310615782511698, "grad_norm": 0.10383980745645156, "learning_rate": 5.4039250530497405e-05, "loss": 2.6844, "step": 21920 }, { "epoch": 1.7314564371878145, "grad_norm": 0.12132712555548382, "learning_rate": 5.3883527668059704e-05, "loss": 2.848, "step": 21925 }, { "epoch": 1.7318512961244594, "grad_norm": 0.08202845179817221, "learning_rate": 5.372801672039529e-05, "loss": 2.8778, "step": 21930 }, { "epoch": 1.7322461550611044, "grad_norm": 0.07603640137284719, "learning_rate": 5.3572717761375286e-05, "loss": 2.7187, "step": 21935 }, { "epoch": 1.7326410139977493, "grad_norm": 0.08224036745512589, "learning_rate": 5.341763086476981e-05, "loss": 2.5931, "step": 21940 }, { "epoch": 1.7330358729343942, "grad_norm": 0.0768272099416216, "learning_rate": 5.326275610424852e-05, "loss": 2.7935, "step": 21945 }, { "epoch": 1.7334307318710391, "grad_norm": 0.08826775245437045, "learning_rate": 5.310809355338031e-05, "loss": 2.9388, "step": 21950 }, { "epoch": 1.733825590807684, "grad_norm": 0.08576154815195414, "learning_rate": 5.2953643285632905e-05, "loss": 2.9639, "step": 21955 }, { "epoch": 1.734220449744329, "grad_norm": 0.07613413824321456, "learning_rate": 5.279940537437378e-05, "loss": 2.7833, "step": 21960 }, { "epoch": 1.7346153086809737, "grad_norm": 0.08278203885540614, "learning_rate": 5.264537989286888e-05, "loss": 2.8487, "step": 21965 }, { "epoch": 1.7350101676176186, "grad_norm": 0.09738531079175974, "learning_rate": 5.2491566914283864e-05, "loss": 2.6532, "step": 21970 }, { "epoch": 1.7354050265542635, "grad_norm": 0.09088362613928636, "learning_rate": 5.233796651168293e-05, "loss": 2.791, "step": 21975 }, { "epoch": 1.7357998854909082, "grad_norm": 0.08595268883059635, "learning_rate": 5.2184578758029674e-05, "loss": 2.7074, "step": 21980 }, { "epoch": 1.7361947444275532, "grad_norm": 0.07275388433862236, "learning_rate": 5.203140372618636e-05, "loss": 2.7084, "step": 21985 }, { "epoch": 1.736589603364198, "grad_norm": 0.17081198027157476, "learning_rate": 5.187844148891452e-05, "loss": 2.6365, "step": 21990 }, { "epoch": 1.736984462300843, "grad_norm": 0.07963593179257301, "learning_rate": 5.1725692118874626e-05, "loss": 2.6932, "step": 21995 }, { "epoch": 1.737379321237488, "grad_norm": 0.0824649158703922, "learning_rate": 5.157315568862542e-05, "loss": 2.7249, "step": 22000 }, { "epoch": 1.7377741801741329, "grad_norm": 0.08479018403753058, "learning_rate": 5.142083227062527e-05, "loss": 2.8131, "step": 22005 }, { "epoch": 1.7381690391107778, "grad_norm": 0.09789870721870997, "learning_rate": 5.126872193723081e-05, "loss": 2.7357, "step": 22010 }, { "epoch": 1.7385638980474225, "grad_norm": 0.08000949371066439, "learning_rate": 5.1116824760697844e-05, "loss": 2.723, "step": 22015 }, { "epoch": 1.7389587569840674, "grad_norm": 0.08960959043131576, "learning_rate": 5.096514081318077e-05, "loss": 2.8809, "step": 22020 }, { "epoch": 1.7393536159207124, "grad_norm": 0.09483205889431012, "learning_rate": 5.0813670166732495e-05, "loss": 2.7731, "step": 22025 }, { "epoch": 1.739748474857357, "grad_norm": 0.07542277099896501, "learning_rate": 5.066241289330492e-05, "loss": 2.7786, "step": 22030 }, { "epoch": 1.740143333794002, "grad_norm": 0.0933296889146144, "learning_rate": 5.0511369064748405e-05, "loss": 2.8153, "step": 22035 }, { "epoch": 1.740538192730647, "grad_norm": 0.10387255275555461, "learning_rate": 5.036053875281205e-05, "loss": 2.8494, "step": 22040 }, { "epoch": 1.7409330516672918, "grad_norm": 0.07318277680212491, "learning_rate": 5.020992202914326e-05, "loss": 2.7367, "step": 22045 }, { "epoch": 1.7413279106039368, "grad_norm": 0.08341927297795343, "learning_rate": 5.005951896528843e-05, "loss": 2.5988, "step": 22050 }, { "epoch": 1.7417227695405817, "grad_norm": 0.07555822443344647, "learning_rate": 4.990932963269196e-05, "loss": 2.6139, "step": 22055 }, { "epoch": 1.7421176284772266, "grad_norm": 0.08270021996847504, "learning_rate": 4.975935410269705e-05, "loss": 2.7768, "step": 22060 }, { "epoch": 1.7425124874138715, "grad_norm": 0.07477361566643947, "learning_rate": 4.960959244654534e-05, "loss": 2.8087, "step": 22065 }, { "epoch": 1.7429073463505163, "grad_norm": 0.07951488217042987, "learning_rate": 4.946004473537663e-05, "loss": 2.6014, "step": 22070 }, { "epoch": 1.7433022052871612, "grad_norm": 0.09351722785532014, "learning_rate": 4.931071104022944e-05, "loss": 2.7268, "step": 22075 }, { "epoch": 1.7436970642238059, "grad_norm": 0.07618218366656992, "learning_rate": 4.916159143204035e-05, "loss": 2.8244, "step": 22080 }, { "epoch": 1.7440919231604508, "grad_norm": 0.08807682722929248, "learning_rate": 4.901268598164432e-05, "loss": 2.6316, "step": 22085 }, { "epoch": 1.7444867820970957, "grad_norm": 0.11391155506513059, "learning_rate": 4.886399475977449e-05, "loss": 2.6382, "step": 22090 }, { "epoch": 1.7448816410337407, "grad_norm": 0.07939077939313698, "learning_rate": 4.8715517837062405e-05, "loss": 2.743, "step": 22095 }, { "epoch": 1.7452764999703856, "grad_norm": 0.07718333764305384, "learning_rate": 4.8567255284037884e-05, "loss": 2.6511, "step": 22100 }, { "epoch": 1.7456713589070305, "grad_norm": 0.07756513396922109, "learning_rate": 4.841920717112863e-05, "loss": 2.5152, "step": 22105 }, { "epoch": 1.7460662178436754, "grad_norm": 0.0817404063968491, "learning_rate": 4.827137356866074e-05, "loss": 2.9169, "step": 22110 }, { "epoch": 1.7464610767803204, "grad_norm": 0.0742335014184499, "learning_rate": 4.8123754546858134e-05, "loss": 2.6536, "step": 22115 }, { "epoch": 1.746855935716965, "grad_norm": 0.07679269168347207, "learning_rate": 4.7976350175843234e-05, "loss": 2.6467, "step": 22120 }, { "epoch": 1.74725079465361, "grad_norm": 0.08546568368002361, "learning_rate": 4.782916052563596e-05, "loss": 2.8158, "step": 22125 }, { "epoch": 1.747645653590255, "grad_norm": 0.08484346864632107, "learning_rate": 4.7682185666154734e-05, "loss": 2.6039, "step": 22130 }, { "epoch": 1.7480405125268996, "grad_norm": 0.08223298950326476, "learning_rate": 4.7535425667215524e-05, "loss": 2.6899, "step": 22135 }, { "epoch": 1.7484353714635446, "grad_norm": 0.07844300079823498, "learning_rate": 4.738888059853264e-05, "loss": 2.6019, "step": 22140 }, { "epoch": 1.7488302304001895, "grad_norm": 0.10005044860252783, "learning_rate": 4.724255052971799e-05, "loss": 2.7585, "step": 22145 }, { "epoch": 1.7492250893368344, "grad_norm": 0.07637376176864874, "learning_rate": 4.709643553028148e-05, "loss": 2.7504, "step": 22150 }, { "epoch": 1.7496199482734793, "grad_norm": 0.10761886937379324, "learning_rate": 4.695053566963087e-05, "loss": 2.5782, "step": 22155 }, { "epoch": 1.7500148072101243, "grad_norm": 0.08789478998778939, "learning_rate": 4.680485101707149e-05, "loss": 2.7005, "step": 22160 }, { "epoch": 1.7504096661467692, "grad_norm": 0.07823082985633437, "learning_rate": 4.6659381641806944e-05, "loss": 2.7112, "step": 22165 }, { "epoch": 1.750804525083414, "grad_norm": 0.07509120735124264, "learning_rate": 4.651412761293811e-05, "loss": 2.7299, "step": 22170 }, { "epoch": 1.7511993840200588, "grad_norm": 0.07849903218091585, "learning_rate": 4.636908899946357e-05, "loss": 2.6418, "step": 22175 }, { "epoch": 1.7515942429567037, "grad_norm": 0.08741829505074002, "learning_rate": 4.6224265870280005e-05, "loss": 2.7863, "step": 22180 }, { "epoch": 1.7519891018933484, "grad_norm": 0.07928831656077237, "learning_rate": 4.607965829418126e-05, "loss": 2.6485, "step": 22185 }, { "epoch": 1.7523839608299934, "grad_norm": 0.09236937237195776, "learning_rate": 4.593526633985917e-05, "loss": 2.6387, "step": 22190 }, { "epoch": 1.7527788197666383, "grad_norm": 0.07446373640250173, "learning_rate": 4.579109007590282e-05, "loss": 2.7112, "step": 22195 }, { "epoch": 1.7531736787032832, "grad_norm": 0.08095570339645386, "learning_rate": 4.56471295707992e-05, "loss": 2.8347, "step": 22200 }, { "epoch": 1.7535685376399281, "grad_norm": 0.08800827337775595, "learning_rate": 4.5503384892932265e-05, "loss": 2.8164, "step": 22205 }, { "epoch": 1.753963396576573, "grad_norm": 0.07386139601878952, "learning_rate": 4.5359856110584074e-05, "loss": 2.5717, "step": 22210 }, { "epoch": 1.754358255513218, "grad_norm": 0.07482107589592811, "learning_rate": 4.5216543291933755e-05, "loss": 2.772, "step": 22215 }, { "epoch": 1.754753114449863, "grad_norm": 0.13243589392871105, "learning_rate": 4.507344650505785e-05, "loss": 2.9036, "step": 22220 }, { "epoch": 1.7551479733865076, "grad_norm": 0.07615489424604173, "learning_rate": 4.493056581793053e-05, "loss": 2.5836, "step": 22225 }, { "epoch": 1.7555428323231526, "grad_norm": 0.08084385102854634, "learning_rate": 4.478790129842297e-05, "loss": 2.6977, "step": 22230 }, { "epoch": 1.7559376912597975, "grad_norm": 0.0763164345497012, "learning_rate": 4.4645453014304004e-05, "loss": 2.8177, "step": 22235 }, { "epoch": 1.7563325501964422, "grad_norm": 0.07723054533066681, "learning_rate": 4.450322103323939e-05, "loss": 2.7561, "step": 22240 }, { "epoch": 1.7567274091330871, "grad_norm": 0.08396654980831893, "learning_rate": 4.436120542279254e-05, "loss": 2.7577, "step": 22245 }, { "epoch": 1.757122268069732, "grad_norm": 0.0794082401017156, "learning_rate": 4.4219406250423646e-05, "loss": 2.7026, "step": 22250 }, { "epoch": 1.757517127006377, "grad_norm": 0.09979468099484587, "learning_rate": 4.4077823583490494e-05, "loss": 2.6894, "step": 22255 }, { "epoch": 1.757911985943022, "grad_norm": 0.07859657445304949, "learning_rate": 4.393645748924796e-05, "loss": 2.6344, "step": 22260 }, { "epoch": 1.7583068448796668, "grad_norm": 0.08403386993354192, "learning_rate": 4.3795308034847605e-05, "loss": 2.8113, "step": 22265 }, { "epoch": 1.7587017038163117, "grad_norm": 0.07578609078307977, "learning_rate": 4.365437528733868e-05, "loss": 2.7659, "step": 22270 }, { "epoch": 1.7590965627529567, "grad_norm": 0.0783411944747611, "learning_rate": 4.351365931366702e-05, "loss": 2.8414, "step": 22275 }, { "epoch": 1.7594914216896014, "grad_norm": 0.07777211685005994, "learning_rate": 4.337316018067583e-05, "loss": 2.7186, "step": 22280 }, { "epoch": 1.7598862806262463, "grad_norm": 0.1097340936694256, "learning_rate": 4.323287795510522e-05, "loss": 2.7343, "step": 22285 }, { "epoch": 1.760281139562891, "grad_norm": 0.11818686365025241, "learning_rate": 4.309281270359205e-05, "loss": 2.7212, "step": 22290 }, { "epoch": 1.760675998499536, "grad_norm": 0.080532306951301, "learning_rate": 4.295296449267044e-05, "loss": 2.7259, "step": 22295 }, { "epoch": 1.7610708574361809, "grad_norm": 0.09100715166863617, "learning_rate": 4.281333338877114e-05, "loss": 2.7161, "step": 22300 }, { "epoch": 1.7614657163728258, "grad_norm": 0.07820579811896487, "learning_rate": 4.2673919458221965e-05, "loss": 2.5702, "step": 22305 }, { "epoch": 1.7618605753094707, "grad_norm": 0.09286150085165461, "learning_rate": 4.253472276724735e-05, "loss": 2.6987, "step": 22310 }, { "epoch": 1.7622554342461156, "grad_norm": 0.08520602341234619, "learning_rate": 4.2395743381968886e-05, "loss": 2.6404, "step": 22315 }, { "epoch": 1.7626502931827606, "grad_norm": 0.13284849761536208, "learning_rate": 4.225698136840456e-05, "loss": 2.7915, "step": 22320 }, { "epoch": 1.7630451521194055, "grad_norm": 0.08606623715398451, "learning_rate": 4.211843679246935e-05, "loss": 2.726, "step": 22325 }, { "epoch": 1.7634400110560502, "grad_norm": 0.125881861356559, "learning_rate": 4.198010971997474e-05, "loss": 2.9316, "step": 22330 }, { "epoch": 1.7638348699926951, "grad_norm": 0.07819726175058887, "learning_rate": 4.184200021662909e-05, "loss": 2.776, "step": 22335 }, { "epoch": 1.76422972892934, "grad_norm": 0.10901977621481161, "learning_rate": 4.1704108348037505e-05, "loss": 2.6005, "step": 22340 }, { "epoch": 1.7646245878659848, "grad_norm": 0.08128162670120338, "learning_rate": 4.156643417970135e-05, "loss": 2.8355, "step": 22345 }, { "epoch": 1.7650194468026297, "grad_norm": 0.08657239371821931, "learning_rate": 4.142897777701882e-05, "loss": 2.7672, "step": 22350 }, { "epoch": 1.7654143057392746, "grad_norm": 0.09804216008957699, "learning_rate": 4.1291739205284564e-05, "loss": 2.7455, "step": 22355 }, { "epoch": 1.7658091646759195, "grad_norm": 0.08020864534098339, "learning_rate": 4.115471852968983e-05, "loss": 2.6257, "step": 22360 }, { "epoch": 1.7662040236125645, "grad_norm": 0.07494980864729392, "learning_rate": 4.101791581532244e-05, "loss": 2.6079, "step": 22365 }, { "epoch": 1.7665988825492094, "grad_norm": 0.07885795716163325, "learning_rate": 4.088133112716647e-05, "loss": 2.7934, "step": 22370 }, { "epoch": 1.7669937414858543, "grad_norm": 0.09809297743909634, "learning_rate": 4.0744964530102626e-05, "loss": 2.6202, "step": 22375 }, { "epoch": 1.767388600422499, "grad_norm": 0.07559699864554015, "learning_rate": 4.060881608890782e-05, "loss": 2.828, "step": 22380 }, { "epoch": 1.767783459359144, "grad_norm": 0.07733212333873007, "learning_rate": 4.047288586825559e-05, "loss": 2.8187, "step": 22385 }, { "epoch": 1.7681783182957889, "grad_norm": 0.07896399607910778, "learning_rate": 4.0337173932715524e-05, "loss": 2.6459, "step": 22390 }, { "epoch": 1.7685731772324336, "grad_norm": 0.08361117780106507, "learning_rate": 4.020168034675376e-05, "loss": 2.7856, "step": 22395 }, { "epoch": 1.7689680361690785, "grad_norm": 0.07498820978684165, "learning_rate": 4.006640517473259e-05, "loss": 2.6399, "step": 22400 }, { "epoch": 1.7693628951057234, "grad_norm": 0.09772615295179879, "learning_rate": 3.993134848091051e-05, "loss": 2.9046, "step": 22405 }, { "epoch": 1.7697577540423683, "grad_norm": 0.07654413787343656, "learning_rate": 3.979651032944254e-05, "loss": 2.6853, "step": 22410 }, { "epoch": 1.7701526129790133, "grad_norm": 0.07999035208829308, "learning_rate": 3.966189078437937e-05, "loss": 2.8493, "step": 22415 }, { "epoch": 1.7705474719156582, "grad_norm": 0.0813696645091715, "learning_rate": 3.9527489909668425e-05, "loss": 2.7264, "step": 22420 }, { "epoch": 1.7709423308523031, "grad_norm": 0.07445314796881972, "learning_rate": 3.939330776915267e-05, "loss": 2.6765, "step": 22425 }, { "epoch": 1.771337189788948, "grad_norm": 0.0857716501924096, "learning_rate": 3.92593444265717e-05, "loss": 2.8448, "step": 22430 }, { "epoch": 1.7717320487255928, "grad_norm": 0.07505247980989617, "learning_rate": 3.9125599945560864e-05, "loss": 2.6089, "step": 22435 }, { "epoch": 1.7721269076622377, "grad_norm": 0.07471157434336632, "learning_rate": 3.8992074389651525e-05, "loss": 2.835, "step": 22440 }, { "epoch": 1.7725217665988824, "grad_norm": 0.10323700642226848, "learning_rate": 3.8858767822271326e-05, "loss": 2.8401, "step": 22445 }, { "epoch": 1.7729166255355273, "grad_norm": 0.0830425940009324, "learning_rate": 3.872568030674351e-05, "loss": 2.6793, "step": 22450 }, { "epoch": 1.7733114844721722, "grad_norm": 0.10848797565148668, "learning_rate": 3.859281190628766e-05, "loss": 2.7278, "step": 22455 }, { "epoch": 1.7737063434088172, "grad_norm": 0.08047634667533357, "learning_rate": 3.846016268401892e-05, "loss": 2.6226, "step": 22460 }, { "epoch": 1.774101202345462, "grad_norm": 0.08928158807528512, "learning_rate": 3.832773270294865e-05, "loss": 2.6497, "step": 22465 }, { "epoch": 1.774496061282107, "grad_norm": 0.07901644699632485, "learning_rate": 3.8195522025983695e-05, "loss": 2.658, "step": 22470 }, { "epoch": 1.774890920218752, "grad_norm": 0.07528700271318697, "learning_rate": 3.8063530715927054e-05, "loss": 2.6259, "step": 22475 }, { "epoch": 1.7752857791553969, "grad_norm": 0.08117332067941599, "learning_rate": 3.7931758835477446e-05, "loss": 2.7786, "step": 22480 }, { "epoch": 1.7756806380920416, "grad_norm": 0.0786131660189213, "learning_rate": 3.780020644722915e-05, "loss": 2.6251, "step": 22485 }, { "epoch": 1.7760754970286865, "grad_norm": 0.09047870174547477, "learning_rate": 3.766887361367249e-05, "loss": 2.8361, "step": 22490 }, { "epoch": 1.7764703559653314, "grad_norm": 0.10532663417685102, "learning_rate": 3.75377603971932e-05, "loss": 2.7264, "step": 22495 }, { "epoch": 1.7768652149019761, "grad_norm": 0.08152313451186788, "learning_rate": 3.740686686007294e-05, "loss": 2.6376, "step": 22500 }, { "epoch": 1.777260073838621, "grad_norm": 0.0804555065961688, "learning_rate": 3.7276193064488875e-05, "loss": 2.6377, "step": 22505 }, { "epoch": 1.777654932775266, "grad_norm": 0.081058884816355, "learning_rate": 3.714573907251384e-05, "loss": 2.6583, "step": 22510 }, { "epoch": 1.778049791711911, "grad_norm": 0.08262992273827396, "learning_rate": 3.701550494611622e-05, "loss": 2.6844, "step": 22515 }, { "epoch": 1.7784446506485558, "grad_norm": 0.09409009638180352, "learning_rate": 3.688549074716002e-05, "loss": 2.8188, "step": 22520 }, { "epoch": 1.7788395095852008, "grad_norm": 0.07792005713268163, "learning_rate": 3.675569653740479e-05, "loss": 2.6805, "step": 22525 }, { "epoch": 1.7792343685218457, "grad_norm": 0.08369424095703396, "learning_rate": 3.6626122378505324e-05, "loss": 2.6279, "step": 22530 }, { "epoch": 1.7796292274584906, "grad_norm": 0.09295232829065816, "learning_rate": 3.649676833201226e-05, "loss": 2.9603, "step": 22535 }, { "epoch": 1.7800240863951353, "grad_norm": 0.0904728589387445, "learning_rate": 3.636763445937147e-05, "loss": 2.7429, "step": 22540 }, { "epoch": 1.7804189453317802, "grad_norm": 0.07959608587953963, "learning_rate": 3.6238720821924306e-05, "loss": 2.7323, "step": 22545 }, { "epoch": 1.780813804268425, "grad_norm": 0.09662854972716765, "learning_rate": 3.611002748090741e-05, "loss": 2.7783, "step": 22550 }, { "epoch": 1.7812086632050699, "grad_norm": 0.0994294654601497, "learning_rate": 3.5981554497452884e-05, "loss": 2.6425, "step": 22555 }, { "epoch": 1.7816035221417148, "grad_norm": 0.11541753351149205, "learning_rate": 3.585330193258818e-05, "loss": 2.7801, "step": 22560 }, { "epoch": 1.7819983810783597, "grad_norm": 0.07541063309490623, "learning_rate": 3.572526984723584e-05, "loss": 2.646, "step": 22565 }, { "epoch": 1.7823932400150047, "grad_norm": 0.08325720277805919, "learning_rate": 3.5597458302214e-05, "loss": 2.6715, "step": 22570 }, { "epoch": 1.7827880989516496, "grad_norm": 0.07494721963459279, "learning_rate": 3.546986735823565e-05, "loss": 2.7799, "step": 22575 }, { "epoch": 1.7831829578882945, "grad_norm": 0.08640433599246035, "learning_rate": 3.534249707590942e-05, "loss": 2.7607, "step": 22580 }, { "epoch": 1.7835778168249394, "grad_norm": 0.12513962221895084, "learning_rate": 3.5215347515738626e-05, "loss": 2.7605, "step": 22585 }, { "epoch": 1.7839726757615841, "grad_norm": 0.07755297281200298, "learning_rate": 3.5088418738122186e-05, "loss": 2.676, "step": 22590 }, { "epoch": 1.784367534698229, "grad_norm": 0.0960713455715144, "learning_rate": 3.496171080335392e-05, "loss": 2.7611, "step": 22595 }, { "epoch": 1.784762393634874, "grad_norm": 0.07793749740689955, "learning_rate": 3.483522377162268e-05, "loss": 2.8242, "step": 22600 }, { "epoch": 1.7851572525715187, "grad_norm": 0.08938137707327365, "learning_rate": 3.470895770301269e-05, "loss": 2.7285, "step": 22605 }, { "epoch": 1.7855521115081636, "grad_norm": 0.07387120710880536, "learning_rate": 3.4582912657502905e-05, "loss": 2.7423, "step": 22610 }, { "epoch": 1.7859469704448085, "grad_norm": 0.07272195344678048, "learning_rate": 3.4457088694967385e-05, "loss": 2.6797, "step": 22615 }, { "epoch": 1.7863418293814535, "grad_norm": 0.09796211124958973, "learning_rate": 3.4331485875175096e-05, "loss": 2.6707, "step": 22620 }, { "epoch": 1.7867366883180984, "grad_norm": 0.08510716115356808, "learning_rate": 3.420610425779008e-05, "loss": 2.8058, "step": 22625 }, { "epoch": 1.7871315472547433, "grad_norm": 0.0763290219323336, "learning_rate": 3.408094390237138e-05, "loss": 2.6247, "step": 22630 }, { "epoch": 1.7875264061913883, "grad_norm": 0.08551210417217396, "learning_rate": 3.395600486837269e-05, "loss": 2.5951, "step": 22635 }, { "epoch": 1.787921265128033, "grad_norm": 0.07638113038851006, "learning_rate": 3.3831287215142784e-05, "loss": 2.7201, "step": 22640 }, { "epoch": 1.7883161240646779, "grad_norm": 0.0747037537533777, "learning_rate": 3.370679100192503e-05, "loss": 2.7581, "step": 22645 }, { "epoch": 1.7887109830013228, "grad_norm": 0.07466448064141006, "learning_rate": 3.3582516287858024e-05, "loss": 2.8095, "step": 22650 }, { "epoch": 1.7891058419379675, "grad_norm": 0.092418011783163, "learning_rate": 3.3458463131974595e-05, "loss": 2.708, "step": 22655 }, { "epoch": 1.7895007008746124, "grad_norm": 0.07872819652316362, "learning_rate": 3.333463159320288e-05, "loss": 2.8564, "step": 22660 }, { "epoch": 1.7898955598112574, "grad_norm": 0.07789875035096941, "learning_rate": 3.3211021730365274e-05, "loss": 2.6414, "step": 22665 }, { "epoch": 1.7902904187479023, "grad_norm": 0.07200168147729227, "learning_rate": 3.3087633602179204e-05, "loss": 2.6381, "step": 22670 }, { "epoch": 1.7906852776845472, "grad_norm": 0.07368571413199339, "learning_rate": 3.296446726725666e-05, "loss": 2.71, "step": 22675 }, { "epoch": 1.7910801366211921, "grad_norm": 0.08097522135445719, "learning_rate": 3.284152278410418e-05, "loss": 2.9868, "step": 22680 }, { "epoch": 1.791474995557837, "grad_norm": 0.13216872452942574, "learning_rate": 3.271880021112306e-05, "loss": 2.7867, "step": 22685 }, { "epoch": 1.791869854494482, "grad_norm": 0.0790099842372133, "learning_rate": 3.259629960660904e-05, "loss": 2.6071, "step": 22690 }, { "epoch": 1.7922647134311267, "grad_norm": 0.08294917883905675, "learning_rate": 3.2474021028752634e-05, "loss": 2.5981, "step": 22695 }, { "epoch": 1.7926595723677716, "grad_norm": 0.08809424192619474, "learning_rate": 3.235196453563866e-05, "loss": 2.8223, "step": 22700 }, { "epoch": 1.7930544313044166, "grad_norm": 0.07831564356576377, "learning_rate": 3.223013018524651e-05, "loss": 2.9068, "step": 22705 }, { "epoch": 1.7934492902410613, "grad_norm": 0.08333020856844138, "learning_rate": 3.2108518035450174e-05, "loss": 2.8503, "step": 22710 }, { "epoch": 1.7938441491777062, "grad_norm": 0.08684129693740973, "learning_rate": 3.198712814401788e-05, "loss": 2.6034, "step": 22715 }, { "epoch": 1.794239008114351, "grad_norm": 0.09567261400367967, "learning_rate": 3.186596056861252e-05, "loss": 2.8767, "step": 22720 }, { "epoch": 1.794633867050996, "grad_norm": 0.10568178820045623, "learning_rate": 3.1745015366791195e-05, "loss": 2.8194, "step": 22725 }, { "epoch": 1.795028725987641, "grad_norm": 0.11617656219000248, "learning_rate": 3.162429259600546e-05, "loss": 2.6479, "step": 22730 }, { "epoch": 1.7954235849242859, "grad_norm": 0.10301126119353704, "learning_rate": 3.150379231360112e-05, "loss": 2.8787, "step": 22735 }, { "epoch": 1.7958184438609308, "grad_norm": 0.0867374204384128, "learning_rate": 3.138351457681854e-05, "loss": 2.5396, "step": 22740 }, { "epoch": 1.7962133027975755, "grad_norm": 0.07178884904125535, "learning_rate": 3.1263459442791966e-05, "loss": 2.7318, "step": 22745 }, { "epoch": 1.7966081617342204, "grad_norm": 0.08485109592966862, "learning_rate": 3.114362696855022e-05, "loss": 2.9296, "step": 22750 }, { "epoch": 1.7970030206708654, "grad_norm": 0.07964039093620115, "learning_rate": 3.1024017211016385e-05, "loss": 2.9105, "step": 22755 }, { "epoch": 1.79739787960751, "grad_norm": 0.07656134078969713, "learning_rate": 3.09046302270074e-05, "loss": 2.7043, "step": 22760 }, { "epoch": 1.797792738544155, "grad_norm": 0.14592960717657424, "learning_rate": 3.078546607323485e-05, "loss": 2.6921, "step": 22765 }, { "epoch": 1.7981875974808, "grad_norm": 0.10796050261548473, "learning_rate": 3.066652480630405e-05, "loss": 3.0428, "step": 22770 }, { "epoch": 1.7985824564174449, "grad_norm": 0.09837369613810071, "learning_rate": 3.0547806482714714e-05, "loss": 2.8047, "step": 22775 }, { "epoch": 1.7989773153540898, "grad_norm": 0.0770285949380112, "learning_rate": 3.0429311158860494e-05, "loss": 2.7855, "step": 22780 }, { "epoch": 1.7993721742907347, "grad_norm": 0.07867199344802671, "learning_rate": 3.0311038891029318e-05, "loss": 2.9789, "step": 22785 }, { "epoch": 1.7997670332273796, "grad_norm": 0.0747619440098263, "learning_rate": 3.019298973540291e-05, "loss": 2.7947, "step": 22790 }, { "epoch": 1.8001618921640246, "grad_norm": 0.08050574339969394, "learning_rate": 3.007516374805702e-05, "loss": 2.7611, "step": 22795 }, { "epoch": 1.8005567511006693, "grad_norm": 0.08597964520111055, "learning_rate": 2.995756098496172e-05, "loss": 2.7822, "step": 22800 }, { "epoch": 1.8009516100373142, "grad_norm": 0.08054510287771234, "learning_rate": 2.984018150198059e-05, "loss": 2.8743, "step": 22805 }, { "epoch": 1.801346468973959, "grad_norm": 0.0729763317648471, "learning_rate": 2.9723025354871536e-05, "loss": 2.6995, "step": 22810 }, { "epoch": 1.8017413279106038, "grad_norm": 0.09974442756877727, "learning_rate": 2.9606092599286096e-05, "loss": 2.7048, "step": 22815 }, { "epoch": 1.8021361868472487, "grad_norm": 0.079757676072871, "learning_rate": 2.9489383290769832e-05, "loss": 2.7524, "step": 22820 }, { "epoch": 1.8025310457838937, "grad_norm": 0.09053890677909433, "learning_rate": 2.937289748476224e-05, "loss": 2.8313, "step": 22825 }, { "epoch": 1.8029259047205386, "grad_norm": 0.07186025659110126, "learning_rate": 2.9256635236596386e-05, "loss": 2.5948, "step": 22830 }, { "epoch": 1.8033207636571835, "grad_norm": 0.08189880054417574, "learning_rate": 2.9140596601499427e-05, "loss": 2.875, "step": 22835 }, { "epoch": 1.8037156225938284, "grad_norm": 0.13696722617470802, "learning_rate": 2.9024781634592102e-05, "loss": 2.6845, "step": 22840 }, { "epoch": 1.8041104815304734, "grad_norm": 0.10913452995383378, "learning_rate": 2.890919039088902e-05, "loss": 2.81, "step": 22845 }, { "epoch": 1.804505340467118, "grad_norm": 0.07542395215348577, "learning_rate": 2.8793822925298418e-05, "loss": 2.619, "step": 22850 }, { "epoch": 1.804900199403763, "grad_norm": 0.07845663959986492, "learning_rate": 2.8678679292622358e-05, "loss": 2.7271, "step": 22855 }, { "epoch": 1.805295058340408, "grad_norm": 0.07517030634826638, "learning_rate": 2.8563759547556412e-05, "loss": 2.767, "step": 22860 }, { "epoch": 1.8056899172770526, "grad_norm": 0.07536212598261446, "learning_rate": 2.8449063744689917e-05, "loss": 2.7324, "step": 22865 }, { "epoch": 1.8060847762136976, "grad_norm": 0.07288014103751723, "learning_rate": 2.8334591938505906e-05, "loss": 2.7302, "step": 22870 }, { "epoch": 1.8064796351503425, "grad_norm": 0.09070478948523901, "learning_rate": 2.8220344183380765e-05, "loss": 2.8128, "step": 22875 }, { "epoch": 1.8068744940869874, "grad_norm": 0.07956015692749492, "learning_rate": 2.8106320533584807e-05, "loss": 2.8971, "step": 22880 }, { "epoch": 1.8072693530236323, "grad_norm": 0.10022902069298663, "learning_rate": 2.7992521043281426e-05, "loss": 2.7662, "step": 22885 }, { "epoch": 1.8076642119602773, "grad_norm": 0.08036372098197798, "learning_rate": 2.7878945766527885e-05, "loss": 2.5933, "step": 22890 }, { "epoch": 1.8080590708969222, "grad_norm": 0.13240626880889694, "learning_rate": 2.776559475727497e-05, "loss": 2.7221, "step": 22895 }, { "epoch": 1.8084539298335671, "grad_norm": 0.0784814732085907, "learning_rate": 2.765246806936661e-05, "loss": 2.7076, "step": 22900 }, { "epoch": 1.8088487887702118, "grad_norm": 0.07657405375344646, "learning_rate": 2.7539565756540542e-05, "loss": 2.5967, "step": 22905 }, { "epoch": 1.8092436477068568, "grad_norm": 0.08891018346396039, "learning_rate": 2.7426887872427643e-05, "loss": 2.6618, "step": 22910 }, { "epoch": 1.8096385066435015, "grad_norm": 0.07798742595566198, "learning_rate": 2.7314434470552373e-05, "loss": 2.6414, "step": 22915 }, { "epoch": 1.8100333655801464, "grad_norm": 0.09558979484221697, "learning_rate": 2.7202205604332453e-05, "loss": 2.735, "step": 22920 }, { "epoch": 1.8104282245167913, "grad_norm": 0.08006178830280267, "learning_rate": 2.709020132707901e-05, "loss": 2.8065, "step": 22925 }, { "epoch": 1.8108230834534362, "grad_norm": 0.08268471770555633, "learning_rate": 2.6978421691996326e-05, "loss": 2.839, "step": 22930 }, { "epoch": 1.8112179423900812, "grad_norm": 0.07436047158412383, "learning_rate": 2.686686675218214e-05, "loss": 2.8052, "step": 22935 }, { "epoch": 1.811612801326726, "grad_norm": 0.07391216578722627, "learning_rate": 2.675553656062757e-05, "loss": 2.8294, "step": 22940 }, { "epoch": 1.812007660263371, "grad_norm": 0.07848773514392017, "learning_rate": 2.6644431170216587e-05, "loss": 2.6539, "step": 22945 }, { "epoch": 1.812402519200016, "grad_norm": 0.09472591193979926, "learning_rate": 2.653355063372681e-05, "loss": 2.6234, "step": 22950 }, { "epoch": 1.8127973781366606, "grad_norm": 0.0782908618702102, "learning_rate": 2.642289500382866e-05, "loss": 2.6528, "step": 22955 }, { "epoch": 1.8131922370733056, "grad_norm": 0.12808012825498102, "learning_rate": 2.6312464333086095e-05, "loss": 2.795, "step": 22960 }, { "epoch": 1.8135870960099505, "grad_norm": 0.07254117085282138, "learning_rate": 2.620225867395587e-05, "loss": 2.5818, "step": 22965 }, { "epoch": 1.8139819549465952, "grad_norm": 0.08154633422742112, "learning_rate": 2.6092278078788e-05, "loss": 2.7178, "step": 22970 }, { "epoch": 1.8143768138832401, "grad_norm": 0.08158100268549602, "learning_rate": 2.5982522599825698e-05, "loss": 2.6931, "step": 22975 }, { "epoch": 1.814771672819885, "grad_norm": 0.08177740494452156, "learning_rate": 2.5872992289205033e-05, "loss": 2.7901, "step": 22980 }, { "epoch": 1.81516653175653, "grad_norm": 0.09561167659178578, "learning_rate": 2.5763687198955334e-05, "loss": 2.9458, "step": 22985 }, { "epoch": 1.815561390693175, "grad_norm": 0.08191642996418186, "learning_rate": 2.565460738099873e-05, "loss": 2.6908, "step": 22990 }, { "epoch": 1.8159562496298198, "grad_norm": 0.17722038758514944, "learning_rate": 2.5545752887150607e-05, "loss": 2.6695, "step": 22995 }, { "epoch": 1.8163511085664648, "grad_norm": 0.08132176402089691, "learning_rate": 2.543712376911894e-05, "loss": 2.7962, "step": 23000 }, { "epoch": 1.8167459675031095, "grad_norm": 0.22188279210100975, "learning_rate": 2.5328720078505007e-05, "loss": 2.8692, "step": 23005 }, { "epoch": 1.8171408264397544, "grad_norm": 0.0888512973409133, "learning_rate": 2.5220541866802783e-05, "loss": 2.8992, "step": 23010 }, { "epoch": 1.8175356853763993, "grad_norm": 0.10484665550508152, "learning_rate": 2.5112589185399216e-05, "loss": 2.7836, "step": 23015 }, { "epoch": 1.817930544313044, "grad_norm": 0.08120133141689972, "learning_rate": 2.5004862085574232e-05, "loss": 2.7142, "step": 23020 }, { "epoch": 1.818325403249689, "grad_norm": 0.07364555031228176, "learning_rate": 2.489736061850034e-05, "loss": 2.8572, "step": 23025 }, { "epoch": 1.8187202621863339, "grad_norm": 0.09527590862553341, "learning_rate": 2.4790084835243144e-05, "loss": 2.841, "step": 23030 }, { "epoch": 1.8191151211229788, "grad_norm": 0.09108176434308826, "learning_rate": 2.4683034786760705e-05, "loss": 2.5529, "step": 23035 }, { "epoch": 1.8195099800596237, "grad_norm": 0.07282668460287195, "learning_rate": 2.4576210523904353e-05, "loss": 2.7624, "step": 23040 }, { "epoch": 1.8199048389962686, "grad_norm": 0.10150623593066403, "learning_rate": 2.4469612097417605e-05, "loss": 2.8398, "step": 23045 }, { "epoch": 1.8202996979329136, "grad_norm": 0.0876634837487242, "learning_rate": 2.4363239557937124e-05, "loss": 2.9213, "step": 23050 }, { "epoch": 1.8206945568695585, "grad_norm": 0.0734909748140297, "learning_rate": 2.4257092955992155e-05, "loss": 2.7441, "step": 23055 }, { "epoch": 1.8210894158062032, "grad_norm": 0.08303122676470383, "learning_rate": 2.415117234200437e-05, "loss": 2.8376, "step": 23060 }, { "epoch": 1.8214842747428481, "grad_norm": 0.10037993091563696, "learning_rate": 2.4045477766288572e-05, "loss": 2.6468, "step": 23065 }, { "epoch": 1.8218791336794928, "grad_norm": 0.07958298665478482, "learning_rate": 2.3940009279051656e-05, "loss": 2.7726, "step": 23070 }, { "epoch": 1.8222739926161378, "grad_norm": 0.07560977104466039, "learning_rate": 2.383476693039366e-05, "loss": 2.6979, "step": 23075 }, { "epoch": 1.8226688515527827, "grad_norm": 0.07877447062387817, "learning_rate": 2.37297507703067e-05, "loss": 2.6846, "step": 23080 }, { "epoch": 1.8230637104894276, "grad_norm": 0.07393726315493629, "learning_rate": 2.362496084867577e-05, "loss": 2.628, "step": 23085 }, { "epoch": 1.8234585694260725, "grad_norm": 0.07388499874102816, "learning_rate": 2.3520397215278378e-05, "loss": 2.8189, "step": 23090 }, { "epoch": 1.8238534283627175, "grad_norm": 0.0813597080561212, "learning_rate": 2.3416059919784294e-05, "loss": 2.7495, "step": 23095 }, { "epoch": 1.8242482872993624, "grad_norm": 0.12376550562705109, "learning_rate": 2.331194901175615e-05, "loss": 2.8329, "step": 23100 }, { "epoch": 1.8246431462360073, "grad_norm": 0.07848717499026563, "learning_rate": 2.320806454064861e-05, "loss": 2.6913, "step": 23105 }, { "epoch": 1.825038005172652, "grad_norm": 0.07755582590595016, "learning_rate": 2.3104406555809198e-05, "loss": 2.6143, "step": 23110 }, { "epoch": 1.825432864109297, "grad_norm": 0.0866304391054061, "learning_rate": 2.3000975106477473e-05, "loss": 2.9533, "step": 23115 }, { "epoch": 1.8258277230459419, "grad_norm": 0.0815516076860422, "learning_rate": 2.2897770241785742e-05, "loss": 2.6334, "step": 23120 }, { "epoch": 1.8262225819825866, "grad_norm": 0.08746145461363647, "learning_rate": 2.2794792010758347e-05, "loss": 2.6279, "step": 23125 }, { "epoch": 1.8266174409192315, "grad_norm": 0.08864195193639748, "learning_rate": 2.2692040462312214e-05, "loss": 2.6161, "step": 23130 }, { "epoch": 1.8270122998558764, "grad_norm": 0.08128220951126278, "learning_rate": 2.2589515645256518e-05, "loss": 2.695, "step": 23135 }, { "epoch": 1.8274071587925214, "grad_norm": 0.07667887498101482, "learning_rate": 2.2487217608292642e-05, "loss": 2.6715, "step": 23140 }, { "epoch": 1.8278020177291663, "grad_norm": 0.10240202595472411, "learning_rate": 2.2385146400014544e-05, "loss": 2.7511, "step": 23145 }, { "epoch": 1.8281968766658112, "grad_norm": 0.07387819446507997, "learning_rate": 2.228330206890783e-05, "loss": 2.8074, "step": 23150 }, { "epoch": 1.8285917356024561, "grad_norm": 0.07668948448702763, "learning_rate": 2.2181684663350964e-05, "loss": 2.8869, "step": 23155 }, { "epoch": 1.828986594539101, "grad_norm": 0.07670283665360772, "learning_rate": 2.208029423161434e-05, "loss": 2.5925, "step": 23160 }, { "epoch": 1.8293814534757458, "grad_norm": 0.08116044662637084, "learning_rate": 2.1979130821860483e-05, "loss": 2.7394, "step": 23165 }, { "epoch": 1.8297763124123907, "grad_norm": 0.0807961037398451, "learning_rate": 2.1878194482144286e-05, "loss": 2.6714, "step": 23170 }, { "epoch": 1.8301711713490354, "grad_norm": 0.07230415423604669, "learning_rate": 2.177748526041251e-05, "loss": 2.6906, "step": 23175 }, { "epoch": 1.8305660302856803, "grad_norm": 0.0773779274632464, "learning_rate": 2.1677003204504275e-05, "loss": 2.668, "step": 23180 }, { "epoch": 1.8309608892223252, "grad_norm": 0.08072026533111, "learning_rate": 2.157674836215062e-05, "loss": 2.8312, "step": 23185 }, { "epoch": 1.8313557481589702, "grad_norm": 0.091302621515766, "learning_rate": 2.147672078097479e-05, "loss": 2.9191, "step": 23190 }, { "epoch": 1.831750607095615, "grad_norm": 0.0927324410446544, "learning_rate": 2.1376920508491883e-05, "loss": 2.7247, "step": 23195 }, { "epoch": 1.83214546603226, "grad_norm": 0.0738137008692713, "learning_rate": 2.1277347592109253e-05, "loss": 2.7238, "step": 23200 }, { "epoch": 1.832540324968905, "grad_norm": 0.07556488296514502, "learning_rate": 2.117800207912607e-05, "loss": 3.0257, "step": 23205 }, { "epoch": 1.8329351839055499, "grad_norm": 0.07991845457096743, "learning_rate": 2.107888401673358e-05, "loss": 2.8972, "step": 23210 }, { "epoch": 1.8333300428421946, "grad_norm": 0.07442846620851902, "learning_rate": 2.0979993452015066e-05, "loss": 2.6839, "step": 23215 }, { "epoch": 1.8337249017788395, "grad_norm": 0.0784000660894693, "learning_rate": 2.0881330431945456e-05, "loss": 2.7866, "step": 23220 }, { "epoch": 1.8341197607154844, "grad_norm": 0.07249844173009677, "learning_rate": 2.0782895003391923e-05, "loss": 2.6592, "step": 23225 }, { "epoch": 1.8345146196521291, "grad_norm": 0.07515995483964756, "learning_rate": 2.0684687213113394e-05, "loss": 2.6415, "step": 23230 }, { "epoch": 1.834909478588774, "grad_norm": 0.08167380283002362, "learning_rate": 2.058670710776045e-05, "loss": 2.7921, "step": 23235 }, { "epoch": 1.835304337525419, "grad_norm": 0.07557028275774239, "learning_rate": 2.048895473387591e-05, "loss": 2.6858, "step": 23240 }, { "epoch": 1.835699196462064, "grad_norm": 0.08050702488092348, "learning_rate": 2.039143013789413e-05, "loss": 2.7701, "step": 23245 }, { "epoch": 1.8360940553987088, "grad_norm": 0.08317665621587995, "learning_rate": 2.0294133366141443e-05, "loss": 2.8211, "step": 23250 }, { "epoch": 1.8364889143353538, "grad_norm": 0.07764534978414404, "learning_rate": 2.019706446483577e-05, "loss": 2.6604, "step": 23255 }, { "epoch": 1.8368837732719987, "grad_norm": 0.12141477491204118, "learning_rate": 2.0100223480087e-05, "loss": 2.7168, "step": 23260 }, { "epoch": 1.8372786322086436, "grad_norm": 0.0749947986852935, "learning_rate": 2.0003610457896504e-05, "loss": 2.7877, "step": 23265 }, { "epoch": 1.8376734911452883, "grad_norm": 0.07704568340817679, "learning_rate": 1.9907225444157684e-05, "loss": 2.7227, "step": 23270 }, { "epoch": 1.8380683500819333, "grad_norm": 0.07294285346639687, "learning_rate": 1.981106848465536e-05, "loss": 2.9327, "step": 23275 }, { "epoch": 1.838463209018578, "grad_norm": 0.07679362152359649, "learning_rate": 1.9715139625066104e-05, "loss": 2.5923, "step": 23280 }, { "epoch": 1.8388580679552229, "grad_norm": 0.07980268057709582, "learning_rate": 1.9619438910958298e-05, "loss": 2.5395, "step": 23285 }, { "epoch": 1.8392529268918678, "grad_norm": 0.0760279621149764, "learning_rate": 1.9523966387791638e-05, "loss": 2.7765, "step": 23290 }, { "epoch": 1.8396477858285127, "grad_norm": 0.0785807056465866, "learning_rate": 1.9428722100917785e-05, "loss": 2.8491, "step": 23295 }, { "epoch": 1.8400426447651577, "grad_norm": 0.09246774792766153, "learning_rate": 1.933370609557955e-05, "loss": 2.6742, "step": 23300 }, { "epoch": 1.8404375037018026, "grad_norm": 0.07677508239171163, "learning_rate": 1.9238918416911777e-05, "loss": 2.727, "step": 23305 }, { "epoch": 1.8408323626384475, "grad_norm": 0.07928933998812679, "learning_rate": 1.9144359109940445e-05, "loss": 2.7116, "step": 23310 }, { "epoch": 1.8412272215750924, "grad_norm": 0.07899239109634447, "learning_rate": 1.905002821958335e-05, "loss": 2.7364, "step": 23315 }, { "epoch": 1.8416220805117371, "grad_norm": 0.11922851726152776, "learning_rate": 1.895592579064964e-05, "loss": 2.8317, "step": 23320 }, { "epoch": 1.842016939448382, "grad_norm": 0.09269267399142109, "learning_rate": 1.8862051867839847e-05, "loss": 2.8146, "step": 23325 }, { "epoch": 1.842411798385027, "grad_norm": 0.07190820158542756, "learning_rate": 1.8768406495746237e-05, "loss": 2.6308, "step": 23330 }, { "epoch": 1.8428066573216717, "grad_norm": 0.07361074889245711, "learning_rate": 1.867498971885223e-05, "loss": 2.6404, "step": 23335 }, { "epoch": 1.8432015162583166, "grad_norm": 0.07621895989939403, "learning_rate": 1.858180158153283e-05, "loss": 2.7008, "step": 23340 }, { "epoch": 1.8435963751949616, "grad_norm": 0.08360562576460899, "learning_rate": 1.848884212805435e-05, "loss": 2.6634, "step": 23345 }, { "epoch": 1.8439912341316065, "grad_norm": 0.07460213656628809, "learning_rate": 1.8396111402574467e-05, "loss": 2.7094, "step": 23350 }, { "epoch": 1.8443860930682514, "grad_norm": 0.07853426346065033, "learning_rate": 1.8303609449142388e-05, "loss": 2.7451, "step": 23355 }, { "epoch": 1.8447809520048963, "grad_norm": 0.07530157211457628, "learning_rate": 1.82113363116983e-05, "loss": 2.7465, "step": 23360 }, { "epoch": 1.8451758109415413, "grad_norm": 0.1044978730278927, "learning_rate": 1.8119292034074085e-05, "loss": 2.8681, "step": 23365 }, { "epoch": 1.845570669878186, "grad_norm": 0.10119315302110217, "learning_rate": 1.802747665999255e-05, "loss": 2.5639, "step": 23370 }, { "epoch": 1.845965528814831, "grad_norm": 0.07379147062290632, "learning_rate": 1.7935890233068076e-05, "loss": 2.603, "step": 23375 }, { "epoch": 1.8463603877514758, "grad_norm": 0.08321508264574629, "learning_rate": 1.784453279680609e-05, "loss": 2.6392, "step": 23380 }, { "epoch": 1.8467552466881205, "grad_norm": 0.07356111680613742, "learning_rate": 1.7753404394603322e-05, "loss": 2.8071, "step": 23385 }, { "epoch": 1.8471501056247654, "grad_norm": 0.07984367925761839, "learning_rate": 1.7662505069747704e-05, "loss": 2.7852, "step": 23390 }, { "epoch": 1.8475449645614104, "grad_norm": 0.17595489249910276, "learning_rate": 1.7571834865418244e-05, "loss": 2.8666, "step": 23395 }, { "epoch": 1.8479398234980553, "grad_norm": 0.10095989021218144, "learning_rate": 1.7481393824685442e-05, "loss": 2.583, "step": 23400 }, { "epoch": 1.8483346824347002, "grad_norm": 0.08131041436720463, "learning_rate": 1.7391181990510484e-05, "loss": 2.8562, "step": 23405 }, { "epoch": 1.8487295413713452, "grad_norm": 0.07814934985088211, "learning_rate": 1.7301199405746038e-05, "loss": 2.7147, "step": 23410 }, { "epoch": 1.84912440030799, "grad_norm": 0.08316491097069363, "learning_rate": 1.721144611313563e-05, "loss": 2.5706, "step": 23415 }, { "epoch": 1.849519259244635, "grad_norm": 0.08253933993193943, "learning_rate": 1.712192215531405e-05, "loss": 2.661, "step": 23420 }, { "epoch": 1.8499141181812797, "grad_norm": 0.07353024195897566, "learning_rate": 1.7032627574807e-05, "loss": 2.9007, "step": 23425 }, { "epoch": 1.8503089771179246, "grad_norm": 0.07552964181584003, "learning_rate": 1.6943562414031332e-05, "loss": 2.7052, "step": 23430 }, { "epoch": 1.8507038360545693, "grad_norm": 0.11028594480996055, "learning_rate": 1.6854726715294978e-05, "loss": 2.5406, "step": 23435 }, { "epoch": 1.8510986949912143, "grad_norm": 0.07564903757200028, "learning_rate": 1.676612052079668e-05, "loss": 2.8436, "step": 23440 }, { "epoch": 1.8514935539278592, "grad_norm": 0.07679513640558046, "learning_rate": 1.6677743872626274e-05, "loss": 2.5808, "step": 23445 }, { "epoch": 1.8518884128645041, "grad_norm": 0.07193013000903502, "learning_rate": 1.658959681276451e-05, "loss": 2.7961, "step": 23450 }, { "epoch": 1.852283271801149, "grad_norm": 0.09590673977276701, "learning_rate": 1.6501679383083167e-05, "loss": 2.7212, "step": 23455 }, { "epoch": 1.852678130737794, "grad_norm": 0.08110944865364542, "learning_rate": 1.6413991625344792e-05, "loss": 2.833, "step": 23460 }, { "epoch": 1.853072989674439, "grad_norm": 0.07856930792644469, "learning_rate": 1.6326533581203062e-05, "loss": 2.623, "step": 23465 }, { "epoch": 1.8534678486110838, "grad_norm": 0.11463200585671655, "learning_rate": 1.62393052922023e-05, "loss": 2.8934, "step": 23470 }, { "epoch": 1.8538627075477285, "grad_norm": 0.07601339192770636, "learning_rate": 1.6152306799777803e-05, "loss": 2.8021, "step": 23475 }, { "epoch": 1.8542575664843735, "grad_norm": 0.08228092306982852, "learning_rate": 1.6065538145255798e-05, "loss": 2.7428, "step": 23480 }, { "epoch": 1.8546524254210184, "grad_norm": 0.11828125569489141, "learning_rate": 1.5978999369853085e-05, "loss": 2.7799, "step": 23485 }, { "epoch": 1.855047284357663, "grad_norm": 0.07332806685566964, "learning_rate": 1.589269051467751e-05, "loss": 2.9804, "step": 23490 }, { "epoch": 1.855442143294308, "grad_norm": 0.0750713210726475, "learning_rate": 1.5806611620727606e-05, "loss": 2.7659, "step": 23495 }, { "epoch": 1.855837002230953, "grad_norm": 0.07332222007327921, "learning_rate": 1.5720762728892556e-05, "loss": 2.8279, "step": 23500 }, { "epoch": 1.8562318611675979, "grad_norm": 0.08041227066581431, "learning_rate": 1.5635143879952574e-05, "loss": 2.9377, "step": 23505 }, { "epoch": 1.8566267201042428, "grad_norm": 0.10040374515068211, "learning_rate": 1.554975511457829e-05, "loss": 2.8165, "step": 23510 }, { "epoch": 1.8570215790408877, "grad_norm": 0.08110229332418104, "learning_rate": 1.546459647333126e-05, "loss": 2.9268, "step": 23515 }, { "epoch": 1.8574164379775326, "grad_norm": 0.07271526683014963, "learning_rate": 1.537966799666357e-05, "loss": 2.6769, "step": 23520 }, { "epoch": 1.8578112969141776, "grad_norm": 0.08749707775513275, "learning_rate": 1.5294969724918173e-05, "loss": 2.7071, "step": 23525 }, { "epoch": 1.8582061558508223, "grad_norm": 0.11357210899533036, "learning_rate": 1.5210501698328382e-05, "loss": 2.9438, "step": 23530 }, { "epoch": 1.8586010147874672, "grad_norm": 0.0784735056658921, "learning_rate": 1.5126263957018383e-05, "loss": 2.6557, "step": 23535 }, { "epoch": 1.858995873724112, "grad_norm": 0.10067351693259835, "learning_rate": 1.5042256541002886e-05, "loss": 2.6401, "step": 23540 }, { "epoch": 1.8593907326607568, "grad_norm": 0.07664026786230335, "learning_rate": 1.4958479490187138e-05, "loss": 2.6288, "step": 23545 }, { "epoch": 1.8597855915974018, "grad_norm": 0.09140510357326553, "learning_rate": 1.487493284436714e-05, "loss": 2.6571, "step": 23550 }, { "epoch": 1.8601804505340467, "grad_norm": 0.09449669144211584, "learning_rate": 1.47916166432292e-05, "loss": 2.5916, "step": 23555 }, { "epoch": 1.8605753094706916, "grad_norm": 0.08272975445895857, "learning_rate": 1.4708530926350327e-05, "loss": 2.7267, "step": 23560 }, { "epoch": 1.8609701684073365, "grad_norm": 0.07775529984644167, "learning_rate": 1.4625675733197951e-05, "loss": 2.6274, "step": 23565 }, { "epoch": 1.8613650273439815, "grad_norm": 0.09776968675199992, "learning_rate": 1.4543051103130145e-05, "loss": 2.9264, "step": 23570 }, { "epoch": 1.8617598862806264, "grad_norm": 0.07189165062571524, "learning_rate": 1.4460657075395178e-05, "loss": 2.7959, "step": 23575 }, { "epoch": 1.862154745217271, "grad_norm": 0.07345137031253397, "learning_rate": 1.4378493689132188e-05, "loss": 2.6643, "step": 23580 }, { "epoch": 1.862549604153916, "grad_norm": 0.11961758713650301, "learning_rate": 1.42965609833704e-05, "loss": 2.7966, "step": 23585 }, { "epoch": 1.862944463090561, "grad_norm": 0.07681688648977546, "learning_rate": 1.4214858997029511e-05, "loss": 2.6678, "step": 23590 }, { "epoch": 1.8633393220272056, "grad_norm": 0.0913406761249919, "learning_rate": 1.4133387768919815e-05, "loss": 2.6964, "step": 23595 }, { "epoch": 1.8637341809638506, "grad_norm": 0.07481047843370492, "learning_rate": 1.4052147337741795e-05, "loss": 2.7938, "step": 23600 }, { "epoch": 1.8641290399004955, "grad_norm": 0.08201373081847675, "learning_rate": 1.397113774208647e-05, "loss": 2.735, "step": 23605 }, { "epoch": 1.8645238988371404, "grad_norm": 0.09409454805048793, "learning_rate": 1.3890359020435007e-05, "loss": 2.6756, "step": 23610 }, { "epoch": 1.8649187577737854, "grad_norm": 0.07538995922844616, "learning_rate": 1.3809811211159096e-05, "loss": 2.8482, "step": 23615 }, { "epoch": 1.8653136167104303, "grad_norm": 0.08232351495807654, "learning_rate": 1.3729494352520577e-05, "loss": 2.6527, "step": 23620 }, { "epoch": 1.8657084756470752, "grad_norm": 0.07907761250638716, "learning_rate": 1.364940848267171e-05, "loss": 2.6753, "step": 23625 }, { "epoch": 1.8661033345837201, "grad_norm": 0.08195604127818877, "learning_rate": 1.3569553639655009e-05, "loss": 2.8029, "step": 23630 }, { "epoch": 1.8664981935203648, "grad_norm": 0.07744162816241637, "learning_rate": 1.3489929861403128e-05, "loss": 2.6225, "step": 23635 }, { "epoch": 1.8668930524570098, "grad_norm": 0.07515618880080999, "learning_rate": 1.3410537185739147e-05, "loss": 2.7012, "step": 23640 }, { "epoch": 1.8672879113936545, "grad_norm": 0.0735462829776204, "learning_rate": 1.3331375650376121e-05, "loss": 2.6991, "step": 23645 }, { "epoch": 1.8676827703302994, "grad_norm": 0.08034174587688309, "learning_rate": 1.3252445292917636e-05, "loss": 2.8174, "step": 23650 }, { "epoch": 1.8680776292669443, "grad_norm": 0.08082361421795191, "learning_rate": 1.3173746150857147e-05, "loss": 2.808, "step": 23655 }, { "epoch": 1.8684724882035892, "grad_norm": 0.08586081056095116, "learning_rate": 1.3095278261578526e-05, "loss": 3.0832, "step": 23660 }, { "epoch": 1.8688673471402342, "grad_norm": 0.07662405233459393, "learning_rate": 1.3017041662355566e-05, "loss": 2.6684, "step": 23665 }, { "epoch": 1.869262206076879, "grad_norm": 0.07638864425629166, "learning_rate": 1.2939036390352321e-05, "loss": 2.4357, "step": 23670 }, { "epoch": 1.869657065013524, "grad_norm": 0.08373430676315274, "learning_rate": 1.2861262482623149e-05, "loss": 2.7074, "step": 23675 }, { "epoch": 1.870051923950169, "grad_norm": 0.075656810389313, "learning_rate": 1.2783719976111996e-05, "loss": 2.9977, "step": 23680 }, { "epoch": 1.8704467828868137, "grad_norm": 0.12025943987093554, "learning_rate": 1.2706408907653399e-05, "loss": 2.7306, "step": 23685 }, { "epoch": 1.8708416418234586, "grad_norm": 0.07103802315710735, "learning_rate": 1.2629329313971705e-05, "loss": 3.0721, "step": 23690 }, { "epoch": 1.8712365007601035, "grad_norm": 0.07524014956710556, "learning_rate": 1.255248123168129e-05, "loss": 2.7216, "step": 23695 }, { "epoch": 1.8716313596967482, "grad_norm": 0.0780659785823803, "learning_rate": 1.2475864697286731e-05, "loss": 2.6765, "step": 23700 }, { "epoch": 1.8720262186333931, "grad_norm": 0.0718289295038227, "learning_rate": 1.2399479747182418e-05, "loss": 2.594, "step": 23705 }, { "epoch": 1.872421077570038, "grad_norm": 0.08004545959805585, "learning_rate": 1.232332641765288e-05, "loss": 2.7713, "step": 23710 }, { "epoch": 1.872815936506683, "grad_norm": 0.07394100858441723, "learning_rate": 1.2247404744872515e-05, "loss": 2.681, "step": 23715 }, { "epoch": 1.873210795443328, "grad_norm": 0.1064006225895074, "learning_rate": 1.217171476490575e-05, "loss": 2.7304, "step": 23720 }, { "epoch": 1.8736056543799728, "grad_norm": 0.07626485183155576, "learning_rate": 1.2096256513706883e-05, "loss": 2.6349, "step": 23725 }, { "epoch": 1.8740005133166178, "grad_norm": 0.07413271841035204, "learning_rate": 1.2021030027120295e-05, "loss": 2.7023, "step": 23730 }, { "epoch": 1.8743953722532625, "grad_norm": 0.07912166101164952, "learning_rate": 1.1946035340880013e-05, "loss": 2.6653, "step": 23735 }, { "epoch": 1.8747902311899074, "grad_norm": 0.0763039694774085, "learning_rate": 1.1871272490610207e-05, "loss": 2.6815, "step": 23740 }, { "epoch": 1.8751850901265523, "grad_norm": 0.08923123329906449, "learning_rate": 1.1796741511824804e-05, "loss": 2.6226, "step": 23745 }, { "epoch": 1.875579949063197, "grad_norm": 0.07685063338263023, "learning_rate": 1.1722442439927538e-05, "loss": 2.7548, "step": 23750 }, { "epoch": 1.875974807999842, "grad_norm": 0.07172521012768839, "learning_rate": 1.1648375310212178e-05, "loss": 2.6256, "step": 23755 }, { "epoch": 1.8763696669364869, "grad_norm": 0.07700384749873246, "learning_rate": 1.1574540157862023e-05, "loss": 2.6991, "step": 23760 }, { "epoch": 1.8767645258731318, "grad_norm": 0.08728610478821461, "learning_rate": 1.1500937017950353e-05, "loss": 2.5809, "step": 23765 }, { "epoch": 1.8771593848097767, "grad_norm": 0.07998570561877576, "learning_rate": 1.1427565925440309e-05, "loss": 2.7561, "step": 23770 }, { "epoch": 1.8775542437464217, "grad_norm": 0.07480704760252636, "learning_rate": 1.1354426915184624e-05, "loss": 2.7774, "step": 23775 }, { "epoch": 1.8779491026830666, "grad_norm": 0.07565617086152615, "learning_rate": 1.1281520021925951e-05, "loss": 2.7266, "step": 23780 }, { "epoch": 1.8783439616197115, "grad_norm": 0.09388450322776319, "learning_rate": 1.1208845280296531e-05, "loss": 3.0822, "step": 23785 }, { "epoch": 1.8787388205563562, "grad_norm": 0.07479279022788213, "learning_rate": 1.1136402724818473e-05, "loss": 2.7516, "step": 23790 }, { "epoch": 1.8791336794930011, "grad_norm": 0.0736354936602538, "learning_rate": 1.1064192389903472e-05, "loss": 2.7646, "step": 23795 }, { "epoch": 1.8795285384296458, "grad_norm": 0.08056552734556717, "learning_rate": 1.0992214309853088e-05, "loss": 2.7157, "step": 23800 }, { "epoch": 1.8799233973662908, "grad_norm": 0.13402120717628294, "learning_rate": 1.092046851885825e-05, "loss": 2.6444, "step": 23805 }, { "epoch": 1.8803182563029357, "grad_norm": 0.07695156267491107, "learning_rate": 1.0848955050999809e-05, "loss": 2.6565, "step": 23810 }, { "epoch": 1.8807131152395806, "grad_norm": 0.06987433478234362, "learning_rate": 1.0777673940248255e-05, "loss": 2.7464, "step": 23815 }, { "epoch": 1.8811079741762256, "grad_norm": 0.09279720014533127, "learning_rate": 1.070662522046345e-05, "loss": 2.7929, "step": 23820 }, { "epoch": 1.8815028331128705, "grad_norm": 0.0868535110626254, "learning_rate": 1.0635808925395285e-05, "loss": 2.6866, "step": 23825 }, { "epoch": 1.8818976920495154, "grad_norm": 0.0694261016787167, "learning_rate": 1.0565225088682739e-05, "loss": 2.7349, "step": 23830 }, { "epoch": 1.8822925509861603, "grad_norm": 0.10686495514158795, "learning_rate": 1.0494873743854883e-05, "loss": 2.6537, "step": 23835 }, { "epoch": 1.882687409922805, "grad_norm": 0.09930236675615406, "learning_rate": 1.0424754924329872e-05, "loss": 2.6163, "step": 23840 }, { "epoch": 1.88308226885945, "grad_norm": 0.0738789838798379, "learning_rate": 1.0354868663415729e-05, "loss": 2.7888, "step": 23845 }, { "epoch": 1.8834771277960949, "grad_norm": 0.07467443227354287, "learning_rate": 1.0285214994309955e-05, "loss": 2.7083, "step": 23850 }, { "epoch": 1.8838719867327396, "grad_norm": 0.08121528259017834, "learning_rate": 1.0215793950099417e-05, "loss": 2.7521, "step": 23855 }, { "epoch": 1.8842668456693845, "grad_norm": 0.08664309119338216, "learning_rate": 1.014660556376068e-05, "loss": 2.7108, "step": 23860 }, { "epoch": 1.8846617046060294, "grad_norm": 0.07693791504973585, "learning_rate": 1.0077649868159622e-05, "loss": 2.763, "step": 23865 }, { "epoch": 1.8850565635426744, "grad_norm": 0.09157965346183627, "learning_rate": 1.0008926896051707e-05, "loss": 2.9039, "step": 23870 }, { "epoch": 1.8854514224793193, "grad_norm": 0.07537821380001912, "learning_rate": 9.94043668008171e-06, "loss": 2.7036, "step": 23875 }, { "epoch": 1.8858462814159642, "grad_norm": 0.0906480952055381, "learning_rate": 9.872179252784053e-06, "loss": 2.6241, "step": 23880 }, { "epoch": 1.8862411403526091, "grad_norm": 0.07883369109658066, "learning_rate": 9.804154646582352e-06, "loss": 2.83, "step": 23885 }, { "epoch": 1.886635999289254, "grad_norm": 0.08726189473033132, "learning_rate": 9.736362893789818e-06, "loss": 2.643, "step": 23890 }, { "epoch": 1.8870308582258988, "grad_norm": 0.07865684391006612, "learning_rate": 9.668804026608968e-06, "loss": 2.638, "step": 23895 }, { "epoch": 1.8874257171625437, "grad_norm": 0.07662204145091442, "learning_rate": 9.601478077131631e-06, "loss": 2.6884, "step": 23900 }, { "epoch": 1.8878205760991884, "grad_norm": 0.07388252884100656, "learning_rate": 9.534385077339169e-06, "loss": 2.6458, "step": 23905 }, { "epoch": 1.8882154350358333, "grad_norm": 0.07792558139314947, "learning_rate": 9.467525059102033e-06, "loss": 2.736, "step": 23910 }, { "epoch": 1.8886102939724783, "grad_norm": 0.07487346910125088, "learning_rate": 9.400898054180262e-06, "loss": 2.6499, "step": 23915 }, { "epoch": 1.8890051529091232, "grad_norm": 0.11139984344903944, "learning_rate": 9.33450409422304e-06, "loss": 2.8312, "step": 23920 }, { "epoch": 1.8894000118457681, "grad_norm": 0.09760601808693481, "learning_rate": 9.268343210768914e-06, "loss": 2.7892, "step": 23925 }, { "epoch": 1.889794870782413, "grad_norm": 0.07220266261000702, "learning_rate": 9.202415435245692e-06, "loss": 2.8334, "step": 23930 }, { "epoch": 1.890189729719058, "grad_norm": 0.07564444501328213, "learning_rate": 9.136720798970488e-06, "loss": 2.815, "step": 23935 }, { "epoch": 1.890584588655703, "grad_norm": 0.07335291001864817, "learning_rate": 9.071259333149729e-06, "loss": 2.7865, "step": 23940 }, { "epoch": 1.8909794475923476, "grad_norm": 0.08359300519191358, "learning_rate": 9.006031068878816e-06, "loss": 2.7103, "step": 23945 }, { "epoch": 1.8913743065289925, "grad_norm": 0.07084210147011713, "learning_rate": 8.941036037142691e-06, "loss": 2.7249, "step": 23950 }, { "epoch": 1.8917691654656374, "grad_norm": 0.07231624170333395, "learning_rate": 8.876274268815266e-06, "loss": 2.7349, "step": 23955 }, { "epoch": 1.8921640244022822, "grad_norm": 0.08864117643281896, "learning_rate": 8.81174579465982e-06, "loss": 2.672, "step": 23960 }, { "epoch": 1.892558883338927, "grad_norm": 0.0716015391715051, "learning_rate": 8.74745064532878e-06, "loss": 2.7243, "step": 23965 }, { "epoch": 1.892953742275572, "grad_norm": 0.08533358216620035, "learning_rate": 8.68338885136366e-06, "loss": 2.6518, "step": 23970 }, { "epoch": 1.893348601212217, "grad_norm": 0.07641124307602568, "learning_rate": 8.619560443195174e-06, "loss": 2.6605, "step": 23975 }, { "epoch": 1.8937434601488619, "grad_norm": 0.07243311939352388, "learning_rate": 8.555965451143122e-06, "loss": 2.6094, "step": 23980 }, { "epoch": 1.8941383190855068, "grad_norm": 0.06654367072825267, "learning_rate": 8.492603905416562e-06, "loss": 2.7498, "step": 23985 }, { "epoch": 1.8945331780221517, "grad_norm": 0.07641881797096237, "learning_rate": 8.429475836113476e-06, "loss": 2.6219, "step": 23990 }, { "epoch": 1.8949280369587966, "grad_norm": 0.07388435800054635, "learning_rate": 8.366581273221152e-06, "loss": 2.6425, "step": 23995 }, { "epoch": 1.8953228958954413, "grad_norm": 0.09398562135298047, "learning_rate": 8.303920246615692e-06, "loss": 2.698, "step": 24000 }, { "epoch": 1.8957177548320863, "grad_norm": 0.07653629898397889, "learning_rate": 8.24149278606251e-06, "loss": 2.6619, "step": 24005 }, { "epoch": 1.896112613768731, "grad_norm": 0.08515326130724889, "learning_rate": 8.179298921215994e-06, "loss": 2.7291, "step": 24010 }, { "epoch": 1.896507472705376, "grad_norm": 0.08481865064717349, "learning_rate": 8.117338681619457e-06, "loss": 2.7334, "step": 24015 }, { "epoch": 1.8969023316420208, "grad_norm": 0.07289307804551258, "learning_rate": 8.055612096705412e-06, "loss": 2.8558, "step": 24020 }, { "epoch": 1.8972971905786657, "grad_norm": 0.07586060685522941, "learning_rate": 7.994119195795236e-06, "loss": 2.7823, "step": 24025 }, { "epoch": 1.8976920495153107, "grad_norm": 0.11054579814394934, "learning_rate": 7.932860008099341e-06, "loss": 2.7298, "step": 24030 }, { "epoch": 1.8980869084519556, "grad_norm": 0.0794504916248923, "learning_rate": 7.871834562717229e-06, "loss": 2.6616, "step": 24035 }, { "epoch": 1.8984817673886005, "grad_norm": 0.08295567315949257, "learning_rate": 7.81104288863721e-06, "loss": 2.7961, "step": 24040 }, { "epoch": 1.8988766263252455, "grad_norm": 0.07160214674875182, "learning_rate": 7.75048501473663e-06, "loss": 2.6068, "step": 24045 }, { "epoch": 1.8992714852618902, "grad_norm": 0.08362888278781593, "learning_rate": 7.690160969781757e-06, "loss": 2.7822, "step": 24050 }, { "epoch": 1.899666344198535, "grad_norm": 0.0742306880725671, "learning_rate": 7.630070782427779e-06, "loss": 2.7566, "step": 24055 }, { "epoch": 1.90006120313518, "grad_norm": 0.09771541333484375, "learning_rate": 7.570214481218863e-06, "loss": 2.9685, "step": 24060 }, { "epoch": 1.9004560620718247, "grad_norm": 0.07964446193428228, "learning_rate": 7.510592094587987e-06, "loss": 2.7365, "step": 24065 }, { "epoch": 1.9008509210084696, "grad_norm": 0.07468797728509038, "learning_rate": 7.451203650856997e-06, "loss": 2.7116, "step": 24070 }, { "epoch": 1.9012457799451146, "grad_norm": 0.07653683344576702, "learning_rate": 7.392049178236715e-06, "loss": 2.7977, "step": 24075 }, { "epoch": 1.9016406388817595, "grad_norm": 0.08380467435854973, "learning_rate": 7.3331287048267745e-06, "loss": 2.6874, "step": 24080 }, { "epoch": 1.9020354978184044, "grad_norm": 0.07526109140718211, "learning_rate": 7.274442258615566e-06, "loss": 2.8226, "step": 24085 }, { "epoch": 1.9024303567550493, "grad_norm": 0.07262574152868298, "learning_rate": 7.215989867480566e-06, "loss": 2.7651, "step": 24090 }, { "epoch": 1.9028252156916943, "grad_norm": 0.1468881039018909, "learning_rate": 7.1577715591877315e-06, "loss": 2.9811, "step": 24095 }, { "epoch": 1.903220074628339, "grad_norm": 0.10563223275209309, "learning_rate": 7.099787361392107e-06, "loss": 2.8372, "step": 24100 }, { "epoch": 1.903614933564984, "grad_norm": 0.09183343475155882, "learning_rate": 7.042037301637327e-06, "loss": 2.7148, "step": 24105 }, { "epoch": 1.9040097925016288, "grad_norm": 0.7803829548026732, "learning_rate": 6.984521407355948e-06, "loss": 2.7109, "step": 24110 }, { "epoch": 1.9044046514382735, "grad_norm": 0.07405439906525761, "learning_rate": 6.927239705869226e-06, "loss": 2.7169, "step": 24115 }, { "epoch": 1.9047995103749185, "grad_norm": 0.08988575107790145, "learning_rate": 6.8701922243871725e-06, "loss": 2.8639, "step": 24120 }, { "epoch": 1.9051943693115634, "grad_norm": 0.08259644216166473, "learning_rate": 6.813378990008556e-06, "loss": 2.8885, "step": 24125 }, { "epoch": 1.9055892282482083, "grad_norm": 0.07782018384563479, "learning_rate": 6.756800029720789e-06, "loss": 2.7114, "step": 24130 }, { "epoch": 1.9059840871848532, "grad_norm": 0.07414658233127049, "learning_rate": 6.7004553704002605e-06, "loss": 2.6652, "step": 24135 }, { "epoch": 1.9063789461214982, "grad_norm": 0.07062393972734503, "learning_rate": 6.644345038811672e-06, "loss": 2.695, "step": 24140 }, { "epoch": 1.906773805058143, "grad_norm": 0.08144887303383952, "learning_rate": 6.588469061608815e-06, "loss": 2.8441, "step": 24145 }, { "epoch": 1.907168663994788, "grad_norm": 0.07690986960826707, "learning_rate": 6.532827465333735e-06, "loss": 2.7757, "step": 24150 }, { "epoch": 1.9075635229314327, "grad_norm": 0.1256920488522962, "learning_rate": 6.477420276417567e-06, "loss": 2.8101, "step": 24155 }, { "epoch": 1.9079583818680776, "grad_norm": 0.07769495109921315, "learning_rate": 6.422247521179814e-06, "loss": 2.7621, "step": 24160 }, { "epoch": 1.9083532408047224, "grad_norm": 0.08885666665218923, "learning_rate": 6.367309225828733e-06, "loss": 2.5878, "step": 24165 }, { "epoch": 1.9087480997413673, "grad_norm": 0.11755691169373937, "learning_rate": 6.31260541646117e-06, "loss": 2.8992, "step": 24170 }, { "epoch": 1.9091429586780122, "grad_norm": 0.07092031881927206, "learning_rate": 6.258136119062563e-06, "loss": 2.8904, "step": 24175 }, { "epoch": 1.9095378176146571, "grad_norm": 0.0774054307849344, "learning_rate": 6.203901359506991e-06, "loss": 2.575, "step": 24180 }, { "epoch": 1.909932676551302, "grad_norm": 0.0748661718375051, "learning_rate": 6.149901163557125e-06, "loss": 2.7205, "step": 24185 }, { "epoch": 1.910327535487947, "grad_norm": 0.08807739583364517, "learning_rate": 6.096135556864279e-06, "loss": 2.7449, "step": 24190 }, { "epoch": 1.910722394424592, "grad_norm": 0.12483222184121842, "learning_rate": 6.0426045649680795e-06, "loss": 2.9791, "step": 24195 }, { "epoch": 1.9111172533612368, "grad_norm": 0.07871847968861795, "learning_rate": 5.989308213297018e-06, "loss": 2.8437, "step": 24200 }, { "epoch": 1.9115121122978815, "grad_norm": 0.0773823922115625, "learning_rate": 5.93624652716801e-06, "loss": 2.7999, "step": 24205 }, { "epoch": 1.9119069712345265, "grad_norm": 0.11476792331200004, "learning_rate": 5.883419531786338e-06, "loss": 2.6866, "step": 24210 }, { "epoch": 1.9123018301711714, "grad_norm": 0.09121358372284201, "learning_rate": 5.830827252246096e-06, "loss": 2.7481, "step": 24215 }, { "epoch": 1.912696689107816, "grad_norm": 0.07885671413594177, "learning_rate": 5.778469713529577e-06, "loss": 2.6221, "step": 24220 }, { "epoch": 1.913091548044461, "grad_norm": 0.09564935534563482, "learning_rate": 5.726346940507887e-06, "loss": 2.9226, "step": 24225 }, { "epoch": 1.913486406981106, "grad_norm": 0.0723615215153162, "learning_rate": 5.674458957940332e-06, "loss": 2.6547, "step": 24230 }, { "epoch": 1.9138812659177509, "grad_norm": 0.09645183078833831, "learning_rate": 5.622805790474805e-06, "loss": 2.7677, "step": 24235 }, { "epoch": 1.9142761248543958, "grad_norm": 0.08390100703904636, "learning_rate": 5.571387462647792e-06, "loss": 2.8554, "step": 24240 }, { "epoch": 1.9146709837910407, "grad_norm": 0.08929466235887087, "learning_rate": 5.520203998883866e-06, "loss": 2.9658, "step": 24245 }, { "epoch": 1.9150658427276857, "grad_norm": 0.07165633997193073, "learning_rate": 5.469255423496466e-06, "loss": 2.8456, "step": 24250 }, { "epoch": 1.9154607016643306, "grad_norm": 0.07337136774637891, "learning_rate": 5.418541760687124e-06, "loss": 2.8308, "step": 24255 }, { "epoch": 1.9158555606009753, "grad_norm": 0.08781144000377682, "learning_rate": 5.3680630345459e-06, "loss": 2.7823, "step": 24260 }, { "epoch": 1.9162504195376202, "grad_norm": 0.12071295677588502, "learning_rate": 5.317819269051338e-06, "loss": 2.6296, "step": 24265 }, { "epoch": 1.916645278474265, "grad_norm": 0.07349032602020748, "learning_rate": 5.26781048807018e-06, "loss": 2.6812, "step": 24270 }, { "epoch": 1.9170401374109098, "grad_norm": 0.11110291534306499, "learning_rate": 5.21803671535781e-06, "loss": 2.7879, "step": 24275 }, { "epoch": 1.9174349963475548, "grad_norm": 0.07244475494075854, "learning_rate": 5.168497974557651e-06, "loss": 2.7226, "step": 24280 }, { "epoch": 1.9178298552841997, "grad_norm": 0.07371436981465995, "learning_rate": 5.1191942892017675e-06, "loss": 2.7606, "step": 24285 }, { "epoch": 1.9182247142208446, "grad_norm": 0.07165151809291921, "learning_rate": 5.07012568271048e-06, "loss": 2.726, "step": 24290 }, { "epoch": 1.9186195731574895, "grad_norm": 0.08289940039874973, "learning_rate": 5.021292178392312e-06, "loss": 2.6042, "step": 24295 }, { "epoch": 1.9190144320941345, "grad_norm": 0.0853133347247558, "learning_rate": 4.972693799444206e-06, "loss": 2.6416, "step": 24300 }, { "epoch": 1.9194092910307794, "grad_norm": 0.07338770644163133, "learning_rate": 4.924330568951529e-06, "loss": 2.7616, "step": 24305 }, { "epoch": 1.919804149967424, "grad_norm": 0.10554414596779178, "learning_rate": 4.876202509887795e-06, "loss": 2.9338, "step": 24310 }, { "epoch": 1.920199008904069, "grad_norm": 0.07908327070108233, "learning_rate": 4.828309645114826e-06, "loss": 2.8074, "step": 24315 }, { "epoch": 1.920593867840714, "grad_norm": 0.0812050016815435, "learning_rate": 4.7806519973827565e-06, "loss": 2.7786, "step": 24320 }, { "epoch": 1.9209887267773587, "grad_norm": 0.07032866717848792, "learning_rate": 4.733229589329979e-06, "loss": 2.8404, "step": 24325 }, { "epoch": 1.9213835857140036, "grad_norm": 0.07259412585689687, "learning_rate": 4.686042443483196e-06, "loss": 2.8092, "step": 24330 }, { "epoch": 1.9217784446506485, "grad_norm": 0.07381100454347934, "learning_rate": 4.639090582257199e-06, "loss": 2.7448, "step": 24335 }, { "epoch": 1.9221733035872934, "grad_norm": 0.07658871280160819, "learning_rate": 4.592374027955259e-06, "loss": 2.636, "step": 24340 }, { "epoch": 1.9225681625239384, "grad_norm": 0.07093627437534325, "learning_rate": 4.54589280276857e-06, "loss": 2.5869, "step": 24345 }, { "epoch": 1.9229630214605833, "grad_norm": 0.07320364084990547, "learning_rate": 4.499646928776746e-06, "loss": 2.6784, "step": 24350 }, { "epoch": 1.9233578803972282, "grad_norm": 0.09876989267210005, "learning_rate": 4.453636427947605e-06, "loss": 2.6725, "step": 24355 }, { "epoch": 1.923752739333873, "grad_norm": 0.07569332341480714, "learning_rate": 4.407861322137107e-06, "loss": 2.6226, "step": 24360 }, { "epoch": 1.9241475982705178, "grad_norm": 0.08376785587237294, "learning_rate": 4.362321633089361e-06, "loss": 2.8529, "step": 24365 }, { "epoch": 1.9245424572071628, "grad_norm": 0.07196540506315549, "learning_rate": 4.317017382436672e-06, "loss": 2.7406, "step": 24370 }, { "epoch": 1.9249373161438075, "grad_norm": 0.07942019305911092, "learning_rate": 4.271948591699604e-06, "loss": 2.747, "step": 24375 }, { "epoch": 1.9253321750804524, "grad_norm": 0.07133639523793824, "learning_rate": 4.227115282286698e-06, "loss": 2.7539, "step": 24380 }, { "epoch": 1.9257270340170973, "grad_norm": 0.07071911638536241, "learning_rate": 4.182517475494751e-06, "loss": 2.7288, "step": 24385 }, { "epoch": 1.9261218929537423, "grad_norm": 0.07535474165975227, "learning_rate": 4.138155192508708e-06, "loss": 2.769, "step": 24390 }, { "epoch": 1.9265167518903872, "grad_norm": 0.07540592450866726, "learning_rate": 4.094028454401488e-06, "loss": 2.7356, "step": 24395 }, { "epoch": 1.926911610827032, "grad_norm": 0.07723418051581755, "learning_rate": 4.050137282134325e-06, "loss": 2.65, "step": 24400 }, { "epoch": 1.927306469763677, "grad_norm": 0.12767164548326257, "learning_rate": 4.00648169655643e-06, "loss": 2.7369, "step": 24405 }, { "epoch": 1.927701328700322, "grad_norm": 0.07240981922523305, "learning_rate": 3.963061718405103e-06, "loss": 2.8214, "step": 24410 }, { "epoch": 1.9280961876369667, "grad_norm": 0.07575385201173272, "learning_rate": 3.919877368305791e-06, "loss": 2.7626, "step": 24415 }, { "epoch": 1.9284910465736116, "grad_norm": 0.12974313959550754, "learning_rate": 3.876928666771917e-06, "loss": 2.6975, "step": 24420 }, { "epoch": 1.9288859055102565, "grad_norm": 0.0822041777865161, "learning_rate": 3.834215634205163e-06, "loss": 2.7092, "step": 24425 }, { "epoch": 1.9292807644469012, "grad_norm": 0.08732564757188106, "learning_rate": 3.7917382908949083e-06, "loss": 2.6024, "step": 24430 }, { "epoch": 1.9296756233835461, "grad_norm": 0.1173501106292977, "learning_rate": 3.7494966570190137e-06, "loss": 2.639, "step": 24435 }, { "epoch": 1.930070482320191, "grad_norm": 0.08919708078275897, "learning_rate": 3.7074907526429835e-06, "loss": 2.8486, "step": 24440 }, { "epoch": 1.930465341256836, "grad_norm": 0.08041669555376323, "learning_rate": 3.665720597720579e-06, "loss": 2.7525, "step": 24445 }, { "epoch": 1.930860200193481, "grad_norm": 0.0825809375385202, "learning_rate": 3.624186212093483e-06, "loss": 2.958, "step": 24450 }, { "epoch": 1.9312550591301259, "grad_norm": 0.09301550123672254, "learning_rate": 3.5828876154915257e-06, "loss": 2.795, "step": 24455 }, { "epoch": 1.9316499180667708, "grad_norm": 0.09270184638435369, "learning_rate": 3.541824827532236e-06, "loss": 2.6186, "step": 24460 }, { "epoch": 1.9320447770034155, "grad_norm": 0.07353638704633121, "learning_rate": 3.500997867721345e-06, "loss": 2.736, "step": 24465 }, { "epoch": 1.9324396359400604, "grad_norm": 0.08370974129407874, "learning_rate": 3.4604067554526718e-06, "loss": 2.7493, "step": 24470 }, { "epoch": 1.9328344948767053, "grad_norm": 0.06827256180845145, "learning_rate": 3.420051510007738e-06, "loss": 2.6015, "step": 24475 }, { "epoch": 1.93322935381335, "grad_norm": 0.07732525448718137, "learning_rate": 3.3799321505560974e-06, "loss": 2.7513, "step": 24480 }, { "epoch": 1.933624212749995, "grad_norm": 0.08357562410089053, "learning_rate": 3.3400486961553956e-06, "loss": 2.8386, "step": 24485 }, { "epoch": 1.93401907168664, "grad_norm": 0.0738787753342308, "learning_rate": 3.3004011657510324e-06, "loss": 2.791, "step": 24490 }, { "epoch": 1.9344139306232848, "grad_norm": 0.07154568124690865, "learning_rate": 3.2609895781764987e-06, "loss": 2.6338, "step": 24495 }, { "epoch": 1.9348087895599297, "grad_norm": 0.07559030442561479, "learning_rate": 3.2218139521530966e-06, "loss": 2.6626, "step": 24500 }, { "epoch": 1.9352036484965747, "grad_norm": 0.07946926624385636, "learning_rate": 3.182874306290051e-06, "loss": 2.6729, "step": 24505 }, { "epoch": 1.9355985074332196, "grad_norm": 0.08160012405864821, "learning_rate": 3.1441706590845664e-06, "loss": 2.6384, "step": 24510 }, { "epoch": 1.9359933663698645, "grad_norm": 0.07896030006322499, "learning_rate": 3.1057030289217135e-06, "loss": 2.6333, "step": 24515 }, { "epoch": 1.9363882253065092, "grad_norm": 0.07415011959227098, "learning_rate": 3.0674714340743206e-06, "loss": 2.7443, "step": 24520 }, { "epoch": 1.9367830842431542, "grad_norm": 0.07528157645973499, "learning_rate": 3.0294758927033594e-06, "loss": 2.7149, "step": 24525 }, { "epoch": 1.9371779431797989, "grad_norm": 0.0775850000434117, "learning_rate": 2.9917164228573936e-06, "loss": 2.7176, "step": 24530 }, { "epoch": 1.9375728021164438, "grad_norm": 0.0772814118860802, "learning_rate": 2.954193042473019e-06, "loss": 2.7034, "step": 24535 }, { "epoch": 1.9379676610530887, "grad_norm": 0.08880123386871687, "learning_rate": 2.916905769374645e-06, "loss": 2.7884, "step": 24540 }, { "epoch": 1.9383625199897336, "grad_norm": 0.08602631914427268, "learning_rate": 2.8798546212744914e-06, "loss": 2.8936, "step": 24545 }, { "epoch": 1.9387573789263786, "grad_norm": 0.11057743317945608, "learning_rate": 2.8430396157727024e-06, "loss": 2.8649, "step": 24550 }, { "epoch": 1.9391522378630235, "grad_norm": 0.07319011867786818, "learning_rate": 2.8064607703570667e-06, "loss": 2.6642, "step": 24555 }, { "epoch": 1.9395470967996684, "grad_norm": 0.0762085139283887, "learning_rate": 2.7701181024034073e-06, "loss": 2.7522, "step": 24560 }, { "epoch": 1.9399419557363133, "grad_norm": 0.07401847002383842, "learning_rate": 2.7340116291752483e-06, "loss": 2.9216, "step": 24565 }, { "epoch": 1.940336814672958, "grad_norm": 0.07469171989361391, "learning_rate": 2.69814136782387e-06, "loss": 2.7539, "step": 24570 }, { "epoch": 1.940731673609603, "grad_norm": 0.07274679963867973, "learning_rate": 2.6625073353884755e-06, "loss": 2.8014, "step": 24575 }, { "epoch": 1.941126532546248, "grad_norm": 0.0760038299210634, "learning_rate": 2.6271095487959142e-06, "loss": 2.7126, "step": 24580 }, { "epoch": 1.9415213914828926, "grad_norm": 0.07042575976651143, "learning_rate": 2.591948024860957e-06, "loss": 2.6062, "step": 24585 }, { "epoch": 1.9419162504195375, "grad_norm": 0.07920779795104264, "learning_rate": 2.557022780286078e-06, "loss": 2.785, "step": 24590 }, { "epoch": 1.9423111093561825, "grad_norm": 0.07641481013988867, "learning_rate": 2.5223338316614497e-06, "loss": 2.6867, "step": 24595 }, { "epoch": 1.9427059682928274, "grad_norm": 0.09240267602124376, "learning_rate": 2.4878811954650584e-06, "loss": 2.7602, "step": 24600 }, { "epoch": 1.9431008272294723, "grad_norm": 0.08451077758368135, "learning_rate": 2.453664888062701e-06, "loss": 2.8836, "step": 24605 }, { "epoch": 1.9434956861661172, "grad_norm": 0.10389408594540528, "learning_rate": 2.4196849257077656e-06, "loss": 2.7368, "step": 24610 }, { "epoch": 1.9438905451027622, "grad_norm": 0.07965033126641885, "learning_rate": 2.385941324541507e-06, "loss": 2.6722, "step": 24615 }, { "epoch": 1.944285404039407, "grad_norm": 0.07353205248604955, "learning_rate": 2.3524341005928262e-06, "loss": 2.9683, "step": 24620 }, { "epoch": 1.9446802629760518, "grad_norm": 0.09932082080692763, "learning_rate": 2.3191632697783795e-06, "loss": 2.6635, "step": 24625 }, { "epoch": 1.9450751219126967, "grad_norm": 0.07525454947301137, "learning_rate": 2.28612884790258e-06, "loss": 2.9801, "step": 24630 }, { "epoch": 1.9454699808493414, "grad_norm": 0.07437725871490934, "learning_rate": 2.2533308506573204e-06, "loss": 2.6616, "step": 24635 }, { "epoch": 1.9458648397859863, "grad_norm": 0.07084542418794892, "learning_rate": 2.2207692936224713e-06, "loss": 2.6859, "step": 24640 }, { "epoch": 1.9462596987226313, "grad_norm": 0.0730060167628626, "learning_rate": 2.1884441922654376e-06, "loss": 2.8669, "step": 24645 }, { "epoch": 1.9466545576592762, "grad_norm": 0.09059117696037353, "learning_rate": 2.1563555619413254e-06, "loss": 2.7659, "step": 24650 }, { "epoch": 1.9470494165959211, "grad_norm": 0.07348302640729523, "learning_rate": 2.124503417892887e-06, "loss": 2.7062, "step": 24655 }, { "epoch": 1.947444275532566, "grad_norm": 0.07995367617647985, "learning_rate": 2.0928877752505737e-06, "loss": 2.7154, "step": 24660 }, { "epoch": 1.947839134469211, "grad_norm": 0.09808235947868552, "learning_rate": 2.061508649032484e-06, "loss": 2.7837, "step": 24665 }, { "epoch": 1.948233993405856, "grad_norm": 0.07118903279239089, "learning_rate": 2.0303660541443615e-06, "loss": 2.978, "step": 24670 }, { "epoch": 1.9486288523425006, "grad_norm": 0.12833197645534786, "learning_rate": 1.9994600053796496e-06, "loss": 2.6203, "step": 24675 }, { "epoch": 1.9490237112791455, "grad_norm": 0.07785861088924108, "learning_rate": 1.968790517419328e-06, "loss": 2.6134, "step": 24680 }, { "epoch": 1.9494185702157905, "grad_norm": 0.07773716941966659, "learning_rate": 1.938357604832075e-06, "loss": 2.784, "step": 24685 }, { "epoch": 1.9498134291524352, "grad_norm": 0.08871603614693542, "learning_rate": 1.9081612820741613e-06, "loss": 2.8523, "step": 24690 }, { "epoch": 1.95020828808908, "grad_norm": 0.06972207324958245, "learning_rate": 1.8782015634894456e-06, "loss": 2.7868, "step": 24695 }, { "epoch": 1.950603147025725, "grad_norm": 0.07738757683692282, "learning_rate": 1.8484784633094886e-06, "loss": 2.734, "step": 24700 }, { "epoch": 1.95099800596237, "grad_norm": 0.10080007781064398, "learning_rate": 1.8189919956533296e-06, "loss": 2.5643, "step": 24705 }, { "epoch": 1.9513928648990149, "grad_norm": 0.08921274166969667, "learning_rate": 1.7897421745277643e-06, "loss": 2.8086, "step": 24710 }, { "epoch": 1.9517877238356598, "grad_norm": 0.07379455989253768, "learning_rate": 1.760729013826956e-06, "loss": 2.6902, "step": 24715 }, { "epoch": 1.9521825827723047, "grad_norm": 0.0909250818888278, "learning_rate": 1.7319525273328807e-06, "loss": 2.898, "step": 24720 }, { "epoch": 1.9525774417089494, "grad_norm": 0.08339214641685365, "learning_rate": 1.7034127287148815e-06, "loss": 2.6421, "step": 24725 }, { "epoch": 1.9529723006455944, "grad_norm": 0.09757727020565943, "learning_rate": 1.6751096315300029e-06, "loss": 2.7969, "step": 24730 }, { "epoch": 1.9533671595822393, "grad_norm": 0.11125022290136342, "learning_rate": 1.6470432492228793e-06, "loss": 2.8129, "step": 24735 }, { "epoch": 1.953762018518884, "grad_norm": 0.07004614827208423, "learning_rate": 1.6192135951255127e-06, "loss": 2.7066, "step": 24740 }, { "epoch": 1.954156877455529, "grad_norm": 0.08180731818714876, "learning_rate": 1.5916206824577173e-06, "loss": 2.629, "step": 24745 }, { "epoch": 1.9545517363921738, "grad_norm": 0.08697058723534679, "learning_rate": 1.564264524326564e-06, "loss": 2.6926, "step": 24750 }, { "epoch": 1.9549465953288188, "grad_norm": 0.07117803956036921, "learning_rate": 1.537145133726936e-06, "loss": 2.5379, "step": 24755 }, { "epoch": 1.9553414542654637, "grad_norm": 0.07414890057638254, "learning_rate": 1.5102625235410284e-06, "loss": 2.7447, "step": 24760 }, { "epoch": 1.9557363132021086, "grad_norm": 0.07444083419146257, "learning_rate": 1.4836167065387374e-06, "loss": 2.8873, "step": 24765 }, { "epoch": 1.9561311721387535, "grad_norm": 0.08199009998876006, "learning_rate": 1.4572076953773272e-06, "loss": 2.8924, "step": 24770 }, { "epoch": 1.9565260310753985, "grad_norm": 0.07017518095333725, "learning_rate": 1.431035502601652e-06, "loss": 2.5672, "step": 24775 }, { "epoch": 1.9569208900120432, "grad_norm": 0.06977996470724532, "learning_rate": 1.4051001406440444e-06, "loss": 2.8598, "step": 24780 }, { "epoch": 1.957315748948688, "grad_norm": 0.07399699613247768, "learning_rate": 1.3794016218243722e-06, "loss": 2.6987, "step": 24785 }, { "epoch": 1.9577106078853328, "grad_norm": 0.07208754616736676, "learning_rate": 1.3539399583499256e-06, "loss": 2.6232, "step": 24790 }, { "epoch": 1.9581054668219777, "grad_norm": 0.0969905096374674, "learning_rate": 1.3287151623156414e-06, "loss": 2.662, "step": 24795 }, { "epoch": 1.9585003257586227, "grad_norm": 0.079608320527157, "learning_rate": 1.303727245703712e-06, "loss": 2.7224, "step": 24800 }, { "epoch": 1.9588951846952676, "grad_norm": 0.07709506157614698, "learning_rate": 1.2789762203840317e-06, "loss": 2.912, "step": 24805 }, { "epoch": 1.9592900436319125, "grad_norm": 0.12550482570352692, "learning_rate": 1.2544620981137512e-06, "loss": 3.0182, "step": 24810 }, { "epoch": 1.9596849025685574, "grad_norm": 0.08594193453201042, "learning_rate": 1.2301848905377223e-06, "loss": 2.5471, "step": 24815 }, { "epoch": 1.9600797615052024, "grad_norm": 0.07429468726159091, "learning_rate": 1.2061446091880535e-06, "loss": 2.6766, "step": 24820 }, { "epoch": 1.9604746204418473, "grad_norm": 0.08921539625077582, "learning_rate": 1.1823412654843878e-06, "loss": 2.8314, "step": 24825 }, { "epoch": 1.960869479378492, "grad_norm": 0.0902402450559629, "learning_rate": 1.1587748707338475e-06, "loss": 2.6649, "step": 24830 }, { "epoch": 1.961264338315137, "grad_norm": 0.08137008504297588, "learning_rate": 1.1354454361309219e-06, "loss": 2.6954, "step": 24835 }, { "epoch": 1.9616591972517818, "grad_norm": 0.07899336102541107, "learning_rate": 1.1123529727576909e-06, "loss": 2.8025, "step": 24840 }, { "epoch": 1.9620540561884265, "grad_norm": 0.07807002911507947, "learning_rate": 1.0894974915834355e-06, "loss": 2.5862, "step": 24845 }, { "epoch": 1.9624489151250715, "grad_norm": 0.08582534164024529, "learning_rate": 1.0668790034650267e-06, "loss": 2.91, "step": 24850 }, { "epoch": 1.9628437740617164, "grad_norm": 0.08608977340404482, "learning_rate": 1.0444975191467587e-06, "loss": 2.8229, "step": 24855 }, { "epoch": 1.9632386329983613, "grad_norm": 0.08319412426046184, "learning_rate": 1.0223530492603495e-06, "loss": 2.9028, "step": 24860 }, { "epoch": 1.9636334919350062, "grad_norm": 0.08338277329567184, "learning_rate": 1.000445604324829e-06, "loss": 2.5681, "step": 24865 }, { "epoch": 1.9640283508716512, "grad_norm": 0.08503823982476681, "learning_rate": 9.78775194746706e-07, "loss": 2.71, "step": 24870 }, { "epoch": 1.964423209808296, "grad_norm": 0.07241549101552638, "learning_rate": 9.573418308198578e-07, "loss": 2.6777, "step": 24875 }, { "epoch": 1.964818068744941, "grad_norm": 0.07280646587845142, "learning_rate": 9.361455227256399e-07, "loss": 2.7988, "step": 24880 }, { "epoch": 1.9652129276815857, "grad_norm": 0.07310716587503663, "learning_rate": 9.151862805327204e-07, "loss": 2.7609, "step": 24885 }, { "epoch": 1.9656077866182307, "grad_norm": 0.0835899441396717, "learning_rate": 8.94464114197191e-07, "loss": 2.8525, "step": 24890 }, { "epoch": 1.9660026455548754, "grad_norm": 0.0873811219537528, "learning_rate": 8.739790335625109e-07, "loss": 2.5992, "step": 24895 }, { "epoch": 1.9663975044915203, "grad_norm": 0.07635472236171058, "learning_rate": 8.537310483595628e-07, "loss": 2.5741, "step": 24900 }, { "epoch": 1.9667923634281652, "grad_norm": 0.07174601775977947, "learning_rate": 8.337201682064866e-07, "loss": 2.7259, "step": 24905 }, { "epoch": 1.9671872223648101, "grad_norm": 0.09756673097301995, "learning_rate": 8.13946402608956e-07, "loss": 2.7964, "step": 24910 }, { "epoch": 1.967582081301455, "grad_norm": 0.08590387442158547, "learning_rate": 7.944097609599021e-07, "loss": 2.7385, "step": 24915 }, { "epoch": 1.9679769402381, "grad_norm": 0.07917716846720398, "learning_rate": 7.751102525396791e-07, "loss": 2.7572, "step": 24920 }, { "epoch": 1.968371799174745, "grad_norm": 0.07103853273183494, "learning_rate": 7.560478865158426e-07, "loss": 2.9733, "step": 24925 }, { "epoch": 1.9687666581113898, "grad_norm": 0.07409288884899382, "learning_rate": 7.372226719435383e-07, "loss": 2.6989, "step": 24930 }, { "epoch": 1.9691615170480345, "grad_norm": 0.09357878278261646, "learning_rate": 7.186346177651127e-07, "loss": 2.6758, "step": 24935 }, { "epoch": 1.9695563759846795, "grad_norm": 0.07375351882860702, "learning_rate": 7.002837328102807e-07, "loss": 2.7916, "step": 24940 }, { "epoch": 1.9699512349213244, "grad_norm": 0.07992538832770203, "learning_rate": 6.821700257960694e-07, "loss": 2.6161, "step": 24945 }, { "epoch": 1.970346093857969, "grad_norm": 0.10240202697315577, "learning_rate": 6.642935053269294e-07, "loss": 2.7734, "step": 24950 }, { "epoch": 1.970740952794614, "grad_norm": 0.07189323889590259, "learning_rate": 6.46654179894568e-07, "loss": 2.7252, "step": 24955 }, { "epoch": 1.971135811731259, "grad_norm": 0.07614527513905872, "learning_rate": 6.292520578780048e-07, "loss": 2.609, "step": 24960 }, { "epoch": 1.9715306706679039, "grad_norm": 0.08619856425360374, "learning_rate": 6.120871475436273e-07, "loss": 2.7245, "step": 24965 }, { "epoch": 1.9719255296045488, "grad_norm": 0.07739861405924951, "learning_rate": 5.951594570451358e-07, "loss": 2.7971, "step": 24970 }, { "epoch": 1.9723203885411937, "grad_norm": 0.07063211903164894, "learning_rate": 5.784689944235977e-07, "loss": 2.6702, "step": 24975 }, { "epoch": 1.9727152474778387, "grad_norm": 0.07397402327677972, "learning_rate": 5.620157676072824e-07, "loss": 2.6392, "step": 24980 }, { "epoch": 1.9731101064144836, "grad_norm": 0.08193245966708008, "learning_rate": 5.457997844118268e-07, "loss": 2.7895, "step": 24985 }, { "epoch": 1.9735049653511283, "grad_norm": 0.08249083719973176, "learning_rate": 5.298210525401248e-07, "loss": 2.816, "step": 24990 }, { "epoch": 1.9738998242877732, "grad_norm": 0.08719521073835113, "learning_rate": 5.140795795824938e-07, "loss": 2.8367, "step": 24995 }, { "epoch": 1.974294683224418, "grad_norm": 0.08025100437317612, "learning_rate": 4.985753730164521e-07, "loss": 2.6574, "step": 25000 }, { "epoch": 1.9746895421610628, "grad_norm": 0.07647947625527814, "learning_rate": 4.833084402067756e-07, "loss": 2.7066, "step": 25005 }, { "epoch": 1.9750844010977078, "grad_norm": 0.09964906175255048, "learning_rate": 4.6827878840560724e-07, "loss": 2.8189, "step": 25010 }, { "epoch": 1.9754792600343527, "grad_norm": 0.07694107935139992, "learning_rate": 4.5348642475234733e-07, "loss": 2.7919, "step": 25015 }, { "epoch": 1.9758741189709976, "grad_norm": 0.07817519872095169, "learning_rate": 4.3893135627365297e-07, "loss": 2.6535, "step": 25020 }, { "epoch": 1.9762689779076426, "grad_norm": 0.07204459617082937, "learning_rate": 4.2461358988354905e-07, "loss": 2.7175, "step": 25025 }, { "epoch": 1.9766638368442875, "grad_norm": 0.08593316667924294, "learning_rate": 4.1053313238326175e-07, "loss": 2.7913, "step": 25030 }, { "epoch": 1.9770586957809324, "grad_norm": 0.0750604072035662, "learning_rate": 3.966899904613297e-07, "loss": 2.6646, "step": 25035 }, { "epoch": 1.9774535547175771, "grad_norm": 0.09066234298517911, "learning_rate": 3.830841706934374e-07, "loss": 2.6937, "step": 25040 }, { "epoch": 1.977848413654222, "grad_norm": 0.08286938419085992, "learning_rate": 3.697156795427481e-07, "loss": 2.8218, "step": 25045 }, { "epoch": 1.978243272590867, "grad_norm": 0.0796665392587086, "learning_rate": 3.5658452335951554e-07, "loss": 2.8269, "step": 25050 }, { "epoch": 1.9786381315275117, "grad_norm": 0.12773645252719834, "learning_rate": 3.4369070838130565e-07, "loss": 2.5879, "step": 25055 }, { "epoch": 1.9790329904641566, "grad_norm": 0.1458989601417717, "learning_rate": 3.3103424073305243e-07, "loss": 2.8938, "step": 25060 }, { "epoch": 1.9794278494008015, "grad_norm": 0.07373596627161572, "learning_rate": 3.1861512642672454e-07, "loss": 2.96, "step": 25065 }, { "epoch": 1.9798227083374464, "grad_norm": 0.10308627346522653, "learning_rate": 3.0643337136176954e-07, "loss": 2.6331, "step": 25070 }, { "epoch": 1.9802175672740914, "grad_norm": 0.07055421677474061, "learning_rate": 2.9448898132466984e-07, "loss": 2.8093, "step": 25075 }, { "epoch": 1.9806124262107363, "grad_norm": 0.09217242154979717, "learning_rate": 2.8278196198933127e-07, "loss": 2.532, "step": 25080 }, { "epoch": 1.9810072851473812, "grad_norm": 0.07290893272023248, "learning_rate": 2.713123189168609e-07, "loss": 2.8117, "step": 25085 }, { "epoch": 1.981402144084026, "grad_norm": 0.07452943984699842, "learning_rate": 2.6008005755551177e-07, "loss": 2.8046, "step": 25090 }, { "epoch": 1.9817970030206709, "grad_norm": 0.0819802853641276, "learning_rate": 2.4908518324090465e-07, "loss": 2.7468, "step": 25095 }, { "epoch": 1.9821918619573158, "grad_norm": 0.0782213972461553, "learning_rate": 2.3832770119580627e-07, "loss": 2.9544, "step": 25100 }, { "epoch": 1.9825867208939605, "grad_norm": 0.0747614943262162, "learning_rate": 2.2780761653024006e-07, "loss": 2.731, "step": 25105 }, { "epoch": 1.9829815798306054, "grad_norm": 0.08477610677459325, "learning_rate": 2.1752493424148646e-07, "loss": 2.861, "step": 25110 }, { "epoch": 1.9833764387672503, "grad_norm": 0.08730112495574302, "learning_rate": 2.0747965921408264e-07, "loss": 2.7811, "step": 25115 }, { "epoch": 1.9837712977038953, "grad_norm": 0.08088241860598569, "learning_rate": 1.9767179621965613e-07, "loss": 2.793, "step": 25120 }, { "epoch": 1.9841661566405402, "grad_norm": 0.07199427945799326, "learning_rate": 1.881013499172024e-07, "loss": 2.6555, "step": 25125 }, { "epoch": 1.9845610155771851, "grad_norm": 0.07082776527624986, "learning_rate": 1.7876832485291817e-07, "loss": 2.7965, "step": 25130 }, { "epoch": 1.98495587451383, "grad_norm": 0.08879225300155273, "learning_rate": 1.696727254600905e-07, "loss": 2.6082, "step": 25135 }, { "epoch": 1.985350733450475, "grad_norm": 0.0701651712428147, "learning_rate": 1.6081455605937434e-07, "loss": 2.6361, "step": 25140 }, { "epoch": 1.9857455923871197, "grad_norm": 0.08182266457986256, "learning_rate": 1.52193820858626e-07, "loss": 2.8048, "step": 25145 }, { "epoch": 1.9861404513237646, "grad_norm": 0.08091078660334133, "learning_rate": 1.4381052395284754e-07, "loss": 2.6403, "step": 25150 }, { "epoch": 1.9865353102604093, "grad_norm": 0.10335327112447167, "learning_rate": 1.3566466932424247e-07, "loss": 2.9398, "step": 25155 }, { "epoch": 1.9869301691970542, "grad_norm": 0.10126373667159651, "learning_rate": 1.277562608423266e-07, "loss": 2.7965, "step": 25160 }, { "epoch": 1.9873250281336992, "grad_norm": 0.07440660407080041, "learning_rate": 1.2008530226376158e-07, "loss": 3.0345, "step": 25165 }, { "epoch": 1.987719887070344, "grad_norm": 0.08749406555041453, "learning_rate": 1.1265179723235486e-07, "loss": 2.6118, "step": 25170 }, { "epoch": 1.988114746006989, "grad_norm": 0.0738082276037783, "learning_rate": 1.054557492792263e-07, "loss": 2.6591, "step": 25175 }, { "epoch": 1.988509604943634, "grad_norm": 0.07729526845801082, "learning_rate": 9.849716182264156e-08, "loss": 2.7592, "step": 25180 }, { "epoch": 1.9889044638802789, "grad_norm": 0.07607773630850263, "learning_rate": 9.177603816806767e-08, "loss": 2.6978, "step": 25185 }, { "epoch": 1.9892993228169238, "grad_norm": 0.07581911647385317, "learning_rate": 8.529238150817298e-08, "loss": 2.74, "step": 25190 }, { "epoch": 1.9896941817535685, "grad_norm": 0.0733345793688011, "learning_rate": 7.904619492282717e-08, "loss": 2.6454, "step": 25195 }, { "epoch": 1.9900890406902134, "grad_norm": 0.0730774881403392, "learning_rate": 7.303748137915678e-08, "loss": 2.5652, "step": 25200 }, { "epoch": 1.9904838996268583, "grad_norm": 0.07926201460598244, "learning_rate": 6.726624373137868e-08, "loss": 2.657, "step": 25205 }, { "epoch": 1.990878758563503, "grad_norm": 0.07606165915464592, "learning_rate": 6.17324847209666e-08, "loss": 2.8496, "step": 25210 }, { "epoch": 1.991273617500148, "grad_norm": 0.08107618728460587, "learning_rate": 5.643620697659557e-08, "loss": 2.8115, "step": 25215 }, { "epoch": 1.991668476436793, "grad_norm": 0.07297956453566638, "learning_rate": 5.137741301403098e-08, "loss": 2.6268, "step": 25220 }, { "epoch": 1.9920633353734378, "grad_norm": 0.07338698565365954, "learning_rate": 4.6556105236406076e-08, "loss": 2.6013, "step": 25225 }, { "epoch": 1.9924581943100828, "grad_norm": 0.07642608342586356, "learning_rate": 4.197228593388891e-08, "loss": 2.738, "step": 25230 }, { "epoch": 1.9928530532467277, "grad_norm": 0.07176558106041922, "learning_rate": 3.762595728384888e-08, "loss": 2.6643, "step": 25235 }, { "epoch": 1.9932479121833726, "grad_norm": 0.07196839570791697, "learning_rate": 3.351712135102325e-08, "loss": 2.6532, "step": 25240 }, { "epoch": 1.9936427711200175, "grad_norm": 0.07135720228264007, "learning_rate": 2.9645780087073083e-08, "loss": 2.6611, "step": 25245 }, { "epoch": 1.9940376300566622, "grad_norm": 0.07858686022076009, "learning_rate": 2.6011935330971792e-08, "loss": 2.6308, "step": 25250 }, { "epoch": 1.9944324889933072, "grad_norm": 0.07555479497652494, "learning_rate": 2.261558880894965e-08, "loss": 2.6823, "step": 25255 }, { "epoch": 1.9948273479299519, "grad_norm": 0.08461653771876904, "learning_rate": 1.9456742134271733e-08, "loss": 2.6261, "step": 25260 }, { "epoch": 1.9952222068665968, "grad_norm": 0.12155911073774207, "learning_rate": 1.6535396807515478e-08, "loss": 2.5915, "step": 25265 }, { "epoch": 1.9956170658032417, "grad_norm": 0.0896137921918128, "learning_rate": 1.3851554216293138e-08, "loss": 3.0912, "step": 25270 }, { "epoch": 1.9960119247398866, "grad_norm": 0.07723114149468442, "learning_rate": 1.1405215635584831e-08, "loss": 2.6806, "step": 25275 }, { "epoch": 1.9964067836765316, "grad_norm": 0.07641996667056386, "learning_rate": 9.196382227405486e-09, "loss": 2.7386, "step": 25280 }, { "epoch": 1.9968016426131765, "grad_norm": 0.07206534450173274, "learning_rate": 7.225055041026885e-09, "loss": 3.0121, "step": 25285 }, { "epoch": 1.9971965015498214, "grad_norm": 0.07497964948277552, "learning_rate": 5.491235012811124e-09, "loss": 2.6048, "step": 25290 }, { "epoch": 1.9975913604864663, "grad_norm": 0.07247433468722958, "learning_rate": 3.994922966432668e-09, "loss": 2.7723, "step": 25295 }, { "epoch": 1.997986219423111, "grad_norm": 0.08143713248397856, "learning_rate": 2.736119612600785e-09, "loss": 2.6834, "step": 25300 }, { "epoch": 1.998381078359756, "grad_norm": 0.09589176193654186, "learning_rate": 1.7148255492815957e-09, "loss": 2.9012, "step": 25305 }, { "epoch": 1.998775937296401, "grad_norm": 0.09232012960288359, "learning_rate": 9.310412616980734e-10, "loss": 2.6357, "step": 25310 }, { "epoch": 1.9991707962330456, "grad_norm": 0.07205025703143203, "learning_rate": 3.8476712210799844e-10, "loss": 2.801, "step": 25315 }, { "epoch": 1.9995656551696905, "grad_norm": 0.07006547845501511, "learning_rate": 7.60033899704915e-11, "loss": 2.8761, "step": 25320 }, { "epoch": 1.9998815423190064, "eval_loss": 2.735769510269165, "eval_runtime": 128.2666, "eval_samples_per_second": 20.652, "eval_steps_per_second": 20.652, "step": 25324 }, { "epoch": 1.9998815423190064, "step": 25324, "total_flos": 1.83007472173056e+16, "train_loss": 3.3040246668631688, "train_runtime": 19562.1005, "train_samples_per_second": 5.178, "train_steps_per_second": 1.295 } ], "logging_steps": 5, "max_steps": 25324, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.83007472173056e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }