diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,228438 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 32628, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 3.0648522741203875e-05, + "grad_norm": 89.08560970136007, + "learning_rate": 1.0214504596527069e-08, + "loss": 2.0546, + "step": 1 + }, + { + "epoch": 6.129704548240775e-05, + "grad_norm": 63.6346443415281, + "learning_rate": 2.0429009193054138e-08, + "loss": 1.9353, + "step": 2 + }, + { + "epoch": 9.194556822361162e-05, + "grad_norm": 7.008418468016836, + "learning_rate": 3.0643513789581204e-08, + "loss": 0.7687, + "step": 3 + }, + { + "epoch": 0.0001225940909648155, + "grad_norm": 78.63474792396211, + "learning_rate": 4.0858018386108276e-08, + "loss": 2.0782, + "step": 4 + }, + { + "epoch": 0.00015324261370601937, + "grad_norm": 77.75173234271104, + "learning_rate": 5.107252298263535e-08, + "loss": 2.0403, + "step": 5 + }, + { + "epoch": 0.00018389113644722325, + "grad_norm": 80.9344025842625, + "learning_rate": 6.128702757916241e-08, + "loss": 2.0655, + "step": 6 + }, + { + "epoch": 0.00021453965918842712, + "grad_norm": 77.39658974042007, + "learning_rate": 7.150153217568949e-08, + "loss": 1.9496, + "step": 7 + }, + { + "epoch": 0.000245188181929631, + "grad_norm": 68.55102766587733, + "learning_rate": 8.171603677221655e-08, + "loss": 1.9041, + "step": 8 + }, + { + "epoch": 0.0002758367046708349, + "grad_norm": 76.97069918578106, + "learning_rate": 9.193054136874362e-08, + "loss": 2.0512, + "step": 9 + }, + { + "epoch": 0.00030648522741203875, + "grad_norm": 77.26026967769032, + "learning_rate": 1.021450459652707e-07, + "loss": 2.1065, + "step": 10 + }, + { + "epoch": 0.0003371337501532426, + "grad_norm": 71.98054745241204, + "learning_rate": 1.1235955056179776e-07, + "loss": 1.963, + "step": 11 + }, + { + "epoch": 0.0003677822728944465, + "grad_norm": 59.54901532655414, + "learning_rate": 1.2257405515832481e-07, + "loss": 1.8054, + "step": 12 + }, + { + "epoch": 0.0003984307956356504, + "grad_norm": 92.31521671616618, + "learning_rate": 1.327885597548519e-07, + "loss": 2.5807, + "step": 13 + }, + { + "epoch": 0.00042907931837685425, + "grad_norm": 6.900672339908472, + "learning_rate": 1.4300306435137899e-07, + "loss": 0.7186, + "step": 14 + }, + { + "epoch": 0.0004597278411180581, + "grad_norm": 70.58978240813653, + "learning_rate": 1.5321756894790606e-07, + "loss": 2.0012, + "step": 15 + }, + { + "epoch": 0.000490376363859262, + "grad_norm": 7.124069940215887, + "learning_rate": 1.634320735444331e-07, + "loss": 0.7339, + "step": 16 + }, + { + "epoch": 0.0005210248866004658, + "grad_norm": 7.244685119281876, + "learning_rate": 1.7364657814096015e-07, + "loss": 0.7621, + "step": 17 + }, + { + "epoch": 0.0005516734093416697, + "grad_norm": 7.005677202133421, + "learning_rate": 1.8386108273748725e-07, + "loss": 0.7716, + "step": 18 + }, + { + "epoch": 0.0005823219320828736, + "grad_norm": 63.72202525143337, + "learning_rate": 1.9407558733401432e-07, + "loss": 1.9179, + "step": 19 + }, + { + "epoch": 0.0006129704548240775, + "grad_norm": 59.88329132065079, + "learning_rate": 2.042900919305414e-07, + "loss": 1.7654, + "step": 20 + }, + { + "epoch": 0.0006436189775652813, + "grad_norm": 58.72977527997002, + "learning_rate": 2.1450459652706847e-07, + "loss": 1.8379, + "step": 21 + }, + { + "epoch": 0.0006742675003064852, + "grad_norm": 60.31981818331502, + "learning_rate": 2.247191011235955e-07, + "loss": 1.9848, + "step": 22 + }, + { + "epoch": 0.0007049160230476891, + "grad_norm": 51.278640317574606, + "learning_rate": 2.349336057201226e-07, + "loss": 1.6186, + "step": 23 + }, + { + "epoch": 0.000735564545788893, + "grad_norm": 46.151723122145654, + "learning_rate": 2.4514811031664963e-07, + "loss": 1.7823, + "step": 24 + }, + { + "epoch": 0.0007662130685300968, + "grad_norm": 41.04689363866026, + "learning_rate": 2.5536261491317673e-07, + "loss": 1.6774, + "step": 25 + }, + { + "epoch": 0.0007968615912713007, + "grad_norm": 6.550244340599658, + "learning_rate": 2.655771195097038e-07, + "loss": 0.7199, + "step": 26 + }, + { + "epoch": 0.0008275101140125046, + "grad_norm": 40.55751608188138, + "learning_rate": 2.7579162410623087e-07, + "loss": 1.6197, + "step": 27 + }, + { + "epoch": 0.0008581586367537085, + "grad_norm": 41.62926943478123, + "learning_rate": 2.8600612870275797e-07, + "loss": 1.7437, + "step": 28 + }, + { + "epoch": 0.0008888071594949123, + "grad_norm": 46.30795670862777, + "learning_rate": 2.96220633299285e-07, + "loss": 1.6424, + "step": 29 + }, + { + "epoch": 0.0009194556822361162, + "grad_norm": 45.21336117318005, + "learning_rate": 3.064351378958121e-07, + "loss": 1.6123, + "step": 30 + }, + { + "epoch": 0.0009501042049773201, + "grad_norm": 38.24930164273746, + "learning_rate": 3.1664964249233916e-07, + "loss": 1.6661, + "step": 31 + }, + { + "epoch": 0.000980752727718524, + "grad_norm": 25.653989230191055, + "learning_rate": 3.268641470888662e-07, + "loss": 1.4691, + "step": 32 + }, + { + "epoch": 0.001011401250459728, + "grad_norm": 6.648868830006903, + "learning_rate": 3.3707865168539325e-07, + "loss": 0.7461, + "step": 33 + }, + { + "epoch": 0.0010420497732009316, + "grad_norm": 6.425606260315896, + "learning_rate": 3.472931562819203e-07, + "loss": 0.745, + "step": 34 + }, + { + "epoch": 0.0010726982959421356, + "grad_norm": 44.71723681485436, + "learning_rate": 3.5750766087844745e-07, + "loss": 1.3927, + "step": 35 + }, + { + "epoch": 0.0011033468186833395, + "grad_norm": 22.519576527379883, + "learning_rate": 3.677221654749745e-07, + "loss": 1.3519, + "step": 36 + }, + { + "epoch": 0.0011339953414245434, + "grad_norm": 25.31883674917661, + "learning_rate": 3.7793667007150154e-07, + "loss": 1.4207, + "step": 37 + }, + { + "epoch": 0.0011646438641657471, + "grad_norm": 6.611587362543892, + "learning_rate": 3.8815117466802864e-07, + "loss": 0.7743, + "step": 38 + }, + { + "epoch": 0.001195292386906951, + "grad_norm": 20.153455522684244, + "learning_rate": 3.9836567926455574e-07, + "loss": 1.2387, + "step": 39 + }, + { + "epoch": 0.001225940909648155, + "grad_norm": 16.787039031749995, + "learning_rate": 4.085801838610828e-07, + "loss": 1.3068, + "step": 40 + }, + { + "epoch": 0.001256589432389359, + "grad_norm": 14.418101616000696, + "learning_rate": 4.1879468845760983e-07, + "loss": 1.3356, + "step": 41 + }, + { + "epoch": 0.0012872379551305626, + "grad_norm": 12.486959274707939, + "learning_rate": 4.2900919305413693e-07, + "loss": 1.3184, + "step": 42 + }, + { + "epoch": 0.0013178864778717666, + "grad_norm": 11.031494308816299, + "learning_rate": 4.39223697650664e-07, + "loss": 1.2462, + "step": 43 + }, + { + "epoch": 0.0013485350006129705, + "grad_norm": 9.251975380193947, + "learning_rate": 4.49438202247191e-07, + "loss": 1.3023, + "step": 44 + }, + { + "epoch": 0.0013791835233541744, + "grad_norm": 9.299659249810281, + "learning_rate": 4.5965270684371807e-07, + "loss": 1.1836, + "step": 45 + }, + { + "epoch": 0.0014098320460953781, + "grad_norm": 7.791049211266313, + "learning_rate": 4.698672114402452e-07, + "loss": 1.2752, + "step": 46 + }, + { + "epoch": 0.001440480568836582, + "grad_norm": 6.724387816941555, + "learning_rate": 4.800817160367723e-07, + "loss": 1.2019, + "step": 47 + }, + { + "epoch": 0.001471129091577786, + "grad_norm": 7.926236892978478, + "learning_rate": 4.902962206332993e-07, + "loss": 1.1857, + "step": 48 + }, + { + "epoch": 0.00150177761431899, + "grad_norm": 5.4109546665507064, + "learning_rate": 5.005107252298265e-07, + "loss": 0.7119, + "step": 49 + }, + { + "epoch": 0.0015324261370601936, + "grad_norm": 8.049658477838308, + "learning_rate": 5.107252298263535e-07, + "loss": 1.1436, + "step": 50 + }, + { + "epoch": 0.0015630746598013976, + "grad_norm": 6.1160754568602815, + "learning_rate": 5.209397344228806e-07, + "loss": 1.1807, + "step": 51 + }, + { + "epoch": 0.0015937231825426015, + "grad_norm": 5.7258944741099, + "learning_rate": 5.311542390194075e-07, + "loss": 1.2282, + "step": 52 + }, + { + "epoch": 0.0016243717052838054, + "grad_norm": 5.203516672024508, + "learning_rate": 5.413687436159346e-07, + "loss": 1.1893, + "step": 53 + }, + { + "epoch": 0.0016550202280250091, + "grad_norm": 5.98723842711262, + "learning_rate": 5.515832482124617e-07, + "loss": 1.2128, + "step": 54 + }, + { + "epoch": 0.001685668750766213, + "grad_norm": 5.253032263527259, + "learning_rate": 5.617977528089888e-07, + "loss": 1.1633, + "step": 55 + }, + { + "epoch": 0.001716317273507417, + "grad_norm": 5.6500920181636864, + "learning_rate": 5.720122574055159e-07, + "loss": 1.105, + "step": 56 + }, + { + "epoch": 0.001746965796248621, + "grad_norm": 5.151414897007073, + "learning_rate": 5.822267620020429e-07, + "loss": 0.7191, + "step": 57 + }, + { + "epoch": 0.0017776143189898246, + "grad_norm": 4.943378448535567, + "learning_rate": 5.9244126659857e-07, + "loss": 1.1309, + "step": 58 + }, + { + "epoch": 0.0018082628417310286, + "grad_norm": 4.456925974953797, + "learning_rate": 6.02655771195097e-07, + "loss": 1.0566, + "step": 59 + }, + { + "epoch": 0.0018389113644722325, + "grad_norm": 4.516446150963775, + "learning_rate": 6.128702757916242e-07, + "loss": 1.2222, + "step": 60 + }, + { + "epoch": 0.0018695598872134364, + "grad_norm": 5.198275128022614, + "learning_rate": 6.230847803881512e-07, + "loss": 0.727, + "step": 61 + }, + { + "epoch": 0.0019002084099546401, + "grad_norm": 3.990987502049036, + "learning_rate": 6.332992849846783e-07, + "loss": 1.1488, + "step": 62 + }, + { + "epoch": 0.001930856932695844, + "grad_norm": 5.313102010861105, + "learning_rate": 6.435137895812053e-07, + "loss": 1.1725, + "step": 63 + }, + { + "epoch": 0.001961505455437048, + "grad_norm": 4.367896524099123, + "learning_rate": 6.537282941777324e-07, + "loss": 1.2599, + "step": 64 + }, + { + "epoch": 0.001992153978178252, + "grad_norm": 3.807555030072833, + "learning_rate": 6.639427987742594e-07, + "loss": 1.0266, + "step": 65 + }, + { + "epoch": 0.002022802500919456, + "grad_norm": 3.4054192704996717, + "learning_rate": 6.741573033707865e-07, + "loss": 1.0558, + "step": 66 + }, + { + "epoch": 0.0020534510236606593, + "grad_norm": 4.974995302858568, + "learning_rate": 6.843718079673137e-07, + "loss": 0.7192, + "step": 67 + }, + { + "epoch": 0.0020840995464018633, + "grad_norm": 4.254291294935033, + "learning_rate": 6.945863125638406e-07, + "loss": 1.0923, + "step": 68 + }, + { + "epoch": 0.002114748069143067, + "grad_norm": 4.467974972326297, + "learning_rate": 7.048008171603678e-07, + "loss": 1.088, + "step": 69 + }, + { + "epoch": 0.002145396591884271, + "grad_norm": 5.416955287481584, + "learning_rate": 7.150153217568949e-07, + "loss": 1.1006, + "step": 70 + }, + { + "epoch": 0.002176045114625475, + "grad_norm": 3.786110278434636, + "learning_rate": 7.252298263534219e-07, + "loss": 1.0699, + "step": 71 + }, + { + "epoch": 0.002206693637366679, + "grad_norm": 3.781489762655271, + "learning_rate": 7.35444330949949e-07, + "loss": 1.01, + "step": 72 + }, + { + "epoch": 0.002237342160107883, + "grad_norm": 3.996822165102075, + "learning_rate": 7.456588355464761e-07, + "loss": 1.1456, + "step": 73 + }, + { + "epoch": 0.002267990682849087, + "grad_norm": 3.834302254983052, + "learning_rate": 7.558733401430031e-07, + "loss": 0.9744, + "step": 74 + }, + { + "epoch": 0.0022986392055902903, + "grad_norm": 4.10052613048204, + "learning_rate": 7.660878447395302e-07, + "loss": 0.9851, + "step": 75 + }, + { + "epoch": 0.0023292877283314943, + "grad_norm": 3.6942561266403184, + "learning_rate": 7.763023493360573e-07, + "loss": 1.035, + "step": 76 + }, + { + "epoch": 0.002359936251072698, + "grad_norm": 3.4161485471016335, + "learning_rate": 7.865168539325843e-07, + "loss": 1.0135, + "step": 77 + }, + { + "epoch": 0.002390584773813902, + "grad_norm": 4.356088598469908, + "learning_rate": 7.967313585291115e-07, + "loss": 1.1297, + "step": 78 + }, + { + "epoch": 0.002421233296555106, + "grad_norm": 3.4434990810076056, + "learning_rate": 8.069458631256384e-07, + "loss": 1.0819, + "step": 79 + }, + { + "epoch": 0.00245188181929631, + "grad_norm": 3.880491700089945, + "learning_rate": 8.171603677221656e-07, + "loss": 1.1027, + "step": 80 + }, + { + "epoch": 0.002482530342037514, + "grad_norm": 3.9487785004894023, + "learning_rate": 8.273748723186927e-07, + "loss": 0.6709, + "step": 81 + }, + { + "epoch": 0.002513178864778718, + "grad_norm": 3.9416434036269496, + "learning_rate": 8.375893769152197e-07, + "loss": 0.6872, + "step": 82 + }, + { + "epoch": 0.0025438273875199213, + "grad_norm": 3.6516709397826217, + "learning_rate": 8.478038815117468e-07, + "loss": 1.0285, + "step": 83 + }, + { + "epoch": 0.0025744759102611253, + "grad_norm": 3.4981395610762496, + "learning_rate": 8.580183861082739e-07, + "loss": 1.0998, + "step": 84 + }, + { + "epoch": 0.002605124433002329, + "grad_norm": 4.095477268478066, + "learning_rate": 8.682328907048009e-07, + "loss": 1.1315, + "step": 85 + }, + { + "epoch": 0.002635772955743533, + "grad_norm": 3.4390679199229917, + "learning_rate": 8.78447395301328e-07, + "loss": 0.9974, + "step": 86 + }, + { + "epoch": 0.002666421478484737, + "grad_norm": 3.9140690373969047, + "learning_rate": 8.886618998978551e-07, + "loss": 1.0622, + "step": 87 + }, + { + "epoch": 0.002697070001225941, + "grad_norm": 3.2268371006486403, + "learning_rate": 8.98876404494382e-07, + "loss": 0.6661, + "step": 88 + }, + { + "epoch": 0.002727718523967145, + "grad_norm": 3.6742147642940775, + "learning_rate": 9.090909090909091e-07, + "loss": 1.0184, + "step": 89 + }, + { + "epoch": 0.002758367046708349, + "grad_norm": 3.2300214225135604, + "learning_rate": 9.193054136874361e-07, + "loss": 0.9918, + "step": 90 + }, + { + "epoch": 0.0027890155694495523, + "grad_norm": 3.744473705561353, + "learning_rate": 9.295199182839632e-07, + "loss": 1.1148, + "step": 91 + }, + { + "epoch": 0.0028196640921907563, + "grad_norm": 3.6200197727657066, + "learning_rate": 9.397344228804904e-07, + "loss": 1.0265, + "step": 92 + }, + { + "epoch": 0.00285031261493196, + "grad_norm": 3.192295139375867, + "learning_rate": 9.499489274770174e-07, + "loss": 0.6898, + "step": 93 + }, + { + "epoch": 0.002880961137673164, + "grad_norm": 3.477295683764482, + "learning_rate": 9.601634320735445e-07, + "loss": 1.0053, + "step": 94 + }, + { + "epoch": 0.002911609660414368, + "grad_norm": 3.4875838217496598, + "learning_rate": 9.703779366700715e-07, + "loss": 1.065, + "step": 95 + }, + { + "epoch": 0.002942258183155572, + "grad_norm": 4.005275121542596, + "learning_rate": 9.805924412665985e-07, + "loss": 1.0819, + "step": 96 + }, + { + "epoch": 0.002972906705896776, + "grad_norm": 3.5313105915912755, + "learning_rate": 9.908069458631257e-07, + "loss": 1.0474, + "step": 97 + }, + { + "epoch": 0.00300355522863798, + "grad_norm": 4.459997023500788, + "learning_rate": 1.001021450459653e-06, + "loss": 1.0753, + "step": 98 + }, + { + "epoch": 0.0030342037513791833, + "grad_norm": 3.693073495477769, + "learning_rate": 1.01123595505618e-06, + "loss": 1.0984, + "step": 99 + }, + { + "epoch": 0.0030648522741203873, + "grad_norm": 3.6723368677483874, + "learning_rate": 1.021450459652707e-06, + "loss": 1.0721, + "step": 100 + }, + { + "epoch": 0.003095500796861591, + "grad_norm": 3.6373269065618143, + "learning_rate": 1.031664964249234e-06, + "loss": 1.1199, + "step": 101 + }, + { + "epoch": 0.003126149319602795, + "grad_norm": 3.489422135604586, + "learning_rate": 1.0418794688457611e-06, + "loss": 1.0281, + "step": 102 + }, + { + "epoch": 0.003156797842343999, + "grad_norm": 3.9190214097913025, + "learning_rate": 1.052093973442288e-06, + "loss": 1.1071, + "step": 103 + }, + { + "epoch": 0.003187446365085203, + "grad_norm": 3.1445518105414694, + "learning_rate": 1.062308478038815e-06, + "loss": 0.9801, + "step": 104 + }, + { + "epoch": 0.003218094887826407, + "grad_norm": 2.820514905590706, + "learning_rate": 1.0725229826353423e-06, + "loss": 0.6845, + "step": 105 + }, + { + "epoch": 0.003248743410567611, + "grad_norm": 3.1696283254436404, + "learning_rate": 1.0827374872318693e-06, + "loss": 1.0023, + "step": 106 + }, + { + "epoch": 0.0032793919333088143, + "grad_norm": 4.108702066866853, + "learning_rate": 1.0929519918283963e-06, + "loss": 1.047, + "step": 107 + }, + { + "epoch": 0.0033100404560500183, + "grad_norm": 3.4279209112489344, + "learning_rate": 1.1031664964249235e-06, + "loss": 1.0335, + "step": 108 + }, + { + "epoch": 0.003340688978791222, + "grad_norm": 3.428199449631726, + "learning_rate": 1.1133810010214507e-06, + "loss": 1.0308, + "step": 109 + }, + { + "epoch": 0.003371337501532426, + "grad_norm": 3.388233010784284, + "learning_rate": 1.1235955056179777e-06, + "loss": 1.0805, + "step": 110 + }, + { + "epoch": 0.00340198602427363, + "grad_norm": 3.9520612051111272, + "learning_rate": 1.1338100102145047e-06, + "loss": 1.0727, + "step": 111 + }, + { + "epoch": 0.003432634547014834, + "grad_norm": 3.4480720647634446, + "learning_rate": 1.1440245148110319e-06, + "loss": 0.9411, + "step": 112 + }, + { + "epoch": 0.003463283069756038, + "grad_norm": 3.3047390323141888, + "learning_rate": 1.1542390194075589e-06, + "loss": 1.1894, + "step": 113 + }, + { + "epoch": 0.003493931592497242, + "grad_norm": 3.0477168052165986, + "learning_rate": 1.1644535240040859e-06, + "loss": 1.1532, + "step": 114 + }, + { + "epoch": 0.0035245801152384453, + "grad_norm": 3.2078393845179685, + "learning_rate": 1.1746680286006129e-06, + "loss": 1.0153, + "step": 115 + }, + { + "epoch": 0.0035552286379796493, + "grad_norm": 3.5674454274202096, + "learning_rate": 1.18488253319714e-06, + "loss": 1.051, + "step": 116 + }, + { + "epoch": 0.003585877160720853, + "grad_norm": 3.0624871255854025, + "learning_rate": 1.195097037793667e-06, + "loss": 1.0116, + "step": 117 + }, + { + "epoch": 0.003616525683462057, + "grad_norm": 3.439434271516326, + "learning_rate": 1.205311542390194e-06, + "loss": 0.9855, + "step": 118 + }, + { + "epoch": 0.003647174206203261, + "grad_norm": 3.3519180895008263, + "learning_rate": 1.2155260469867213e-06, + "loss": 1.1147, + "step": 119 + }, + { + "epoch": 0.003677822728944465, + "grad_norm": 2.583955506410239, + "learning_rate": 1.2257405515832485e-06, + "loss": 0.677, + "step": 120 + }, + { + "epoch": 0.003708471251685669, + "grad_norm": 2.3050586894934626, + "learning_rate": 1.2359550561797752e-06, + "loss": 0.6313, + "step": 121 + }, + { + "epoch": 0.003739119774426873, + "grad_norm": 2.8360220758966252, + "learning_rate": 1.2461695607763025e-06, + "loss": 1.0149, + "step": 122 + }, + { + "epoch": 0.0037697682971680763, + "grad_norm": 3.7682033879316164, + "learning_rate": 1.2563840653728297e-06, + "loss": 0.9588, + "step": 123 + }, + { + "epoch": 0.0038004168199092803, + "grad_norm": 4.153984443659916, + "learning_rate": 1.2665985699693567e-06, + "loss": 0.9886, + "step": 124 + }, + { + "epoch": 0.003831065342650484, + "grad_norm": 3.8028541140199548, + "learning_rate": 1.2768130745658836e-06, + "loss": 1.04, + "step": 125 + }, + { + "epoch": 0.003861713865391688, + "grad_norm": 3.4603010940565406, + "learning_rate": 1.2870275791624106e-06, + "loss": 0.9819, + "step": 126 + }, + { + "epoch": 0.003892362388132892, + "grad_norm": 3.43598074030124, + "learning_rate": 1.2972420837589378e-06, + "loss": 1.0121, + "step": 127 + }, + { + "epoch": 0.003923010910874096, + "grad_norm": 3.246986790784606, + "learning_rate": 1.3074565883554648e-06, + "loss": 1.0699, + "step": 128 + }, + { + "epoch": 0.0039536594336153, + "grad_norm": 3.4927772671567725, + "learning_rate": 1.317671092951992e-06, + "loss": 0.9985, + "step": 129 + }, + { + "epoch": 0.003984307956356504, + "grad_norm": 4.25878679662059, + "learning_rate": 1.3278855975485188e-06, + "loss": 1.0501, + "step": 130 + }, + { + "epoch": 0.004014956479097708, + "grad_norm": 3.3799398458565593, + "learning_rate": 1.338100102145046e-06, + "loss": 0.9657, + "step": 131 + }, + { + "epoch": 0.004045605001838912, + "grad_norm": 2.487454345743188, + "learning_rate": 1.348314606741573e-06, + "loss": 0.683, + "step": 132 + }, + { + "epoch": 0.004076253524580116, + "grad_norm": 2.8012874772465004, + "learning_rate": 1.3585291113381002e-06, + "loss": 0.9319, + "step": 133 + }, + { + "epoch": 0.004106902047321319, + "grad_norm": 3.3680665460644397, + "learning_rate": 1.3687436159346274e-06, + "loss": 1.0171, + "step": 134 + }, + { + "epoch": 0.004137550570062523, + "grad_norm": 3.252892830992809, + "learning_rate": 1.3789581205311544e-06, + "loss": 0.9732, + "step": 135 + }, + { + "epoch": 0.0041681990928037265, + "grad_norm": 3.802640287618769, + "learning_rate": 1.3891726251276812e-06, + "loss": 1.0121, + "step": 136 + }, + { + "epoch": 0.0041988476155449305, + "grad_norm": 3.351631669051486, + "learning_rate": 1.3993871297242084e-06, + "loss": 0.9784, + "step": 137 + }, + { + "epoch": 0.004229496138286134, + "grad_norm": 3.251398157849799, + "learning_rate": 1.4096016343207356e-06, + "loss": 1.0134, + "step": 138 + }, + { + "epoch": 0.004260144661027338, + "grad_norm": 3.4009372770795108, + "learning_rate": 1.4198161389172626e-06, + "loss": 1.0324, + "step": 139 + }, + { + "epoch": 0.004290793183768542, + "grad_norm": 2.3125921602601633, + "learning_rate": 1.4300306435137898e-06, + "loss": 0.6819, + "step": 140 + }, + { + "epoch": 0.004321441706509746, + "grad_norm": 2.9100493595076244, + "learning_rate": 1.4402451481103168e-06, + "loss": 0.9165, + "step": 141 + }, + { + "epoch": 0.00435209022925095, + "grad_norm": 3.2115352293155897, + "learning_rate": 1.4504596527068438e-06, + "loss": 0.9862, + "step": 142 + }, + { + "epoch": 0.004382738751992154, + "grad_norm": 3.199105355709511, + "learning_rate": 1.4606741573033708e-06, + "loss": 1.0018, + "step": 143 + }, + { + "epoch": 0.004413387274733358, + "grad_norm": 3.4748128155602838, + "learning_rate": 1.470888661899898e-06, + "loss": 0.9747, + "step": 144 + }, + { + "epoch": 0.004444035797474562, + "grad_norm": 3.2891305674130873, + "learning_rate": 1.4811031664964252e-06, + "loss": 0.9475, + "step": 145 + }, + { + "epoch": 0.004474684320215766, + "grad_norm": 3.1560075133340275, + "learning_rate": 1.4913176710929522e-06, + "loss": 0.9506, + "step": 146 + }, + { + "epoch": 0.00450533284295697, + "grad_norm": 3.1761117994147328, + "learning_rate": 1.501532175689479e-06, + "loss": 0.9519, + "step": 147 + }, + { + "epoch": 0.004535981365698174, + "grad_norm": 3.4436202554982906, + "learning_rate": 1.5117466802860062e-06, + "loss": 1.0429, + "step": 148 + }, + { + "epoch": 0.004566629888439378, + "grad_norm": 3.4715026779583393, + "learning_rate": 1.5219611848825334e-06, + "loss": 1.0815, + "step": 149 + }, + { + "epoch": 0.004597278411180581, + "grad_norm": 2.170857072885516, + "learning_rate": 1.5321756894790604e-06, + "loss": 0.6939, + "step": 150 + }, + { + "epoch": 0.004627926933921785, + "grad_norm": 2.212521131912681, + "learning_rate": 1.5423901940755876e-06, + "loss": 0.6511, + "step": 151 + }, + { + "epoch": 0.0046585754566629885, + "grad_norm": 4.466721883647417, + "learning_rate": 1.5526046986721146e-06, + "loss": 0.9391, + "step": 152 + }, + { + "epoch": 0.0046892239794041925, + "grad_norm": 2.134647841855301, + "learning_rate": 1.5628192032686416e-06, + "loss": 0.6774, + "step": 153 + }, + { + "epoch": 0.004719872502145396, + "grad_norm": 2.115748716997937, + "learning_rate": 1.5730337078651686e-06, + "loss": 0.658, + "step": 154 + }, + { + "epoch": 0.0047505210248866, + "grad_norm": 2.0260612108613874, + "learning_rate": 1.5832482124616958e-06, + "loss": 0.6654, + "step": 155 + }, + { + "epoch": 0.004781169547627804, + "grad_norm": 4.272038030535424, + "learning_rate": 1.593462717058223e-06, + "loss": 0.8623, + "step": 156 + }, + { + "epoch": 0.004811818070369008, + "grad_norm": 3.343379292852195, + "learning_rate": 1.60367722165475e-06, + "loss": 0.9184, + "step": 157 + }, + { + "epoch": 0.004842466593110212, + "grad_norm": 3.491992769755343, + "learning_rate": 1.6138917262512767e-06, + "loss": 0.9747, + "step": 158 + }, + { + "epoch": 0.004873115115851416, + "grad_norm": 4.724495102420808, + "learning_rate": 1.624106230847804e-06, + "loss": 1.0154, + "step": 159 + }, + { + "epoch": 0.00490376363859262, + "grad_norm": 3.3513672103385725, + "learning_rate": 1.6343207354443311e-06, + "loss": 0.9418, + "step": 160 + }, + { + "epoch": 0.004934412161333824, + "grad_norm": 3.824925191693266, + "learning_rate": 1.6445352400408581e-06, + "loss": 0.9909, + "step": 161 + }, + { + "epoch": 0.004965060684075028, + "grad_norm": 3.3744440391641835, + "learning_rate": 1.6547497446373853e-06, + "loss": 0.9643, + "step": 162 + }, + { + "epoch": 0.004995709206816232, + "grad_norm": 3.4173097168613684, + "learning_rate": 1.6649642492339123e-06, + "loss": 0.912, + "step": 163 + }, + { + "epoch": 0.005026357729557436, + "grad_norm": 3.019044387227756, + "learning_rate": 1.6751787538304393e-06, + "loss": 0.8663, + "step": 164 + }, + { + "epoch": 0.00505700625229864, + "grad_norm": 3.1455003067520955, + "learning_rate": 1.6853932584269663e-06, + "loss": 0.8966, + "step": 165 + }, + { + "epoch": 0.005087654775039843, + "grad_norm": 3.108785130598784, + "learning_rate": 1.6956077630234935e-06, + "loss": 0.957, + "step": 166 + }, + { + "epoch": 0.005118303297781047, + "grad_norm": 3.5075835728701654, + "learning_rate": 1.7058222676200205e-06, + "loss": 0.9482, + "step": 167 + }, + { + "epoch": 0.0051489518205222505, + "grad_norm": 2.9487671285535444, + "learning_rate": 1.7160367722165477e-06, + "loss": 1.001, + "step": 168 + }, + { + "epoch": 0.0051796003432634545, + "grad_norm": 2.9554458147756106, + "learning_rate": 1.7262512768130745e-06, + "loss": 1.0078, + "step": 169 + }, + { + "epoch": 0.005210248866004658, + "grad_norm": 4.155998320226826, + "learning_rate": 1.7364657814096017e-06, + "loss": 0.9881, + "step": 170 + }, + { + "epoch": 0.005240897388745862, + "grad_norm": 4.07957118138219, + "learning_rate": 1.746680286006129e-06, + "loss": 0.8711, + "step": 171 + }, + { + "epoch": 0.005271545911487066, + "grad_norm": 3.617947824051078, + "learning_rate": 1.756894790602656e-06, + "loss": 0.8177, + "step": 172 + }, + { + "epoch": 0.00530219443422827, + "grad_norm": 3.6989039342917334, + "learning_rate": 1.7671092951991831e-06, + "loss": 0.9532, + "step": 173 + }, + { + "epoch": 0.005332842956969474, + "grad_norm": 3.473308204810775, + "learning_rate": 1.7773237997957101e-06, + "loss": 0.9801, + "step": 174 + }, + { + "epoch": 0.005363491479710678, + "grad_norm": 1.70651540295496, + "learning_rate": 1.787538304392237e-06, + "loss": 0.6389, + "step": 175 + }, + { + "epoch": 0.005394140002451882, + "grad_norm": 3.1052002028436134, + "learning_rate": 1.797752808988764e-06, + "loss": 0.9566, + "step": 176 + }, + { + "epoch": 0.005424788525193086, + "grad_norm": 3.2451167116582544, + "learning_rate": 1.8079673135852913e-06, + "loss": 0.8613, + "step": 177 + }, + { + "epoch": 0.00545543704793429, + "grad_norm": 3.2279097139257, + "learning_rate": 1.8181818181818183e-06, + "loss": 0.958, + "step": 178 + }, + { + "epoch": 0.005486085570675494, + "grad_norm": 3.577130990597013, + "learning_rate": 1.8283963227783455e-06, + "loss": 1.0718, + "step": 179 + }, + { + "epoch": 0.005516734093416698, + "grad_norm": 2.8163857524374936, + "learning_rate": 1.8386108273748723e-06, + "loss": 0.8969, + "step": 180 + }, + { + "epoch": 0.005547382616157902, + "grad_norm": 3.396610291986765, + "learning_rate": 1.8488253319713995e-06, + "loss": 0.8887, + "step": 181 + }, + { + "epoch": 0.005578031138899105, + "grad_norm": 3.5220756541384075, + "learning_rate": 1.8590398365679265e-06, + "loss": 0.9661, + "step": 182 + }, + { + "epoch": 0.005608679661640309, + "grad_norm": 1.8072339180147845, + "learning_rate": 1.8692543411644537e-06, + "loss": 0.6554, + "step": 183 + }, + { + "epoch": 0.0056393281843815125, + "grad_norm": 1.7086600964575829, + "learning_rate": 1.8794688457609809e-06, + "loss": 0.6525, + "step": 184 + }, + { + "epoch": 0.0056699767071227165, + "grad_norm": 3.8021707259666675, + "learning_rate": 1.8896833503575079e-06, + "loss": 1.0149, + "step": 185 + }, + { + "epoch": 0.00570062522986392, + "grad_norm": 3.0824341123917347, + "learning_rate": 1.8998978549540349e-06, + "loss": 0.9505, + "step": 186 + }, + { + "epoch": 0.005731273752605124, + "grad_norm": 3.2195912419120627, + "learning_rate": 1.910112359550562e-06, + "loss": 0.9943, + "step": 187 + }, + { + "epoch": 0.005761922275346328, + "grad_norm": 1.584748377553122, + "learning_rate": 1.920326864147089e-06, + "loss": 0.6274, + "step": 188 + }, + { + "epoch": 0.005792570798087532, + "grad_norm": 3.402958847184411, + "learning_rate": 1.9305413687436163e-06, + "loss": 0.9569, + "step": 189 + }, + { + "epoch": 0.005823219320828736, + "grad_norm": 2.945358184897761, + "learning_rate": 1.940755873340143e-06, + "loss": 1.051, + "step": 190 + }, + { + "epoch": 0.00585386784356994, + "grad_norm": 2.8915254148956167, + "learning_rate": 1.9509703779366703e-06, + "loss": 0.9587, + "step": 191 + }, + { + "epoch": 0.005884516366311144, + "grad_norm": 3.8891486234004753, + "learning_rate": 1.961184882533197e-06, + "loss": 0.8889, + "step": 192 + }, + { + "epoch": 0.005915164889052348, + "grad_norm": 3.895325232768456, + "learning_rate": 1.9713993871297242e-06, + "loss": 0.9869, + "step": 193 + }, + { + "epoch": 0.005945813411793552, + "grad_norm": 3.354893261249651, + "learning_rate": 1.9816138917262514e-06, + "loss": 0.9624, + "step": 194 + }, + { + "epoch": 0.005976461934534756, + "grad_norm": 3.628338176896766, + "learning_rate": 1.9918283963227787e-06, + "loss": 0.9495, + "step": 195 + }, + { + "epoch": 0.00600711045727596, + "grad_norm": 3.5323553071453873, + "learning_rate": 2.002042900919306e-06, + "loss": 0.8969, + "step": 196 + }, + { + "epoch": 0.006037758980017163, + "grad_norm": 3.031344466866221, + "learning_rate": 2.0122574055158326e-06, + "loss": 0.8913, + "step": 197 + }, + { + "epoch": 0.006068407502758367, + "grad_norm": 4.11802180589706, + "learning_rate": 2.02247191011236e-06, + "loss": 0.9613, + "step": 198 + }, + { + "epoch": 0.006099056025499571, + "grad_norm": 3.492610491947565, + "learning_rate": 2.0326864147088866e-06, + "loss": 1.023, + "step": 199 + }, + { + "epoch": 0.0061297045482407745, + "grad_norm": 4.6942275026520806, + "learning_rate": 2.042900919305414e-06, + "loss": 1.1647, + "step": 200 + }, + { + "epoch": 0.0061603530709819785, + "grad_norm": 3.4565299014965656, + "learning_rate": 2.053115423901941e-06, + "loss": 0.8833, + "step": 201 + }, + { + "epoch": 0.006191001593723182, + "grad_norm": 3.644230511302545, + "learning_rate": 2.063329928498468e-06, + "loss": 0.9712, + "step": 202 + }, + { + "epoch": 0.006221650116464386, + "grad_norm": 1.6968571303555406, + "learning_rate": 2.073544433094995e-06, + "loss": 0.6351, + "step": 203 + }, + { + "epoch": 0.00625229863920559, + "grad_norm": 3.1848286499077387, + "learning_rate": 2.0837589376915222e-06, + "loss": 0.8456, + "step": 204 + }, + { + "epoch": 0.006282947161946794, + "grad_norm": 3.78546693636232, + "learning_rate": 2.0939734422880494e-06, + "loss": 0.9182, + "step": 205 + }, + { + "epoch": 0.006313595684687998, + "grad_norm": 3.175916076681233, + "learning_rate": 2.104187946884576e-06, + "loss": 0.9616, + "step": 206 + }, + { + "epoch": 0.006344244207429202, + "grad_norm": 2.7917080235606226, + "learning_rate": 2.1144024514811034e-06, + "loss": 0.9161, + "step": 207 + }, + { + "epoch": 0.006374892730170406, + "grad_norm": 1.5695735120259882, + "learning_rate": 2.12461695607763e-06, + "loss": 0.6476, + "step": 208 + }, + { + "epoch": 0.00640554125291161, + "grad_norm": 3.7340818230175086, + "learning_rate": 2.1348314606741574e-06, + "loss": 1.0016, + "step": 209 + }, + { + "epoch": 0.006436189775652814, + "grad_norm": 3.5509391533608863, + "learning_rate": 2.1450459652706846e-06, + "loss": 0.9495, + "step": 210 + }, + { + "epoch": 0.006466838298394018, + "grad_norm": 3.4104898301445354, + "learning_rate": 2.155260469867212e-06, + "loss": 0.9637, + "step": 211 + }, + { + "epoch": 0.006497486821135222, + "grad_norm": 3.4367476129750654, + "learning_rate": 2.1654749744637386e-06, + "loss": 0.9581, + "step": 212 + }, + { + "epoch": 0.006528135343876425, + "grad_norm": 3.4871202243056563, + "learning_rate": 2.175689479060266e-06, + "loss": 0.9322, + "step": 213 + }, + { + "epoch": 0.006558783866617629, + "grad_norm": 3.1154715314840016, + "learning_rate": 2.1859039836567926e-06, + "loss": 0.9855, + "step": 214 + }, + { + "epoch": 0.006589432389358833, + "grad_norm": 3.3157629195787273, + "learning_rate": 2.1961184882533198e-06, + "loss": 0.8119, + "step": 215 + }, + { + "epoch": 0.0066200809121000365, + "grad_norm": 2.912465812604796, + "learning_rate": 2.206332992849847e-06, + "loss": 0.8865, + "step": 216 + }, + { + "epoch": 0.0066507294348412405, + "grad_norm": 3.0255597024987932, + "learning_rate": 2.216547497446374e-06, + "loss": 0.9906, + "step": 217 + }, + { + "epoch": 0.006681377957582444, + "grad_norm": 3.7896284807404204, + "learning_rate": 2.2267620020429014e-06, + "loss": 0.9427, + "step": 218 + }, + { + "epoch": 0.006712026480323648, + "grad_norm": 1.5126739039127572, + "learning_rate": 2.236976506639428e-06, + "loss": 0.6023, + "step": 219 + }, + { + "epoch": 0.006742675003064852, + "grad_norm": 1.4478259315800626, + "learning_rate": 2.2471910112359554e-06, + "loss": 0.6374, + "step": 220 + }, + { + "epoch": 0.006773323525806056, + "grad_norm": 3.128091088580022, + "learning_rate": 2.257405515832482e-06, + "loss": 0.9313, + "step": 221 + }, + { + "epoch": 0.00680397204854726, + "grad_norm": 1.570123120001384, + "learning_rate": 2.2676200204290094e-06, + "loss": 0.6458, + "step": 222 + }, + { + "epoch": 0.006834620571288464, + "grad_norm": 4.4883386692537774, + "learning_rate": 2.2778345250255366e-06, + "loss": 0.9949, + "step": 223 + }, + { + "epoch": 0.006865269094029668, + "grad_norm": 3.5275024342403483, + "learning_rate": 2.2880490296220638e-06, + "loss": 0.9457, + "step": 224 + }, + { + "epoch": 0.006895917616770872, + "grad_norm": 3.838269511990043, + "learning_rate": 2.2982635342185906e-06, + "loss": 0.9286, + "step": 225 + }, + { + "epoch": 0.006926566139512076, + "grad_norm": 2.969449117064357, + "learning_rate": 2.3084780388151178e-06, + "loss": 0.8549, + "step": 226 + }, + { + "epoch": 0.00695721466225328, + "grad_norm": 1.3529663649608028, + "learning_rate": 2.3186925434116445e-06, + "loss": 0.6271, + "step": 227 + }, + { + "epoch": 0.006987863184994484, + "grad_norm": 4.831412189045911, + "learning_rate": 2.3289070480081717e-06, + "loss": 0.9244, + "step": 228 + }, + { + "epoch": 0.007018511707735687, + "grad_norm": 3.4068773358338404, + "learning_rate": 2.339121552604699e-06, + "loss": 1.0225, + "step": 229 + }, + { + "epoch": 0.007049160230476891, + "grad_norm": 3.1162020144088878, + "learning_rate": 2.3493360572012257e-06, + "loss": 0.953, + "step": 230 + }, + { + "epoch": 0.007079808753218095, + "grad_norm": 1.31876673780315, + "learning_rate": 2.359550561797753e-06, + "loss": 0.623, + "step": 231 + }, + { + "epoch": 0.0071104572759592985, + "grad_norm": 3.963931707540173, + "learning_rate": 2.36976506639428e-06, + "loss": 0.9648, + "step": 232 + }, + { + "epoch": 0.0071411057987005025, + "grad_norm": 3.2611983131442823, + "learning_rate": 2.3799795709908073e-06, + "loss": 0.7852, + "step": 233 + }, + { + "epoch": 0.007171754321441706, + "grad_norm": 3.1143191308964107, + "learning_rate": 2.390194075587334e-06, + "loss": 0.9833, + "step": 234 + }, + { + "epoch": 0.00720240284418291, + "grad_norm": 3.563077023898005, + "learning_rate": 2.4004085801838613e-06, + "loss": 0.8967, + "step": 235 + }, + { + "epoch": 0.007233051366924114, + "grad_norm": 2.9768192555020683, + "learning_rate": 2.410623084780388e-06, + "loss": 0.8465, + "step": 236 + }, + { + "epoch": 0.007263699889665318, + "grad_norm": 3.3347625979665017, + "learning_rate": 2.4208375893769153e-06, + "loss": 0.976, + "step": 237 + }, + { + "epoch": 0.007294348412406522, + "grad_norm": 2.7833419732233824, + "learning_rate": 2.4310520939734425e-06, + "loss": 0.9077, + "step": 238 + }, + { + "epoch": 0.007324996935147726, + "grad_norm": 3.668609639664664, + "learning_rate": 2.4412665985699697e-06, + "loss": 0.9406, + "step": 239 + }, + { + "epoch": 0.00735564545788893, + "grad_norm": 3.8137923072932165, + "learning_rate": 2.451481103166497e-06, + "loss": 0.8719, + "step": 240 + }, + { + "epoch": 0.007386293980630134, + "grad_norm": 2.947444139435356, + "learning_rate": 2.4616956077630237e-06, + "loss": 0.932, + "step": 241 + }, + { + "epoch": 0.007416942503371338, + "grad_norm": 3.7768728770534783, + "learning_rate": 2.4719101123595505e-06, + "loss": 0.8959, + "step": 242 + }, + { + "epoch": 0.007447591026112542, + "grad_norm": 3.284586616508773, + "learning_rate": 2.4821246169560777e-06, + "loss": 0.8629, + "step": 243 + }, + { + "epoch": 0.007478239548853746, + "grad_norm": 3.3649909671723415, + "learning_rate": 2.492339121552605e-06, + "loss": 0.8381, + "step": 244 + }, + { + "epoch": 0.007508888071594949, + "grad_norm": 3.496289795428584, + "learning_rate": 2.5025536261491317e-06, + "loss": 0.9189, + "step": 245 + }, + { + "epoch": 0.007539536594336153, + "grad_norm": 3.0710547598716618, + "learning_rate": 2.5127681307456593e-06, + "loss": 0.8905, + "step": 246 + }, + { + "epoch": 0.007570185117077357, + "grad_norm": 3.838751065413459, + "learning_rate": 2.522982635342186e-06, + "loss": 0.8679, + "step": 247 + }, + { + "epoch": 0.0076008336398185605, + "grad_norm": 3.062461013469598, + "learning_rate": 2.5331971399387133e-06, + "loss": 0.9768, + "step": 248 + }, + { + "epoch": 0.0076314821625597645, + "grad_norm": 3.3316521886569803, + "learning_rate": 2.54341164453524e-06, + "loss": 0.8299, + "step": 249 + }, + { + "epoch": 0.007662130685300968, + "grad_norm": 3.0248590818593146, + "learning_rate": 2.5536261491317673e-06, + "loss": 0.8794, + "step": 250 + }, + { + "epoch": 0.007692779208042172, + "grad_norm": 3.456784157041569, + "learning_rate": 2.5638406537282945e-06, + "loss": 0.8729, + "step": 251 + }, + { + "epoch": 0.007723427730783376, + "grad_norm": 2.761647645926438, + "learning_rate": 2.5740551583248213e-06, + "loss": 0.9185, + "step": 252 + }, + { + "epoch": 0.00775407625352458, + "grad_norm": 2.950582587174075, + "learning_rate": 2.584269662921349e-06, + "loss": 0.9503, + "step": 253 + }, + { + "epoch": 0.007784724776265784, + "grad_norm": 3.4234213708522345, + "learning_rate": 2.5944841675178757e-06, + "loss": 0.9499, + "step": 254 + }, + { + "epoch": 0.007815373299006988, + "grad_norm": 3.368769285818423, + "learning_rate": 2.6046986721144025e-06, + "loss": 0.8869, + "step": 255 + }, + { + "epoch": 0.007846021821748192, + "grad_norm": 3.956938858049432, + "learning_rate": 2.6149131767109297e-06, + "loss": 0.8755, + "step": 256 + }, + { + "epoch": 0.007876670344489396, + "grad_norm": 3.3683244053666113, + "learning_rate": 2.6251276813074565e-06, + "loss": 0.9338, + "step": 257 + }, + { + "epoch": 0.0079073188672306, + "grad_norm": 3.1117512713898416, + "learning_rate": 2.635342185903984e-06, + "loss": 0.8871, + "step": 258 + }, + { + "epoch": 0.007937967389971804, + "grad_norm": 3.0936219587603393, + "learning_rate": 2.645556690500511e-06, + "loss": 0.8698, + "step": 259 + }, + { + "epoch": 0.007968615912713008, + "grad_norm": 3.3080286477749463, + "learning_rate": 2.6557711950970376e-06, + "loss": 0.9051, + "step": 260 + }, + { + "epoch": 0.007999264435454212, + "grad_norm": 1.325321405348781, + "learning_rate": 2.6659856996935653e-06, + "loss": 0.6003, + "step": 261 + }, + { + "epoch": 0.008029912958195416, + "grad_norm": 3.0774188858172233, + "learning_rate": 2.676200204290092e-06, + "loss": 0.8745, + "step": 262 + }, + { + "epoch": 0.00806056148093662, + "grad_norm": 2.9238034143692473, + "learning_rate": 2.6864147088866193e-06, + "loss": 0.9636, + "step": 263 + }, + { + "epoch": 0.008091210003677823, + "grad_norm": 1.3877663216331586, + "learning_rate": 2.696629213483146e-06, + "loss": 0.6274, + "step": 264 + }, + { + "epoch": 0.008121858526419027, + "grad_norm": 3.5530050725438946, + "learning_rate": 2.7068437180796737e-06, + "loss": 0.9121, + "step": 265 + }, + { + "epoch": 0.008152507049160231, + "grad_norm": 2.9744806372221286, + "learning_rate": 2.7170582226762004e-06, + "loss": 0.8906, + "step": 266 + }, + { + "epoch": 0.008183155571901435, + "grad_norm": 2.9868029297560006, + "learning_rate": 2.7272727272727272e-06, + "loss": 0.9627, + "step": 267 + }, + { + "epoch": 0.008213804094642637, + "grad_norm": 1.2133921082687236, + "learning_rate": 2.737487231869255e-06, + "loss": 0.5921, + "step": 268 + }, + { + "epoch": 0.008244452617383841, + "grad_norm": 3.408567737654011, + "learning_rate": 2.7477017364657816e-06, + "loss": 1.027, + "step": 269 + }, + { + "epoch": 0.008275101140125045, + "grad_norm": 3.136925372366743, + "learning_rate": 2.757916241062309e-06, + "loss": 0.9307, + "step": 270 + }, + { + "epoch": 0.00830574966286625, + "grad_norm": 1.202703434706634, + "learning_rate": 2.7681307456588356e-06, + "loss": 0.6134, + "step": 271 + }, + { + "epoch": 0.008336398185607453, + "grad_norm": 1.4491471908613063, + "learning_rate": 2.7783452502553624e-06, + "loss": 0.6243, + "step": 272 + }, + { + "epoch": 0.008367046708348657, + "grad_norm": 3.644689750424839, + "learning_rate": 2.78855975485189e-06, + "loss": 0.9169, + "step": 273 + }, + { + "epoch": 0.008397695231089861, + "grad_norm": 3.124196865258082, + "learning_rate": 2.798774259448417e-06, + "loss": 0.8765, + "step": 274 + }, + { + "epoch": 0.008428343753831065, + "grad_norm": 2.980329100992928, + "learning_rate": 2.8089887640449444e-06, + "loss": 0.8624, + "step": 275 + }, + { + "epoch": 0.008458992276572269, + "grad_norm": 1.254692338126859, + "learning_rate": 2.8192032686414712e-06, + "loss": 0.6215, + "step": 276 + }, + { + "epoch": 0.008489640799313473, + "grad_norm": 3.432027650921869, + "learning_rate": 2.829417773237998e-06, + "loss": 0.9062, + "step": 277 + }, + { + "epoch": 0.008520289322054677, + "grad_norm": 3.4231401879558176, + "learning_rate": 2.839632277834525e-06, + "loss": 0.969, + "step": 278 + }, + { + "epoch": 0.00855093784479588, + "grad_norm": 1.196910987376432, + "learning_rate": 2.849846782431052e-06, + "loss": 0.6257, + "step": 279 + }, + { + "epoch": 0.008581586367537085, + "grad_norm": 2.905759025137128, + "learning_rate": 2.8600612870275796e-06, + "loss": 0.876, + "step": 280 + }, + { + "epoch": 0.008612234890278288, + "grad_norm": 3.1684751541148772, + "learning_rate": 2.8702757916241064e-06, + "loss": 0.8966, + "step": 281 + }, + { + "epoch": 0.008642883413019492, + "grad_norm": 3.2479875806829988, + "learning_rate": 2.8804902962206336e-06, + "loss": 0.9053, + "step": 282 + }, + { + "epoch": 0.008673531935760696, + "grad_norm": 3.861480654797683, + "learning_rate": 2.890704800817161e-06, + "loss": 1.0196, + "step": 283 + }, + { + "epoch": 0.0087041804585019, + "grad_norm": 2.6276883618947537, + "learning_rate": 2.9009193054136876e-06, + "loss": 0.9337, + "step": 284 + }, + { + "epoch": 0.008734828981243104, + "grad_norm": 3.1526301701410064, + "learning_rate": 2.911133810010215e-06, + "loss": 0.9101, + "step": 285 + }, + { + "epoch": 0.008765477503984308, + "grad_norm": 3.2400978103925566, + "learning_rate": 2.9213483146067416e-06, + "loss": 0.8961, + "step": 286 + }, + { + "epoch": 0.008796126026725512, + "grad_norm": 3.0005165824272835, + "learning_rate": 2.931562819203269e-06, + "loss": 0.9566, + "step": 287 + }, + { + "epoch": 0.008826774549466716, + "grad_norm": 2.9487717797007957, + "learning_rate": 2.941777323799796e-06, + "loss": 0.9033, + "step": 288 + }, + { + "epoch": 0.00885742307220792, + "grad_norm": 3.730179151234975, + "learning_rate": 2.9519918283963228e-06, + "loss": 0.8556, + "step": 289 + }, + { + "epoch": 0.008888071594949124, + "grad_norm": 3.7700974050309193, + "learning_rate": 2.9622063329928504e-06, + "loss": 0.8963, + "step": 290 + }, + { + "epoch": 0.008918720117690328, + "grad_norm": 3.169834681090144, + "learning_rate": 2.972420837589377e-06, + "loss": 0.8297, + "step": 291 + }, + { + "epoch": 0.008949368640431532, + "grad_norm": 3.1715819801310867, + "learning_rate": 2.9826353421859044e-06, + "loss": 0.781, + "step": 292 + }, + { + "epoch": 0.008980017163172736, + "grad_norm": 1.196888349087265, + "learning_rate": 2.992849846782431e-06, + "loss": 0.6063, + "step": 293 + }, + { + "epoch": 0.00901066568591394, + "grad_norm": 3.2023828031615325, + "learning_rate": 3.003064351378958e-06, + "loss": 0.9106, + "step": 294 + }, + { + "epoch": 0.009041314208655143, + "grad_norm": 3.206293503182386, + "learning_rate": 3.0132788559754856e-06, + "loss": 0.9296, + "step": 295 + }, + { + "epoch": 0.009071962731396347, + "grad_norm": 2.720214226482726, + "learning_rate": 3.0234933605720124e-06, + "loss": 0.8801, + "step": 296 + }, + { + "epoch": 0.009102611254137551, + "grad_norm": 1.256799602771568, + "learning_rate": 3.03370786516854e-06, + "loss": 0.6041, + "step": 297 + }, + { + "epoch": 0.009133259776878755, + "grad_norm": 1.1542869945939802, + "learning_rate": 3.0439223697650668e-06, + "loss": 0.5804, + "step": 298 + }, + { + "epoch": 0.00916390829961996, + "grad_norm": 3.3196795926538893, + "learning_rate": 3.0541368743615935e-06, + "loss": 0.9078, + "step": 299 + }, + { + "epoch": 0.009194556822361161, + "grad_norm": 3.285544830679664, + "learning_rate": 3.0643513789581207e-06, + "loss": 0.7894, + "step": 300 + }, + { + "epoch": 0.009225205345102365, + "grad_norm": 3.2963910390492788, + "learning_rate": 3.0745658835546475e-06, + "loss": 0.8708, + "step": 301 + }, + { + "epoch": 0.00925585386784357, + "grad_norm": 2.9521139312987987, + "learning_rate": 3.084780388151175e-06, + "loss": 0.868, + "step": 302 + }, + { + "epoch": 0.009286502390584773, + "grad_norm": 1.145885534241748, + "learning_rate": 3.094994892747702e-06, + "loss": 0.6004, + "step": 303 + }, + { + "epoch": 0.009317150913325977, + "grad_norm": 3.4916511900121714, + "learning_rate": 3.105209397344229e-06, + "loss": 0.8709, + "step": 304 + }, + { + "epoch": 0.009347799436067181, + "grad_norm": 3.530825277936294, + "learning_rate": 3.1154239019407563e-06, + "loss": 0.8898, + "step": 305 + }, + { + "epoch": 0.009378447958808385, + "grad_norm": 3.1063487790596156, + "learning_rate": 3.125638406537283e-06, + "loss": 0.7664, + "step": 306 + }, + { + "epoch": 0.009409096481549589, + "grad_norm": 3.0430864825715553, + "learning_rate": 3.1358529111338103e-06, + "loss": 0.9034, + "step": 307 + }, + { + "epoch": 0.009439745004290793, + "grad_norm": 2.9103349357785797, + "learning_rate": 3.146067415730337e-06, + "loss": 0.9009, + "step": 308 + }, + { + "epoch": 0.009470393527031997, + "grad_norm": 3.4237820164263493, + "learning_rate": 3.1562819203268647e-06, + "loss": 1.0147, + "step": 309 + }, + { + "epoch": 0.0095010420497732, + "grad_norm": 1.1021551560504406, + "learning_rate": 3.1664964249233915e-06, + "loss": 0.5914, + "step": 310 + }, + { + "epoch": 0.009531690572514405, + "grad_norm": 3.323931624517119, + "learning_rate": 3.1767109295199183e-06, + "loss": 0.8696, + "step": 311 + }, + { + "epoch": 0.009562339095255609, + "grad_norm": 3.492881975811172, + "learning_rate": 3.186925434116446e-06, + "loss": 0.8417, + "step": 312 + }, + { + "epoch": 0.009592987617996812, + "grad_norm": 3.4256018723459283, + "learning_rate": 3.1971399387129727e-06, + "loss": 1.016, + "step": 313 + }, + { + "epoch": 0.009623636140738016, + "grad_norm": 1.174838930809781, + "learning_rate": 3.2073544433095e-06, + "loss": 0.6117, + "step": 314 + }, + { + "epoch": 0.00965428466347922, + "grad_norm": 3.9507426029881403, + "learning_rate": 3.2175689479060267e-06, + "loss": 0.9215, + "step": 315 + }, + { + "epoch": 0.009684933186220424, + "grad_norm": 4.443852053675296, + "learning_rate": 3.2277834525025535e-06, + "loss": 0.7801, + "step": 316 + }, + { + "epoch": 0.009715581708961628, + "grad_norm": 3.548063932940883, + "learning_rate": 3.237997957099081e-06, + "loss": 1.0531, + "step": 317 + }, + { + "epoch": 0.009746230231702832, + "grad_norm": 3.4526896883745177, + "learning_rate": 3.248212461695608e-06, + "loss": 0.9079, + "step": 318 + }, + { + "epoch": 0.009776878754444036, + "grad_norm": 3.203770670419235, + "learning_rate": 3.258426966292135e-06, + "loss": 0.8072, + "step": 319 + }, + { + "epoch": 0.00980752727718524, + "grad_norm": 3.123528345930958, + "learning_rate": 3.2686414708886623e-06, + "loss": 0.7823, + "step": 320 + }, + { + "epoch": 0.009838175799926444, + "grad_norm": 3.762634285651869, + "learning_rate": 3.278855975485189e-06, + "loss": 0.9414, + "step": 321 + }, + { + "epoch": 0.009868824322667648, + "grad_norm": 3.6338575050821036, + "learning_rate": 3.2890704800817163e-06, + "loss": 0.8684, + "step": 322 + }, + { + "epoch": 0.009899472845408852, + "grad_norm": 3.28653890140348, + "learning_rate": 3.299284984678243e-06, + "loss": 0.893, + "step": 323 + }, + { + "epoch": 0.009930121368150056, + "grad_norm": 2.7837310717604447, + "learning_rate": 3.3094994892747707e-06, + "loss": 0.7737, + "step": 324 + }, + { + "epoch": 0.00996076989089126, + "grad_norm": 3.3372920338551104, + "learning_rate": 3.3197139938712975e-06, + "loss": 0.9097, + "step": 325 + }, + { + "epoch": 0.009991418413632464, + "grad_norm": 3.2469963777045976, + "learning_rate": 3.3299284984678247e-06, + "loss": 0.8756, + "step": 326 + }, + { + "epoch": 0.010022066936373667, + "grad_norm": 3.2128483631098845, + "learning_rate": 3.340143003064352e-06, + "loss": 0.9081, + "step": 327 + }, + { + "epoch": 0.010052715459114871, + "grad_norm": 3.8513228597554354, + "learning_rate": 3.3503575076608787e-06, + "loss": 0.9289, + "step": 328 + }, + { + "epoch": 0.010083363981856075, + "grad_norm": 3.088653553752994, + "learning_rate": 3.360572012257406e-06, + "loss": 0.7953, + "step": 329 + }, + { + "epoch": 0.01011401250459728, + "grad_norm": 2.967548543873015, + "learning_rate": 3.3707865168539327e-06, + "loss": 0.8443, + "step": 330 + }, + { + "epoch": 0.010144661027338481, + "grad_norm": 2.9524092822709673, + "learning_rate": 3.3810010214504603e-06, + "loss": 0.8719, + "step": 331 + }, + { + "epoch": 0.010175309550079685, + "grad_norm": 3.4395108755500416, + "learning_rate": 3.391215526046987e-06, + "loss": 0.9401, + "step": 332 + }, + { + "epoch": 0.01020595807282089, + "grad_norm": 2.794432689185305, + "learning_rate": 3.401430030643514e-06, + "loss": 0.8456, + "step": 333 + }, + { + "epoch": 0.010236606595562093, + "grad_norm": 3.1290674602092174, + "learning_rate": 3.411644535240041e-06, + "loss": 0.9206, + "step": 334 + }, + { + "epoch": 0.010267255118303297, + "grad_norm": 2.8996280117902753, + "learning_rate": 3.4218590398365683e-06, + "loss": 0.8386, + "step": 335 + }, + { + "epoch": 0.010297903641044501, + "grad_norm": 2.9679830803308827, + "learning_rate": 3.4320735444330955e-06, + "loss": 0.8024, + "step": 336 + }, + { + "epoch": 0.010328552163785705, + "grad_norm": 3.2427858686387094, + "learning_rate": 3.4422880490296222e-06, + "loss": 0.9389, + "step": 337 + }, + { + "epoch": 0.010359200686526909, + "grad_norm": 3.62830992137235, + "learning_rate": 3.452502553626149e-06, + "loss": 0.8063, + "step": 338 + }, + { + "epoch": 0.010389849209268113, + "grad_norm": 3.6016582424842363, + "learning_rate": 3.4627170582226766e-06, + "loss": 0.8186, + "step": 339 + }, + { + "epoch": 0.010420497732009317, + "grad_norm": 2.8348699545735205, + "learning_rate": 3.4729315628192034e-06, + "loss": 0.8903, + "step": 340 + }, + { + "epoch": 0.01045114625475052, + "grad_norm": 2.923216936924085, + "learning_rate": 3.4831460674157306e-06, + "loss": 0.9081, + "step": 341 + }, + { + "epoch": 0.010481794777491725, + "grad_norm": 3.148903452330976, + "learning_rate": 3.493360572012258e-06, + "loss": 0.8502, + "step": 342 + }, + { + "epoch": 0.010512443300232929, + "grad_norm": 3.9910381398825026, + "learning_rate": 3.5035750766087846e-06, + "loss": 0.954, + "step": 343 + }, + { + "epoch": 0.010543091822974133, + "grad_norm": 3.084968035257475, + "learning_rate": 3.513789581205312e-06, + "loss": 0.9003, + "step": 344 + }, + { + "epoch": 0.010573740345715336, + "grad_norm": 2.845978021479907, + "learning_rate": 3.5240040858018386e-06, + "loss": 0.9905, + "step": 345 + }, + { + "epoch": 0.01060438886845654, + "grad_norm": 1.209140998227631, + "learning_rate": 3.5342185903983662e-06, + "loss": 0.611, + "step": 346 + }, + { + "epoch": 0.010635037391197744, + "grad_norm": 2.887730978389089, + "learning_rate": 3.544433094994893e-06, + "loss": 0.8483, + "step": 347 + }, + { + "epoch": 0.010665685913938948, + "grad_norm": 3.2585524233911145, + "learning_rate": 3.5546475995914202e-06, + "loss": 0.892, + "step": 348 + }, + { + "epoch": 0.010696334436680152, + "grad_norm": 3.6005153222646533, + "learning_rate": 3.564862104187947e-06, + "loss": 0.92, + "step": 349 + }, + { + "epoch": 0.010726982959421356, + "grad_norm": 3.497421380095859, + "learning_rate": 3.575076608784474e-06, + "loss": 0.8971, + "step": 350 + }, + { + "epoch": 0.01075763148216256, + "grad_norm": 2.820905183354147, + "learning_rate": 3.5852911133810014e-06, + "loss": 0.8145, + "step": 351 + }, + { + "epoch": 0.010788280004903764, + "grad_norm": 3.2562703727312052, + "learning_rate": 3.595505617977528e-06, + "loss": 0.9036, + "step": 352 + }, + { + "epoch": 0.010818928527644968, + "grad_norm": 2.820818421620116, + "learning_rate": 3.605720122574056e-06, + "loss": 0.8782, + "step": 353 + }, + { + "epoch": 0.010849577050386172, + "grad_norm": 3.054396546470862, + "learning_rate": 3.6159346271705826e-06, + "loss": 0.9613, + "step": 354 + }, + { + "epoch": 0.010880225573127376, + "grad_norm": 2.8020513916779604, + "learning_rate": 3.6261491317671094e-06, + "loss": 0.8948, + "step": 355 + }, + { + "epoch": 0.01091087409586858, + "grad_norm": 3.1691640543283333, + "learning_rate": 3.6363636363636366e-06, + "loss": 0.8595, + "step": 356 + }, + { + "epoch": 0.010941522618609784, + "grad_norm": 2.755821426341309, + "learning_rate": 3.646578140960164e-06, + "loss": 0.9276, + "step": 357 + }, + { + "epoch": 0.010972171141350988, + "grad_norm": 3.0467755189675088, + "learning_rate": 3.656792645556691e-06, + "loss": 0.9529, + "step": 358 + }, + { + "epoch": 0.011002819664092191, + "grad_norm": 2.8463240178459355, + "learning_rate": 3.6670071501532178e-06, + "loss": 0.8382, + "step": 359 + }, + { + "epoch": 0.011033468186833395, + "grad_norm": 2.9422459128005247, + "learning_rate": 3.6772216547497446e-06, + "loss": 0.8948, + "step": 360 + }, + { + "epoch": 0.0110641167095746, + "grad_norm": 3.1603288049184464, + "learning_rate": 3.687436159346272e-06, + "loss": 0.8442, + "step": 361 + }, + { + "epoch": 0.011094765232315803, + "grad_norm": 3.164658989542033, + "learning_rate": 3.697650663942799e-06, + "loss": 0.89, + "step": 362 + }, + { + "epoch": 0.011125413755057005, + "grad_norm": 1.1409301753838441, + "learning_rate": 3.707865168539326e-06, + "loss": 0.5911, + "step": 363 + }, + { + "epoch": 0.01115606227779821, + "grad_norm": 2.7879491279335746, + "learning_rate": 3.718079673135853e-06, + "loss": 0.8923, + "step": 364 + }, + { + "epoch": 0.011186710800539413, + "grad_norm": 2.9077301280810604, + "learning_rate": 3.7282941777323806e-06, + "loss": 0.8232, + "step": 365 + }, + { + "epoch": 0.011217359323280617, + "grad_norm": 1.1461361593937374, + "learning_rate": 3.7385086823289074e-06, + "loss": 0.5908, + "step": 366 + }, + { + "epoch": 0.011248007846021821, + "grad_norm": 2.8411152333722143, + "learning_rate": 3.748723186925434e-06, + "loss": 0.8451, + "step": 367 + }, + { + "epoch": 0.011278656368763025, + "grad_norm": 1.1012079176578276, + "learning_rate": 3.7589376915219618e-06, + "loss": 0.5916, + "step": 368 + }, + { + "epoch": 0.011309304891504229, + "grad_norm": 3.240576902151979, + "learning_rate": 3.7691521961184886e-06, + "loss": 0.9554, + "step": 369 + }, + { + "epoch": 0.011339953414245433, + "grad_norm": 3.242171668872021, + "learning_rate": 3.7793667007150158e-06, + "loss": 0.8115, + "step": 370 + }, + { + "epoch": 0.011370601936986637, + "grad_norm": 1.097455905226801, + "learning_rate": 3.7895812053115425e-06, + "loss": 0.617, + "step": 371 + }, + { + "epoch": 0.01140125045972784, + "grad_norm": 3.06185297784939, + "learning_rate": 3.7997957099080697e-06, + "loss": 0.8526, + "step": 372 + }, + { + "epoch": 0.011431898982469045, + "grad_norm": 2.6647185164077887, + "learning_rate": 3.810010214504597e-06, + "loss": 0.8372, + "step": 373 + }, + { + "epoch": 0.011462547505210249, + "grad_norm": 2.7828450164086127, + "learning_rate": 3.820224719101124e-06, + "loss": 0.8637, + "step": 374 + }, + { + "epoch": 0.011493196027951453, + "grad_norm": 2.907795184765188, + "learning_rate": 3.830439223697651e-06, + "loss": 0.8525, + "step": 375 + }, + { + "epoch": 0.011523844550692657, + "grad_norm": 1.1270691778333481, + "learning_rate": 3.840653728294178e-06, + "loss": 0.571, + "step": 376 + }, + { + "epoch": 0.01155449307343386, + "grad_norm": 2.954371068526859, + "learning_rate": 3.850868232890705e-06, + "loss": 0.8246, + "step": 377 + }, + { + "epoch": 0.011585141596175064, + "grad_norm": 1.1165992979946426, + "learning_rate": 3.8610827374872325e-06, + "loss": 0.5577, + "step": 378 + }, + { + "epoch": 0.011615790118916268, + "grad_norm": 1.1587227705176206, + "learning_rate": 3.871297242083759e-06, + "loss": 0.6087, + "step": 379 + }, + { + "epoch": 0.011646438641657472, + "grad_norm": 3.1812256190265473, + "learning_rate": 3.881511746680286e-06, + "loss": 0.8291, + "step": 380 + }, + { + "epoch": 0.011677087164398676, + "grad_norm": 2.8417051005728804, + "learning_rate": 3.891726251276814e-06, + "loss": 0.9287, + "step": 381 + }, + { + "epoch": 0.01170773568713988, + "grad_norm": 3.089530794182103, + "learning_rate": 3.9019407558733405e-06, + "loss": 0.8419, + "step": 382 + }, + { + "epoch": 0.011738384209881084, + "grad_norm": 2.9460995591158525, + "learning_rate": 3.912155260469867e-06, + "loss": 0.9367, + "step": 383 + }, + { + "epoch": 0.011769032732622288, + "grad_norm": 3.0032635416848623, + "learning_rate": 3.922369765066394e-06, + "loss": 0.86, + "step": 384 + }, + { + "epoch": 0.011799681255363492, + "grad_norm": 3.130418104080654, + "learning_rate": 3.932584269662922e-06, + "loss": 0.9164, + "step": 385 + }, + { + "epoch": 0.011830329778104696, + "grad_norm": 3.2874669135575876, + "learning_rate": 3.9427987742594485e-06, + "loss": 0.882, + "step": 386 + }, + { + "epoch": 0.0118609783008459, + "grad_norm": 2.7462663469800606, + "learning_rate": 3.953013278855976e-06, + "loss": 0.8852, + "step": 387 + }, + { + "epoch": 0.011891626823587104, + "grad_norm": 3.0404997266571034, + "learning_rate": 3.963227783452503e-06, + "loss": 0.9054, + "step": 388 + }, + { + "epoch": 0.011922275346328308, + "grad_norm": 2.646623183660799, + "learning_rate": 3.97344228804903e-06, + "loss": 0.8949, + "step": 389 + }, + { + "epoch": 0.011952923869069512, + "grad_norm": 2.878337501045697, + "learning_rate": 3.983656792645557e-06, + "loss": 0.9149, + "step": 390 + }, + { + "epoch": 0.011983572391810715, + "grad_norm": 2.817348320722246, + "learning_rate": 3.993871297242084e-06, + "loss": 0.8229, + "step": 391 + }, + { + "epoch": 0.01201422091455192, + "grad_norm": 3.1649329560698107, + "learning_rate": 4.004085801838612e-06, + "loss": 0.8127, + "step": 392 + }, + { + "epoch": 0.012044869437293123, + "grad_norm": 3.09471142301152, + "learning_rate": 4.0143003064351385e-06, + "loss": 0.7908, + "step": 393 + }, + { + "epoch": 0.012075517960034325, + "grad_norm": 3.097526676154771, + "learning_rate": 4.024514811031665e-06, + "loss": 0.8022, + "step": 394 + }, + { + "epoch": 0.01210616648277553, + "grad_norm": 2.836267512199918, + "learning_rate": 4.034729315628192e-06, + "loss": 0.9257, + "step": 395 + }, + { + "epoch": 0.012136815005516733, + "grad_norm": 2.8740920124367495, + "learning_rate": 4.04494382022472e-06, + "loss": 0.9031, + "step": 396 + }, + { + "epoch": 0.012167463528257937, + "grad_norm": 2.899631215834397, + "learning_rate": 4.0551583248212465e-06, + "loss": 0.9323, + "step": 397 + }, + { + "epoch": 0.012198112050999141, + "grad_norm": 1.180737801166812, + "learning_rate": 4.065372829417773e-06, + "loss": 0.5903, + "step": 398 + }, + { + "epoch": 0.012228760573740345, + "grad_norm": 3.3154310082963176, + "learning_rate": 4.0755873340143e-06, + "loss": 0.9241, + "step": 399 + }, + { + "epoch": 0.012259409096481549, + "grad_norm": 1.2495535692961328, + "learning_rate": 4.085801838610828e-06, + "loss": 0.5871, + "step": 400 + }, + { + "epoch": 0.012290057619222753, + "grad_norm": 3.145058778429256, + "learning_rate": 4.0960163432073544e-06, + "loss": 0.8722, + "step": 401 + }, + { + "epoch": 0.012320706141963957, + "grad_norm": 1.1473574529907897, + "learning_rate": 4.106230847803882e-06, + "loss": 0.5696, + "step": 402 + }, + { + "epoch": 0.01235135466470516, + "grad_norm": 3.1611926936694053, + "learning_rate": 4.116445352400409e-06, + "loss": 0.8728, + "step": 403 + }, + { + "epoch": 0.012382003187446365, + "grad_norm": 3.147789428183099, + "learning_rate": 4.126659856996936e-06, + "loss": 0.9528, + "step": 404 + }, + { + "epoch": 0.012412651710187569, + "grad_norm": 2.5184813900633, + "learning_rate": 4.136874361593463e-06, + "loss": 0.8784, + "step": 405 + }, + { + "epoch": 0.012443300232928773, + "grad_norm": 5.731046367423496, + "learning_rate": 4.14708886618999e-06, + "loss": 0.9508, + "step": 406 + }, + { + "epoch": 0.012473948755669977, + "grad_norm": 2.788289113040058, + "learning_rate": 4.157303370786518e-06, + "loss": 0.8914, + "step": 407 + }, + { + "epoch": 0.01250459727841118, + "grad_norm": 3.0283790110999744, + "learning_rate": 4.1675178753830445e-06, + "loss": 0.8298, + "step": 408 + }, + { + "epoch": 0.012535245801152384, + "grad_norm": 2.778791133345945, + "learning_rate": 4.177732379979571e-06, + "loss": 0.9067, + "step": 409 + }, + { + "epoch": 0.012565894323893588, + "grad_norm": 2.651708445690339, + "learning_rate": 4.187946884576099e-06, + "loss": 0.9043, + "step": 410 + }, + { + "epoch": 0.012596542846634792, + "grad_norm": 3.2551252660566967, + "learning_rate": 4.198161389172626e-06, + "loss": 0.9046, + "step": 411 + }, + { + "epoch": 0.012627191369375996, + "grad_norm": 2.7536016669389407, + "learning_rate": 4.208375893769152e-06, + "loss": 0.8608, + "step": 412 + }, + { + "epoch": 0.0126578398921172, + "grad_norm": 2.7224451511172276, + "learning_rate": 4.218590398365679e-06, + "loss": 0.819, + "step": 413 + }, + { + "epoch": 0.012688488414858404, + "grad_norm": 2.932661532139795, + "learning_rate": 4.228804902962207e-06, + "loss": 0.8893, + "step": 414 + }, + { + "epoch": 0.012719136937599608, + "grad_norm": 3.2867140708217484, + "learning_rate": 4.239019407558734e-06, + "loss": 0.8556, + "step": 415 + }, + { + "epoch": 0.012749785460340812, + "grad_norm": 2.960744005422641, + "learning_rate": 4.24923391215526e-06, + "loss": 0.8267, + "step": 416 + }, + { + "epoch": 0.012780433983082016, + "grad_norm": 1.107527803270888, + "learning_rate": 4.259448416751788e-06, + "loss": 0.571, + "step": 417 + }, + { + "epoch": 0.01281108250582322, + "grad_norm": 1.128221203923007, + "learning_rate": 4.269662921348315e-06, + "loss": 0.5564, + "step": 418 + }, + { + "epoch": 0.012841731028564424, + "grad_norm": 3.042497745475226, + "learning_rate": 4.2798774259448424e-06, + "loss": 0.9101, + "step": 419 + }, + { + "epoch": 0.012872379551305628, + "grad_norm": 3.096263008958437, + "learning_rate": 4.290091930541369e-06, + "loss": 0.9037, + "step": 420 + }, + { + "epoch": 0.012903028074046832, + "grad_norm": 3.338341059506719, + "learning_rate": 4.300306435137896e-06, + "loss": 0.7318, + "step": 421 + }, + { + "epoch": 0.012933676596788036, + "grad_norm": 3.1289975038719504, + "learning_rate": 4.310520939734424e-06, + "loss": 0.9266, + "step": 422 + }, + { + "epoch": 0.01296432511952924, + "grad_norm": 2.9784029503771117, + "learning_rate": 4.32073544433095e-06, + "loss": 0.9363, + "step": 423 + }, + { + "epoch": 0.012994973642270443, + "grad_norm": 1.09953541753196, + "learning_rate": 4.330949948927477e-06, + "loss": 0.6152, + "step": 424 + }, + { + "epoch": 0.013025622165011647, + "grad_norm": 2.5610935319953896, + "learning_rate": 4.341164453524005e-06, + "loss": 0.7599, + "step": 425 + }, + { + "epoch": 0.01305627068775285, + "grad_norm": 2.938096626867143, + "learning_rate": 4.351378958120532e-06, + "loss": 0.9027, + "step": 426 + }, + { + "epoch": 0.013086919210494053, + "grad_norm": 3.1339434744162538, + "learning_rate": 4.361593462717058e-06, + "loss": 0.8451, + "step": 427 + }, + { + "epoch": 0.013117567733235257, + "grad_norm": 3.195467497167443, + "learning_rate": 4.371807967313585e-06, + "loss": 0.8248, + "step": 428 + }, + { + "epoch": 0.013148216255976461, + "grad_norm": 5.0019649027346995, + "learning_rate": 4.382022471910113e-06, + "loss": 0.938, + "step": 429 + }, + { + "epoch": 0.013178864778717665, + "grad_norm": 3.285980925845122, + "learning_rate": 4.3922369765066396e-06, + "loss": 0.7918, + "step": 430 + }, + { + "epoch": 0.01320951330145887, + "grad_norm": 2.9571937939593105, + "learning_rate": 4.402451481103167e-06, + "loss": 0.9191, + "step": 431 + }, + { + "epoch": 0.013240161824200073, + "grad_norm": 2.8765206307111084, + "learning_rate": 4.412665985699694e-06, + "loss": 0.806, + "step": 432 + }, + { + "epoch": 0.013270810346941277, + "grad_norm": 3.584199807678198, + "learning_rate": 4.422880490296221e-06, + "loss": 0.8353, + "step": 433 + }, + { + "epoch": 0.013301458869682481, + "grad_norm": 3.262627043539584, + "learning_rate": 4.433094994892748e-06, + "loss": 0.7976, + "step": 434 + }, + { + "epoch": 0.013332107392423685, + "grad_norm": 2.8098804035975817, + "learning_rate": 4.443309499489275e-06, + "loss": 0.9153, + "step": 435 + }, + { + "epoch": 0.013362755915164889, + "grad_norm": 3.949785753745224, + "learning_rate": 4.453524004085803e-06, + "loss": 0.856, + "step": 436 + }, + { + "epoch": 0.013393404437906093, + "grad_norm": 2.9203226284938357, + "learning_rate": 4.4637385086823296e-06, + "loss": 0.9262, + "step": 437 + }, + { + "epoch": 0.013424052960647297, + "grad_norm": 3.094861590254716, + "learning_rate": 4.473953013278856e-06, + "loss": 0.9133, + "step": 438 + }, + { + "epoch": 0.0134547014833885, + "grad_norm": 3.354187999142606, + "learning_rate": 4.484167517875383e-06, + "loss": 0.9474, + "step": 439 + }, + { + "epoch": 0.013485350006129704, + "grad_norm": 2.970212581147025, + "learning_rate": 4.494382022471911e-06, + "loss": 0.776, + "step": 440 + }, + { + "epoch": 0.013515998528870908, + "grad_norm": 2.8504655623526363, + "learning_rate": 4.5045965270684375e-06, + "loss": 0.8646, + "step": 441 + }, + { + "epoch": 0.013546647051612112, + "grad_norm": 2.8898271419501995, + "learning_rate": 4.514811031664964e-06, + "loss": 0.9165, + "step": 442 + }, + { + "epoch": 0.013577295574353316, + "grad_norm": 2.6853383411158136, + "learning_rate": 4.525025536261491e-06, + "loss": 0.8422, + "step": 443 + }, + { + "epoch": 0.01360794409709452, + "grad_norm": 3.2520016520407817, + "learning_rate": 4.535240040858019e-06, + "loss": 0.7906, + "step": 444 + }, + { + "epoch": 0.013638592619835724, + "grad_norm": 2.796046099350526, + "learning_rate": 4.5454545454545455e-06, + "loss": 0.7794, + "step": 445 + }, + { + "epoch": 0.013669241142576928, + "grad_norm": 2.8459727573133713, + "learning_rate": 4.555669050051073e-06, + "loss": 0.9145, + "step": 446 + }, + { + "epoch": 0.013699889665318132, + "grad_norm": 2.8567568211903827, + "learning_rate": 4.5658835546476e-06, + "loss": 0.915, + "step": 447 + }, + { + "epoch": 0.013730538188059336, + "grad_norm": 2.5996944607252166, + "learning_rate": 4.5760980592441276e-06, + "loss": 0.8483, + "step": 448 + }, + { + "epoch": 0.01376118671080054, + "grad_norm": 3.0929327072666957, + "learning_rate": 4.586312563840654e-06, + "loss": 0.8443, + "step": 449 + }, + { + "epoch": 0.013791835233541744, + "grad_norm": 3.14269667687473, + "learning_rate": 4.596527068437181e-06, + "loss": 0.8169, + "step": 450 + }, + { + "epoch": 0.013822483756282948, + "grad_norm": 1.1130164188890606, + "learning_rate": 4.606741573033709e-06, + "loss": 0.5867, + "step": 451 + }, + { + "epoch": 0.013853132279024152, + "grad_norm": 3.08583715579591, + "learning_rate": 4.6169560776302355e-06, + "loss": 0.9182, + "step": 452 + }, + { + "epoch": 0.013883780801765356, + "grad_norm": 1.0242526582946794, + "learning_rate": 4.627170582226762e-06, + "loss": 0.5963, + "step": 453 + }, + { + "epoch": 0.01391442932450656, + "grad_norm": 3.2229578706655357, + "learning_rate": 4.637385086823289e-06, + "loss": 0.9167, + "step": 454 + }, + { + "epoch": 0.013945077847247763, + "grad_norm": 3.448179635243589, + "learning_rate": 4.647599591419817e-06, + "loss": 0.8293, + "step": 455 + }, + { + "epoch": 0.013975726369988967, + "grad_norm": 3.1820848899471215, + "learning_rate": 4.6578140960163435e-06, + "loss": 0.7903, + "step": 456 + }, + { + "epoch": 0.01400637489273017, + "grad_norm": 3.471281418457695, + "learning_rate": 4.66802860061287e-06, + "loss": 0.7659, + "step": 457 + }, + { + "epoch": 0.014037023415471373, + "grad_norm": 3.3992705417565103, + "learning_rate": 4.678243105209398e-06, + "loss": 0.9349, + "step": 458 + }, + { + "epoch": 0.014067671938212577, + "grad_norm": 2.980343166661902, + "learning_rate": 4.688457609805925e-06, + "loss": 0.8362, + "step": 459 + }, + { + "epoch": 0.014098320460953781, + "grad_norm": 2.952374666583926, + "learning_rate": 4.6986721144024515e-06, + "loss": 0.8013, + "step": 460 + }, + { + "epoch": 0.014128968983694985, + "grad_norm": 1.0328460162888402, + "learning_rate": 4.708886618998979e-06, + "loss": 0.582, + "step": 461 + }, + { + "epoch": 0.01415961750643619, + "grad_norm": 2.8696244504384825, + "learning_rate": 4.719101123595506e-06, + "loss": 0.8233, + "step": 462 + }, + { + "epoch": 0.014190266029177393, + "grad_norm": 1.0826929109753551, + "learning_rate": 4.7293156281920335e-06, + "loss": 0.5767, + "step": 463 + }, + { + "epoch": 0.014220914551918597, + "grad_norm": 2.8960959539110016, + "learning_rate": 4.73953013278856e-06, + "loss": 0.9009, + "step": 464 + }, + { + "epoch": 0.014251563074659801, + "grad_norm": 3.666953827474016, + "learning_rate": 4.749744637385087e-06, + "loss": 0.8415, + "step": 465 + }, + { + "epoch": 0.014282211597401005, + "grad_norm": 2.7528536539261355, + "learning_rate": 4.759959141981615e-06, + "loss": 0.9336, + "step": 466 + }, + { + "epoch": 0.014312860120142209, + "grad_norm": 2.9914210745375525, + "learning_rate": 4.7701736465781415e-06, + "loss": 0.7582, + "step": 467 + }, + { + "epoch": 0.014343508642883413, + "grad_norm": 2.7279115162474215, + "learning_rate": 4.780388151174668e-06, + "loss": 0.8423, + "step": 468 + }, + { + "epoch": 0.014374157165624617, + "grad_norm": 3.57434907291983, + "learning_rate": 4.790602655771195e-06, + "loss": 0.9274, + "step": 469 + }, + { + "epoch": 0.01440480568836582, + "grad_norm": 2.8104257408627484, + "learning_rate": 4.800817160367723e-06, + "loss": 0.927, + "step": 470 + }, + { + "epoch": 0.014435454211107025, + "grad_norm": 3.1718354878775212, + "learning_rate": 4.8110316649642495e-06, + "loss": 0.8937, + "step": 471 + }, + { + "epoch": 0.014466102733848228, + "grad_norm": 3.027503797024198, + "learning_rate": 4.821246169560776e-06, + "loss": 0.8591, + "step": 472 + }, + { + "epoch": 0.014496751256589432, + "grad_norm": 3.340988495033096, + "learning_rate": 4.831460674157304e-06, + "loss": 0.8902, + "step": 473 + }, + { + "epoch": 0.014527399779330636, + "grad_norm": 3.0542153550347995, + "learning_rate": 4.841675178753831e-06, + "loss": 0.8944, + "step": 474 + }, + { + "epoch": 0.01455804830207184, + "grad_norm": 3.1332988398310095, + "learning_rate": 4.851889683350358e-06, + "loss": 0.813, + "step": 475 + }, + { + "epoch": 0.014588696824813044, + "grad_norm": 2.4289495181070624, + "learning_rate": 4.862104187946885e-06, + "loss": 0.7932, + "step": 476 + }, + { + "epoch": 0.014619345347554248, + "grad_norm": 2.9236133797987716, + "learning_rate": 4.872318692543412e-06, + "loss": 0.8317, + "step": 477 + }, + { + "epoch": 0.014649993870295452, + "grad_norm": 2.8958325070107396, + "learning_rate": 4.8825331971399395e-06, + "loss": 0.9119, + "step": 478 + }, + { + "epoch": 0.014680642393036656, + "grad_norm": 3.2694425317001894, + "learning_rate": 4.892747701736466e-06, + "loss": 0.9648, + "step": 479 + }, + { + "epoch": 0.01471129091577786, + "grad_norm": 2.888550415345471, + "learning_rate": 4.902962206332994e-06, + "loss": 0.852, + "step": 480 + }, + { + "epoch": 0.014741939438519064, + "grad_norm": 1.0386803967777178, + "learning_rate": 4.913176710929521e-06, + "loss": 0.5676, + "step": 481 + }, + { + "epoch": 0.014772587961260268, + "grad_norm": 2.9655098120878662, + "learning_rate": 4.9233912155260474e-06, + "loss": 0.9501, + "step": 482 + }, + { + "epoch": 0.014803236484001472, + "grad_norm": 3.083486146377912, + "learning_rate": 4.933605720122574e-06, + "loss": 0.8985, + "step": 483 + }, + { + "epoch": 0.014833885006742676, + "grad_norm": 2.8526146425085024, + "learning_rate": 4.943820224719101e-06, + "loss": 0.8646, + "step": 484 + }, + { + "epoch": 0.01486453352948388, + "grad_norm": 2.9473752036235723, + "learning_rate": 4.954034729315629e-06, + "loss": 0.9521, + "step": 485 + }, + { + "epoch": 0.014895182052225083, + "grad_norm": 1.0444232929877322, + "learning_rate": 4.964249233912155e-06, + "loss": 0.5651, + "step": 486 + }, + { + "epoch": 0.014925830574966287, + "grad_norm": 2.795716172907219, + "learning_rate": 4.974463738508682e-06, + "loss": 0.7568, + "step": 487 + }, + { + "epoch": 0.014956479097707491, + "grad_norm": 2.9334155532877366, + "learning_rate": 4.98467824310521e-06, + "loss": 0.8369, + "step": 488 + }, + { + "epoch": 0.014987127620448694, + "grad_norm": 2.9299257923105584, + "learning_rate": 4.994892747701737e-06, + "loss": 0.6903, + "step": 489 + }, + { + "epoch": 0.015017776143189897, + "grad_norm": 3.2340468250860788, + "learning_rate": 5.005107252298263e-06, + "loss": 0.9858, + "step": 490 + }, + { + "epoch": 0.015048424665931101, + "grad_norm": 2.6522099811443067, + "learning_rate": 5.015321756894791e-06, + "loss": 0.9162, + "step": 491 + }, + { + "epoch": 0.015079073188672305, + "grad_norm": 2.685659940829006, + "learning_rate": 5.025536261491319e-06, + "loss": 0.8806, + "step": 492 + }, + { + "epoch": 0.01510972171141351, + "grad_norm": 3.2233546086130502, + "learning_rate": 5.0357507660878446e-06, + "loss": 0.9236, + "step": 493 + }, + { + "epoch": 0.015140370234154713, + "grad_norm": 2.628225516132047, + "learning_rate": 5.045965270684372e-06, + "loss": 0.9174, + "step": 494 + }, + { + "epoch": 0.015171018756895917, + "grad_norm": 2.767364830457831, + "learning_rate": 5.0561797752809e-06, + "loss": 0.8704, + "step": 495 + }, + { + "epoch": 0.015201667279637121, + "grad_norm": 2.5565222313772944, + "learning_rate": 5.066394279877427e-06, + "loss": 0.7171, + "step": 496 + }, + { + "epoch": 0.015232315802378325, + "grad_norm": 2.770539276188391, + "learning_rate": 5.076608784473953e-06, + "loss": 0.8492, + "step": 497 + }, + { + "epoch": 0.015262964325119529, + "grad_norm": 3.170705818121551, + "learning_rate": 5.08682328907048e-06, + "loss": 0.8421, + "step": 498 + }, + { + "epoch": 0.015293612847860733, + "grad_norm": 2.9768001338826675, + "learning_rate": 5.097037793667008e-06, + "loss": 0.8912, + "step": 499 + }, + { + "epoch": 0.015324261370601937, + "grad_norm": 3.007056567795919, + "learning_rate": 5.1072522982635346e-06, + "loss": 0.8174, + "step": 500 + }, + { + "epoch": 0.01535490989334314, + "grad_norm": 2.4140079863420816, + "learning_rate": 5.117466802860061e-06, + "loss": 0.8324, + "step": 501 + }, + { + "epoch": 0.015385558416084345, + "grad_norm": 2.823686715642901, + "learning_rate": 5.127681307456589e-06, + "loss": 0.8449, + "step": 502 + }, + { + "epoch": 0.015416206938825549, + "grad_norm": 2.663099685568847, + "learning_rate": 5.137895812053117e-06, + "loss": 0.9823, + "step": 503 + }, + { + "epoch": 0.015446855461566752, + "grad_norm": 3.054160764382166, + "learning_rate": 5.1481103166496425e-06, + "loss": 0.8442, + "step": 504 + }, + { + "epoch": 0.015477503984307956, + "grad_norm": 1.1161419776729158, + "learning_rate": 5.15832482124617e-06, + "loss": 0.579, + "step": 505 + }, + { + "epoch": 0.01550815250704916, + "grad_norm": 2.8038774265936084, + "learning_rate": 5.168539325842698e-06, + "loss": 0.8442, + "step": 506 + }, + { + "epoch": 0.015538801029790364, + "grad_norm": 3.0875416984118056, + "learning_rate": 5.178753830439224e-06, + "loss": 0.8803, + "step": 507 + }, + { + "epoch": 0.015569449552531568, + "grad_norm": 2.790366391996132, + "learning_rate": 5.188968335035751e-06, + "loss": 0.7933, + "step": 508 + }, + { + "epoch": 0.015600098075272772, + "grad_norm": 2.6929937146763905, + "learning_rate": 5.199182839632278e-06, + "loss": 0.7835, + "step": 509 + }, + { + "epoch": 0.015630746598013976, + "grad_norm": 2.768157017632214, + "learning_rate": 5.209397344228805e-06, + "loss": 0.8846, + "step": 510 + }, + { + "epoch": 0.015661395120755178, + "grad_norm": 1.1662187660538215, + "learning_rate": 5.2196118488253326e-06, + "loss": 0.583, + "step": 511 + }, + { + "epoch": 0.015692043643496384, + "grad_norm": 1.2090409097308505, + "learning_rate": 5.229826353421859e-06, + "loss": 0.5691, + "step": 512 + }, + { + "epoch": 0.015722692166237586, + "grad_norm": 0.997411867649856, + "learning_rate": 5.240040858018387e-06, + "loss": 0.5758, + "step": 513 + }, + { + "epoch": 0.015753340688978792, + "grad_norm": 3.263583392402083, + "learning_rate": 5.250255362614913e-06, + "loss": 0.8199, + "step": 514 + }, + { + "epoch": 0.015783989211719994, + "grad_norm": 2.718189600887344, + "learning_rate": 5.2604698672114405e-06, + "loss": 0.8913, + "step": 515 + }, + { + "epoch": 0.0158146377344612, + "grad_norm": 2.7347809995400114, + "learning_rate": 5.270684371807968e-06, + "loss": 0.8671, + "step": 516 + }, + { + "epoch": 0.015845286257202402, + "grad_norm": 2.5213211971330147, + "learning_rate": 5.280898876404494e-06, + "loss": 0.8832, + "step": 517 + }, + { + "epoch": 0.015875934779943607, + "grad_norm": 3.1528725876060646, + "learning_rate": 5.291113381001022e-06, + "loss": 0.942, + "step": 518 + }, + { + "epoch": 0.01590658330268481, + "grad_norm": 2.703919785733534, + "learning_rate": 5.301327885597549e-06, + "loss": 0.8025, + "step": 519 + }, + { + "epoch": 0.015937231825426015, + "grad_norm": 2.8737458766193194, + "learning_rate": 5.311542390194075e-06, + "loss": 0.9299, + "step": 520 + }, + { + "epoch": 0.015967880348167218, + "grad_norm": 3.1358694944620766, + "learning_rate": 5.321756894790603e-06, + "loss": 0.9045, + "step": 521 + }, + { + "epoch": 0.015998528870908423, + "grad_norm": 2.8664733049171014, + "learning_rate": 5.3319713993871305e-06, + "loss": 0.9052, + "step": 522 + }, + { + "epoch": 0.016029177393649625, + "grad_norm": 3.1698280642171426, + "learning_rate": 5.342185903983657e-06, + "loss": 0.8567, + "step": 523 + }, + { + "epoch": 0.01605982591639083, + "grad_norm": 2.719269023736026, + "learning_rate": 5.352400408580184e-06, + "loss": 0.8134, + "step": 524 + }, + { + "epoch": 0.016090474439132033, + "grad_norm": 2.8280406450324787, + "learning_rate": 5.362614913176712e-06, + "loss": 0.8276, + "step": 525 + }, + { + "epoch": 0.01612112296187324, + "grad_norm": 3.0346837862716938, + "learning_rate": 5.3728294177732385e-06, + "loss": 0.8867, + "step": 526 + }, + { + "epoch": 0.01615177148461444, + "grad_norm": 4.002814559771493, + "learning_rate": 5.383043922369765e-06, + "loss": 0.8305, + "step": 527 + }, + { + "epoch": 0.016182420007355647, + "grad_norm": 2.5514522139142883, + "learning_rate": 5.393258426966292e-06, + "loss": 0.8009, + "step": 528 + }, + { + "epoch": 0.01621306853009685, + "grad_norm": 2.633835597420876, + "learning_rate": 5.40347293156282e-06, + "loss": 0.8301, + "step": 529 + }, + { + "epoch": 0.016243717052838055, + "grad_norm": 1.1754344140812931, + "learning_rate": 5.413687436159347e-06, + "loss": 0.6064, + "step": 530 + }, + { + "epoch": 0.016274365575579257, + "grad_norm": 1.260386288407654, + "learning_rate": 5.423901940755873e-06, + "loss": 0.5556, + "step": 531 + }, + { + "epoch": 0.016305014098320463, + "grad_norm": 3.188025585464316, + "learning_rate": 5.434116445352401e-06, + "loss": 0.8698, + "step": 532 + }, + { + "epoch": 0.016335662621061665, + "grad_norm": 1.3281892469177676, + "learning_rate": 5.4443309499489285e-06, + "loss": 0.5705, + "step": 533 + }, + { + "epoch": 0.01636631114380287, + "grad_norm": 2.7462738950625596, + "learning_rate": 5.4545454545454545e-06, + "loss": 0.8929, + "step": 534 + }, + { + "epoch": 0.016396959666544073, + "grad_norm": 2.904992001849432, + "learning_rate": 5.464759959141982e-06, + "loss": 0.8524, + "step": 535 + }, + { + "epoch": 0.016427608189285275, + "grad_norm": 2.7205965641653727, + "learning_rate": 5.47497446373851e-06, + "loss": 0.7128, + "step": 536 + }, + { + "epoch": 0.01645825671202648, + "grad_norm": 2.636917527970757, + "learning_rate": 5.485188968335036e-06, + "loss": 0.9121, + "step": 537 + }, + { + "epoch": 0.016488905234767683, + "grad_norm": 1.251555152883313, + "learning_rate": 5.495403472931563e-06, + "loss": 0.5828, + "step": 538 + }, + { + "epoch": 0.01651955375750889, + "grad_norm": 2.711524991219316, + "learning_rate": 5.50561797752809e-06, + "loss": 0.8441, + "step": 539 + }, + { + "epoch": 0.01655020228025009, + "grad_norm": 2.701050640658386, + "learning_rate": 5.515832482124618e-06, + "loss": 0.7686, + "step": 540 + }, + { + "epoch": 0.016580850802991296, + "grad_norm": 3.4157544938738695, + "learning_rate": 5.5260469867211445e-06, + "loss": 0.8951, + "step": 541 + }, + { + "epoch": 0.0166114993257325, + "grad_norm": 1.1453271308755009, + "learning_rate": 5.536261491317671e-06, + "loss": 0.5797, + "step": 542 + }, + { + "epoch": 0.016642147848473704, + "grad_norm": 2.8687257310984715, + "learning_rate": 5.546475995914199e-06, + "loss": 0.8599, + "step": 543 + }, + { + "epoch": 0.016672796371214906, + "grad_norm": 2.9557796997833075, + "learning_rate": 5.556690500510725e-06, + "loss": 0.7395, + "step": 544 + }, + { + "epoch": 0.016703444893956112, + "grad_norm": 2.986792631796743, + "learning_rate": 5.5669050051072524e-06, + "loss": 0.8693, + "step": 545 + }, + { + "epoch": 0.016734093416697314, + "grad_norm": 3.117748474276798, + "learning_rate": 5.57711950970378e-06, + "loss": 0.8093, + "step": 546 + }, + { + "epoch": 0.01676474193943852, + "grad_norm": 1.0414129626698536, + "learning_rate": 5.587334014300308e-06, + "loss": 0.5861, + "step": 547 + }, + { + "epoch": 0.016795390462179722, + "grad_norm": 2.9857882252922066, + "learning_rate": 5.597548518896834e-06, + "loss": 0.9073, + "step": 548 + }, + { + "epoch": 0.016826038984920928, + "grad_norm": 2.9307943455619525, + "learning_rate": 5.607763023493361e-06, + "loss": 0.8396, + "step": 549 + }, + { + "epoch": 0.01685668750766213, + "grad_norm": 2.9659640198224815, + "learning_rate": 5.617977528089889e-06, + "loss": 0.803, + "step": 550 + }, + { + "epoch": 0.016887336030403335, + "grad_norm": 3.546023300442476, + "learning_rate": 5.628192032686415e-06, + "loss": 0.8525, + "step": 551 + }, + { + "epoch": 0.016917984553144538, + "grad_norm": 2.9537243378825835, + "learning_rate": 5.6384065372829424e-06, + "loss": 0.9181, + "step": 552 + }, + { + "epoch": 0.016948633075885743, + "grad_norm": 2.8132097850390707, + "learning_rate": 5.648621041879469e-06, + "loss": 0.8055, + "step": 553 + }, + { + "epoch": 0.016979281598626945, + "grad_norm": 2.9218232676186693, + "learning_rate": 5.658835546475996e-06, + "loss": 0.8023, + "step": 554 + }, + { + "epoch": 0.01700993012136815, + "grad_norm": 2.6295760018362127, + "learning_rate": 5.669050051072524e-06, + "loss": 0.7687, + "step": 555 + }, + { + "epoch": 0.017040578644109353, + "grad_norm": 1.0257433997359908, + "learning_rate": 5.67926455566905e-06, + "loss": 0.5758, + "step": 556 + }, + { + "epoch": 0.01707122716685056, + "grad_norm": 1.1531412642264425, + "learning_rate": 5.689479060265578e-06, + "loss": 0.5721, + "step": 557 + }, + { + "epoch": 0.01710187568959176, + "grad_norm": 2.6263829071353717, + "learning_rate": 5.699693564862104e-06, + "loss": 0.8577, + "step": 558 + }, + { + "epoch": 0.017132524212332967, + "grad_norm": 1.0171827490037695, + "learning_rate": 5.709908069458632e-06, + "loss": 0.5507, + "step": 559 + }, + { + "epoch": 0.01716317273507417, + "grad_norm": 1.03795608507198, + "learning_rate": 5.720122574055159e-06, + "loss": 0.5948, + "step": 560 + }, + { + "epoch": 0.017193821257815375, + "grad_norm": 2.598828276423245, + "learning_rate": 5.730337078651685e-06, + "loss": 0.8237, + "step": 561 + }, + { + "epoch": 0.017224469780556577, + "grad_norm": 3.1653601544801253, + "learning_rate": 5.740551583248213e-06, + "loss": 0.7955, + "step": 562 + }, + { + "epoch": 0.017255118303297783, + "grad_norm": 3.2619636937465057, + "learning_rate": 5.7507660878447404e-06, + "loss": 0.879, + "step": 563 + }, + { + "epoch": 0.017285766826038985, + "grad_norm": 2.861099540146627, + "learning_rate": 5.760980592441267e-06, + "loss": 0.8892, + "step": 564 + }, + { + "epoch": 0.01731641534878019, + "grad_norm": 2.894414129241923, + "learning_rate": 5.771195097037794e-06, + "loss": 0.8186, + "step": 565 + }, + { + "epoch": 0.017347063871521393, + "grad_norm": 2.392148124633032, + "learning_rate": 5.781409601634322e-06, + "loss": 0.8013, + "step": 566 + }, + { + "epoch": 0.017377712394262595, + "grad_norm": 2.899982590344122, + "learning_rate": 5.791624106230848e-06, + "loss": 0.8622, + "step": 567 + }, + { + "epoch": 0.0174083609170038, + "grad_norm": 3.7612736050275792, + "learning_rate": 5.801838610827375e-06, + "loss": 0.8666, + "step": 568 + }, + { + "epoch": 0.017439009439745003, + "grad_norm": 3.3171995030274952, + "learning_rate": 5.812053115423902e-06, + "loss": 0.8687, + "step": 569 + }, + { + "epoch": 0.01746965796248621, + "grad_norm": 2.7945019573265273, + "learning_rate": 5.82226762002043e-06, + "loss": 0.8247, + "step": 570 + }, + { + "epoch": 0.01750030648522741, + "grad_norm": 3.458234741947401, + "learning_rate": 5.832482124616956e-06, + "loss": 0.8104, + "step": 571 + }, + { + "epoch": 0.017530955007968616, + "grad_norm": 3.0583534966361148, + "learning_rate": 5.842696629213483e-06, + "loss": 0.8188, + "step": 572 + }, + { + "epoch": 0.01756160353070982, + "grad_norm": 2.810251125210679, + "learning_rate": 5.852911133810011e-06, + "loss": 0.7986, + "step": 573 + }, + { + "epoch": 0.017592252053451024, + "grad_norm": 1.2484028505736617, + "learning_rate": 5.863125638406538e-06, + "loss": 0.5671, + "step": 574 + }, + { + "epoch": 0.017622900576192226, + "grad_norm": 1.2033147952600964, + "learning_rate": 5.873340143003064e-06, + "loss": 0.57, + "step": 575 + }, + { + "epoch": 0.017653549098933432, + "grad_norm": 3.2793462920357803, + "learning_rate": 5.883554647599592e-06, + "loss": 0.9753, + "step": 576 + }, + { + "epoch": 0.017684197621674634, + "grad_norm": 3.0974791169839637, + "learning_rate": 5.89376915219612e-06, + "loss": 0.8733, + "step": 577 + }, + { + "epoch": 0.01771484614441584, + "grad_norm": 2.749834964833258, + "learning_rate": 5.9039836567926455e-06, + "loss": 0.948, + "step": 578 + }, + { + "epoch": 0.017745494667157042, + "grad_norm": 2.85623502138601, + "learning_rate": 5.914198161389173e-06, + "loss": 0.8174, + "step": 579 + }, + { + "epoch": 0.017776143189898248, + "grad_norm": 2.6678156643880655, + "learning_rate": 5.924412665985701e-06, + "loss": 0.7862, + "step": 580 + }, + { + "epoch": 0.01780679171263945, + "grad_norm": 2.761894998742353, + "learning_rate": 5.934627170582227e-06, + "loss": 0.9069, + "step": 581 + }, + { + "epoch": 0.017837440235380655, + "grad_norm": 2.6215354906156514, + "learning_rate": 5.944841675178754e-06, + "loss": 0.8139, + "step": 582 + }, + { + "epoch": 0.017868088758121858, + "grad_norm": 3.2719839757402185, + "learning_rate": 5.955056179775281e-06, + "loss": 0.7598, + "step": 583 + }, + { + "epoch": 0.017898737280863063, + "grad_norm": 2.5819031582809235, + "learning_rate": 5.965270684371809e-06, + "loss": 0.8935, + "step": 584 + }, + { + "epoch": 0.017929385803604266, + "grad_norm": 3.1348148453851077, + "learning_rate": 5.9754851889683355e-06, + "loss": 0.729, + "step": 585 + }, + { + "epoch": 0.01796003432634547, + "grad_norm": 2.774641341373821, + "learning_rate": 5.985699693564862e-06, + "loss": 0.8368, + "step": 586 + }, + { + "epoch": 0.017990682849086673, + "grad_norm": 2.884278109874713, + "learning_rate": 5.99591419816139e-06, + "loss": 0.8472, + "step": 587 + }, + { + "epoch": 0.01802133137182788, + "grad_norm": 2.76196141967181, + "learning_rate": 6.006128702757916e-06, + "loss": 0.7917, + "step": 588 + }, + { + "epoch": 0.01805197989456908, + "grad_norm": 3.572557269279883, + "learning_rate": 6.0163432073544435e-06, + "loss": 0.876, + "step": 589 + }, + { + "epoch": 0.018082628417310287, + "grad_norm": 2.7195154064941773, + "learning_rate": 6.026557711950971e-06, + "loss": 0.8523, + "step": 590 + }, + { + "epoch": 0.01811327694005149, + "grad_norm": 2.603345553891082, + "learning_rate": 6.036772216547499e-06, + "loss": 0.848, + "step": 591 + }, + { + "epoch": 0.018143925462792695, + "grad_norm": 2.772086847124186, + "learning_rate": 6.046986721144025e-06, + "loss": 0.9359, + "step": 592 + }, + { + "epoch": 0.018174573985533897, + "grad_norm": 2.7969987348880316, + "learning_rate": 6.057201225740552e-06, + "loss": 0.8389, + "step": 593 + }, + { + "epoch": 0.018205222508275103, + "grad_norm": 1.6176007860306407, + "learning_rate": 6.06741573033708e-06, + "loss": 0.5551, + "step": 594 + }, + { + "epoch": 0.018235871031016305, + "grad_norm": 1.5136407140855554, + "learning_rate": 6.077630234933606e-06, + "loss": 0.5612, + "step": 595 + }, + { + "epoch": 0.01826651955375751, + "grad_norm": 2.8026386450997585, + "learning_rate": 6.0878447395301335e-06, + "loss": 0.9343, + "step": 596 + }, + { + "epoch": 0.018297168076498713, + "grad_norm": 2.8882919707140675, + "learning_rate": 6.09805924412666e-06, + "loss": 0.7715, + "step": 597 + }, + { + "epoch": 0.01832781659923992, + "grad_norm": 0.9996479262653694, + "learning_rate": 6.108273748723187e-06, + "loss": 0.5539, + "step": 598 + }, + { + "epoch": 0.01835846512198112, + "grad_norm": 2.768014806606586, + "learning_rate": 6.118488253319715e-06, + "loss": 0.8599, + "step": 599 + }, + { + "epoch": 0.018389113644722323, + "grad_norm": 3.1423799684036924, + "learning_rate": 6.1287027579162415e-06, + "loss": 0.8895, + "step": 600 + }, + { + "epoch": 0.01841976216746353, + "grad_norm": 2.730528612615601, + "learning_rate": 6.138917262512769e-06, + "loss": 0.7893, + "step": 601 + }, + { + "epoch": 0.01845041069020473, + "grad_norm": 3.276221868193066, + "learning_rate": 6.149131767109295e-06, + "loss": 0.9391, + "step": 602 + }, + { + "epoch": 0.018481059212945936, + "grad_norm": 2.630175617409509, + "learning_rate": 6.159346271705823e-06, + "loss": 0.8645, + "step": 603 + }, + { + "epoch": 0.01851170773568714, + "grad_norm": 2.99143463139179, + "learning_rate": 6.16956077630235e-06, + "loss": 0.8033, + "step": 604 + }, + { + "epoch": 0.018542356258428344, + "grad_norm": 2.8508012371824725, + "learning_rate": 6.179775280898876e-06, + "loss": 0.8066, + "step": 605 + }, + { + "epoch": 0.018573004781169546, + "grad_norm": 2.89285752515505, + "learning_rate": 6.189989785495404e-06, + "loss": 0.8479, + "step": 606 + }, + { + "epoch": 0.018603653303910752, + "grad_norm": 2.9528851383172654, + "learning_rate": 6.2002042900919315e-06, + "loss": 0.821, + "step": 607 + }, + { + "epoch": 0.018634301826651954, + "grad_norm": 2.0526909725217926, + "learning_rate": 6.210418794688458e-06, + "loss": 0.5781, + "step": 608 + }, + { + "epoch": 0.01866495034939316, + "grad_norm": 3.460760246833261, + "learning_rate": 6.220633299284985e-06, + "loss": 0.6983, + "step": 609 + }, + { + "epoch": 0.018695598872134362, + "grad_norm": 1.403228363636226, + "learning_rate": 6.230847803881513e-06, + "loss": 0.5758, + "step": 610 + }, + { + "epoch": 0.018726247394875568, + "grad_norm": 2.21442261225321, + "learning_rate": 6.2410623084780395e-06, + "loss": 0.7274, + "step": 611 + }, + { + "epoch": 0.01875689591761677, + "grad_norm": 1.0077094847678891, + "learning_rate": 6.251276813074566e-06, + "loss": 0.5477, + "step": 612 + }, + { + "epoch": 0.018787544440357976, + "grad_norm": 2.852900006230864, + "learning_rate": 6.261491317671093e-06, + "loss": 0.8067, + "step": 613 + }, + { + "epoch": 0.018818192963099178, + "grad_norm": 2.7407433017281573, + "learning_rate": 6.271705822267621e-06, + "loss": 0.9382, + "step": 614 + }, + { + "epoch": 0.018848841485840383, + "grad_norm": 1.4432757695325467, + "learning_rate": 6.2819203268641474e-06, + "loss": 0.5788, + "step": 615 + }, + { + "epoch": 0.018879490008581586, + "grad_norm": 2.6831020226317905, + "learning_rate": 6.292134831460674e-06, + "loss": 0.8722, + "step": 616 + }, + { + "epoch": 0.01891013853132279, + "grad_norm": 1.3067365892669007, + "learning_rate": 6.302349336057202e-06, + "loss": 0.543, + "step": 617 + }, + { + "epoch": 0.018940787054063993, + "grad_norm": 1.2654551436564394, + "learning_rate": 6.3125638406537295e-06, + "loss": 0.5682, + "step": 618 + }, + { + "epoch": 0.0189714355768052, + "grad_norm": 2.6700287262027214, + "learning_rate": 6.322778345250255e-06, + "loss": 0.9075, + "step": 619 + }, + { + "epoch": 0.0190020840995464, + "grad_norm": 3.164946438256333, + "learning_rate": 6.332992849846783e-06, + "loss": 0.7347, + "step": 620 + }, + { + "epoch": 0.019032732622287607, + "grad_norm": 0.9984360528793536, + "learning_rate": 6.343207354443311e-06, + "loss": 0.5557, + "step": 621 + }, + { + "epoch": 0.01906338114502881, + "grad_norm": 2.807612021980321, + "learning_rate": 6.353421859039837e-06, + "loss": 0.8786, + "step": 622 + }, + { + "epoch": 0.019094029667770015, + "grad_norm": 3.550384550850249, + "learning_rate": 6.363636363636364e-06, + "loss": 0.9101, + "step": 623 + }, + { + "epoch": 0.019124678190511217, + "grad_norm": 2.9589132313521525, + "learning_rate": 6.373850868232892e-06, + "loss": 0.9171, + "step": 624 + }, + { + "epoch": 0.019155326713252423, + "grad_norm": 1.2355936921188393, + "learning_rate": 6.384065372829419e-06, + "loss": 0.5483, + "step": 625 + }, + { + "epoch": 0.019185975235993625, + "grad_norm": 1.2562870119108291, + "learning_rate": 6.3942798774259454e-06, + "loss": 0.5616, + "step": 626 + }, + { + "epoch": 0.01921662375873483, + "grad_norm": 3.189611270872149, + "learning_rate": 6.404494382022472e-06, + "loss": 0.8781, + "step": 627 + }, + { + "epoch": 0.019247272281476033, + "grad_norm": 2.7894275274347624, + "learning_rate": 6.414708886619e-06, + "loss": 0.7685, + "step": 628 + }, + { + "epoch": 0.01927792080421724, + "grad_norm": 2.8242915971659026, + "learning_rate": 6.424923391215527e-06, + "loss": 0.8482, + "step": 629 + }, + { + "epoch": 0.01930856932695844, + "grad_norm": 2.7991407439137745, + "learning_rate": 6.435137895812053e-06, + "loss": 0.9109, + "step": 630 + }, + { + "epoch": 0.019339217849699643, + "grad_norm": 2.6843293521114377, + "learning_rate": 6.445352400408581e-06, + "loss": 0.8108, + "step": 631 + }, + { + "epoch": 0.01936986637244085, + "grad_norm": 1.0283206808343337, + "learning_rate": 6.455566905005107e-06, + "loss": 0.5456, + "step": 632 + }, + { + "epoch": 0.01940051489518205, + "grad_norm": 2.8247267185715086, + "learning_rate": 6.465781409601635e-06, + "loss": 0.8759, + "step": 633 + }, + { + "epoch": 0.019431163417923256, + "grad_norm": 2.517981728055398, + "learning_rate": 6.475995914198162e-06, + "loss": 0.8541, + "step": 634 + }, + { + "epoch": 0.01946181194066446, + "grad_norm": 2.7346535408829284, + "learning_rate": 6.48621041879469e-06, + "loss": 0.8419, + "step": 635 + }, + { + "epoch": 0.019492460463405664, + "grad_norm": 2.7057011425086874, + "learning_rate": 6.496424923391216e-06, + "loss": 0.7794, + "step": 636 + }, + { + "epoch": 0.019523108986146866, + "grad_norm": 2.7885735566767282, + "learning_rate": 6.506639427987743e-06, + "loss": 0.7967, + "step": 637 + }, + { + "epoch": 0.019553757508888072, + "grad_norm": 3.041792150725988, + "learning_rate": 6.51685393258427e-06, + "loss": 0.9763, + "step": 638 + }, + { + "epoch": 0.019584406031629274, + "grad_norm": 2.6413226799912017, + "learning_rate": 6.527068437180797e-06, + "loss": 0.7732, + "step": 639 + }, + { + "epoch": 0.01961505455437048, + "grad_norm": 2.7773728180774815, + "learning_rate": 6.537282941777325e-06, + "loss": 0.7418, + "step": 640 + }, + { + "epoch": 0.019645703077111682, + "grad_norm": 1.1934708388950996, + "learning_rate": 6.547497446373851e-06, + "loss": 0.5592, + "step": 641 + }, + { + "epoch": 0.019676351599852888, + "grad_norm": 1.112349364020128, + "learning_rate": 6.557711950970378e-06, + "loss": 0.5809, + "step": 642 + }, + { + "epoch": 0.01970700012259409, + "grad_norm": 2.6273367435610187, + "learning_rate": 6.567926455566905e-06, + "loss": 0.8465, + "step": 643 + }, + { + "epoch": 0.019737648645335296, + "grad_norm": 2.8488234349273176, + "learning_rate": 6.5781409601634326e-06, + "loss": 0.8247, + "step": 644 + }, + { + "epoch": 0.019768297168076498, + "grad_norm": 2.949485471383936, + "learning_rate": 6.58835546475996e-06, + "loss": 0.8318, + "step": 645 + }, + { + "epoch": 0.019798945690817703, + "grad_norm": 2.6173325685527837, + "learning_rate": 6.598569969356486e-06, + "loss": 0.7565, + "step": 646 + }, + { + "epoch": 0.019829594213558906, + "grad_norm": 2.6205432344550603, + "learning_rate": 6.608784473953014e-06, + "loss": 0.8091, + "step": 647 + }, + { + "epoch": 0.01986024273630011, + "grad_norm": 2.715241952471307, + "learning_rate": 6.618998978549541e-06, + "loss": 0.8474, + "step": 648 + }, + { + "epoch": 0.019890891259041314, + "grad_norm": 2.83073456259497, + "learning_rate": 6.629213483146067e-06, + "loss": 0.926, + "step": 649 + }, + { + "epoch": 0.01992153978178252, + "grad_norm": 3.425251747818931, + "learning_rate": 6.639427987742595e-06, + "loss": 0.9049, + "step": 650 + }, + { + "epoch": 0.01995218830452372, + "grad_norm": 3.0564889529392376, + "learning_rate": 6.649642492339123e-06, + "loss": 0.8952, + "step": 651 + }, + { + "epoch": 0.019982836827264927, + "grad_norm": 2.9826188894107597, + "learning_rate": 6.659856996935649e-06, + "loss": 0.8468, + "step": 652 + }, + { + "epoch": 0.02001348535000613, + "grad_norm": 3.1087379914693116, + "learning_rate": 6.670071501532176e-06, + "loss": 0.8054, + "step": 653 + }, + { + "epoch": 0.020044133872747335, + "grad_norm": 1.213320805171772, + "learning_rate": 6.680286006128704e-06, + "loss": 0.5433, + "step": 654 + }, + { + "epoch": 0.020074782395488537, + "grad_norm": 2.5524467000391, + "learning_rate": 6.6905005107252305e-06, + "loss": 0.8343, + "step": 655 + }, + { + "epoch": 0.020105430918229743, + "grad_norm": 2.4466106410054977, + "learning_rate": 6.700715015321757e-06, + "loss": 0.8358, + "step": 656 + }, + { + "epoch": 0.020136079440970945, + "grad_norm": 2.8915819804631195, + "learning_rate": 6.710929519918284e-06, + "loss": 0.9236, + "step": 657 + }, + { + "epoch": 0.02016672796371215, + "grad_norm": 2.7561821256361974, + "learning_rate": 6.721144024514812e-06, + "loss": 0.845, + "step": 658 + }, + { + "epoch": 0.020197376486453353, + "grad_norm": 2.536582865711595, + "learning_rate": 6.7313585291113385e-06, + "loss": 0.7972, + "step": 659 + }, + { + "epoch": 0.02022802500919456, + "grad_norm": 2.673525040282449, + "learning_rate": 6.741573033707865e-06, + "loss": 0.8967, + "step": 660 + }, + { + "epoch": 0.02025867353193576, + "grad_norm": 2.331188146839733, + "learning_rate": 6.751787538304393e-06, + "loss": 0.861, + "step": 661 + }, + { + "epoch": 0.020289322054676963, + "grad_norm": 2.953796728373752, + "learning_rate": 6.7620020429009206e-06, + "loss": 0.8163, + "step": 662 + }, + { + "epoch": 0.02031997057741817, + "grad_norm": 2.2868528541214146, + "learning_rate": 6.7722165474974465e-06, + "loss": 0.7793, + "step": 663 + }, + { + "epoch": 0.02035061910015937, + "grad_norm": 1.0639715278901762, + "learning_rate": 6.782431052093974e-06, + "loss": 0.5647, + "step": 664 + }, + { + "epoch": 0.020381267622900576, + "grad_norm": 2.7423584796980314, + "learning_rate": 6.792645556690502e-06, + "loss": 0.894, + "step": 665 + }, + { + "epoch": 0.02041191614564178, + "grad_norm": 2.2674153252479523, + "learning_rate": 6.802860061287028e-06, + "loss": 0.7764, + "step": 666 + }, + { + "epoch": 0.020442564668382984, + "grad_norm": 2.6683260263425566, + "learning_rate": 6.813074565883555e-06, + "loss": 0.8451, + "step": 667 + }, + { + "epoch": 0.020473213191124186, + "grad_norm": 3.096224768372658, + "learning_rate": 6.823289070480082e-06, + "loss": 0.8003, + "step": 668 + }, + { + "epoch": 0.020503861713865392, + "grad_norm": 3.460956544474347, + "learning_rate": 6.83350357507661e-06, + "loss": 0.8372, + "step": 669 + }, + { + "epoch": 0.020534510236606594, + "grad_norm": 2.789233343735804, + "learning_rate": 6.8437180796731365e-06, + "loss": 0.8058, + "step": 670 + }, + { + "epoch": 0.0205651587593478, + "grad_norm": 2.783762162725732, + "learning_rate": 6.853932584269663e-06, + "loss": 0.7723, + "step": 671 + }, + { + "epoch": 0.020595807282089002, + "grad_norm": 2.7222759403234984, + "learning_rate": 6.864147088866191e-06, + "loss": 0.8587, + "step": 672 + }, + { + "epoch": 0.020626455804830208, + "grad_norm": 2.786726760322622, + "learning_rate": 6.874361593462717e-06, + "loss": 0.8461, + "step": 673 + }, + { + "epoch": 0.02065710432757141, + "grad_norm": 2.8801111140139772, + "learning_rate": 6.8845760980592445e-06, + "loss": 0.8033, + "step": 674 + }, + { + "epoch": 0.020687752850312616, + "grad_norm": 2.817091054288429, + "learning_rate": 6.894790602655772e-06, + "loss": 0.7478, + "step": 675 + }, + { + "epoch": 0.020718401373053818, + "grad_norm": 2.669438590994199, + "learning_rate": 6.905005107252298e-06, + "loss": 0.8775, + "step": 676 + }, + { + "epoch": 0.020749049895795024, + "grad_norm": 2.5952635953450893, + "learning_rate": 6.915219611848826e-06, + "loss": 0.8536, + "step": 677 + }, + { + "epoch": 0.020779698418536226, + "grad_norm": 2.8293359197967454, + "learning_rate": 6.925434116445353e-06, + "loss": 0.8485, + "step": 678 + }, + { + "epoch": 0.02081034694127743, + "grad_norm": 2.598128121441444, + "learning_rate": 6.935648621041881e-06, + "loss": 0.8077, + "step": 679 + }, + { + "epoch": 0.020840995464018634, + "grad_norm": 2.835352084378578, + "learning_rate": 6.945863125638407e-06, + "loss": 0.8177, + "step": 680 + }, + { + "epoch": 0.02087164398675984, + "grad_norm": 2.9649314779219784, + "learning_rate": 6.9560776302349345e-06, + "loss": 0.7993, + "step": 681 + }, + { + "epoch": 0.02090229250950104, + "grad_norm": 2.656014321498177, + "learning_rate": 6.966292134831461e-06, + "loss": 0.8394, + "step": 682 + }, + { + "epoch": 0.020932941032242247, + "grad_norm": 2.5038050851265297, + "learning_rate": 6.976506639427988e-06, + "loss": 0.7937, + "step": 683 + }, + { + "epoch": 0.02096358955498345, + "grad_norm": 2.6303175152933833, + "learning_rate": 6.986721144024516e-06, + "loss": 0.8562, + "step": 684 + }, + { + "epoch": 0.020994238077724655, + "grad_norm": 2.8313430829237207, + "learning_rate": 6.9969356486210425e-06, + "loss": 0.8633, + "step": 685 + }, + { + "epoch": 0.021024886600465857, + "grad_norm": 2.4205584412312073, + "learning_rate": 7.007150153217569e-06, + "loss": 0.8408, + "step": 686 + }, + { + "epoch": 0.021055535123207063, + "grad_norm": 2.284994523179326, + "learning_rate": 7.017364657814096e-06, + "loss": 0.8391, + "step": 687 + }, + { + "epoch": 0.021086183645948265, + "grad_norm": 2.717770440063039, + "learning_rate": 7.027579162410624e-06, + "loss": 0.7649, + "step": 688 + }, + { + "epoch": 0.02111683216868947, + "grad_norm": 2.626154837044685, + "learning_rate": 7.037793667007151e-06, + "loss": 0.7578, + "step": 689 + }, + { + "epoch": 0.021147480691430673, + "grad_norm": 2.769978419899632, + "learning_rate": 7.048008171603677e-06, + "loss": 0.8155, + "step": 690 + }, + { + "epoch": 0.02117812921417188, + "grad_norm": 2.7721533299361587, + "learning_rate": 7.058222676200205e-06, + "loss": 0.8543, + "step": 691 + }, + { + "epoch": 0.02120877773691308, + "grad_norm": 1.1440968068972854, + "learning_rate": 7.0684371807967325e-06, + "loss": 0.5798, + "step": 692 + }, + { + "epoch": 0.021239426259654283, + "grad_norm": 2.9201016320741013, + "learning_rate": 7.078651685393258e-06, + "loss": 0.8378, + "step": 693 + }, + { + "epoch": 0.02127007478239549, + "grad_norm": 2.5047227255074316, + "learning_rate": 7.088866189989786e-06, + "loss": 0.8512, + "step": 694 + }, + { + "epoch": 0.02130072330513669, + "grad_norm": 2.5520623067700643, + "learning_rate": 7.099080694586314e-06, + "loss": 0.8053, + "step": 695 + }, + { + "epoch": 0.021331371827877896, + "grad_norm": 2.8273848300911806, + "learning_rate": 7.1092951991828404e-06, + "loss": 0.8415, + "step": 696 + }, + { + "epoch": 0.0213620203506191, + "grad_norm": 2.800361618206784, + "learning_rate": 7.119509703779367e-06, + "loss": 0.857, + "step": 697 + }, + { + "epoch": 0.021392668873360304, + "grad_norm": 2.6708916406627683, + "learning_rate": 7.129724208375894e-06, + "loss": 0.7528, + "step": 698 + }, + { + "epoch": 0.021423317396101507, + "grad_norm": 1.0163027499694144, + "learning_rate": 7.139938712972422e-06, + "loss": 0.5596, + "step": 699 + }, + { + "epoch": 0.021453965918842712, + "grad_norm": 2.714056486084067, + "learning_rate": 7.150153217568948e-06, + "loss": 0.8528, + "step": 700 + }, + { + "epoch": 0.021484614441583914, + "grad_norm": 2.938723866408357, + "learning_rate": 7.160367722165475e-06, + "loss": 0.8392, + "step": 701 + }, + { + "epoch": 0.02151526296432512, + "grad_norm": 2.6717498622124007, + "learning_rate": 7.170582226762003e-06, + "loss": 0.8429, + "step": 702 + }, + { + "epoch": 0.021545911487066322, + "grad_norm": 2.612437461807258, + "learning_rate": 7.180796731358529e-06, + "loss": 0.8979, + "step": 703 + }, + { + "epoch": 0.021576560009807528, + "grad_norm": 2.664763030538786, + "learning_rate": 7.191011235955056e-06, + "loss": 0.7547, + "step": 704 + }, + { + "epoch": 0.02160720853254873, + "grad_norm": 2.5189912052957264, + "learning_rate": 7.201225740551584e-06, + "loss": 0.7004, + "step": 705 + }, + { + "epoch": 0.021637857055289936, + "grad_norm": 2.874670182387559, + "learning_rate": 7.211440245148112e-06, + "loss": 0.8955, + "step": 706 + }, + { + "epoch": 0.021668505578031138, + "grad_norm": 2.401389247097066, + "learning_rate": 7.2216547497446376e-06, + "loss": 0.8334, + "step": 707 + }, + { + "epoch": 0.021699154100772344, + "grad_norm": 2.4694361220469965, + "learning_rate": 7.231869254341165e-06, + "loss": 0.8837, + "step": 708 + }, + { + "epoch": 0.021729802623513546, + "grad_norm": 2.580423395889507, + "learning_rate": 7.242083758937693e-06, + "loss": 0.8679, + "step": 709 + }, + { + "epoch": 0.02176045114625475, + "grad_norm": 2.6574643390849877, + "learning_rate": 7.252298263534219e-06, + "loss": 0.8469, + "step": 710 + }, + { + "epoch": 0.021791099668995954, + "grad_norm": 2.5824103450648526, + "learning_rate": 7.262512768130746e-06, + "loss": 0.8683, + "step": 711 + }, + { + "epoch": 0.02182174819173716, + "grad_norm": 2.459152536997915, + "learning_rate": 7.272727272727273e-06, + "loss": 0.8139, + "step": 712 + }, + { + "epoch": 0.02185239671447836, + "grad_norm": 2.4166547444867503, + "learning_rate": 7.282941777323801e-06, + "loss": 0.8002, + "step": 713 + }, + { + "epoch": 0.021883045237219567, + "grad_norm": 2.7827364521945226, + "learning_rate": 7.293156281920328e-06, + "loss": 0.8186, + "step": 714 + }, + { + "epoch": 0.02191369375996077, + "grad_norm": 2.8855699938925845, + "learning_rate": 7.303370786516854e-06, + "loss": 0.8956, + "step": 715 + }, + { + "epoch": 0.021944342282701975, + "grad_norm": 2.4432746684770983, + "learning_rate": 7.313585291113382e-06, + "loss": 0.8015, + "step": 716 + }, + { + "epoch": 0.021974990805443177, + "grad_norm": 2.6078550849793642, + "learning_rate": 7.323799795709908e-06, + "loss": 0.9135, + "step": 717 + }, + { + "epoch": 0.022005639328184383, + "grad_norm": 2.519133670855868, + "learning_rate": 7.3340143003064355e-06, + "loss": 0.8627, + "step": 718 + }, + { + "epoch": 0.022036287850925585, + "grad_norm": 2.694599306419332, + "learning_rate": 7.344228804902963e-06, + "loss": 0.8938, + "step": 719 + }, + { + "epoch": 0.02206693637366679, + "grad_norm": 2.9385697498089747, + "learning_rate": 7.354443309499489e-06, + "loss": 0.8138, + "step": 720 + }, + { + "epoch": 0.022097584896407993, + "grad_norm": 3.909301591578764, + "learning_rate": 7.364657814096017e-06, + "loss": 0.7889, + "step": 721 + }, + { + "epoch": 0.0221282334191492, + "grad_norm": 2.6572416916499892, + "learning_rate": 7.374872318692544e-06, + "loss": 0.7914, + "step": 722 + }, + { + "epoch": 0.0221588819418904, + "grad_norm": 2.55358037634514, + "learning_rate": 7.385086823289071e-06, + "loss": 0.8487, + "step": 723 + }, + { + "epoch": 0.022189530464631606, + "grad_norm": 2.642090787198194, + "learning_rate": 7.395301327885598e-06, + "loss": 0.7767, + "step": 724 + }, + { + "epoch": 0.02222017898737281, + "grad_norm": 2.793396334148574, + "learning_rate": 7.4055158324821256e-06, + "loss": 0.8561, + "step": 725 + }, + { + "epoch": 0.02225082751011401, + "grad_norm": 2.9901552475169764, + "learning_rate": 7.415730337078652e-06, + "loss": 0.9384, + "step": 726 + }, + { + "epoch": 0.022281476032855217, + "grad_norm": 1.1491853303491966, + "learning_rate": 7.425944841675179e-06, + "loss": 0.5879, + "step": 727 + }, + { + "epoch": 0.02231212455559642, + "grad_norm": 2.675949687916952, + "learning_rate": 7.436159346271706e-06, + "loss": 0.8772, + "step": 728 + }, + { + "epoch": 0.022342773078337624, + "grad_norm": 2.4527923846353454, + "learning_rate": 7.4463738508682335e-06, + "loss": 0.8013, + "step": 729 + }, + { + "epoch": 0.022373421601078827, + "grad_norm": 2.772561984425348, + "learning_rate": 7.456588355464761e-06, + "loss": 0.8045, + "step": 730 + }, + { + "epoch": 0.022404070123820032, + "grad_norm": 2.3679091558353984, + "learning_rate": 7.466802860061287e-06, + "loss": 0.8082, + "step": 731 + }, + { + "epoch": 0.022434718646561234, + "grad_norm": 2.424979845145818, + "learning_rate": 7.477017364657815e-06, + "loss": 0.8533, + "step": 732 + }, + { + "epoch": 0.02246536716930244, + "grad_norm": 2.94724103547818, + "learning_rate": 7.487231869254342e-06, + "loss": 0.7972, + "step": 733 + }, + { + "epoch": 0.022496015692043642, + "grad_norm": 2.556031971029626, + "learning_rate": 7.497446373850868e-06, + "loss": 0.8536, + "step": 734 + }, + { + "epoch": 0.022526664214784848, + "grad_norm": 3.050099004606253, + "learning_rate": 7.507660878447396e-06, + "loss": 0.7765, + "step": 735 + }, + { + "epoch": 0.02255731273752605, + "grad_norm": 1.1364557491821465, + "learning_rate": 7.5178753830439235e-06, + "loss": 0.5574, + "step": 736 + }, + { + "epoch": 0.022587961260267256, + "grad_norm": 2.430942630719383, + "learning_rate": 7.5280898876404495e-06, + "loss": 0.8897, + "step": 737 + }, + { + "epoch": 0.022618609783008458, + "grad_norm": 2.6203708101857206, + "learning_rate": 7.538304392236977e-06, + "loss": 0.8645, + "step": 738 + }, + { + "epoch": 0.022649258305749664, + "grad_norm": 2.7324650267593076, + "learning_rate": 7.548518896833505e-06, + "loss": 0.7692, + "step": 739 + }, + { + "epoch": 0.022679906828490866, + "grad_norm": 2.7644798057649145, + "learning_rate": 7.5587334014300315e-06, + "loss": 0.7196, + "step": 740 + }, + { + "epoch": 0.02271055535123207, + "grad_norm": 1.0458700929406632, + "learning_rate": 7.568947906026558e-06, + "loss": 0.5399, + "step": 741 + }, + { + "epoch": 0.022741203873973274, + "grad_norm": 3.067656427248182, + "learning_rate": 7.579162410623085e-06, + "loss": 0.8563, + "step": 742 + }, + { + "epoch": 0.02277185239671448, + "grad_norm": 2.5480879320346483, + "learning_rate": 7.589376915219613e-06, + "loss": 0.8486, + "step": 743 + }, + { + "epoch": 0.02280250091945568, + "grad_norm": 2.8913791709970704, + "learning_rate": 7.5995914198161395e-06, + "loss": 0.9296, + "step": 744 + }, + { + "epoch": 0.022833149442196887, + "grad_norm": 2.7166576621215692, + "learning_rate": 7.609805924412666e-06, + "loss": 0.8437, + "step": 745 + }, + { + "epoch": 0.02286379796493809, + "grad_norm": 2.868842318468182, + "learning_rate": 7.620020429009194e-06, + "loss": 0.9104, + "step": 746 + }, + { + "epoch": 0.022894446487679295, + "grad_norm": 2.7230078276092735, + "learning_rate": 7.63023493360572e-06, + "loss": 0.8531, + "step": 747 + }, + { + "epoch": 0.022925095010420497, + "grad_norm": 2.461871043070862, + "learning_rate": 7.640449438202247e-06, + "loss": 0.7421, + "step": 748 + }, + { + "epoch": 0.022955743533161703, + "grad_norm": 2.554270964492917, + "learning_rate": 7.650663942798775e-06, + "loss": 0.8275, + "step": 749 + }, + { + "epoch": 0.022986392055902905, + "grad_norm": 2.6039206706155626, + "learning_rate": 7.660878447395303e-06, + "loss": 0.8352, + "step": 750 + }, + { + "epoch": 0.02301704057864411, + "grad_norm": 2.540145656757064, + "learning_rate": 7.671092951991829e-06, + "loss": 0.8256, + "step": 751 + }, + { + "epoch": 0.023047689101385313, + "grad_norm": 2.7214261591154734, + "learning_rate": 7.681307456588356e-06, + "loss": 0.8706, + "step": 752 + }, + { + "epoch": 0.02307833762412652, + "grad_norm": 2.7130829383392885, + "learning_rate": 7.691521961184884e-06, + "loss": 0.7934, + "step": 753 + }, + { + "epoch": 0.02310898614686772, + "grad_norm": 2.8708390296858273, + "learning_rate": 7.70173646578141e-06, + "loss": 0.8044, + "step": 754 + }, + { + "epoch": 0.023139634669608927, + "grad_norm": 3.0505888869524775, + "learning_rate": 7.711950970377937e-06, + "loss": 0.8254, + "step": 755 + }, + { + "epoch": 0.02317028319235013, + "grad_norm": 3.6816750037513226, + "learning_rate": 7.722165474974465e-06, + "loss": 0.7782, + "step": 756 + }, + { + "epoch": 0.02320093171509133, + "grad_norm": 2.789910921702266, + "learning_rate": 7.732379979570993e-06, + "loss": 0.7376, + "step": 757 + }, + { + "epoch": 0.023231580237832537, + "grad_norm": 2.9090057000385916, + "learning_rate": 7.742594484167519e-06, + "loss": 0.8103, + "step": 758 + }, + { + "epoch": 0.02326222876057374, + "grad_norm": 2.6081153061363542, + "learning_rate": 7.752808988764046e-06, + "loss": 0.7587, + "step": 759 + }, + { + "epoch": 0.023292877283314944, + "grad_norm": 2.6110908510103603, + "learning_rate": 7.763023493360572e-06, + "loss": 0.909, + "step": 760 + }, + { + "epoch": 0.023323525806056147, + "grad_norm": 2.9344419178507217, + "learning_rate": 7.7732379979571e-06, + "loss": 0.7932, + "step": 761 + }, + { + "epoch": 0.023354174328797352, + "grad_norm": 2.7542743692200546, + "learning_rate": 7.783452502553627e-06, + "loss": 0.8531, + "step": 762 + }, + { + "epoch": 0.023384822851538555, + "grad_norm": 2.871857832188165, + "learning_rate": 7.793667007150153e-06, + "loss": 0.9389, + "step": 763 + }, + { + "epoch": 0.02341547137427976, + "grad_norm": 1.2972420445787027, + "learning_rate": 7.803881511746681e-06, + "loss": 0.5451, + "step": 764 + }, + { + "epoch": 0.023446119897020962, + "grad_norm": 2.663419980525243, + "learning_rate": 7.814096016343207e-06, + "loss": 0.8129, + "step": 765 + }, + { + "epoch": 0.023476768419762168, + "grad_norm": 1.207231539583585, + "learning_rate": 7.824310520939735e-06, + "loss": 0.5618, + "step": 766 + }, + { + "epoch": 0.02350741694250337, + "grad_norm": 2.899009709608621, + "learning_rate": 7.834525025536262e-06, + "loss": 0.9287, + "step": 767 + }, + { + "epoch": 0.023538065465244576, + "grad_norm": 2.494082803195622, + "learning_rate": 7.844739530132788e-06, + "loss": 0.8086, + "step": 768 + }, + { + "epoch": 0.023568713987985778, + "grad_norm": 2.623792300924052, + "learning_rate": 7.854954034729316e-06, + "loss": 0.7774, + "step": 769 + }, + { + "epoch": 0.023599362510726984, + "grad_norm": 2.610385170073888, + "learning_rate": 7.865168539325843e-06, + "loss": 0.7268, + "step": 770 + }, + { + "epoch": 0.023630011033468186, + "grad_norm": 2.633520870832457, + "learning_rate": 7.87538304392237e-06, + "loss": 0.7838, + "step": 771 + }, + { + "epoch": 0.02366065955620939, + "grad_norm": 2.624973703497371, + "learning_rate": 7.885597548518897e-06, + "loss": 0.9103, + "step": 772 + }, + { + "epoch": 0.023691308078950594, + "grad_norm": 1.5227679154016525, + "learning_rate": 7.895812053115425e-06, + "loss": 0.5488, + "step": 773 + }, + { + "epoch": 0.0237219566016918, + "grad_norm": 2.479268014090245, + "learning_rate": 7.906026557711952e-06, + "loss": 0.8612, + "step": 774 + }, + { + "epoch": 0.023752605124433, + "grad_norm": 2.8903049664040714, + "learning_rate": 7.916241062308478e-06, + "loss": 0.8059, + "step": 775 + }, + { + "epoch": 0.023783253647174207, + "grad_norm": 2.6500010261562834, + "learning_rate": 7.926455566905006e-06, + "loss": 0.8792, + "step": 776 + }, + { + "epoch": 0.02381390216991541, + "grad_norm": 2.480893290639923, + "learning_rate": 7.936670071501533e-06, + "loss": 0.7673, + "step": 777 + }, + { + "epoch": 0.023844550692656615, + "grad_norm": 2.815091717878676, + "learning_rate": 7.94688457609806e-06, + "loss": 0.8229, + "step": 778 + }, + { + "epoch": 0.023875199215397817, + "grad_norm": 2.7115926648353224, + "learning_rate": 7.957099080694587e-06, + "loss": 0.8917, + "step": 779 + }, + { + "epoch": 0.023905847738139023, + "grad_norm": 3.023148704765526, + "learning_rate": 7.967313585291115e-06, + "loss": 0.7239, + "step": 780 + }, + { + "epoch": 0.023936496260880225, + "grad_norm": 1.272846888223202, + "learning_rate": 7.97752808988764e-06, + "loss": 0.5541, + "step": 781 + }, + { + "epoch": 0.02396714478362143, + "grad_norm": 2.3781680149348223, + "learning_rate": 7.987742594484168e-06, + "loss": 0.7803, + "step": 782 + }, + { + "epoch": 0.023997793306362633, + "grad_norm": 2.7809861263277096, + "learning_rate": 7.997957099080696e-06, + "loss": 0.8305, + "step": 783 + }, + { + "epoch": 0.02402844182910384, + "grad_norm": 3.686101341042195, + "learning_rate": 8.008171603677223e-06, + "loss": 0.8876, + "step": 784 + }, + { + "epoch": 0.02405909035184504, + "grad_norm": 2.3595418307545817, + "learning_rate": 8.01838610827375e-06, + "loss": 0.7125, + "step": 785 + }, + { + "epoch": 0.024089738874586247, + "grad_norm": 2.362173168076082, + "learning_rate": 8.028600612870277e-06, + "loss": 0.8467, + "step": 786 + }, + { + "epoch": 0.02412038739732745, + "grad_norm": 2.5116109332963856, + "learning_rate": 8.038815117466805e-06, + "loss": 0.8069, + "step": 787 + }, + { + "epoch": 0.02415103592006865, + "grad_norm": 2.6575777419493827, + "learning_rate": 8.04902962206333e-06, + "loss": 0.9022, + "step": 788 + }, + { + "epoch": 0.024181684442809857, + "grad_norm": 2.57174966345801, + "learning_rate": 8.059244126659858e-06, + "loss": 0.8693, + "step": 789 + }, + { + "epoch": 0.02421233296555106, + "grad_norm": 2.3741826054283584, + "learning_rate": 8.069458631256384e-06, + "loss": 0.8724, + "step": 790 + }, + { + "epoch": 0.024242981488292265, + "grad_norm": 1.3082340538282995, + "learning_rate": 8.079673135852912e-06, + "loss": 0.5586, + "step": 791 + }, + { + "epoch": 0.024273630011033467, + "grad_norm": 2.7828894181453383, + "learning_rate": 8.08988764044944e-06, + "loss": 0.8388, + "step": 792 + }, + { + "epoch": 0.024304278533774672, + "grad_norm": 2.427629568699509, + "learning_rate": 8.100102145045965e-06, + "loss": 0.8701, + "step": 793 + }, + { + "epoch": 0.024334927056515875, + "grad_norm": 2.7552297337204688, + "learning_rate": 8.110316649642493e-06, + "loss": 0.8267, + "step": 794 + }, + { + "epoch": 0.02436557557925708, + "grad_norm": 2.573239912320061, + "learning_rate": 8.120531154239019e-06, + "loss": 0.8433, + "step": 795 + }, + { + "epoch": 0.024396224101998282, + "grad_norm": 1.0246386722689858, + "learning_rate": 8.130745658835547e-06, + "loss": 0.5294, + "step": 796 + }, + { + "epoch": 0.024426872624739488, + "grad_norm": 2.432423185662239, + "learning_rate": 8.140960163432074e-06, + "loss": 0.8441, + "step": 797 + }, + { + "epoch": 0.02445752114748069, + "grad_norm": 2.3419335503581187, + "learning_rate": 8.1511746680286e-06, + "loss": 0.7905, + "step": 798 + }, + { + "epoch": 0.024488169670221896, + "grad_norm": 2.5703489722906836, + "learning_rate": 8.161389172625128e-06, + "loss": 0.826, + "step": 799 + }, + { + "epoch": 0.024518818192963098, + "grad_norm": 2.548085923395877, + "learning_rate": 8.171603677221655e-06, + "loss": 0.8292, + "step": 800 + }, + { + "epoch": 0.024549466715704304, + "grad_norm": 2.574537986347276, + "learning_rate": 8.181818181818183e-06, + "loss": 0.8566, + "step": 801 + }, + { + "epoch": 0.024580115238445506, + "grad_norm": 2.798053270216666, + "learning_rate": 8.192032686414709e-06, + "loss": 0.7926, + "step": 802 + }, + { + "epoch": 0.02461076376118671, + "grad_norm": 2.816179015560676, + "learning_rate": 8.202247191011237e-06, + "loss": 0.8615, + "step": 803 + }, + { + "epoch": 0.024641412283927914, + "grad_norm": 2.7135238536985917, + "learning_rate": 8.212461695607764e-06, + "loss": 0.8315, + "step": 804 + }, + { + "epoch": 0.02467206080666912, + "grad_norm": 2.3489425466151452, + "learning_rate": 8.22267620020429e-06, + "loss": 0.7628, + "step": 805 + }, + { + "epoch": 0.02470270932941032, + "grad_norm": 2.2727347399448843, + "learning_rate": 8.232890704800818e-06, + "loss": 0.7686, + "step": 806 + }, + { + "epoch": 0.024733357852151527, + "grad_norm": 2.650219634323542, + "learning_rate": 8.243105209397345e-06, + "loss": 0.8165, + "step": 807 + }, + { + "epoch": 0.02476400637489273, + "grad_norm": 2.570779255473059, + "learning_rate": 8.253319713993871e-06, + "loss": 0.8439, + "step": 808 + }, + { + "epoch": 0.024794654897633935, + "grad_norm": 1.147276085612331, + "learning_rate": 8.263534218590399e-06, + "loss": 0.5719, + "step": 809 + }, + { + "epoch": 0.024825303420375137, + "grad_norm": 1.041089518221371, + "learning_rate": 8.273748723186927e-06, + "loss": 0.5477, + "step": 810 + }, + { + "epoch": 0.024855951943116343, + "grad_norm": 2.5837806173658864, + "learning_rate": 8.283963227783454e-06, + "loss": 0.8565, + "step": 811 + }, + { + "epoch": 0.024886600465857545, + "grad_norm": 2.3330345787058993, + "learning_rate": 8.29417773237998e-06, + "loss": 0.7899, + "step": 812 + }, + { + "epoch": 0.02491724898859875, + "grad_norm": 2.484462669048185, + "learning_rate": 8.304392236976508e-06, + "loss": 0.9207, + "step": 813 + }, + { + "epoch": 0.024947897511339953, + "grad_norm": 2.596343764073533, + "learning_rate": 8.314606741573035e-06, + "loss": 0.832, + "step": 814 + }, + { + "epoch": 0.02497854603408116, + "grad_norm": 2.5174269083317236, + "learning_rate": 8.324821246169561e-06, + "loss": 0.8807, + "step": 815 + }, + { + "epoch": 0.02500919455682236, + "grad_norm": 2.4532373482671597, + "learning_rate": 8.335035750766089e-06, + "loss": 0.6986, + "step": 816 + }, + { + "epoch": 0.025039843079563567, + "grad_norm": 2.4661005564791347, + "learning_rate": 8.345250255362617e-06, + "loss": 0.7601, + "step": 817 + }, + { + "epoch": 0.02507049160230477, + "grad_norm": 2.6323898354850357, + "learning_rate": 8.355464759959142e-06, + "loss": 0.8352, + "step": 818 + }, + { + "epoch": 0.02510114012504597, + "grad_norm": 1.2874777248094558, + "learning_rate": 8.36567926455567e-06, + "loss": 0.5838, + "step": 819 + }, + { + "epoch": 0.025131788647787177, + "grad_norm": 1.125204128242292, + "learning_rate": 8.375893769152198e-06, + "loss": 0.546, + "step": 820 + }, + { + "epoch": 0.02516243717052838, + "grad_norm": 2.900243627582199, + "learning_rate": 8.386108273748724e-06, + "loss": 0.788, + "step": 821 + }, + { + "epoch": 0.025193085693269585, + "grad_norm": 2.7760036339171745, + "learning_rate": 8.396322778345251e-06, + "loss": 0.8666, + "step": 822 + }, + { + "epoch": 0.025223734216010787, + "grad_norm": 3.1126109144635428, + "learning_rate": 8.406537282941777e-06, + "loss": 0.8769, + "step": 823 + }, + { + "epoch": 0.025254382738751992, + "grad_norm": 2.5415993714914227, + "learning_rate": 8.416751787538305e-06, + "loss": 0.8567, + "step": 824 + }, + { + "epoch": 0.025285031261493195, + "grad_norm": 2.6340756389736084, + "learning_rate": 8.426966292134832e-06, + "loss": 0.8887, + "step": 825 + }, + { + "epoch": 0.0253156797842344, + "grad_norm": 2.508698032242236, + "learning_rate": 8.437180796731358e-06, + "loss": 0.7914, + "step": 826 + }, + { + "epoch": 0.025346328306975602, + "grad_norm": 2.5311781038835712, + "learning_rate": 8.447395301327886e-06, + "loss": 0.8157, + "step": 827 + }, + { + "epoch": 0.025376976829716808, + "grad_norm": 2.4099391775740786, + "learning_rate": 8.457609805924414e-06, + "loss": 0.8328, + "step": 828 + }, + { + "epoch": 0.02540762535245801, + "grad_norm": 2.8262460628277672, + "learning_rate": 8.46782431052094e-06, + "loss": 0.8953, + "step": 829 + }, + { + "epoch": 0.025438273875199216, + "grad_norm": 2.6268744350574695, + "learning_rate": 8.478038815117467e-06, + "loss": 0.8844, + "step": 830 + }, + { + "epoch": 0.025468922397940418, + "grad_norm": 2.775673570115626, + "learning_rate": 8.488253319713995e-06, + "loss": 0.9974, + "step": 831 + }, + { + "epoch": 0.025499570920681624, + "grad_norm": 2.7274378400206225, + "learning_rate": 8.49846782431052e-06, + "loss": 0.8877, + "step": 832 + }, + { + "epoch": 0.025530219443422826, + "grad_norm": 2.6667591907622525, + "learning_rate": 8.508682328907048e-06, + "loss": 0.8432, + "step": 833 + }, + { + "epoch": 0.02556086796616403, + "grad_norm": 2.6132873214559322, + "learning_rate": 8.518896833503576e-06, + "loss": 0.8647, + "step": 834 + }, + { + "epoch": 0.025591516488905234, + "grad_norm": 3.010548082282907, + "learning_rate": 8.529111338100104e-06, + "loss": 0.7312, + "step": 835 + }, + { + "epoch": 0.02562216501164644, + "grad_norm": 2.6632537774454246, + "learning_rate": 8.53932584269663e-06, + "loss": 0.9057, + "step": 836 + }, + { + "epoch": 0.025652813534387642, + "grad_norm": 2.246768439379882, + "learning_rate": 8.549540347293157e-06, + "loss": 0.822, + "step": 837 + }, + { + "epoch": 0.025683462057128847, + "grad_norm": 2.854966289986523, + "learning_rate": 8.559754851889685e-06, + "loss": 0.9023, + "step": 838 + }, + { + "epoch": 0.02571411057987005, + "grad_norm": 2.613408934721483, + "learning_rate": 8.56996935648621e-06, + "loss": 0.8117, + "step": 839 + }, + { + "epoch": 0.025744759102611255, + "grad_norm": 2.3556484466166387, + "learning_rate": 8.580183861082738e-06, + "loss": 0.8202, + "step": 840 + }, + { + "epoch": 0.025775407625352457, + "grad_norm": 2.532871511557852, + "learning_rate": 8.590398365679266e-06, + "loss": 0.8054, + "step": 841 + }, + { + "epoch": 0.025806056148093663, + "grad_norm": 2.559305045378276, + "learning_rate": 8.600612870275792e-06, + "loss": 0.7622, + "step": 842 + }, + { + "epoch": 0.025836704670834865, + "grad_norm": 3.1797481867617288, + "learning_rate": 8.61082737487232e-06, + "loss": 0.7494, + "step": 843 + }, + { + "epoch": 0.02586735319357607, + "grad_norm": 2.551453691760712, + "learning_rate": 8.621041879468847e-06, + "loss": 0.9095, + "step": 844 + }, + { + "epoch": 0.025898001716317273, + "grad_norm": 2.632911451559825, + "learning_rate": 8.631256384065375e-06, + "loss": 0.8499, + "step": 845 + }, + { + "epoch": 0.02592865023905848, + "grad_norm": 2.660944977332849, + "learning_rate": 8.6414708886619e-06, + "loss": 0.7536, + "step": 846 + }, + { + "epoch": 0.02595929876179968, + "grad_norm": 1.9569939502631544, + "learning_rate": 8.651685393258428e-06, + "loss": 0.5495, + "step": 847 + }, + { + "epoch": 0.025989947284540887, + "grad_norm": 3.0209608680331264, + "learning_rate": 8.661899897854954e-06, + "loss": 0.772, + "step": 848 + }, + { + "epoch": 0.02602059580728209, + "grad_norm": 2.7434468475357403, + "learning_rate": 8.672114402451482e-06, + "loss": 0.7995, + "step": 849 + }, + { + "epoch": 0.026051244330023295, + "grad_norm": 2.650065750088603, + "learning_rate": 8.68232890704801e-06, + "loss": 0.905, + "step": 850 + }, + { + "epoch": 0.026081892852764497, + "grad_norm": 2.6140517642519487, + "learning_rate": 8.692543411644536e-06, + "loss": 0.8161, + "step": 851 + }, + { + "epoch": 0.0261125413755057, + "grad_norm": 2.6583794182956657, + "learning_rate": 8.702757916241063e-06, + "loss": 0.7274, + "step": 852 + }, + { + "epoch": 0.026143189898246905, + "grad_norm": 2.8924937098838837, + "learning_rate": 8.712972420837589e-06, + "loss": 0.8863, + "step": 853 + }, + { + "epoch": 0.026173838420988107, + "grad_norm": 2.5322419179387703, + "learning_rate": 8.723186925434117e-06, + "loss": 0.8366, + "step": 854 + }, + { + "epoch": 0.026204486943729313, + "grad_norm": 2.6668562751637626, + "learning_rate": 8.733401430030644e-06, + "loss": 0.8899, + "step": 855 + }, + { + "epoch": 0.026235135466470515, + "grad_norm": 3.1105253346763884, + "learning_rate": 8.74361593462717e-06, + "loss": 0.8593, + "step": 856 + }, + { + "epoch": 0.02626578398921172, + "grad_norm": 2.745522928599825, + "learning_rate": 8.753830439223698e-06, + "loss": 0.8762, + "step": 857 + }, + { + "epoch": 0.026296432511952923, + "grad_norm": 2.820371672904367, + "learning_rate": 8.764044943820226e-06, + "loss": 0.9065, + "step": 858 + }, + { + "epoch": 0.026327081034694128, + "grad_norm": 2.4258146135636274, + "learning_rate": 8.774259448416752e-06, + "loss": 0.771, + "step": 859 + }, + { + "epoch": 0.02635772955743533, + "grad_norm": 2.7328001343894956, + "learning_rate": 8.784473953013279e-06, + "loss": 0.7946, + "step": 860 + }, + { + "epoch": 0.026388378080176536, + "grad_norm": 3.093589809640212, + "learning_rate": 8.794688457609807e-06, + "loss": 0.8917, + "step": 861 + }, + { + "epoch": 0.02641902660291774, + "grad_norm": 2.673877697109671, + "learning_rate": 8.804902962206334e-06, + "loss": 0.8392, + "step": 862 + }, + { + "epoch": 0.026449675125658944, + "grad_norm": 1.7171565847418424, + "learning_rate": 8.81511746680286e-06, + "loss": 0.554, + "step": 863 + }, + { + "epoch": 0.026480323648400146, + "grad_norm": 2.8339918016366, + "learning_rate": 8.825331971399388e-06, + "loss": 0.8552, + "step": 864 + }, + { + "epoch": 0.026510972171141352, + "grad_norm": 2.758202190881158, + "learning_rate": 8.835546475995916e-06, + "loss": 0.8577, + "step": 865 + }, + { + "epoch": 0.026541620693882554, + "grad_norm": 2.666596170857121, + "learning_rate": 8.845760980592442e-06, + "loss": 0.8614, + "step": 866 + }, + { + "epoch": 0.02657226921662376, + "grad_norm": 2.4626059530983433, + "learning_rate": 8.855975485188969e-06, + "loss": 0.8282, + "step": 867 + }, + { + "epoch": 0.026602917739364962, + "grad_norm": 2.7101234752439294, + "learning_rate": 8.866189989785497e-06, + "loss": 0.8276, + "step": 868 + }, + { + "epoch": 0.026633566262106168, + "grad_norm": 2.587988197149573, + "learning_rate": 8.876404494382023e-06, + "loss": 0.8328, + "step": 869 + }, + { + "epoch": 0.02666421478484737, + "grad_norm": 2.6950444477476068, + "learning_rate": 8.88661899897855e-06, + "loss": 0.7748, + "step": 870 + }, + { + "epoch": 0.026694863307588575, + "grad_norm": 2.354779587495378, + "learning_rate": 8.896833503575078e-06, + "loss": 0.7574, + "step": 871 + }, + { + "epoch": 0.026725511830329778, + "grad_norm": 2.8575796679015713, + "learning_rate": 8.907048008171606e-06, + "loss": 0.8717, + "step": 872 + }, + { + "epoch": 0.026756160353070983, + "grad_norm": 2.5830526486645824, + "learning_rate": 8.917262512768132e-06, + "loss": 0.8297, + "step": 873 + }, + { + "epoch": 0.026786808875812185, + "grad_norm": 1.6344762189202922, + "learning_rate": 8.927477017364659e-06, + "loss": 0.5805, + "step": 874 + }, + { + "epoch": 0.02681745739855339, + "grad_norm": 2.7164994883880635, + "learning_rate": 8.937691521961187e-06, + "loss": 0.8196, + "step": 875 + }, + { + "epoch": 0.026848105921294593, + "grad_norm": 2.6181483221024937, + "learning_rate": 8.947906026557713e-06, + "loss": 0.7712, + "step": 876 + }, + { + "epoch": 0.0268787544440358, + "grad_norm": 3.0806438010104786, + "learning_rate": 8.95812053115424e-06, + "loss": 0.8944, + "step": 877 + }, + { + "epoch": 0.026909402966777, + "grad_norm": 2.5834843493950617, + "learning_rate": 8.968335035750766e-06, + "loss": 0.757, + "step": 878 + }, + { + "epoch": 0.026940051489518207, + "grad_norm": 2.728155069145252, + "learning_rate": 8.978549540347294e-06, + "loss": 0.8445, + "step": 879 + }, + { + "epoch": 0.02697070001225941, + "grad_norm": 2.4450448692081115, + "learning_rate": 8.988764044943822e-06, + "loss": 0.8559, + "step": 880 + }, + { + "epoch": 0.027001348535000615, + "grad_norm": 2.3397505420037823, + "learning_rate": 8.998978549540347e-06, + "loss": 0.7991, + "step": 881 + }, + { + "epoch": 0.027031997057741817, + "grad_norm": 1.3677263325671185, + "learning_rate": 9.009193054136875e-06, + "loss": 0.5804, + "step": 882 + }, + { + "epoch": 0.02706264558048302, + "grad_norm": 2.6644083202892404, + "learning_rate": 9.019407558733401e-06, + "loss": 0.8311, + "step": 883 + }, + { + "epoch": 0.027093294103224225, + "grad_norm": 2.6036955467444565, + "learning_rate": 9.029622063329929e-06, + "loss": 0.8554, + "step": 884 + }, + { + "epoch": 0.027123942625965427, + "grad_norm": 2.724944389249544, + "learning_rate": 9.039836567926456e-06, + "loss": 0.825, + "step": 885 + }, + { + "epoch": 0.027154591148706633, + "grad_norm": 2.414120821805667, + "learning_rate": 9.050051072522982e-06, + "loss": 0.9189, + "step": 886 + }, + { + "epoch": 0.027185239671447835, + "grad_norm": 2.3614865687060163, + "learning_rate": 9.06026557711951e-06, + "loss": 0.7455, + "step": 887 + }, + { + "epoch": 0.02721588819418904, + "grad_norm": 2.705471842485433, + "learning_rate": 9.070480081716037e-06, + "loss": 0.8847, + "step": 888 + }, + { + "epoch": 0.027246536716930243, + "grad_norm": 2.2502470681806126, + "learning_rate": 9.080694586312565e-06, + "loss": 0.7791, + "step": 889 + }, + { + "epoch": 0.02727718523967145, + "grad_norm": 2.8895300054915243, + "learning_rate": 9.090909090909091e-06, + "loss": 0.8423, + "step": 890 + }, + { + "epoch": 0.02730783376241265, + "grad_norm": 2.400942036621106, + "learning_rate": 9.101123595505619e-06, + "loss": 0.7522, + "step": 891 + }, + { + "epoch": 0.027338482285153856, + "grad_norm": 2.6433642995371707, + "learning_rate": 9.111338100102146e-06, + "loss": 0.825, + "step": 892 + }, + { + "epoch": 0.02736913080789506, + "grad_norm": 2.393791008434286, + "learning_rate": 9.121552604698672e-06, + "loss": 0.8107, + "step": 893 + }, + { + "epoch": 0.027399779330636264, + "grad_norm": 2.5776436532258318, + "learning_rate": 9.1317671092952e-06, + "loss": 0.7972, + "step": 894 + }, + { + "epoch": 0.027430427853377466, + "grad_norm": 2.4560225081013414, + "learning_rate": 9.141981613891727e-06, + "loss": 0.7217, + "step": 895 + }, + { + "epoch": 0.027461076376118672, + "grad_norm": 2.741831782917869, + "learning_rate": 9.152196118488255e-06, + "loss": 0.8873, + "step": 896 + }, + { + "epoch": 0.027491724898859874, + "grad_norm": 2.612857117184384, + "learning_rate": 9.162410623084781e-06, + "loss": 0.7401, + "step": 897 + }, + { + "epoch": 0.02752237342160108, + "grad_norm": 2.5049418916516837, + "learning_rate": 9.172625127681309e-06, + "loss": 0.8022, + "step": 898 + }, + { + "epoch": 0.027553021944342282, + "grad_norm": 1.2746541085167293, + "learning_rate": 9.182839632277836e-06, + "loss": 0.5643, + "step": 899 + }, + { + "epoch": 0.027583670467083488, + "grad_norm": 2.5069582853042007, + "learning_rate": 9.193054136874362e-06, + "loss": 0.7851, + "step": 900 + }, + { + "epoch": 0.02761431898982469, + "grad_norm": 2.576669871917871, + "learning_rate": 9.20326864147089e-06, + "loss": 0.8603, + "step": 901 + }, + { + "epoch": 0.027644967512565895, + "grad_norm": 2.450572685709724, + "learning_rate": 9.213483146067417e-06, + "loss": 0.8692, + "step": 902 + }, + { + "epoch": 0.027675616035307098, + "grad_norm": 3.0014619683246617, + "learning_rate": 9.223697650663943e-06, + "loss": 0.8959, + "step": 903 + }, + { + "epoch": 0.027706264558048303, + "grad_norm": 2.6948085119350242, + "learning_rate": 9.233912155260471e-06, + "loss": 0.8897, + "step": 904 + }, + { + "epoch": 0.027736913080789505, + "grad_norm": 2.3499546042625745, + "learning_rate": 9.244126659856999e-06, + "loss": 0.8778, + "step": 905 + }, + { + "epoch": 0.02776756160353071, + "grad_norm": 2.598433280340729, + "learning_rate": 9.254341164453525e-06, + "loss": 0.8472, + "step": 906 + }, + { + "epoch": 0.027798210126271913, + "grad_norm": 2.511034311848889, + "learning_rate": 9.264555669050052e-06, + "loss": 0.7848, + "step": 907 + }, + { + "epoch": 0.02782885864901312, + "grad_norm": 2.542738924306037, + "learning_rate": 9.274770173646578e-06, + "loss": 0.8482, + "step": 908 + }, + { + "epoch": 0.02785950717175432, + "grad_norm": 1.2571803581608227, + "learning_rate": 9.284984678243106e-06, + "loss": 0.5664, + "step": 909 + }, + { + "epoch": 0.027890155694495527, + "grad_norm": 2.540279628831256, + "learning_rate": 9.295199182839633e-06, + "loss": 0.729, + "step": 910 + }, + { + "epoch": 0.02792080421723673, + "grad_norm": 2.7514703075998033, + "learning_rate": 9.30541368743616e-06, + "loss": 0.749, + "step": 911 + }, + { + "epoch": 0.027951452739977935, + "grad_norm": 2.69877353940744, + "learning_rate": 9.315628192032687e-06, + "loss": 0.9042, + "step": 912 + }, + { + "epoch": 0.027982101262719137, + "grad_norm": 2.61050720609846, + "learning_rate": 9.325842696629213e-06, + "loss": 0.8512, + "step": 913 + }, + { + "epoch": 0.02801274978546034, + "grad_norm": 2.3527178696530133, + "learning_rate": 9.33605720122574e-06, + "loss": 0.9047, + "step": 914 + }, + { + "epoch": 0.028043398308201545, + "grad_norm": 2.334150231626858, + "learning_rate": 9.346271705822268e-06, + "loss": 0.8385, + "step": 915 + }, + { + "epoch": 0.028074046830942747, + "grad_norm": 2.8023560630501705, + "learning_rate": 9.356486210418796e-06, + "loss": 0.8636, + "step": 916 + }, + { + "epoch": 0.028104695353683953, + "grad_norm": 2.3222114960685065, + "learning_rate": 9.366700715015322e-06, + "loss": 0.7902, + "step": 917 + }, + { + "epoch": 0.028135343876425155, + "grad_norm": 2.359591455597825, + "learning_rate": 9.37691521961185e-06, + "loss": 0.8404, + "step": 918 + }, + { + "epoch": 0.02816599239916636, + "grad_norm": 2.3866818116315662, + "learning_rate": 9.387129724208377e-06, + "loss": 0.8796, + "step": 919 + }, + { + "epoch": 0.028196640921907563, + "grad_norm": 2.5353210704461837, + "learning_rate": 9.397344228804903e-06, + "loss": 0.8145, + "step": 920 + }, + { + "epoch": 0.02822728944464877, + "grad_norm": 2.695016878806716, + "learning_rate": 9.40755873340143e-06, + "loss": 0.8114, + "step": 921 + }, + { + "epoch": 0.02825793796738997, + "grad_norm": 2.465155074941893, + "learning_rate": 9.417773237997958e-06, + "loss": 0.8238, + "step": 922 + }, + { + "epoch": 0.028288586490131176, + "grad_norm": 2.5536123728138254, + "learning_rate": 9.427987742594486e-06, + "loss": 0.7399, + "step": 923 + }, + { + "epoch": 0.02831923501287238, + "grad_norm": 1.0604879350764393, + "learning_rate": 9.438202247191012e-06, + "loss": 0.5711, + "step": 924 + }, + { + "epoch": 0.028349883535613584, + "grad_norm": 1.1363205112623014, + "learning_rate": 9.44841675178754e-06, + "loss": 0.5669, + "step": 925 + }, + { + "epoch": 0.028380532058354786, + "grad_norm": 2.780920729399713, + "learning_rate": 9.458631256384067e-06, + "loss": 0.7395, + "step": 926 + }, + { + "epoch": 0.028411180581095992, + "grad_norm": 2.5964547159078673, + "learning_rate": 9.468845760980593e-06, + "loss": 0.8742, + "step": 927 + }, + { + "epoch": 0.028441829103837194, + "grad_norm": 2.5366869086366983, + "learning_rate": 9.47906026557712e-06, + "loss": 0.9066, + "step": 928 + }, + { + "epoch": 0.0284724776265784, + "grad_norm": 2.4149473626153317, + "learning_rate": 9.489274770173648e-06, + "loss": 0.7666, + "step": 929 + }, + { + "epoch": 0.028503126149319602, + "grad_norm": 2.5407076053745072, + "learning_rate": 9.499489274770174e-06, + "loss": 0.8175, + "step": 930 + }, + { + "epoch": 0.028533774672060808, + "grad_norm": 2.3090942332571154, + "learning_rate": 9.509703779366702e-06, + "loss": 0.6784, + "step": 931 + }, + { + "epoch": 0.02856442319480201, + "grad_norm": 2.5700660692300517, + "learning_rate": 9.51991828396323e-06, + "loss": 0.8682, + "step": 932 + }, + { + "epoch": 0.028595071717543216, + "grad_norm": 2.517546560768532, + "learning_rate": 9.530132788559755e-06, + "loss": 0.7301, + "step": 933 + }, + { + "epoch": 0.028625720240284418, + "grad_norm": 2.650916379972764, + "learning_rate": 9.540347293156283e-06, + "loss": 0.8852, + "step": 934 + }, + { + "epoch": 0.028656368763025623, + "grad_norm": 1.4876408056573778, + "learning_rate": 9.55056179775281e-06, + "loss": 0.5577, + "step": 935 + }, + { + "epoch": 0.028687017285766826, + "grad_norm": 2.7786317429846834, + "learning_rate": 9.560776302349337e-06, + "loss": 0.8532, + "step": 936 + }, + { + "epoch": 0.02871766580850803, + "grad_norm": 2.549107278732885, + "learning_rate": 9.570990806945864e-06, + "loss": 0.7941, + "step": 937 + }, + { + "epoch": 0.028748314331249233, + "grad_norm": 2.621561689797684, + "learning_rate": 9.58120531154239e-06, + "loss": 0.9105, + "step": 938 + }, + { + "epoch": 0.02877896285399044, + "grad_norm": 2.621723014722738, + "learning_rate": 9.591419816138918e-06, + "loss": 0.6723, + "step": 939 + }, + { + "epoch": 0.02880961137673164, + "grad_norm": 2.824985080602391, + "learning_rate": 9.601634320735445e-06, + "loss": 0.8584, + "step": 940 + }, + { + "epoch": 0.028840259899472847, + "grad_norm": 2.646898070762316, + "learning_rate": 9.611848825331971e-06, + "loss": 0.8113, + "step": 941 + }, + { + "epoch": 0.02887090842221405, + "grad_norm": 2.960425582015106, + "learning_rate": 9.622063329928499e-06, + "loss": 0.8674, + "step": 942 + }, + { + "epoch": 0.028901556944955255, + "grad_norm": 1.4109932533501508, + "learning_rate": 9.632277834525027e-06, + "loss": 0.5681, + "step": 943 + }, + { + "epoch": 0.028932205467696457, + "grad_norm": 2.2937909185956955, + "learning_rate": 9.642492339121552e-06, + "loss": 0.8622, + "step": 944 + }, + { + "epoch": 0.02896285399043766, + "grad_norm": 2.4541806609726713, + "learning_rate": 9.65270684371808e-06, + "loss": 0.8571, + "step": 945 + }, + { + "epoch": 0.028993502513178865, + "grad_norm": 2.494082243658257, + "learning_rate": 9.662921348314608e-06, + "loss": 0.8313, + "step": 946 + }, + { + "epoch": 0.029024151035920067, + "grad_norm": 2.5137246009355603, + "learning_rate": 9.673135852911134e-06, + "loss": 0.8655, + "step": 947 + }, + { + "epoch": 0.029054799558661273, + "grad_norm": 2.2016583627213824, + "learning_rate": 9.683350357507661e-06, + "loss": 0.7857, + "step": 948 + }, + { + "epoch": 0.029085448081402475, + "grad_norm": 2.525177199514098, + "learning_rate": 9.693564862104189e-06, + "loss": 0.7948, + "step": 949 + }, + { + "epoch": 0.02911609660414368, + "grad_norm": 2.2685819687037676, + "learning_rate": 9.703779366700717e-06, + "loss": 0.8242, + "step": 950 + }, + { + "epoch": 0.029146745126884883, + "grad_norm": 2.250225463776507, + "learning_rate": 9.713993871297242e-06, + "loss": 0.7679, + "step": 951 + }, + { + "epoch": 0.02917739364962609, + "grad_norm": 2.5462848063137478, + "learning_rate": 9.72420837589377e-06, + "loss": 0.8051, + "step": 952 + }, + { + "epoch": 0.02920804217236729, + "grad_norm": 2.9267964873938292, + "learning_rate": 9.734422880490298e-06, + "loss": 0.8373, + "step": 953 + }, + { + "epoch": 0.029238690695108496, + "grad_norm": 2.5980641880267843, + "learning_rate": 9.744637385086824e-06, + "loss": 0.9197, + "step": 954 + }, + { + "epoch": 0.0292693392178497, + "grad_norm": 2.7070564447246355, + "learning_rate": 9.754851889683351e-06, + "loss": 0.9031, + "step": 955 + }, + { + "epoch": 0.029299987740590904, + "grad_norm": 2.2661360942059723, + "learning_rate": 9.765066394279879e-06, + "loss": 0.8534, + "step": 956 + }, + { + "epoch": 0.029330636263332106, + "grad_norm": 2.7702331342658035, + "learning_rate": 9.775280898876405e-06, + "loss": 0.8182, + "step": 957 + }, + { + "epoch": 0.029361284786073312, + "grad_norm": 2.3820111219518667, + "learning_rate": 9.785495403472932e-06, + "loss": 0.8924, + "step": 958 + }, + { + "epoch": 0.029391933308814514, + "grad_norm": 2.5425465214931693, + "learning_rate": 9.79570990806946e-06, + "loss": 0.8118, + "step": 959 + }, + { + "epoch": 0.02942258183155572, + "grad_norm": 2.484143220617581, + "learning_rate": 9.805924412665988e-06, + "loss": 0.8123, + "step": 960 + }, + { + "epoch": 0.029453230354296922, + "grad_norm": 2.8482979859598587, + "learning_rate": 9.816138917262514e-06, + "loss": 0.8288, + "step": 961 + }, + { + "epoch": 0.029483878877038128, + "grad_norm": 1.8653644994384166, + "learning_rate": 9.826353421859041e-06, + "loss": 0.5886, + "step": 962 + }, + { + "epoch": 0.02951452739977933, + "grad_norm": 2.454158731946066, + "learning_rate": 9.836567926455567e-06, + "loss": 0.8418, + "step": 963 + }, + { + "epoch": 0.029545175922520536, + "grad_norm": 2.279549380638653, + "learning_rate": 9.846782431052095e-06, + "loss": 0.7624, + "step": 964 + }, + { + "epoch": 0.029575824445261738, + "grad_norm": 2.2876376037345514, + "learning_rate": 9.856996935648622e-06, + "loss": 0.8286, + "step": 965 + }, + { + "epoch": 0.029606472968002943, + "grad_norm": 2.3732154998779738, + "learning_rate": 9.867211440245148e-06, + "loss": 0.8217, + "step": 966 + }, + { + "epoch": 0.029637121490744146, + "grad_norm": 2.5155757813672475, + "learning_rate": 9.877425944841676e-06, + "loss": 0.8498, + "step": 967 + }, + { + "epoch": 0.02966777001348535, + "grad_norm": 2.4272298626355946, + "learning_rate": 9.887640449438202e-06, + "loss": 0.723, + "step": 968 + }, + { + "epoch": 0.029698418536226553, + "grad_norm": 2.709633377377744, + "learning_rate": 9.89785495403473e-06, + "loss": 0.8837, + "step": 969 + }, + { + "epoch": 0.02972906705896776, + "grad_norm": 2.4198269388239413, + "learning_rate": 9.908069458631257e-06, + "loss": 0.7477, + "step": 970 + }, + { + "epoch": 0.02975971558170896, + "grad_norm": 3.224906379810377, + "learning_rate": 9.918283963227783e-06, + "loss": 0.8943, + "step": 971 + }, + { + "epoch": 0.029790364104450167, + "grad_norm": 2.481287823904104, + "learning_rate": 9.92849846782431e-06, + "loss": 0.892, + "step": 972 + }, + { + "epoch": 0.02982101262719137, + "grad_norm": 2.345120480787265, + "learning_rate": 9.938712972420838e-06, + "loss": 0.7125, + "step": 973 + }, + { + "epoch": 0.029851661149932575, + "grad_norm": 1.766245848929486, + "learning_rate": 9.948927477017364e-06, + "loss": 0.582, + "step": 974 + }, + { + "epoch": 0.029882309672673777, + "grad_norm": 2.3253765514266984, + "learning_rate": 9.959141981613892e-06, + "loss": 0.8224, + "step": 975 + }, + { + "epoch": 0.029912958195414983, + "grad_norm": 2.592912671791218, + "learning_rate": 9.96935648621042e-06, + "loss": 0.8634, + "step": 976 + }, + { + "epoch": 0.029943606718156185, + "grad_norm": 2.259563972118137, + "learning_rate": 9.979570990806947e-06, + "loss": 0.8094, + "step": 977 + }, + { + "epoch": 0.029974255240897387, + "grad_norm": 2.512323072459396, + "learning_rate": 9.989785495403473e-06, + "loss": 0.8641, + "step": 978 + }, + { + "epoch": 0.030004903763638593, + "grad_norm": 2.526037549441196, + "learning_rate": 1e-05, + "loss": 0.8622, + "step": 979 + }, + { + "epoch": 0.030035552286379795, + "grad_norm": 2.352533466709703, + "learning_rate": 9.999999975366861e-06, + "loss": 0.9115, + "step": 980 + }, + { + "epoch": 0.030066200809121, + "grad_norm": 2.5336464647919263, + "learning_rate": 9.999999901467443e-06, + "loss": 0.7955, + "step": 981 + }, + { + "epoch": 0.030096849331862203, + "grad_norm": 2.372539279245691, + "learning_rate": 9.999999778301746e-06, + "loss": 0.8814, + "step": 982 + }, + { + "epoch": 0.03012749785460341, + "grad_norm": 2.526050673424826, + "learning_rate": 9.999999605869772e-06, + "loss": 0.9574, + "step": 983 + }, + { + "epoch": 0.03015814637734461, + "grad_norm": 2.7399580387122997, + "learning_rate": 9.999999384171522e-06, + "loss": 0.8249, + "step": 984 + }, + { + "epoch": 0.030188794900085816, + "grad_norm": 1.5262334685412926, + "learning_rate": 9.999999113207e-06, + "loss": 0.5813, + "step": 985 + }, + { + "epoch": 0.03021944342282702, + "grad_norm": 2.5409253727367975, + "learning_rate": 9.999998792976206e-06, + "loss": 0.8519, + "step": 986 + }, + { + "epoch": 0.030250091945568224, + "grad_norm": 2.7311688988171263, + "learning_rate": 9.999998423479145e-06, + "loss": 0.7932, + "step": 987 + }, + { + "epoch": 0.030280740468309426, + "grad_norm": 2.7220181968091333, + "learning_rate": 9.99999800471582e-06, + "loss": 0.8214, + "step": 988 + }, + { + "epoch": 0.030311388991050632, + "grad_norm": 1.060146330888957, + "learning_rate": 9.999997536686236e-06, + "loss": 0.568, + "step": 989 + }, + { + "epoch": 0.030342037513791834, + "grad_norm": 2.4304142914011804, + "learning_rate": 9.999997019390398e-06, + "loss": 0.8263, + "step": 990 + }, + { + "epoch": 0.03037268603653304, + "grad_norm": 2.7619616639983797, + "learning_rate": 9.999996452828306e-06, + "loss": 0.8919, + "step": 991 + }, + { + "epoch": 0.030403334559274242, + "grad_norm": 1.0750799408669391, + "learning_rate": 9.999995836999975e-06, + "loss": 0.5395, + "step": 992 + }, + { + "epoch": 0.030433983082015448, + "grad_norm": 2.5877142451049595, + "learning_rate": 9.999995171905401e-06, + "loss": 0.727, + "step": 993 + }, + { + "epoch": 0.03046463160475665, + "grad_norm": 2.479254893717689, + "learning_rate": 9.999994457544599e-06, + "loss": 0.9414, + "step": 994 + }, + { + "epoch": 0.030495280127497856, + "grad_norm": 1.1023587352338724, + "learning_rate": 9.99999369391757e-06, + "loss": 0.5826, + "step": 995 + }, + { + "epoch": 0.030525928650239058, + "grad_norm": 2.5982040603898953, + "learning_rate": 9.999992881024326e-06, + "loss": 0.7893, + "step": 996 + }, + { + "epoch": 0.030556577172980263, + "grad_norm": 2.53764418194585, + "learning_rate": 9.999992018864871e-06, + "loss": 0.9024, + "step": 997 + }, + { + "epoch": 0.030587225695721466, + "grad_norm": 2.313628360783178, + "learning_rate": 9.999991107439215e-06, + "loss": 0.8027, + "step": 998 + }, + { + "epoch": 0.03061787421846267, + "grad_norm": 2.760143594788861, + "learning_rate": 9.999990146747369e-06, + "loss": 0.8229, + "step": 999 + }, + { + "epoch": 0.030648522741203874, + "grad_norm": 1.217348630970878, + "learning_rate": 9.999989136789339e-06, + "loss": 0.5669, + "step": 1000 + }, + { + "epoch": 0.03067917126394508, + "grad_norm": 2.2392593287604647, + "learning_rate": 9.999988077565138e-06, + "loss": 0.8416, + "step": 1001 + }, + { + "epoch": 0.03070981978668628, + "grad_norm": 3.2186820645809084, + "learning_rate": 9.999986969074775e-06, + "loss": 0.7788, + "step": 1002 + }, + { + "epoch": 0.030740468309427487, + "grad_norm": 2.524674290767132, + "learning_rate": 9.99998581131826e-06, + "loss": 0.772, + "step": 1003 + }, + { + "epoch": 0.03077111683216869, + "grad_norm": 2.4988370697004454, + "learning_rate": 9.999984604295606e-06, + "loss": 0.8432, + "step": 1004 + }, + { + "epoch": 0.030801765354909895, + "grad_norm": 2.4719805826059726, + "learning_rate": 9.999983348006825e-06, + "loss": 0.816, + "step": 1005 + }, + { + "epoch": 0.030832413877651097, + "grad_norm": 2.379912027316148, + "learning_rate": 9.999982042451927e-06, + "loss": 0.8783, + "step": 1006 + }, + { + "epoch": 0.030863062400392303, + "grad_norm": 2.529928388286035, + "learning_rate": 9.999980687630931e-06, + "loss": 0.7689, + "step": 1007 + }, + { + "epoch": 0.030893710923133505, + "grad_norm": 2.338995616980002, + "learning_rate": 9.999979283543842e-06, + "loss": 0.8624, + "step": 1008 + }, + { + "epoch": 0.030924359445874707, + "grad_norm": 2.1692702523563065, + "learning_rate": 9.99997783019068e-06, + "loss": 0.8628, + "step": 1009 + }, + { + "epoch": 0.030955007968615913, + "grad_norm": 2.09877885626247, + "learning_rate": 9.999976327571454e-06, + "loss": 0.7293, + "step": 1010 + }, + { + "epoch": 0.030985656491357115, + "grad_norm": 1.9866275813199457, + "learning_rate": 9.999974775686186e-06, + "loss": 0.7178, + "step": 1011 + }, + { + "epoch": 0.03101630501409832, + "grad_norm": 2.5039500270241555, + "learning_rate": 9.999973174534885e-06, + "loss": 0.7968, + "step": 1012 + }, + { + "epoch": 0.031046953536839523, + "grad_norm": 2.4827946679429136, + "learning_rate": 9.999971524117569e-06, + "loss": 0.8554, + "step": 1013 + }, + { + "epoch": 0.03107760205958073, + "grad_norm": 2.416495488939969, + "learning_rate": 9.999969824434255e-06, + "loss": 0.8244, + "step": 1014 + }, + { + "epoch": 0.03110825058232193, + "grad_norm": 2.6942390535072938, + "learning_rate": 9.999968075484959e-06, + "loss": 0.8268, + "step": 1015 + }, + { + "epoch": 0.031138899105063136, + "grad_norm": 2.187838748640899, + "learning_rate": 9.999966277269697e-06, + "loss": 0.7432, + "step": 1016 + }, + { + "epoch": 0.03116954762780434, + "grad_norm": 2.528431435558222, + "learning_rate": 9.999964429788487e-06, + "loss": 0.85, + "step": 1017 + }, + { + "epoch": 0.031200196150545544, + "grad_norm": 2.6710595856670913, + "learning_rate": 9.999962533041352e-06, + "loss": 0.9239, + "step": 1018 + }, + { + "epoch": 0.031230844673286746, + "grad_norm": 2.2519832162019995, + "learning_rate": 9.999960587028303e-06, + "loss": 0.853, + "step": 1019 + }, + { + "epoch": 0.03126149319602795, + "grad_norm": 2.3440894648736084, + "learning_rate": 9.999958591749366e-06, + "loss": 0.8245, + "step": 1020 + }, + { + "epoch": 0.03129214171876916, + "grad_norm": 2.5666871892799215, + "learning_rate": 9.999956547204557e-06, + "loss": 0.892, + "step": 1021 + }, + { + "epoch": 0.031322790241510357, + "grad_norm": 2.0626268537706745, + "learning_rate": 9.999954453393896e-06, + "loss": 0.7472, + "step": 1022 + }, + { + "epoch": 0.03135343876425156, + "grad_norm": 2.6684115925220686, + "learning_rate": 9.999952310317404e-06, + "loss": 0.8973, + "step": 1023 + }, + { + "epoch": 0.03138408728699277, + "grad_norm": 2.7668706306313298, + "learning_rate": 9.999950117975104e-06, + "loss": 0.824, + "step": 1024 + }, + { + "epoch": 0.031414735809733974, + "grad_norm": 2.4535671063428826, + "learning_rate": 9.999947876367015e-06, + "loss": 0.8186, + "step": 1025 + }, + { + "epoch": 0.03144538433247517, + "grad_norm": 2.2294805398623327, + "learning_rate": 9.999945585493163e-06, + "loss": 0.7607, + "step": 1026 + }, + { + "epoch": 0.03147603285521638, + "grad_norm": 2.3351619034187125, + "learning_rate": 9.999943245353566e-06, + "loss": 0.8186, + "step": 1027 + }, + { + "epoch": 0.031506681377957584, + "grad_norm": 2.787552634649161, + "learning_rate": 9.999940855948247e-06, + "loss": 0.8853, + "step": 1028 + }, + { + "epoch": 0.03153732990069879, + "grad_norm": 2.3361469019785694, + "learning_rate": 9.999938417277234e-06, + "loss": 0.8638, + "step": 1029 + }, + { + "epoch": 0.03156797842343999, + "grad_norm": 2.491584992556915, + "learning_rate": 9.999935929340548e-06, + "loss": 0.8948, + "step": 1030 + }, + { + "epoch": 0.031598626946181194, + "grad_norm": 2.6965460057727935, + "learning_rate": 9.999933392138212e-06, + "loss": 0.8461, + "step": 1031 + }, + { + "epoch": 0.0316292754689224, + "grad_norm": 2.725897196398422, + "learning_rate": 9.999930805670256e-06, + "loss": 0.8945, + "step": 1032 + }, + { + "epoch": 0.031659923991663605, + "grad_norm": 3.126771538097249, + "learning_rate": 9.9999281699367e-06, + "loss": 0.9015, + "step": 1033 + }, + { + "epoch": 0.031690572514404804, + "grad_norm": 2.4226596940477476, + "learning_rate": 9.999925484937574e-06, + "loss": 0.871, + "step": 1034 + }, + { + "epoch": 0.03172122103714601, + "grad_norm": 2.491777224082894, + "learning_rate": 9.999922750672903e-06, + "loss": 0.8139, + "step": 1035 + }, + { + "epoch": 0.031751869559887215, + "grad_norm": 2.505264345182828, + "learning_rate": 9.999919967142713e-06, + "loss": 0.8192, + "step": 1036 + }, + { + "epoch": 0.03178251808262842, + "grad_norm": 1.1807428169900465, + "learning_rate": 9.999917134347032e-06, + "loss": 0.5624, + "step": 1037 + }, + { + "epoch": 0.03181316660536962, + "grad_norm": 1.147289844532225, + "learning_rate": 9.999914252285889e-06, + "loss": 0.5456, + "step": 1038 + }, + { + "epoch": 0.031843815128110825, + "grad_norm": 2.2551127186372715, + "learning_rate": 9.999911320959312e-06, + "loss": 0.8384, + "step": 1039 + }, + { + "epoch": 0.03187446365085203, + "grad_norm": 2.3824806922140302, + "learning_rate": 9.999908340367331e-06, + "loss": 0.9253, + "step": 1040 + }, + { + "epoch": 0.03190511217359323, + "grad_norm": 2.4208312575025377, + "learning_rate": 9.99990531050997e-06, + "loss": 0.806, + "step": 1041 + }, + { + "epoch": 0.031935760696334435, + "grad_norm": 2.3540003256942157, + "learning_rate": 9.999902231387264e-06, + "loss": 0.8191, + "step": 1042 + }, + { + "epoch": 0.03196640921907564, + "grad_norm": 2.2293741864851926, + "learning_rate": 9.999899102999243e-06, + "loss": 0.7228, + "step": 1043 + }, + { + "epoch": 0.031997057741816846, + "grad_norm": 1.6249912318772541, + "learning_rate": 9.999895925345938e-06, + "loss": 0.5705, + "step": 1044 + }, + { + "epoch": 0.032027706264558045, + "grad_norm": 2.4995487209998064, + "learning_rate": 9.999892698427377e-06, + "loss": 0.8498, + "step": 1045 + }, + { + "epoch": 0.03205835478729925, + "grad_norm": 2.3392331234514514, + "learning_rate": 9.999889422243594e-06, + "loss": 0.8853, + "step": 1046 + }, + { + "epoch": 0.032089003310040456, + "grad_norm": 2.3207338200493877, + "learning_rate": 9.999886096794623e-06, + "loss": 0.8725, + "step": 1047 + }, + { + "epoch": 0.03211965183278166, + "grad_norm": 0.9930054374497782, + "learning_rate": 9.999882722080491e-06, + "loss": 0.5614, + "step": 1048 + }, + { + "epoch": 0.03215030035552286, + "grad_norm": 2.725149701687121, + "learning_rate": 9.99987929810124e-06, + "loss": 0.8051, + "step": 1049 + }, + { + "epoch": 0.032180948878264067, + "grad_norm": 2.1615724413614257, + "learning_rate": 9.999875824856897e-06, + "loss": 0.8215, + "step": 1050 + }, + { + "epoch": 0.03221159740100527, + "grad_norm": 2.61061790777966, + "learning_rate": 9.999872302347498e-06, + "loss": 0.8924, + "step": 1051 + }, + { + "epoch": 0.03224224592374648, + "grad_norm": 2.4432782558867374, + "learning_rate": 9.999868730573078e-06, + "loss": 0.8192, + "step": 1052 + }, + { + "epoch": 0.03227289444648768, + "grad_norm": 2.4316508050580032, + "learning_rate": 9.999865109533672e-06, + "loss": 0.8641, + "step": 1053 + }, + { + "epoch": 0.03230354296922888, + "grad_norm": 1.2184657279460587, + "learning_rate": 9.999861439229317e-06, + "loss": 0.558, + "step": 1054 + }, + { + "epoch": 0.03233419149197009, + "grad_norm": 1.1262646298539005, + "learning_rate": 9.999857719660047e-06, + "loss": 0.5644, + "step": 1055 + }, + { + "epoch": 0.032364840014711294, + "grad_norm": 2.226972248339666, + "learning_rate": 9.999853950825898e-06, + "loss": 0.7586, + "step": 1056 + }, + { + "epoch": 0.03239548853745249, + "grad_norm": 2.7764263722418314, + "learning_rate": 9.999850132726912e-06, + "loss": 0.8724, + "step": 1057 + }, + { + "epoch": 0.0324261370601937, + "grad_norm": 2.3376827236125806, + "learning_rate": 9.999846265363121e-06, + "loss": 0.7445, + "step": 1058 + }, + { + "epoch": 0.032456785582934904, + "grad_norm": 2.4856492994213837, + "learning_rate": 9.999842348734566e-06, + "loss": 0.8068, + "step": 1059 + }, + { + "epoch": 0.03248743410567611, + "grad_norm": 2.3770349804539737, + "learning_rate": 9.999838382841285e-06, + "loss": 0.8208, + "step": 1060 + }, + { + "epoch": 0.03251808262841731, + "grad_norm": 2.3568466755180855, + "learning_rate": 9.99983436768332e-06, + "loss": 0.7486, + "step": 1061 + }, + { + "epoch": 0.032548731151158514, + "grad_norm": 1.6642085284394468, + "learning_rate": 9.999830303260703e-06, + "loss": 0.549, + "step": 1062 + }, + { + "epoch": 0.03257937967389972, + "grad_norm": 2.3632521691491744, + "learning_rate": 9.999826189573482e-06, + "loss": 0.854, + "step": 1063 + }, + { + "epoch": 0.032610028196640925, + "grad_norm": 2.5920999059132614, + "learning_rate": 9.999822026621692e-06, + "loss": 0.8114, + "step": 1064 + }, + { + "epoch": 0.032640676719382124, + "grad_norm": 2.2699637742764334, + "learning_rate": 9.999817814405379e-06, + "loss": 0.748, + "step": 1065 + }, + { + "epoch": 0.03267132524212333, + "grad_norm": 2.5939944981720746, + "learning_rate": 9.99981355292458e-06, + "loss": 0.9342, + "step": 1066 + }, + { + "epoch": 0.032701973764864535, + "grad_norm": 2.2155506579062596, + "learning_rate": 9.999809242179339e-06, + "loss": 0.7644, + "step": 1067 + }, + { + "epoch": 0.03273262228760574, + "grad_norm": 2.5201942129859325, + "learning_rate": 9.9998048821697e-06, + "loss": 0.9008, + "step": 1068 + }, + { + "epoch": 0.03276327081034694, + "grad_norm": 2.472011512745823, + "learning_rate": 9.999800472895701e-06, + "loss": 0.8835, + "step": 1069 + }, + { + "epoch": 0.032793919333088145, + "grad_norm": 2.494947955662713, + "learning_rate": 9.99979601435739e-06, + "loss": 0.8022, + "step": 1070 + }, + { + "epoch": 0.03282456785582935, + "grad_norm": 2.040271663123092, + "learning_rate": 9.99979150655481e-06, + "loss": 0.748, + "step": 1071 + }, + { + "epoch": 0.03285521637857055, + "grad_norm": 2.2710535630077446, + "learning_rate": 9.999786949488007e-06, + "loss": 0.7861, + "step": 1072 + }, + { + "epoch": 0.032885864901311755, + "grad_norm": 2.646934598491452, + "learning_rate": 9.99978234315702e-06, + "loss": 0.8808, + "step": 1073 + }, + { + "epoch": 0.03291651342405296, + "grad_norm": 2.525620782873754, + "learning_rate": 9.999777687561901e-06, + "loss": 0.789, + "step": 1074 + }, + { + "epoch": 0.032947161946794166, + "grad_norm": 2.3310707702527593, + "learning_rate": 9.999772982702694e-06, + "loss": 0.8988, + "step": 1075 + }, + { + "epoch": 0.032977810469535365, + "grad_norm": 2.6458181105796834, + "learning_rate": 9.999768228579442e-06, + "loss": 0.7743, + "step": 1076 + }, + { + "epoch": 0.03300845899227657, + "grad_norm": 2.1170640293649536, + "learning_rate": 9.999763425192195e-06, + "loss": 0.5613, + "step": 1077 + }, + { + "epoch": 0.03303910751501778, + "grad_norm": 2.400128379003866, + "learning_rate": 9.999758572541001e-06, + "loss": 0.8, + "step": 1078 + }, + { + "epoch": 0.03306975603775898, + "grad_norm": 2.8142451399364368, + "learning_rate": 9.999753670625905e-06, + "loss": 0.6803, + "step": 1079 + }, + { + "epoch": 0.03310040456050018, + "grad_norm": 2.6209025591595725, + "learning_rate": 9.999748719446958e-06, + "loss": 0.9168, + "step": 1080 + }, + { + "epoch": 0.03313105308324139, + "grad_norm": 2.6515758826281237, + "learning_rate": 9.999743719004206e-06, + "loss": 0.8237, + "step": 1081 + }, + { + "epoch": 0.03316170160598259, + "grad_norm": 2.3292769181387554, + "learning_rate": 9.999738669297702e-06, + "loss": 0.7961, + "step": 1082 + }, + { + "epoch": 0.0331923501287238, + "grad_norm": 1.2490564090991496, + "learning_rate": 9.999733570327492e-06, + "loss": 0.566, + "step": 1083 + }, + { + "epoch": 0.033222998651465, + "grad_norm": 2.3137931009194235, + "learning_rate": 9.99972842209363e-06, + "loss": 0.7587, + "step": 1084 + }, + { + "epoch": 0.0332536471742062, + "grad_norm": 2.507608927343861, + "learning_rate": 9.999723224596163e-06, + "loss": 0.8198, + "step": 1085 + }, + { + "epoch": 0.03328429569694741, + "grad_norm": 2.430802025399781, + "learning_rate": 9.999717977835144e-06, + "loss": 0.7868, + "step": 1086 + }, + { + "epoch": 0.033314944219688614, + "grad_norm": 2.5033238227369923, + "learning_rate": 9.999712681810626e-06, + "loss": 0.9802, + "step": 1087 + }, + { + "epoch": 0.03334559274242981, + "grad_norm": 2.5769256893074446, + "learning_rate": 9.999707336522657e-06, + "loss": 0.6047, + "step": 1088 + }, + { + "epoch": 0.03337624126517102, + "grad_norm": 2.458828999837031, + "learning_rate": 9.999701941971293e-06, + "loss": 0.9125, + "step": 1089 + }, + { + "epoch": 0.033406889787912224, + "grad_norm": 2.403726057655127, + "learning_rate": 9.999696498156588e-06, + "loss": 0.7567, + "step": 1090 + }, + { + "epoch": 0.03343753831065343, + "grad_norm": 1.9811919812786096, + "learning_rate": 9.999691005078593e-06, + "loss": 0.7911, + "step": 1091 + }, + { + "epoch": 0.03346818683339463, + "grad_norm": 1.035247123478199, + "learning_rate": 9.999685462737364e-06, + "loss": 0.5371, + "step": 1092 + }, + { + "epoch": 0.033498835356135834, + "grad_norm": 2.158110953273258, + "learning_rate": 9.999679871132955e-06, + "loss": 0.824, + "step": 1093 + }, + { + "epoch": 0.03352948387887704, + "grad_norm": 2.2691176151907677, + "learning_rate": 9.999674230265421e-06, + "loss": 0.7653, + "step": 1094 + }, + { + "epoch": 0.033560132401618245, + "grad_norm": 2.293793414169252, + "learning_rate": 9.999668540134817e-06, + "loss": 0.867, + "step": 1095 + }, + { + "epoch": 0.033590780924359444, + "grad_norm": 2.4699018487889095, + "learning_rate": 9.9996628007412e-06, + "loss": 0.918, + "step": 1096 + }, + { + "epoch": 0.03362142944710065, + "grad_norm": 2.572940617252387, + "learning_rate": 9.999657012084627e-06, + "loss": 0.8195, + "step": 1097 + }, + { + "epoch": 0.033652077969841855, + "grad_norm": 2.0490720523299393, + "learning_rate": 9.999651174165152e-06, + "loss": 0.7412, + "step": 1098 + }, + { + "epoch": 0.03368272649258306, + "grad_norm": 2.262082679125583, + "learning_rate": 9.999645286982838e-06, + "loss": 0.8366, + "step": 1099 + }, + { + "epoch": 0.03371337501532426, + "grad_norm": 2.348774998926491, + "learning_rate": 9.999639350537736e-06, + "loss": 0.8933, + "step": 1100 + }, + { + "epoch": 0.033744023538065465, + "grad_norm": 2.1547763670433606, + "learning_rate": 9.99963336482991e-06, + "loss": 0.7725, + "step": 1101 + }, + { + "epoch": 0.03377467206080667, + "grad_norm": 1.0132699143945691, + "learning_rate": 9.999627329859418e-06, + "loss": 0.5595, + "step": 1102 + }, + { + "epoch": 0.03380532058354787, + "grad_norm": 2.557938089332334, + "learning_rate": 9.999621245626318e-06, + "loss": 0.8552, + "step": 1103 + }, + { + "epoch": 0.033835969106289075, + "grad_norm": 2.46586989608345, + "learning_rate": 9.999615112130671e-06, + "loss": 0.8697, + "step": 1104 + }, + { + "epoch": 0.03386661762903028, + "grad_norm": 2.439887275933391, + "learning_rate": 9.999608929372537e-06, + "loss": 0.7895, + "step": 1105 + }, + { + "epoch": 0.03389726615177149, + "grad_norm": 2.2902043558871767, + "learning_rate": 9.999602697351978e-06, + "loss": 0.8417, + "step": 1106 + }, + { + "epoch": 0.033927914674512685, + "grad_norm": 2.6627709575004044, + "learning_rate": 9.999596416069052e-06, + "loss": 0.7699, + "step": 1107 + }, + { + "epoch": 0.03395856319725389, + "grad_norm": 2.568585532926355, + "learning_rate": 9.999590085523825e-06, + "loss": 0.7871, + "step": 1108 + }, + { + "epoch": 0.0339892117199951, + "grad_norm": 1.989612905280849, + "learning_rate": 9.999583705716357e-06, + "loss": 0.7543, + "step": 1109 + }, + { + "epoch": 0.0340198602427363, + "grad_norm": 2.1448993456587457, + "learning_rate": 9.999577276646712e-06, + "loss": 0.794, + "step": 1110 + }, + { + "epoch": 0.0340505087654775, + "grad_norm": 2.023026456508637, + "learning_rate": 9.999570798314952e-06, + "loss": 0.8757, + "step": 1111 + }, + { + "epoch": 0.03408115728821871, + "grad_norm": 2.3941191718720733, + "learning_rate": 9.999564270721144e-06, + "loss": 0.7707, + "step": 1112 + }, + { + "epoch": 0.03411180581095991, + "grad_norm": 2.4323990874724357, + "learning_rate": 9.999557693865349e-06, + "loss": 0.7984, + "step": 1113 + }, + { + "epoch": 0.03414245433370112, + "grad_norm": 2.0600381754821258, + "learning_rate": 9.999551067747632e-06, + "loss": 0.7814, + "step": 1114 + }, + { + "epoch": 0.03417310285644232, + "grad_norm": 2.182277379463982, + "learning_rate": 9.999544392368059e-06, + "loss": 0.7568, + "step": 1115 + }, + { + "epoch": 0.03420375137918352, + "grad_norm": 2.1902019503343753, + "learning_rate": 9.999537667726697e-06, + "loss": 0.9036, + "step": 1116 + }, + { + "epoch": 0.03423439990192473, + "grad_norm": 2.5173505135317926, + "learning_rate": 9.99953089382361e-06, + "loss": 0.8676, + "step": 1117 + }, + { + "epoch": 0.034265048424665934, + "grad_norm": 1.1163431360204223, + "learning_rate": 9.999524070658865e-06, + "loss": 0.5419, + "step": 1118 + }, + { + "epoch": 0.03429569694740713, + "grad_norm": 2.4001408022031154, + "learning_rate": 9.999517198232533e-06, + "loss": 0.8907, + "step": 1119 + }, + { + "epoch": 0.03432634547014834, + "grad_norm": 2.2387242416523945, + "learning_rate": 9.999510276544677e-06, + "loss": 0.7529, + "step": 1120 + }, + { + "epoch": 0.034356993992889544, + "grad_norm": 3.2050856391832325, + "learning_rate": 9.999503305595369e-06, + "loss": 0.7505, + "step": 1121 + }, + { + "epoch": 0.03438764251563075, + "grad_norm": 2.3926212355398007, + "learning_rate": 9.999496285384674e-06, + "loss": 0.8693, + "step": 1122 + }, + { + "epoch": 0.03441829103837195, + "grad_norm": 2.6557710885195753, + "learning_rate": 9.999489215912664e-06, + "loss": 0.803, + "step": 1123 + }, + { + "epoch": 0.034448939561113154, + "grad_norm": 2.6874205758410135, + "learning_rate": 9.999482097179406e-06, + "loss": 0.8458, + "step": 1124 + }, + { + "epoch": 0.03447958808385436, + "grad_norm": 2.2936650368349456, + "learning_rate": 9.999474929184972e-06, + "loss": 0.7921, + "step": 1125 + }, + { + "epoch": 0.034510236606595565, + "grad_norm": 2.3147048994528663, + "learning_rate": 9.999467711929433e-06, + "loss": 0.7233, + "step": 1126 + }, + { + "epoch": 0.034540885129336764, + "grad_norm": 5.692900448486351, + "learning_rate": 9.99946044541286e-06, + "loss": 0.8591, + "step": 1127 + }, + { + "epoch": 0.03457153365207797, + "grad_norm": 2.189037285226436, + "learning_rate": 9.999453129635324e-06, + "loss": 0.7533, + "step": 1128 + }, + { + "epoch": 0.034602182174819175, + "grad_norm": 2.4674549585824375, + "learning_rate": 9.999445764596896e-06, + "loss": 0.7326, + "step": 1129 + }, + { + "epoch": 0.03463283069756038, + "grad_norm": 2.20437537645294, + "learning_rate": 9.99943835029765e-06, + "loss": 0.7303, + "step": 1130 + }, + { + "epoch": 0.03466347922030158, + "grad_norm": 2.296370408322298, + "learning_rate": 9.99943088673766e-06, + "loss": 0.8341, + "step": 1131 + }, + { + "epoch": 0.034694127743042785, + "grad_norm": 1.340766069394169, + "learning_rate": 9.999423373916997e-06, + "loss": 0.5585, + "step": 1132 + }, + { + "epoch": 0.03472477626578399, + "grad_norm": 2.390635369259343, + "learning_rate": 9.999415811835737e-06, + "loss": 0.9172, + "step": 1133 + }, + { + "epoch": 0.03475542478852519, + "grad_norm": 2.4641885082240655, + "learning_rate": 9.999408200493954e-06, + "loss": 0.8435, + "step": 1134 + }, + { + "epoch": 0.034786073311266395, + "grad_norm": 2.279881692362399, + "learning_rate": 9.999400539891722e-06, + "loss": 0.8371, + "step": 1135 + }, + { + "epoch": 0.0348167218340076, + "grad_norm": 2.186729081161133, + "learning_rate": 9.999392830029118e-06, + "loss": 0.8186, + "step": 1136 + }, + { + "epoch": 0.03484737035674881, + "grad_norm": 17.21561035671076, + "learning_rate": 9.999385070906217e-06, + "loss": 0.8274, + "step": 1137 + }, + { + "epoch": 0.034878018879490005, + "grad_norm": 1.0693229787310432, + "learning_rate": 9.999377262523095e-06, + "loss": 0.5613, + "step": 1138 + }, + { + "epoch": 0.03490866740223121, + "grad_norm": 2.3126547719716504, + "learning_rate": 9.99936940487983e-06, + "loss": 0.8647, + "step": 1139 + }, + { + "epoch": 0.03493931592497242, + "grad_norm": 2.3307068656080365, + "learning_rate": 9.9993614979765e-06, + "loss": 0.8245, + "step": 1140 + }, + { + "epoch": 0.03496996444771362, + "grad_norm": 2.352976815356306, + "learning_rate": 9.999353541813182e-06, + "loss": 0.8728, + "step": 1141 + }, + { + "epoch": 0.03500061297045482, + "grad_norm": 2.165489248325459, + "learning_rate": 9.999345536389955e-06, + "loss": 0.7996, + "step": 1142 + }, + { + "epoch": 0.03503126149319603, + "grad_norm": 2.349564845976544, + "learning_rate": 9.999337481706897e-06, + "loss": 0.7857, + "step": 1143 + }, + { + "epoch": 0.03506191001593723, + "grad_norm": 2.587876007103602, + "learning_rate": 9.999329377764086e-06, + "loss": 0.7868, + "step": 1144 + }, + { + "epoch": 0.03509255853867844, + "grad_norm": 2.269658881688955, + "learning_rate": 9.999321224561604e-06, + "loss": 0.842, + "step": 1145 + }, + { + "epoch": 0.03512320706141964, + "grad_norm": 2.98957266016936, + "learning_rate": 9.999313022099533e-06, + "loss": 0.8809, + "step": 1146 + }, + { + "epoch": 0.03515385558416084, + "grad_norm": 2.6150043031132375, + "learning_rate": 9.999304770377948e-06, + "loss": 0.7714, + "step": 1147 + }, + { + "epoch": 0.03518450410690205, + "grad_norm": 2.578859124757857, + "learning_rate": 9.999296469396938e-06, + "loss": 0.8503, + "step": 1148 + }, + { + "epoch": 0.035215152629643254, + "grad_norm": 2.069880551153864, + "learning_rate": 9.999288119156578e-06, + "loss": 0.6836, + "step": 1149 + }, + { + "epoch": 0.03524580115238445, + "grad_norm": 2.508195734150611, + "learning_rate": 9.999279719656953e-06, + "loss": 0.908, + "step": 1150 + }, + { + "epoch": 0.03527644967512566, + "grad_norm": 1.114813611820268, + "learning_rate": 9.999271270898148e-06, + "loss": 0.5538, + "step": 1151 + }, + { + "epoch": 0.035307098197866864, + "grad_norm": 2.181033165165685, + "learning_rate": 9.999262772880241e-06, + "loss": 0.7737, + "step": 1152 + }, + { + "epoch": 0.03533774672060807, + "grad_norm": 2.3185263148704585, + "learning_rate": 9.99925422560332e-06, + "loss": 0.7584, + "step": 1153 + }, + { + "epoch": 0.03536839524334927, + "grad_norm": 2.2578022835719636, + "learning_rate": 9.999245629067469e-06, + "loss": 0.8076, + "step": 1154 + }, + { + "epoch": 0.035399043766090474, + "grad_norm": 2.52497922928254, + "learning_rate": 9.999236983272772e-06, + "loss": 0.7871, + "step": 1155 + }, + { + "epoch": 0.03542969228883168, + "grad_norm": 2.5642672420690715, + "learning_rate": 9.999228288219314e-06, + "loss": 0.8095, + "step": 1156 + }, + { + "epoch": 0.035460340811572885, + "grad_norm": 1.0931384078123485, + "learning_rate": 9.99921954390718e-06, + "loss": 0.5532, + "step": 1157 + }, + { + "epoch": 0.035490989334314084, + "grad_norm": 2.304652164571122, + "learning_rate": 9.999210750336455e-06, + "loss": 0.8847, + "step": 1158 + }, + { + "epoch": 0.03552163785705529, + "grad_norm": 2.1737805925639493, + "learning_rate": 9.999201907507227e-06, + "loss": 0.8093, + "step": 1159 + }, + { + "epoch": 0.035552286379796495, + "grad_norm": 2.3159717954284598, + "learning_rate": 9.999193015419586e-06, + "loss": 0.7536, + "step": 1160 + }, + { + "epoch": 0.0355829349025377, + "grad_norm": 2.2435102327799332, + "learning_rate": 9.999184074073618e-06, + "loss": 0.7864, + "step": 1161 + }, + { + "epoch": 0.0356135834252789, + "grad_norm": 2.7520783444048798, + "learning_rate": 9.999175083469407e-06, + "loss": 0.7822, + "step": 1162 + }, + { + "epoch": 0.035644231948020105, + "grad_norm": 2.6329354194589225, + "learning_rate": 9.999166043607048e-06, + "loss": 0.7942, + "step": 1163 + }, + { + "epoch": 0.03567488047076131, + "grad_norm": 2.309805879337428, + "learning_rate": 9.999156954486624e-06, + "loss": 0.77, + "step": 1164 + }, + { + "epoch": 0.03570552899350252, + "grad_norm": 2.435278576632006, + "learning_rate": 9.999147816108229e-06, + "loss": 0.7738, + "step": 1165 + }, + { + "epoch": 0.035736177516243715, + "grad_norm": 2.490029926748888, + "learning_rate": 9.999138628471951e-06, + "loss": 0.7987, + "step": 1166 + }, + { + "epoch": 0.03576682603898492, + "grad_norm": 1.3000128070090873, + "learning_rate": 9.999129391577882e-06, + "loss": 0.5664, + "step": 1167 + }, + { + "epoch": 0.03579747456172613, + "grad_norm": 2.5118180316722842, + "learning_rate": 9.99912010542611e-06, + "loss": 0.8497, + "step": 1168 + }, + { + "epoch": 0.035828123084467325, + "grad_norm": 2.343991069668834, + "learning_rate": 9.99911077001673e-06, + "loss": 0.8863, + "step": 1169 + }, + { + "epoch": 0.03585877160720853, + "grad_norm": 2.473300398434256, + "learning_rate": 9.999101385349831e-06, + "loss": 0.8104, + "step": 1170 + }, + { + "epoch": 0.03588942012994974, + "grad_norm": 2.4629907656042374, + "learning_rate": 9.99909195142551e-06, + "loss": 0.7638, + "step": 1171 + }, + { + "epoch": 0.03592006865269094, + "grad_norm": 2.2113459715155175, + "learning_rate": 9.999082468243855e-06, + "loss": 0.8351, + "step": 1172 + }, + { + "epoch": 0.03595071717543214, + "grad_norm": 2.2758296986961284, + "learning_rate": 9.99907293580496e-06, + "loss": 0.7766, + "step": 1173 + }, + { + "epoch": 0.03598136569817335, + "grad_norm": 2.4554081954752123, + "learning_rate": 9.999063354108923e-06, + "loss": 0.8179, + "step": 1174 + }, + { + "epoch": 0.03601201422091455, + "grad_norm": 2.394345240768447, + "learning_rate": 9.999053723155834e-06, + "loss": 0.8555, + "step": 1175 + }, + { + "epoch": 0.03604266274365576, + "grad_norm": 2.3045510599878245, + "learning_rate": 9.99904404294579e-06, + "loss": 0.7003, + "step": 1176 + }, + { + "epoch": 0.03607331126639696, + "grad_norm": 2.6574272492988604, + "learning_rate": 9.999034313478888e-06, + "loss": 0.7545, + "step": 1177 + }, + { + "epoch": 0.03610395978913816, + "grad_norm": 2.611496629034802, + "learning_rate": 9.99902453475522e-06, + "loss": 0.8053, + "step": 1178 + }, + { + "epoch": 0.03613460831187937, + "grad_norm": 2.324609420303341, + "learning_rate": 9.999014706774883e-06, + "loss": 0.7734, + "step": 1179 + }, + { + "epoch": 0.036165256834620574, + "grad_norm": 2.1973597974679784, + "learning_rate": 9.999004829537976e-06, + "loss": 0.8306, + "step": 1180 + }, + { + "epoch": 0.03619590535736177, + "grad_norm": 2.5063279130867877, + "learning_rate": 9.998994903044596e-06, + "loss": 0.8456, + "step": 1181 + }, + { + "epoch": 0.03622655388010298, + "grad_norm": 2.2406035395671284, + "learning_rate": 9.99898492729484e-06, + "loss": 0.7568, + "step": 1182 + }, + { + "epoch": 0.036257202402844184, + "grad_norm": 2.3335594026309994, + "learning_rate": 9.998974902288805e-06, + "loss": 0.7324, + "step": 1183 + }, + { + "epoch": 0.03628785092558539, + "grad_norm": 2.3759307749183205, + "learning_rate": 9.998964828026594e-06, + "loss": 0.8163, + "step": 1184 + }, + { + "epoch": 0.03631849944832659, + "grad_norm": 3.1005749591746388, + "learning_rate": 9.998954704508301e-06, + "loss": 0.8395, + "step": 1185 + }, + { + "epoch": 0.036349147971067794, + "grad_norm": 2.216687360826011, + "learning_rate": 9.99894453173403e-06, + "loss": 0.8065, + "step": 1186 + }, + { + "epoch": 0.036379796493809, + "grad_norm": 2.56813275971025, + "learning_rate": 9.998934309703878e-06, + "loss": 0.7761, + "step": 1187 + }, + { + "epoch": 0.036410445016550205, + "grad_norm": 1.6231557064244992, + "learning_rate": 9.998924038417947e-06, + "loss": 0.5608, + "step": 1188 + }, + { + "epoch": 0.036441093539291404, + "grad_norm": 2.435695465148863, + "learning_rate": 9.99891371787634e-06, + "loss": 0.7934, + "step": 1189 + }, + { + "epoch": 0.03647174206203261, + "grad_norm": 2.337609218110758, + "learning_rate": 9.998903348079157e-06, + "loss": 0.8472, + "step": 1190 + }, + { + "epoch": 0.036502390584773815, + "grad_norm": 2.8174565275281527, + "learning_rate": 9.998892929026499e-06, + "loss": 0.7977, + "step": 1191 + }, + { + "epoch": 0.03653303910751502, + "grad_norm": 2.4528373992318606, + "learning_rate": 9.998882460718472e-06, + "loss": 0.8889, + "step": 1192 + }, + { + "epoch": 0.03656368763025622, + "grad_norm": 2.3138289630510727, + "learning_rate": 9.998871943155175e-06, + "loss": 0.853, + "step": 1193 + }, + { + "epoch": 0.036594336152997425, + "grad_norm": 4.452215825315945, + "learning_rate": 9.998861376336715e-06, + "loss": 0.8206, + "step": 1194 + }, + { + "epoch": 0.03662498467573863, + "grad_norm": 2.4563495698883955, + "learning_rate": 9.998850760263194e-06, + "loss": 0.8497, + "step": 1195 + }, + { + "epoch": 0.03665563319847984, + "grad_norm": 2.4991425664663405, + "learning_rate": 9.998840094934717e-06, + "loss": 0.8638, + "step": 1196 + }, + { + "epoch": 0.036686281721221035, + "grad_norm": 1.4002232172667024, + "learning_rate": 9.998829380351391e-06, + "loss": 0.5537, + "step": 1197 + }, + { + "epoch": 0.03671693024396224, + "grad_norm": 2.388427552701879, + "learning_rate": 9.99881861651332e-06, + "loss": 0.8026, + "step": 1198 + }, + { + "epoch": 0.03674757876670345, + "grad_norm": 2.521629931420432, + "learning_rate": 9.998807803420609e-06, + "loss": 0.819, + "step": 1199 + }, + { + "epoch": 0.036778227289444645, + "grad_norm": 2.4461891535913987, + "learning_rate": 9.998796941073365e-06, + "loss": 0.8504, + "step": 1200 + }, + { + "epoch": 0.03680887581218585, + "grad_norm": 2.491676746886056, + "learning_rate": 9.998786029471698e-06, + "loss": 0.8648, + "step": 1201 + }, + { + "epoch": 0.03683952433492706, + "grad_norm": 2.385062744544643, + "learning_rate": 9.998775068615713e-06, + "loss": 0.8178, + "step": 1202 + }, + { + "epoch": 0.03687017285766826, + "grad_norm": 2.4137661580618714, + "learning_rate": 9.998764058505517e-06, + "loss": 0.7524, + "step": 1203 + }, + { + "epoch": 0.03690082138040946, + "grad_norm": 2.3732376372654964, + "learning_rate": 9.99875299914122e-06, + "loss": 0.7433, + "step": 1204 + }, + { + "epoch": 0.03693146990315067, + "grad_norm": 2.546819248563387, + "learning_rate": 9.99874189052293e-06, + "loss": 0.8167, + "step": 1205 + }, + { + "epoch": 0.03696211842589187, + "grad_norm": 3.39279348233648, + "learning_rate": 9.99873073265076e-06, + "loss": 0.8905, + "step": 1206 + }, + { + "epoch": 0.03699276694863308, + "grad_norm": 2.4951013129548003, + "learning_rate": 9.998719525524814e-06, + "loss": 0.7438, + "step": 1207 + }, + { + "epoch": 0.03702341547137428, + "grad_norm": 2.416194726365278, + "learning_rate": 9.998708269145207e-06, + "loss": 0.7208, + "step": 1208 + }, + { + "epoch": 0.03705406399411548, + "grad_norm": 1.4300394333266837, + "learning_rate": 9.99869696351205e-06, + "loss": 0.5918, + "step": 1209 + }, + { + "epoch": 0.03708471251685669, + "grad_norm": 2.458616117707772, + "learning_rate": 9.99868560862545e-06, + "loss": 0.8014, + "step": 1210 + }, + { + "epoch": 0.037115361039597894, + "grad_norm": 1.1679012695246136, + "learning_rate": 9.998674204485524e-06, + "loss": 0.5858, + "step": 1211 + }, + { + "epoch": 0.03714600956233909, + "grad_norm": 2.5346527295340935, + "learning_rate": 9.99866275109238e-06, + "loss": 0.7466, + "step": 1212 + }, + { + "epoch": 0.0371766580850803, + "grad_norm": 2.8537656031248653, + "learning_rate": 9.998651248446135e-06, + "loss": 0.9287, + "step": 1213 + }, + { + "epoch": 0.037207306607821504, + "grad_norm": 2.5152542815805483, + "learning_rate": 9.9986396965469e-06, + "loss": 0.8112, + "step": 1214 + }, + { + "epoch": 0.03723795513056271, + "grad_norm": 2.423784493233649, + "learning_rate": 9.998628095394786e-06, + "loss": 0.7984, + "step": 1215 + }, + { + "epoch": 0.03726860365330391, + "grad_norm": 2.2597857321255517, + "learning_rate": 9.998616444989912e-06, + "loss": 0.7576, + "step": 1216 + }, + { + "epoch": 0.037299252176045114, + "grad_norm": 2.446026061604031, + "learning_rate": 9.998604745332394e-06, + "loss": 0.8393, + "step": 1217 + }, + { + "epoch": 0.03732990069878632, + "grad_norm": 2.5362137685645663, + "learning_rate": 9.998592996422342e-06, + "loss": 0.7858, + "step": 1218 + }, + { + "epoch": 0.037360549221527525, + "grad_norm": 2.335274359108287, + "learning_rate": 9.998581198259875e-06, + "loss": 0.7597, + "step": 1219 + }, + { + "epoch": 0.037391197744268724, + "grad_norm": 2.7909850184132545, + "learning_rate": 9.998569350845107e-06, + "loss": 0.9007, + "step": 1220 + }, + { + "epoch": 0.03742184626700993, + "grad_norm": 4.575709528844049, + "learning_rate": 9.998557454178158e-06, + "loss": 0.8208, + "step": 1221 + }, + { + "epoch": 0.037452494789751135, + "grad_norm": 2.1997341708106632, + "learning_rate": 9.998545508259143e-06, + "loss": 0.5841, + "step": 1222 + }, + { + "epoch": 0.03748314331249234, + "grad_norm": 2.43952295338665, + "learning_rate": 9.99853351308818e-06, + "loss": 0.8382, + "step": 1223 + }, + { + "epoch": 0.03751379183523354, + "grad_norm": 2.68899216541948, + "learning_rate": 9.998521468665388e-06, + "loss": 0.7649, + "step": 1224 + }, + { + "epoch": 0.037544440357974745, + "grad_norm": 2.3047228575432652, + "learning_rate": 9.998509374990885e-06, + "loss": 0.7827, + "step": 1225 + }, + { + "epoch": 0.03757508888071595, + "grad_norm": 2.417226639677862, + "learning_rate": 9.998497232064789e-06, + "loss": 0.8708, + "step": 1226 + }, + { + "epoch": 0.03760573740345716, + "grad_norm": 2.298338055701502, + "learning_rate": 9.998485039887222e-06, + "loss": 0.841, + "step": 1227 + }, + { + "epoch": 0.037636385926198355, + "grad_norm": 2.7003783250773514, + "learning_rate": 9.998472798458302e-06, + "loss": 0.8449, + "step": 1228 + }, + { + "epoch": 0.03766703444893956, + "grad_norm": 2.642130170114504, + "learning_rate": 9.998460507778152e-06, + "loss": 0.9184, + "step": 1229 + }, + { + "epoch": 0.03769768297168077, + "grad_norm": 2.192112056969457, + "learning_rate": 9.99844816784689e-06, + "loss": 0.7915, + "step": 1230 + }, + { + "epoch": 0.037728331494421966, + "grad_norm": 1.872364878386262, + "learning_rate": 9.99843577866464e-06, + "loss": 0.5601, + "step": 1231 + }, + { + "epoch": 0.03775898001716317, + "grad_norm": 2.5319559474101285, + "learning_rate": 9.998423340231524e-06, + "loss": 0.8422, + "step": 1232 + }, + { + "epoch": 0.03778962853990438, + "grad_norm": 2.2141416119707724, + "learning_rate": 9.998410852547663e-06, + "loss": 0.7951, + "step": 1233 + }, + { + "epoch": 0.03782027706264558, + "grad_norm": 2.0029568131255986, + "learning_rate": 9.998398315613182e-06, + "loss": 0.7741, + "step": 1234 + }, + { + "epoch": 0.03785092558538678, + "grad_norm": 2.2734719889639528, + "learning_rate": 9.998385729428202e-06, + "loss": 0.8576, + "step": 1235 + }, + { + "epoch": 0.03788157410812799, + "grad_norm": 2.3087177324198107, + "learning_rate": 9.998373093992851e-06, + "loss": 0.847, + "step": 1236 + }, + { + "epoch": 0.03791222263086919, + "grad_norm": 2.5756232248342084, + "learning_rate": 9.998360409307248e-06, + "loss": 0.8389, + "step": 1237 + }, + { + "epoch": 0.0379428711536104, + "grad_norm": 2.2921474377958257, + "learning_rate": 9.998347675371523e-06, + "loss": 0.8194, + "step": 1238 + }, + { + "epoch": 0.0379735196763516, + "grad_norm": 2.271328827023043, + "learning_rate": 9.998334892185799e-06, + "loss": 0.7748, + "step": 1239 + }, + { + "epoch": 0.0380041681990928, + "grad_norm": 1.2352733126727184, + "learning_rate": 9.998322059750203e-06, + "loss": 0.5326, + "step": 1240 + }, + { + "epoch": 0.03803481672183401, + "grad_norm": 2.535866704124961, + "learning_rate": 9.99830917806486e-06, + "loss": 0.824, + "step": 1241 + }, + { + "epoch": 0.038065465244575214, + "grad_norm": 2.218663381781324, + "learning_rate": 9.998296247129897e-06, + "loss": 0.8023, + "step": 1242 + }, + { + "epoch": 0.03809611376731641, + "grad_norm": 2.3999192543366865, + "learning_rate": 9.998283266945444e-06, + "loss": 0.8693, + "step": 1243 + }, + { + "epoch": 0.03812676229005762, + "grad_norm": 2.280815314988098, + "learning_rate": 9.998270237511627e-06, + "loss": 0.7994, + "step": 1244 + }, + { + "epoch": 0.038157410812798824, + "grad_norm": 2.5630116463088775, + "learning_rate": 9.998257158828572e-06, + "loss": 0.743, + "step": 1245 + }, + { + "epoch": 0.03818805933554003, + "grad_norm": 2.5554817778268277, + "learning_rate": 9.998244030896413e-06, + "loss": 0.7884, + "step": 1246 + }, + { + "epoch": 0.03821870785828123, + "grad_norm": 1.076889476204951, + "learning_rate": 9.998230853715276e-06, + "loss": 0.5809, + "step": 1247 + }, + { + "epoch": 0.038249356381022434, + "grad_norm": 1.0757468502129648, + "learning_rate": 9.99821762728529e-06, + "loss": 0.538, + "step": 1248 + }, + { + "epoch": 0.03828000490376364, + "grad_norm": 2.345118834006311, + "learning_rate": 9.998204351606591e-06, + "loss": 0.8564, + "step": 1249 + }, + { + "epoch": 0.038310653426504845, + "grad_norm": 2.226306501626551, + "learning_rate": 9.998191026679302e-06, + "loss": 0.6967, + "step": 1250 + }, + { + "epoch": 0.038341301949246044, + "grad_norm": 2.3457438917516202, + "learning_rate": 9.99817765250356e-06, + "loss": 0.7822, + "step": 1251 + }, + { + "epoch": 0.03837195047198725, + "grad_norm": 2.429551463701764, + "learning_rate": 9.998164229079491e-06, + "loss": 0.7951, + "step": 1252 + }, + { + "epoch": 0.038402598994728455, + "grad_norm": 2.7838052828345536, + "learning_rate": 9.998150756407236e-06, + "loss": 0.7795, + "step": 1253 + }, + { + "epoch": 0.03843324751746966, + "grad_norm": 2.0825728479181733, + "learning_rate": 9.99813723448692e-06, + "loss": 0.785, + "step": 1254 + }, + { + "epoch": 0.03846389604021086, + "grad_norm": 2.222400409695483, + "learning_rate": 9.99812366331868e-06, + "loss": 0.8755, + "step": 1255 + }, + { + "epoch": 0.038494544562952066, + "grad_norm": 1.5256205615734886, + "learning_rate": 9.998110042902648e-06, + "loss": 0.549, + "step": 1256 + }, + { + "epoch": 0.03852519308569327, + "grad_norm": 2.700863098160979, + "learning_rate": 9.998096373238958e-06, + "loss": 0.7568, + "step": 1257 + }, + { + "epoch": 0.03855584160843448, + "grad_norm": 2.089226073574537, + "learning_rate": 9.998082654327748e-06, + "loss": 0.7135, + "step": 1258 + }, + { + "epoch": 0.038586490131175676, + "grad_norm": 2.188881239896762, + "learning_rate": 9.99806888616915e-06, + "loss": 0.8559, + "step": 1259 + }, + { + "epoch": 0.03861713865391688, + "grad_norm": 2.7174378760192, + "learning_rate": 9.998055068763299e-06, + "loss": 0.8502, + "step": 1260 + }, + { + "epoch": 0.03864778717665809, + "grad_norm": 1.9949947306197129, + "learning_rate": 9.998041202110334e-06, + "loss": 0.7014, + "step": 1261 + }, + { + "epoch": 0.038678435699399286, + "grad_norm": 1.046443732324221, + "learning_rate": 9.99802728621039e-06, + "loss": 0.5601, + "step": 1262 + }, + { + "epoch": 0.03870908422214049, + "grad_norm": 2.435639508644738, + "learning_rate": 9.998013321063604e-06, + "loss": 0.9334, + "step": 1263 + }, + { + "epoch": 0.0387397327448817, + "grad_norm": 2.136018925602849, + "learning_rate": 9.997999306670114e-06, + "loss": 0.8217, + "step": 1264 + }, + { + "epoch": 0.0387703812676229, + "grad_norm": 2.2523974214495914, + "learning_rate": 9.997985243030058e-06, + "loss": 0.8331, + "step": 1265 + }, + { + "epoch": 0.0388010297903641, + "grad_norm": 2.469106404279449, + "learning_rate": 9.997971130143575e-06, + "loss": 0.924, + "step": 1266 + }, + { + "epoch": 0.03883167831310531, + "grad_norm": 2.585980535080345, + "learning_rate": 9.997956968010803e-06, + "loss": 0.7756, + "step": 1267 + }, + { + "epoch": 0.03886232683584651, + "grad_norm": 1.0478295308717978, + "learning_rate": 9.997942756631883e-06, + "loss": 0.5419, + "step": 1268 + }, + { + "epoch": 0.03889297535858772, + "grad_norm": 2.3001692835086898, + "learning_rate": 9.997928496006954e-06, + "loss": 0.8555, + "step": 1269 + }, + { + "epoch": 0.03892362388132892, + "grad_norm": 2.2760293739429667, + "learning_rate": 9.997914186136157e-06, + "loss": 0.862, + "step": 1270 + }, + { + "epoch": 0.03895427240407012, + "grad_norm": 2.56513413379102, + "learning_rate": 9.997899827019632e-06, + "loss": 0.857, + "step": 1271 + }, + { + "epoch": 0.03898492092681133, + "grad_norm": 2.35993494544197, + "learning_rate": 9.997885418657522e-06, + "loss": 0.7945, + "step": 1272 + }, + { + "epoch": 0.039015569449552534, + "grad_norm": 2.606988138816629, + "learning_rate": 9.997870961049968e-06, + "loss": 0.8466, + "step": 1273 + }, + { + "epoch": 0.03904621797229373, + "grad_norm": 2.883900724456928, + "learning_rate": 9.997856454197112e-06, + "loss": 0.8733, + "step": 1274 + }, + { + "epoch": 0.03907686649503494, + "grad_norm": 2.2494859768605755, + "learning_rate": 9.997841898099098e-06, + "loss": 0.8349, + "step": 1275 + }, + { + "epoch": 0.039107515017776144, + "grad_norm": 0.9903774472244229, + "learning_rate": 9.99782729275607e-06, + "loss": 0.522, + "step": 1276 + }, + { + "epoch": 0.03913816354051735, + "grad_norm": 2.47503948228917, + "learning_rate": 9.997812638168169e-06, + "loss": 0.805, + "step": 1277 + }, + { + "epoch": 0.03916881206325855, + "grad_norm": 2.292389806535465, + "learning_rate": 9.997797934335542e-06, + "loss": 0.9322, + "step": 1278 + }, + { + "epoch": 0.039199460585999754, + "grad_norm": 2.3067161104310316, + "learning_rate": 9.997783181258335e-06, + "loss": 0.7436, + "step": 1279 + }, + { + "epoch": 0.03923010910874096, + "grad_norm": 2.4074077813182644, + "learning_rate": 9.99776837893669e-06, + "loss": 0.8355, + "step": 1280 + }, + { + "epoch": 0.039260757631482165, + "grad_norm": 2.651540927831436, + "learning_rate": 9.997753527370756e-06, + "loss": 0.7749, + "step": 1281 + }, + { + "epoch": 0.039291406154223364, + "grad_norm": 2.201433182875649, + "learning_rate": 9.997738626560676e-06, + "loss": 0.7422, + "step": 1282 + }, + { + "epoch": 0.03932205467696457, + "grad_norm": 2.448083058906365, + "learning_rate": 9.9977236765066e-06, + "loss": 0.841, + "step": 1283 + }, + { + "epoch": 0.039352703199705776, + "grad_norm": 2.4924173375472845, + "learning_rate": 9.99770867720867e-06, + "loss": 0.7892, + "step": 1284 + }, + { + "epoch": 0.03938335172244698, + "grad_norm": 2.50706164478169, + "learning_rate": 9.997693628667042e-06, + "loss": 0.7902, + "step": 1285 + }, + { + "epoch": 0.03941400024518818, + "grad_norm": 2.4094256889996544, + "learning_rate": 9.997678530881858e-06, + "loss": 0.6843, + "step": 1286 + }, + { + "epoch": 0.039444648767929386, + "grad_norm": 2.2500524446880372, + "learning_rate": 9.99766338385327e-06, + "loss": 0.868, + "step": 1287 + }, + { + "epoch": 0.03947529729067059, + "grad_norm": 5.412902297197953, + "learning_rate": 9.997648187581425e-06, + "loss": 0.9382, + "step": 1288 + }, + { + "epoch": 0.0395059458134118, + "grad_norm": 1.151884197170638, + "learning_rate": 9.997632942066473e-06, + "loss": 0.5495, + "step": 1289 + }, + { + "epoch": 0.039536594336152996, + "grad_norm": 2.299171579562735, + "learning_rate": 9.997617647308566e-06, + "loss": 0.7426, + "step": 1290 + }, + { + "epoch": 0.0395672428588942, + "grad_norm": 2.198921878256667, + "learning_rate": 9.997602303307854e-06, + "loss": 0.7182, + "step": 1291 + }, + { + "epoch": 0.03959789138163541, + "grad_norm": 1.8945985753091934, + "learning_rate": 9.997586910064488e-06, + "loss": 0.7775, + "step": 1292 + }, + { + "epoch": 0.039628539904376606, + "grad_norm": 2.170921857912556, + "learning_rate": 9.997571467578617e-06, + "loss": 0.7186, + "step": 1293 + }, + { + "epoch": 0.03965918842711781, + "grad_norm": 2.7025635450550927, + "learning_rate": 9.997555975850398e-06, + "loss": 0.7234, + "step": 1294 + }, + { + "epoch": 0.03968983694985902, + "grad_norm": 2.5696421616467693, + "learning_rate": 9.997540434879981e-06, + "loss": 0.8466, + "step": 1295 + }, + { + "epoch": 0.03972048547260022, + "grad_norm": 2.114008596553646, + "learning_rate": 9.997524844667519e-06, + "loss": 0.7869, + "step": 1296 + }, + { + "epoch": 0.03975113399534142, + "grad_norm": 2.418323548822359, + "learning_rate": 9.997509205213166e-06, + "loss": 0.6592, + "step": 1297 + }, + { + "epoch": 0.03978178251808263, + "grad_norm": 2.449355384008759, + "learning_rate": 9.997493516517076e-06, + "loss": 0.881, + "step": 1298 + }, + { + "epoch": 0.03981243104082383, + "grad_norm": 2.6470930498257816, + "learning_rate": 9.997477778579404e-06, + "loss": 0.7878, + "step": 1299 + }, + { + "epoch": 0.03984307956356504, + "grad_norm": 2.2706335094414407, + "learning_rate": 9.997461991400306e-06, + "loss": 0.8371, + "step": 1300 + }, + { + "epoch": 0.03987372808630624, + "grad_norm": 2.234291857707326, + "learning_rate": 9.997446154979936e-06, + "loss": 0.8774, + "step": 1301 + }, + { + "epoch": 0.03990437660904744, + "grad_norm": 1.2913561604202421, + "learning_rate": 9.99743026931845e-06, + "loss": 0.5811, + "step": 1302 + }, + { + "epoch": 0.03993502513178865, + "grad_norm": 2.335897382626118, + "learning_rate": 9.997414334416002e-06, + "loss": 0.8589, + "step": 1303 + }, + { + "epoch": 0.039965673654529854, + "grad_norm": 2.4296503278088224, + "learning_rate": 9.997398350272755e-06, + "loss": 0.8725, + "step": 1304 + }, + { + "epoch": 0.03999632217727105, + "grad_norm": 2.266641878471248, + "learning_rate": 9.997382316888864e-06, + "loss": 0.7512, + "step": 1305 + }, + { + "epoch": 0.04002697070001226, + "grad_norm": 2.6622273048903256, + "learning_rate": 9.997366234264484e-06, + "loss": 0.8638, + "step": 1306 + }, + { + "epoch": 0.040057619222753464, + "grad_norm": 2.648553669397978, + "learning_rate": 9.997350102399777e-06, + "loss": 0.7743, + "step": 1307 + }, + { + "epoch": 0.04008826774549467, + "grad_norm": 2.2589716161157196, + "learning_rate": 9.997333921294901e-06, + "loss": 0.7507, + "step": 1308 + }, + { + "epoch": 0.04011891626823587, + "grad_norm": 0.9563017228979653, + "learning_rate": 9.997317690950015e-06, + "loss": 0.5251, + "step": 1309 + }, + { + "epoch": 0.040149564790977074, + "grad_norm": 2.3757089249080217, + "learning_rate": 9.997301411365279e-06, + "loss": 0.839, + "step": 1310 + }, + { + "epoch": 0.04018021331371828, + "grad_norm": 1.9882448957519219, + "learning_rate": 9.997285082540854e-06, + "loss": 0.8511, + "step": 1311 + }, + { + "epoch": 0.040210861836459486, + "grad_norm": 2.682320876988619, + "learning_rate": 9.9972687044769e-06, + "loss": 0.9029, + "step": 1312 + }, + { + "epoch": 0.040241510359200684, + "grad_norm": 2.0655720205043875, + "learning_rate": 9.997252277173579e-06, + "loss": 0.7971, + "step": 1313 + }, + { + "epoch": 0.04027215888194189, + "grad_norm": 2.2462029332016065, + "learning_rate": 9.997235800631053e-06, + "loss": 0.8005, + "step": 1314 + }, + { + "epoch": 0.040302807404683096, + "grad_norm": 2.4437254505947057, + "learning_rate": 9.997219274849483e-06, + "loss": 0.8512, + "step": 1315 + }, + { + "epoch": 0.0403334559274243, + "grad_norm": 2.5546141514212732, + "learning_rate": 9.997202699829035e-06, + "loss": 0.8685, + "step": 1316 + }, + { + "epoch": 0.0403641044501655, + "grad_norm": 2.521213911435786, + "learning_rate": 9.997186075569869e-06, + "loss": 0.7944, + "step": 1317 + }, + { + "epoch": 0.040394752972906706, + "grad_norm": 2.1530617645070818, + "learning_rate": 9.99716940207215e-06, + "loss": 0.8684, + "step": 1318 + }, + { + "epoch": 0.04042540149564791, + "grad_norm": 2.2982018401602673, + "learning_rate": 9.997152679336041e-06, + "loss": 0.8592, + "step": 1319 + }, + { + "epoch": 0.04045605001838912, + "grad_norm": 2.3601117449170603, + "learning_rate": 9.99713590736171e-06, + "loss": 0.8102, + "step": 1320 + }, + { + "epoch": 0.040486698541130316, + "grad_norm": 2.33911517287537, + "learning_rate": 9.99711908614932e-06, + "loss": 0.8434, + "step": 1321 + }, + { + "epoch": 0.04051734706387152, + "grad_norm": 2.1711534775740686, + "learning_rate": 9.997102215699037e-06, + "loss": 0.8857, + "step": 1322 + }, + { + "epoch": 0.04054799558661273, + "grad_norm": 1.3884990181295693, + "learning_rate": 9.997085296011027e-06, + "loss": 0.5762, + "step": 1323 + }, + { + "epoch": 0.040578644109353926, + "grad_norm": 1.0255321809049662, + "learning_rate": 9.997068327085458e-06, + "loss": 0.5284, + "step": 1324 + }, + { + "epoch": 0.04060929263209513, + "grad_norm": 2.2383344000106526, + "learning_rate": 9.997051308922495e-06, + "loss": 0.8858, + "step": 1325 + }, + { + "epoch": 0.04063994115483634, + "grad_norm": 1.0157753160679976, + "learning_rate": 9.997034241522308e-06, + "loss": 0.5542, + "step": 1326 + }, + { + "epoch": 0.04067058967757754, + "grad_norm": 2.287691432985838, + "learning_rate": 9.997017124885063e-06, + "loss": 0.7466, + "step": 1327 + }, + { + "epoch": 0.04070123820031874, + "grad_norm": 2.267355793242582, + "learning_rate": 9.99699995901093e-06, + "loss": 0.7046, + "step": 1328 + }, + { + "epoch": 0.04073188672305995, + "grad_norm": 2.3838617122971693, + "learning_rate": 9.996982743900077e-06, + "loss": 0.816, + "step": 1329 + }, + { + "epoch": 0.04076253524580115, + "grad_norm": 2.5044310057175254, + "learning_rate": 9.996965479552675e-06, + "loss": 0.9252, + "step": 1330 + }, + { + "epoch": 0.04079318376854236, + "grad_norm": 2.120903324988358, + "learning_rate": 9.996948165968896e-06, + "loss": 0.7706, + "step": 1331 + }, + { + "epoch": 0.04082383229128356, + "grad_norm": 2.021878187893428, + "learning_rate": 9.996930803148905e-06, + "loss": 0.737, + "step": 1332 + }, + { + "epoch": 0.04085448081402476, + "grad_norm": 2.2292262827547837, + "learning_rate": 9.996913391092877e-06, + "loss": 0.8274, + "step": 1333 + }, + { + "epoch": 0.04088512933676597, + "grad_norm": 2.1845320944256033, + "learning_rate": 9.996895929800986e-06, + "loss": 0.9012, + "step": 1334 + }, + { + "epoch": 0.040915777859507174, + "grad_norm": 2.2303532427433983, + "learning_rate": 9.996878419273397e-06, + "loss": 0.8404, + "step": 1335 + }, + { + "epoch": 0.04094642638224837, + "grad_norm": 2.180591096674949, + "learning_rate": 9.996860859510286e-06, + "loss": 0.8249, + "step": 1336 + }, + { + "epoch": 0.04097707490498958, + "grad_norm": 1.6165071565845, + "learning_rate": 9.99684325051183e-06, + "loss": 0.5809, + "step": 1337 + }, + { + "epoch": 0.041007723427730784, + "grad_norm": 2.1970762115391547, + "learning_rate": 9.996825592278197e-06, + "loss": 0.7904, + "step": 1338 + }, + { + "epoch": 0.04103837195047199, + "grad_norm": 2.074499786084035, + "learning_rate": 9.996807884809563e-06, + "loss": 0.8054, + "step": 1339 + }, + { + "epoch": 0.04106902047321319, + "grad_norm": 2.1681083943906074, + "learning_rate": 9.996790128106101e-06, + "loss": 0.7352, + "step": 1340 + }, + { + "epoch": 0.041099668995954394, + "grad_norm": 2.5583417995452526, + "learning_rate": 9.99677232216799e-06, + "loss": 0.7199, + "step": 1341 + }, + { + "epoch": 0.0411303175186956, + "grad_norm": 2.248692509860964, + "learning_rate": 9.996754466995401e-06, + "loss": 0.7321, + "step": 1342 + }, + { + "epoch": 0.041160966041436806, + "grad_norm": 2.1762386678111723, + "learning_rate": 9.996736562588513e-06, + "loss": 0.877, + "step": 1343 + }, + { + "epoch": 0.041191614564178004, + "grad_norm": 2.27860301457575, + "learning_rate": 9.9967186089475e-06, + "loss": 0.7848, + "step": 1344 + }, + { + "epoch": 0.04122226308691921, + "grad_norm": 2.1833397465462636, + "learning_rate": 9.996700606072542e-06, + "loss": 0.7928, + "step": 1345 + }, + { + "epoch": 0.041252911609660416, + "grad_norm": 2.5833073338876247, + "learning_rate": 9.996682553963813e-06, + "loss": 0.8765, + "step": 1346 + }, + { + "epoch": 0.04128356013240162, + "grad_norm": 1.1023062977183757, + "learning_rate": 9.996664452621492e-06, + "loss": 0.5374, + "step": 1347 + }, + { + "epoch": 0.04131420865514282, + "grad_norm": 2.3129657921060063, + "learning_rate": 9.996646302045758e-06, + "loss": 0.8798, + "step": 1348 + }, + { + "epoch": 0.041344857177884026, + "grad_norm": 2.405520708782671, + "learning_rate": 9.996628102236789e-06, + "loss": 0.8302, + "step": 1349 + }, + { + "epoch": 0.04137550570062523, + "grad_norm": 2.485070992643439, + "learning_rate": 9.996609853194766e-06, + "loss": 0.8567, + "step": 1350 + }, + { + "epoch": 0.04140615422336644, + "grad_norm": 2.145478298831264, + "learning_rate": 9.996591554919868e-06, + "loss": 0.8211, + "step": 1351 + }, + { + "epoch": 0.041436802746107636, + "grad_norm": 2.1331639661426074, + "learning_rate": 9.996573207412275e-06, + "loss": 0.9342, + "step": 1352 + }, + { + "epoch": 0.04146745126884884, + "grad_norm": 2.265424699159151, + "learning_rate": 9.996554810672165e-06, + "loss": 0.7994, + "step": 1353 + }, + { + "epoch": 0.04149809979159005, + "grad_norm": 2.167335018909853, + "learning_rate": 9.996536364699726e-06, + "loss": 0.8161, + "step": 1354 + }, + { + "epoch": 0.041528748314331246, + "grad_norm": 2.2302491093214933, + "learning_rate": 9.996517869495133e-06, + "loss": 0.8242, + "step": 1355 + }, + { + "epoch": 0.04155939683707245, + "grad_norm": 2.2984493643372486, + "learning_rate": 9.996499325058572e-06, + "loss": 0.7293, + "step": 1356 + }, + { + "epoch": 0.04159004535981366, + "grad_norm": 2.317884495280388, + "learning_rate": 9.996480731390224e-06, + "loss": 0.8205, + "step": 1357 + }, + { + "epoch": 0.04162069388255486, + "grad_norm": 1.9569711982748266, + "learning_rate": 9.996462088490273e-06, + "loss": 0.7441, + "step": 1358 + }, + { + "epoch": 0.04165134240529606, + "grad_norm": 2.10140415863538, + "learning_rate": 9.996443396358904e-06, + "loss": 0.7077, + "step": 1359 + }, + { + "epoch": 0.04168199092803727, + "grad_norm": 2.2649946248824153, + "learning_rate": 9.9964246549963e-06, + "loss": 0.7208, + "step": 1360 + }, + { + "epoch": 0.04171263945077847, + "grad_norm": 2.4327897745860576, + "learning_rate": 9.996405864402644e-06, + "loss": 0.824, + "step": 1361 + }, + { + "epoch": 0.04174328797351968, + "grad_norm": 2.012786778228896, + "learning_rate": 9.996387024578122e-06, + "loss": 0.7996, + "step": 1362 + }, + { + "epoch": 0.04177393649626088, + "grad_norm": 2.2117652007499617, + "learning_rate": 9.996368135522922e-06, + "loss": 0.7922, + "step": 1363 + }, + { + "epoch": 0.04180458501900208, + "grad_norm": 1.1369740159029451, + "learning_rate": 9.996349197237228e-06, + "loss": 0.5625, + "step": 1364 + }, + { + "epoch": 0.04183523354174329, + "grad_norm": 2.7350500753318467, + "learning_rate": 9.996330209721226e-06, + "loss": 0.8597, + "step": 1365 + }, + { + "epoch": 0.041865882064484494, + "grad_norm": 2.269044278748801, + "learning_rate": 9.996311172975105e-06, + "loss": 0.7632, + "step": 1366 + }, + { + "epoch": 0.04189653058722569, + "grad_norm": 2.107673027868904, + "learning_rate": 9.996292086999051e-06, + "loss": 0.7702, + "step": 1367 + }, + { + "epoch": 0.0419271791099669, + "grad_norm": 2.2934655310278584, + "learning_rate": 9.996272951793253e-06, + "loss": 0.7708, + "step": 1368 + }, + { + "epoch": 0.041957827632708104, + "grad_norm": 2.082228108408857, + "learning_rate": 9.9962537673579e-06, + "loss": 0.7392, + "step": 1369 + }, + { + "epoch": 0.04198847615544931, + "grad_norm": 2.290297919512996, + "learning_rate": 9.99623453369318e-06, + "loss": 0.8138, + "step": 1370 + }, + { + "epoch": 0.04201912467819051, + "grad_norm": 2.088306810321175, + "learning_rate": 9.996215250799282e-06, + "loss": 0.8494, + "step": 1371 + }, + { + "epoch": 0.042049773200931714, + "grad_norm": 1.1728537512890453, + "learning_rate": 9.996195918676397e-06, + "loss": 0.5586, + "step": 1372 + }, + { + "epoch": 0.04208042172367292, + "grad_norm": 1.0562431578239788, + "learning_rate": 9.996176537324715e-06, + "loss": 0.5457, + "step": 1373 + }, + { + "epoch": 0.042111070246414126, + "grad_norm": 2.371750722825589, + "learning_rate": 9.996157106744429e-06, + "loss": 0.7425, + "step": 1374 + }, + { + "epoch": 0.042141718769155324, + "grad_norm": 2.3118923129917506, + "learning_rate": 9.996137626935727e-06, + "loss": 0.7678, + "step": 1375 + }, + { + "epoch": 0.04217236729189653, + "grad_norm": 2.1111055000079078, + "learning_rate": 9.996118097898804e-06, + "loss": 0.8184, + "step": 1376 + }, + { + "epoch": 0.042203015814637736, + "grad_norm": 2.2576479129112967, + "learning_rate": 9.99609851963385e-06, + "loss": 0.7555, + "step": 1377 + }, + { + "epoch": 0.04223366433737894, + "grad_norm": 2.5785019289216993, + "learning_rate": 9.996078892141059e-06, + "loss": 0.7113, + "step": 1378 + }, + { + "epoch": 0.04226431286012014, + "grad_norm": 1.3161941647233533, + "learning_rate": 9.996059215420625e-06, + "loss": 0.5456, + "step": 1379 + }, + { + "epoch": 0.042294961382861346, + "grad_norm": 2.374570111407064, + "learning_rate": 9.996039489472741e-06, + "loss": 0.7103, + "step": 1380 + }, + { + "epoch": 0.04232560990560255, + "grad_norm": 2.430827379566154, + "learning_rate": 9.996019714297601e-06, + "loss": 0.8432, + "step": 1381 + }, + { + "epoch": 0.04235625842834376, + "grad_norm": 2.433090693004498, + "learning_rate": 9.9959998898954e-06, + "loss": 0.7811, + "step": 1382 + }, + { + "epoch": 0.042386906951084956, + "grad_norm": 2.2120115232690187, + "learning_rate": 9.995980016266335e-06, + "loss": 0.7, + "step": 1383 + }, + { + "epoch": 0.04241755547382616, + "grad_norm": 2.127251025362776, + "learning_rate": 9.995960093410601e-06, + "loss": 0.8375, + "step": 1384 + }, + { + "epoch": 0.04244820399656737, + "grad_norm": 1.1493440284220235, + "learning_rate": 9.995940121328394e-06, + "loss": 0.5556, + "step": 1385 + }, + { + "epoch": 0.042478852519308566, + "grad_norm": 2.251593722468747, + "learning_rate": 9.995920100019909e-06, + "loss": 0.8324, + "step": 1386 + }, + { + "epoch": 0.04250950104204977, + "grad_norm": 2.444311317227807, + "learning_rate": 9.995900029485348e-06, + "loss": 0.7401, + "step": 1387 + }, + { + "epoch": 0.04254014956479098, + "grad_norm": 2.4633248781144035, + "learning_rate": 9.995879909724903e-06, + "loss": 0.7944, + "step": 1388 + }, + { + "epoch": 0.04257079808753218, + "grad_norm": 2.3453521941999234, + "learning_rate": 9.995859740738776e-06, + "loss": 0.8654, + "step": 1389 + }, + { + "epoch": 0.04260144661027338, + "grad_norm": 2.238733530861835, + "learning_rate": 9.995839522527165e-06, + "loss": 0.8498, + "step": 1390 + }, + { + "epoch": 0.04263209513301459, + "grad_norm": 1.081993376456448, + "learning_rate": 9.995819255090266e-06, + "loss": 0.5451, + "step": 1391 + }, + { + "epoch": 0.04266274365575579, + "grad_norm": 1.0117959035805268, + "learning_rate": 9.995798938428285e-06, + "loss": 0.5476, + "step": 1392 + }, + { + "epoch": 0.042693392178497, + "grad_norm": 2.3287273825709915, + "learning_rate": 9.995778572541419e-06, + "loss": 0.8277, + "step": 1393 + }, + { + "epoch": 0.0427240407012382, + "grad_norm": 2.440742196848512, + "learning_rate": 9.995758157429867e-06, + "loss": 0.8061, + "step": 1394 + }, + { + "epoch": 0.0427546892239794, + "grad_norm": 2.284309967620969, + "learning_rate": 9.995737693093833e-06, + "loss": 0.8181, + "step": 1395 + }, + { + "epoch": 0.04278533774672061, + "grad_norm": 1.9865078880130782, + "learning_rate": 9.995717179533515e-06, + "loss": 0.7393, + "step": 1396 + }, + { + "epoch": 0.042815986269461814, + "grad_norm": 2.296326339696516, + "learning_rate": 9.99569661674912e-06, + "loss": 0.8189, + "step": 1397 + }, + { + "epoch": 0.04284663479220301, + "grad_norm": 2.4344669213455044, + "learning_rate": 9.995676004740846e-06, + "loss": 0.9175, + "step": 1398 + }, + { + "epoch": 0.04287728331494422, + "grad_norm": 2.143385920817151, + "learning_rate": 9.9956553435089e-06, + "loss": 0.8191, + "step": 1399 + }, + { + "epoch": 0.042907931837685424, + "grad_norm": 2.270847644983126, + "learning_rate": 9.995634633053481e-06, + "loss": 0.8519, + "step": 1400 + }, + { + "epoch": 0.04293858036042663, + "grad_norm": 2.1611776250322903, + "learning_rate": 9.995613873374798e-06, + "loss": 0.841, + "step": 1401 + }, + { + "epoch": 0.04296922888316783, + "grad_norm": 2.491905195313811, + "learning_rate": 9.995593064473053e-06, + "loss": 0.7982, + "step": 1402 + }, + { + "epoch": 0.042999877405909034, + "grad_norm": 2.214984256313521, + "learning_rate": 9.99557220634845e-06, + "loss": 0.689, + "step": 1403 + }, + { + "epoch": 0.04303052592865024, + "grad_norm": 2.291000847393793, + "learning_rate": 9.995551299001198e-06, + "loss": 0.593, + "step": 1404 + }, + { + "epoch": 0.043061174451391446, + "grad_norm": 2.685110065840251, + "learning_rate": 9.9955303424315e-06, + "loss": 0.8361, + "step": 1405 + }, + { + "epoch": 0.043091822974132644, + "grad_norm": 2.4583113073267433, + "learning_rate": 9.995509336639563e-06, + "loss": 0.86, + "step": 1406 + }, + { + "epoch": 0.04312247149687385, + "grad_norm": 2.2392325270173696, + "learning_rate": 9.995488281625594e-06, + "loss": 0.729, + "step": 1407 + }, + { + "epoch": 0.043153120019615056, + "grad_norm": 2.3396021412950936, + "learning_rate": 9.995467177389801e-06, + "loss": 0.7383, + "step": 1408 + }, + { + "epoch": 0.04318376854235626, + "grad_norm": 2.234101423941762, + "learning_rate": 9.995446023932394e-06, + "loss": 0.8028, + "step": 1409 + }, + { + "epoch": 0.04321441706509746, + "grad_norm": 2.143633974168245, + "learning_rate": 9.995424821253577e-06, + "loss": 0.8815, + "step": 1410 + }, + { + "epoch": 0.043245065587838666, + "grad_norm": 2.026892705377884, + "learning_rate": 9.99540356935356e-06, + "loss": 0.7441, + "step": 1411 + }, + { + "epoch": 0.04327571411057987, + "grad_norm": 2.304444362589145, + "learning_rate": 9.995382268232556e-06, + "loss": 0.9004, + "step": 1412 + }, + { + "epoch": 0.04330636263332108, + "grad_norm": 2.271113140149838, + "learning_rate": 9.99536091789077e-06, + "loss": 0.77, + "step": 1413 + }, + { + "epoch": 0.043337011156062276, + "grad_norm": 2.2995220035530974, + "learning_rate": 9.995339518328418e-06, + "loss": 0.826, + "step": 1414 + }, + { + "epoch": 0.04336765967880348, + "grad_norm": 2.2445720159718574, + "learning_rate": 9.995318069545706e-06, + "loss": 0.8447, + "step": 1415 + }, + { + "epoch": 0.04339830820154469, + "grad_norm": 2.170528717903499, + "learning_rate": 9.995296571542845e-06, + "loss": 0.5544, + "step": 1416 + }, + { + "epoch": 0.04342895672428589, + "grad_norm": 2.428881184332189, + "learning_rate": 9.995275024320051e-06, + "loss": 0.8313, + "step": 1417 + }, + { + "epoch": 0.04345960524702709, + "grad_norm": 2.16347878880223, + "learning_rate": 9.995253427877533e-06, + "loss": 0.786, + "step": 1418 + }, + { + "epoch": 0.0434902537697683, + "grad_norm": 2.271911459547079, + "learning_rate": 9.995231782215506e-06, + "loss": 0.7767, + "step": 1419 + }, + { + "epoch": 0.0435209022925095, + "grad_norm": 1.086860238089838, + "learning_rate": 9.995210087334182e-06, + "loss": 0.5389, + "step": 1420 + }, + { + "epoch": 0.0435515508152507, + "grad_norm": 2.2558505372937807, + "learning_rate": 9.995188343233775e-06, + "loss": 0.7467, + "step": 1421 + }, + { + "epoch": 0.04358219933799191, + "grad_norm": 2.486740161267048, + "learning_rate": 9.995166549914498e-06, + "loss": 0.7525, + "step": 1422 + }, + { + "epoch": 0.04361284786073311, + "grad_norm": 2.3366804349806993, + "learning_rate": 9.995144707376568e-06, + "loss": 0.8914, + "step": 1423 + }, + { + "epoch": 0.04364349638347432, + "grad_norm": 2.4044276647261777, + "learning_rate": 9.995122815620199e-06, + "loss": 0.7545, + "step": 1424 + }, + { + "epoch": 0.04367414490621552, + "grad_norm": 2.199760873419752, + "learning_rate": 9.995100874645605e-06, + "loss": 0.8445, + "step": 1425 + }, + { + "epoch": 0.04370479342895672, + "grad_norm": 1.5271576596091754, + "learning_rate": 9.995078884453006e-06, + "loss": 0.543, + "step": 1426 + }, + { + "epoch": 0.04373544195169793, + "grad_norm": 2.284250695425709, + "learning_rate": 9.995056845042616e-06, + "loss": 0.7912, + "step": 1427 + }, + { + "epoch": 0.043766090474439134, + "grad_norm": 2.4835710950920777, + "learning_rate": 9.995034756414655e-06, + "loss": 0.8613, + "step": 1428 + }, + { + "epoch": 0.04379673899718033, + "grad_norm": 2.064420580832537, + "learning_rate": 9.995012618569335e-06, + "loss": 0.8147, + "step": 1429 + }, + { + "epoch": 0.04382738751992154, + "grad_norm": 2.165787133989808, + "learning_rate": 9.99499043150688e-06, + "loss": 0.8102, + "step": 1430 + }, + { + "epoch": 0.043858036042662744, + "grad_norm": 2.179364396482116, + "learning_rate": 9.994968195227505e-06, + "loss": 0.8321, + "step": 1431 + }, + { + "epoch": 0.04388868456540395, + "grad_norm": 2.162212419591497, + "learning_rate": 9.994945909731432e-06, + "loss": 0.8131, + "step": 1432 + }, + { + "epoch": 0.04391933308814515, + "grad_norm": 1.0781343371685606, + "learning_rate": 9.994923575018878e-06, + "loss": 0.5648, + "step": 1433 + }, + { + "epoch": 0.043949981610886354, + "grad_norm": 2.2623780039184247, + "learning_rate": 9.994901191090063e-06, + "loss": 0.879, + "step": 1434 + }, + { + "epoch": 0.04398063013362756, + "grad_norm": 1.048867485391845, + "learning_rate": 9.99487875794521e-06, + "loss": 0.5571, + "step": 1435 + }, + { + "epoch": 0.044011278656368766, + "grad_norm": 2.205242750388276, + "learning_rate": 9.994856275584537e-06, + "loss": 0.8668, + "step": 1436 + }, + { + "epoch": 0.044041927179109965, + "grad_norm": 2.0703086928238106, + "learning_rate": 9.99483374400827e-06, + "loss": 0.7102, + "step": 1437 + }, + { + "epoch": 0.04407257570185117, + "grad_norm": 2.287740639221629, + "learning_rate": 9.994811163216625e-06, + "loss": 0.7592, + "step": 1438 + }, + { + "epoch": 0.044103224224592376, + "grad_norm": 2.2597116642212995, + "learning_rate": 9.994788533209829e-06, + "loss": 0.8037, + "step": 1439 + }, + { + "epoch": 0.04413387274733358, + "grad_norm": 2.249875692803309, + "learning_rate": 9.994765853988105e-06, + "loss": 0.8981, + "step": 1440 + }, + { + "epoch": 0.04416452127007478, + "grad_norm": 2.132558718081938, + "learning_rate": 9.994743125551672e-06, + "loss": 0.924, + "step": 1441 + }, + { + "epoch": 0.044195169792815986, + "grad_norm": 2.0367865499119304, + "learning_rate": 9.994720347900759e-06, + "loss": 0.7754, + "step": 1442 + }, + { + "epoch": 0.04422581831555719, + "grad_norm": 2.1349111231193003, + "learning_rate": 9.994697521035588e-06, + "loss": 0.7921, + "step": 1443 + }, + { + "epoch": 0.0442564668382984, + "grad_norm": 2.269534884113159, + "learning_rate": 9.994674644956385e-06, + "loss": 0.8185, + "step": 1444 + }, + { + "epoch": 0.044287115361039596, + "grad_norm": 2.6974020616837024, + "learning_rate": 9.994651719663373e-06, + "loss": 0.6834, + "step": 1445 + }, + { + "epoch": 0.0443177638837808, + "grad_norm": 2.2668536714564755, + "learning_rate": 9.994628745156782e-06, + "loss": 0.8087, + "step": 1446 + }, + { + "epoch": 0.04434841240652201, + "grad_norm": 2.242711689464131, + "learning_rate": 9.994605721436836e-06, + "loss": 0.7147, + "step": 1447 + }, + { + "epoch": 0.04437906092926321, + "grad_norm": 2.073926938551995, + "learning_rate": 9.99458264850376e-06, + "loss": 0.7147, + "step": 1448 + }, + { + "epoch": 0.04440970945200441, + "grad_norm": 1.9423028541071032, + "learning_rate": 9.994559526357785e-06, + "loss": 0.7545, + "step": 1449 + }, + { + "epoch": 0.04444035797474562, + "grad_norm": 2.456717880705304, + "learning_rate": 9.994536354999136e-06, + "loss": 0.7724, + "step": 1450 + }, + { + "epoch": 0.04447100649748682, + "grad_norm": 1.9300995677826185, + "learning_rate": 9.994513134428042e-06, + "loss": 0.734, + "step": 1451 + }, + { + "epoch": 0.04450165502022802, + "grad_norm": 2.3857486647138857, + "learning_rate": 9.994489864644733e-06, + "loss": 0.8506, + "step": 1452 + }, + { + "epoch": 0.04453230354296923, + "grad_norm": 1.9003265173836814, + "learning_rate": 9.994466545649437e-06, + "loss": 0.7814, + "step": 1453 + }, + { + "epoch": 0.04456295206571043, + "grad_norm": 2.330218483129496, + "learning_rate": 9.994443177442386e-06, + "loss": 0.704, + "step": 1454 + }, + { + "epoch": 0.04459360058845164, + "grad_norm": 2.1966224841284197, + "learning_rate": 9.994419760023806e-06, + "loss": 0.8376, + "step": 1455 + }, + { + "epoch": 0.04462424911119284, + "grad_norm": 2.1637051973842367, + "learning_rate": 9.994396293393932e-06, + "loss": 0.7836, + "step": 1456 + }, + { + "epoch": 0.04465489763393404, + "grad_norm": 1.9196486162317807, + "learning_rate": 9.994372777552992e-06, + "loss": 0.8039, + "step": 1457 + }, + { + "epoch": 0.04468554615667525, + "grad_norm": 1.5726101373380235, + "learning_rate": 9.99434921250122e-06, + "loss": 0.5665, + "step": 1458 + }, + { + "epoch": 0.044716194679416454, + "grad_norm": 2.194723795125612, + "learning_rate": 9.994325598238847e-06, + "loss": 0.7964, + "step": 1459 + }, + { + "epoch": 0.04474684320215765, + "grad_norm": 2.2432830321391712, + "learning_rate": 9.994301934766106e-06, + "loss": 0.767, + "step": 1460 + }, + { + "epoch": 0.04477749172489886, + "grad_norm": 2.244304738676307, + "learning_rate": 9.99427822208323e-06, + "loss": 0.7447, + "step": 1461 + }, + { + "epoch": 0.044808140247640064, + "grad_norm": 2.1290143570812665, + "learning_rate": 9.994254460190453e-06, + "loss": 0.7857, + "step": 1462 + }, + { + "epoch": 0.04483878877038127, + "grad_norm": 2.5212837321933, + "learning_rate": 9.99423064908801e-06, + "loss": 0.8543, + "step": 1463 + }, + { + "epoch": 0.04486943729312247, + "grad_norm": 2.3852472486068232, + "learning_rate": 9.994206788776133e-06, + "loss": 0.8734, + "step": 1464 + }, + { + "epoch": 0.044900085815863675, + "grad_norm": 2.428333902070855, + "learning_rate": 9.99418287925506e-06, + "loss": 0.7955, + "step": 1465 + }, + { + "epoch": 0.04493073433860488, + "grad_norm": 2.3330409583273095, + "learning_rate": 9.994158920525024e-06, + "loss": 0.8244, + "step": 1466 + }, + { + "epoch": 0.044961382861346086, + "grad_norm": 2.3431516393449083, + "learning_rate": 9.994134912586265e-06, + "loss": 0.7559, + "step": 1467 + }, + { + "epoch": 0.044992031384087285, + "grad_norm": 2.34794540919881, + "learning_rate": 9.994110855439014e-06, + "loss": 0.7574, + "step": 1468 + }, + { + "epoch": 0.04502267990682849, + "grad_norm": 2.4543615471522893, + "learning_rate": 9.994086749083512e-06, + "loss": 0.8522, + "step": 1469 + }, + { + "epoch": 0.045053328429569696, + "grad_norm": 1.5307031787226704, + "learning_rate": 9.994062593519995e-06, + "loss": 0.5441, + "step": 1470 + }, + { + "epoch": 0.0450839769523109, + "grad_norm": 2.8529408356706205, + "learning_rate": 9.994038388748702e-06, + "loss": 0.8658, + "step": 1471 + }, + { + "epoch": 0.0451146254750521, + "grad_norm": 2.157871156961857, + "learning_rate": 9.994014134769872e-06, + "loss": 0.7359, + "step": 1472 + }, + { + "epoch": 0.045145273997793306, + "grad_norm": 2.190216927047067, + "learning_rate": 9.993989831583742e-06, + "loss": 0.913, + "step": 1473 + }, + { + "epoch": 0.04517592252053451, + "grad_norm": 2.227293800496843, + "learning_rate": 9.99396547919055e-06, + "loss": 0.7547, + "step": 1474 + }, + { + "epoch": 0.04520657104327572, + "grad_norm": 2.2155770270069217, + "learning_rate": 9.99394107759054e-06, + "loss": 0.805, + "step": 1475 + }, + { + "epoch": 0.045237219566016916, + "grad_norm": 2.199937013071588, + "learning_rate": 9.993916626783952e-06, + "loss": 0.7912, + "step": 1476 + }, + { + "epoch": 0.04526786808875812, + "grad_norm": 2.347752082740451, + "learning_rate": 9.993892126771026e-06, + "loss": 0.9641, + "step": 1477 + }, + { + "epoch": 0.04529851661149933, + "grad_norm": 2.3351507612422657, + "learning_rate": 9.993867577552003e-06, + "loss": 0.7439, + "step": 1478 + }, + { + "epoch": 0.04532916513424053, + "grad_norm": 2.599036273224227, + "learning_rate": 9.993842979127124e-06, + "loss": 0.8478, + "step": 1479 + }, + { + "epoch": 0.04535981365698173, + "grad_norm": 2.18415008356805, + "learning_rate": 9.993818331496632e-06, + "loss": 0.8127, + "step": 1480 + }, + { + "epoch": 0.04539046217972294, + "grad_norm": 2.3255727244522713, + "learning_rate": 9.99379363466077e-06, + "loss": 0.8178, + "step": 1481 + }, + { + "epoch": 0.04542111070246414, + "grad_norm": 2.3563630573957375, + "learning_rate": 9.993768888619783e-06, + "loss": 0.7338, + "step": 1482 + }, + { + "epoch": 0.04545175922520534, + "grad_norm": 1.9087166312121584, + "learning_rate": 9.993744093373915e-06, + "loss": 0.7437, + "step": 1483 + }, + { + "epoch": 0.04548240774794655, + "grad_norm": 1.2022184464606387, + "learning_rate": 9.993719248923406e-06, + "loss": 0.5491, + "step": 1484 + }, + { + "epoch": 0.04551305627068775, + "grad_norm": 2.434305421984262, + "learning_rate": 9.993694355268504e-06, + "loss": 0.7503, + "step": 1485 + }, + { + "epoch": 0.04554370479342896, + "grad_norm": 2.2345663334212547, + "learning_rate": 9.993669412409455e-06, + "loss": 0.7475, + "step": 1486 + }, + { + "epoch": 0.04557435331617016, + "grad_norm": 2.2580284428908657, + "learning_rate": 9.993644420346503e-06, + "loss": 0.91, + "step": 1487 + }, + { + "epoch": 0.04560500183891136, + "grad_norm": 2.0493193672796406, + "learning_rate": 9.993619379079893e-06, + "loss": 0.7917, + "step": 1488 + }, + { + "epoch": 0.04563565036165257, + "grad_norm": 2.0664686841549194, + "learning_rate": 9.993594288609876e-06, + "loss": 0.7948, + "step": 1489 + }, + { + "epoch": 0.045666298884393774, + "grad_norm": 2.247877835475181, + "learning_rate": 9.993569148936695e-06, + "loss": 0.7747, + "step": 1490 + }, + { + "epoch": 0.04569694740713497, + "grad_norm": 2.2076143114504223, + "learning_rate": 9.993543960060601e-06, + "loss": 0.7455, + "step": 1491 + }, + { + "epoch": 0.04572759592987618, + "grad_norm": 2.16360610159134, + "learning_rate": 9.99351872198184e-06, + "loss": 0.813, + "step": 1492 + }, + { + "epoch": 0.045758244452617385, + "grad_norm": 2.408060961697643, + "learning_rate": 9.99349343470066e-06, + "loss": 0.8755, + "step": 1493 + }, + { + "epoch": 0.04578889297535859, + "grad_norm": 2.1267682896780724, + "learning_rate": 9.993468098217313e-06, + "loss": 0.9123, + "step": 1494 + }, + { + "epoch": 0.04581954149809979, + "grad_norm": 2.3456013238850932, + "learning_rate": 9.993442712532048e-06, + "loss": 0.8164, + "step": 1495 + }, + { + "epoch": 0.045850190020840995, + "grad_norm": 2.278793091371842, + "learning_rate": 9.993417277645114e-06, + "loss": 0.7467, + "step": 1496 + }, + { + "epoch": 0.0458808385435822, + "grad_norm": 2.1641553133735867, + "learning_rate": 9.99339179355676e-06, + "loss": 0.7661, + "step": 1497 + }, + { + "epoch": 0.045911487066323406, + "grad_norm": 2.4047732457726303, + "learning_rate": 9.993366260267243e-06, + "loss": 0.8162, + "step": 1498 + }, + { + "epoch": 0.045942135589064605, + "grad_norm": 2.302629761806154, + "learning_rate": 9.993340677776809e-06, + "loss": 0.6933, + "step": 1499 + }, + { + "epoch": 0.04597278411180581, + "grad_norm": 1.3156016047843697, + "learning_rate": 9.99331504608571e-06, + "loss": 0.5364, + "step": 1500 + }, + { + "epoch": 0.046003432634547016, + "grad_norm": 2.278665084283029, + "learning_rate": 9.993289365194201e-06, + "loss": 0.8045, + "step": 1501 + }, + { + "epoch": 0.04603408115728822, + "grad_norm": 2.1464912345486153, + "learning_rate": 9.993263635102534e-06, + "loss": 0.8516, + "step": 1502 + }, + { + "epoch": 0.04606472968002942, + "grad_norm": 0.9557001457412279, + "learning_rate": 9.993237855810964e-06, + "loss": 0.5481, + "step": 1503 + }, + { + "epoch": 0.046095378202770626, + "grad_norm": 2.4669494850658342, + "learning_rate": 9.993212027319742e-06, + "loss": 0.8697, + "step": 1504 + }, + { + "epoch": 0.04612602672551183, + "grad_norm": 0.9780316888470426, + "learning_rate": 9.993186149629127e-06, + "loss": 0.5269, + "step": 1505 + }, + { + "epoch": 0.04615667524825304, + "grad_norm": 2.0146015978757417, + "learning_rate": 9.993160222739369e-06, + "loss": 0.8206, + "step": 1506 + }, + { + "epoch": 0.046187323770994236, + "grad_norm": 2.2619544828142946, + "learning_rate": 9.993134246650726e-06, + "loss": 0.7552, + "step": 1507 + }, + { + "epoch": 0.04621797229373544, + "grad_norm": 2.051845008989956, + "learning_rate": 9.993108221363454e-06, + "loss": 0.7846, + "step": 1508 + }, + { + "epoch": 0.04624862081647665, + "grad_norm": 2.320196593352715, + "learning_rate": 9.99308214687781e-06, + "loss": 0.9886, + "step": 1509 + }, + { + "epoch": 0.04627926933921785, + "grad_norm": 1.0431476707966199, + "learning_rate": 9.99305602319405e-06, + "loss": 0.5663, + "step": 1510 + }, + { + "epoch": 0.04630991786195905, + "grad_norm": 2.1228387020743558, + "learning_rate": 9.99302985031243e-06, + "loss": 0.7548, + "step": 1511 + }, + { + "epoch": 0.04634056638470026, + "grad_norm": 2.0888640201066866, + "learning_rate": 9.993003628233211e-06, + "loss": 0.7293, + "step": 1512 + }, + { + "epoch": 0.04637121490744146, + "grad_norm": 2.272166700187362, + "learning_rate": 9.99297735695665e-06, + "loss": 0.862, + "step": 1513 + }, + { + "epoch": 0.04640186343018266, + "grad_norm": 2.1822570864932054, + "learning_rate": 9.992951036483003e-06, + "loss": 0.7515, + "step": 1514 + }, + { + "epoch": 0.04643251195292387, + "grad_norm": 1.0997606658180217, + "learning_rate": 9.992924666812533e-06, + "loss": 0.5433, + "step": 1515 + }, + { + "epoch": 0.04646316047566507, + "grad_norm": 2.2121041659383054, + "learning_rate": 9.9928982479455e-06, + "loss": 0.7945, + "step": 1516 + }, + { + "epoch": 0.04649380899840628, + "grad_norm": 2.109551214723022, + "learning_rate": 9.992871779882164e-06, + "loss": 0.7465, + "step": 1517 + }, + { + "epoch": 0.04652445752114748, + "grad_norm": 2.440433728922343, + "learning_rate": 9.992845262622782e-06, + "loss": 0.7555, + "step": 1518 + }, + { + "epoch": 0.04655510604388868, + "grad_norm": 1.0090824573448174, + "learning_rate": 9.99281869616762e-06, + "loss": 0.551, + "step": 1519 + }, + { + "epoch": 0.04658575456662989, + "grad_norm": 2.515344620787977, + "learning_rate": 9.992792080516938e-06, + "loss": 0.8497, + "step": 1520 + }, + { + "epoch": 0.046616403089371095, + "grad_norm": 1.0075554855056676, + "learning_rate": 9.992765415670998e-06, + "loss": 0.5551, + "step": 1521 + }, + { + "epoch": 0.04664705161211229, + "grad_norm": 2.080535328124425, + "learning_rate": 9.992738701630061e-06, + "loss": 0.76, + "step": 1522 + }, + { + "epoch": 0.0466777001348535, + "grad_norm": 0.9275864009661776, + "learning_rate": 9.992711938394394e-06, + "loss": 0.5329, + "step": 1523 + }, + { + "epoch": 0.046708348657594705, + "grad_norm": 2.216368739269394, + "learning_rate": 9.992685125964259e-06, + "loss": 0.8035, + "step": 1524 + }, + { + "epoch": 0.04673899718033591, + "grad_norm": 2.483526992787161, + "learning_rate": 9.992658264339918e-06, + "loss": 0.8702, + "step": 1525 + }, + { + "epoch": 0.04676964570307711, + "grad_norm": 2.282147697165149, + "learning_rate": 9.99263135352164e-06, + "loss": 0.8533, + "step": 1526 + }, + { + "epoch": 0.046800294225818315, + "grad_norm": 0.974270047040013, + "learning_rate": 9.992604393509687e-06, + "loss": 0.5266, + "step": 1527 + }, + { + "epoch": 0.04683094274855952, + "grad_norm": 0.9553261893417786, + "learning_rate": 9.992577384304325e-06, + "loss": 0.5344, + "step": 1528 + }, + { + "epoch": 0.046861591271300726, + "grad_norm": 2.256096841821275, + "learning_rate": 9.992550325905821e-06, + "loss": 0.7246, + "step": 1529 + }, + { + "epoch": 0.046892239794041925, + "grad_norm": 2.0533917774154364, + "learning_rate": 9.992523218314442e-06, + "loss": 0.7784, + "step": 1530 + }, + { + "epoch": 0.04692288831678313, + "grad_norm": 0.9825689084529211, + "learning_rate": 9.992496061530454e-06, + "loss": 0.5701, + "step": 1531 + }, + { + "epoch": 0.046953536839524336, + "grad_norm": 2.2499559575345747, + "learning_rate": 9.992468855554125e-06, + "loss": 0.865, + "step": 1532 + }, + { + "epoch": 0.04698418536226554, + "grad_norm": 2.0839156677819073, + "learning_rate": 9.992441600385721e-06, + "loss": 0.8337, + "step": 1533 + }, + { + "epoch": 0.04701483388500674, + "grad_norm": 0.9479263916345423, + "learning_rate": 9.992414296025514e-06, + "loss": 0.5539, + "step": 1534 + }, + { + "epoch": 0.047045482407747946, + "grad_norm": 2.24668634261317, + "learning_rate": 9.992386942473773e-06, + "loss": 0.7805, + "step": 1535 + }, + { + "epoch": 0.04707613093048915, + "grad_norm": 2.0119379477455377, + "learning_rate": 9.992359539730763e-06, + "loss": 0.8361, + "step": 1536 + }, + { + "epoch": 0.04710677945323036, + "grad_norm": 2.0590415058557467, + "learning_rate": 9.99233208779676e-06, + "loss": 0.7783, + "step": 1537 + }, + { + "epoch": 0.047137427975971556, + "grad_norm": 0.999341501350408, + "learning_rate": 9.99230458667203e-06, + "loss": 0.544, + "step": 1538 + }, + { + "epoch": 0.04716807649871276, + "grad_norm": 2.3464302413584472, + "learning_rate": 9.992277036356846e-06, + "loss": 0.8463, + "step": 1539 + }, + { + "epoch": 0.04719872502145397, + "grad_norm": 1.8873596136578552, + "learning_rate": 9.99224943685148e-06, + "loss": 0.779, + "step": 1540 + }, + { + "epoch": 0.04722937354419517, + "grad_norm": 2.259766077167158, + "learning_rate": 9.992221788156202e-06, + "loss": 0.7016, + "step": 1541 + }, + { + "epoch": 0.04726002206693637, + "grad_norm": 2.2468863703266684, + "learning_rate": 9.992194090271285e-06, + "loss": 0.7517, + "step": 1542 + }, + { + "epoch": 0.04729067058967758, + "grad_norm": 2.221028307623843, + "learning_rate": 9.992166343197002e-06, + "loss": 0.7601, + "step": 1543 + }, + { + "epoch": 0.04732131911241878, + "grad_norm": 2.011766617457771, + "learning_rate": 9.992138546933629e-06, + "loss": 0.7397, + "step": 1544 + }, + { + "epoch": 0.04735196763515998, + "grad_norm": 2.4961850559214325, + "learning_rate": 9.992110701481436e-06, + "loss": 0.79, + "step": 1545 + }, + { + "epoch": 0.04738261615790119, + "grad_norm": 2.268198373134644, + "learning_rate": 9.9920828068407e-06, + "loss": 0.8422, + "step": 1546 + }, + { + "epoch": 0.04741326468064239, + "grad_norm": 2.46609291328082, + "learning_rate": 9.992054863011693e-06, + "loss": 0.7151, + "step": 1547 + }, + { + "epoch": 0.0474439132033836, + "grad_norm": 2.0080438622222743, + "learning_rate": 9.992026869994694e-06, + "loss": 0.7491, + "step": 1548 + }, + { + "epoch": 0.0474745617261248, + "grad_norm": 2.5522209383293917, + "learning_rate": 9.991998827789975e-06, + "loss": 0.7906, + "step": 1549 + }, + { + "epoch": 0.047505210248866, + "grad_norm": 2.0941462289814163, + "learning_rate": 9.991970736397817e-06, + "loss": 0.7089, + "step": 1550 + }, + { + "epoch": 0.04753585877160721, + "grad_norm": 2.095478772482786, + "learning_rate": 9.991942595818491e-06, + "loss": 0.6989, + "step": 1551 + }, + { + "epoch": 0.047566507294348415, + "grad_norm": 2.176402416776282, + "learning_rate": 9.991914406052279e-06, + "loss": 0.8467, + "step": 1552 + }, + { + "epoch": 0.04759715581708961, + "grad_norm": 1.054218142627708, + "learning_rate": 9.991886167099456e-06, + "loss": 0.5438, + "step": 1553 + }, + { + "epoch": 0.04762780433983082, + "grad_norm": 2.188993094042433, + "learning_rate": 9.991857878960303e-06, + "loss": 0.7348, + "step": 1554 + }, + { + "epoch": 0.047658452862572025, + "grad_norm": 0.993328214613932, + "learning_rate": 9.991829541635097e-06, + "loss": 0.566, + "step": 1555 + }, + { + "epoch": 0.04768910138531323, + "grad_norm": 0.8967822304498032, + "learning_rate": 9.991801155124116e-06, + "loss": 0.5353, + "step": 1556 + }, + { + "epoch": 0.04771974990805443, + "grad_norm": 0.9266881311772347, + "learning_rate": 9.991772719427642e-06, + "loss": 0.553, + "step": 1557 + }, + { + "epoch": 0.047750398430795635, + "grad_norm": 2.401205417688273, + "learning_rate": 9.991744234545952e-06, + "loss": 0.8428, + "step": 1558 + }, + { + "epoch": 0.04778104695353684, + "grad_norm": 2.4499191217310754, + "learning_rate": 9.991715700479333e-06, + "loss": 0.8473, + "step": 1559 + }, + { + "epoch": 0.047811695476278046, + "grad_norm": 2.008993083356474, + "learning_rate": 9.99168711722806e-06, + "loss": 0.8482, + "step": 1560 + }, + { + "epoch": 0.047842343999019245, + "grad_norm": 2.1722996376305774, + "learning_rate": 9.991658484792416e-06, + "loss": 0.7843, + "step": 1561 + }, + { + "epoch": 0.04787299252176045, + "grad_norm": 2.1830763529828268, + "learning_rate": 9.991629803172684e-06, + "loss": 0.8011, + "step": 1562 + }, + { + "epoch": 0.047903641044501656, + "grad_norm": 2.1150838657136104, + "learning_rate": 9.991601072369145e-06, + "loss": 0.7337, + "step": 1563 + }, + { + "epoch": 0.04793428956724286, + "grad_norm": 1.1899983551134405, + "learning_rate": 9.991572292382086e-06, + "loss": 0.5358, + "step": 1564 + }, + { + "epoch": 0.04796493808998406, + "grad_norm": 2.1726250695828613, + "learning_rate": 9.991543463211788e-06, + "loss": 0.7746, + "step": 1565 + }, + { + "epoch": 0.047995586612725266, + "grad_norm": 1.9268239508919724, + "learning_rate": 9.991514584858534e-06, + "loss": 0.7416, + "step": 1566 + }, + { + "epoch": 0.04802623513546647, + "grad_norm": 2.198511583009963, + "learning_rate": 9.991485657322609e-06, + "loss": 0.7508, + "step": 1567 + }, + { + "epoch": 0.04805688365820768, + "grad_norm": 1.025992046821334, + "learning_rate": 9.9914566806043e-06, + "loss": 0.5517, + "step": 1568 + }, + { + "epoch": 0.048087532180948876, + "grad_norm": 2.2796568240972164, + "learning_rate": 9.99142765470389e-06, + "loss": 0.8111, + "step": 1569 + }, + { + "epoch": 0.04811818070369008, + "grad_norm": 2.092582662968351, + "learning_rate": 9.991398579621668e-06, + "loss": 0.6874, + "step": 1570 + }, + { + "epoch": 0.04814882922643129, + "grad_norm": 1.0012775849066038, + "learning_rate": 9.991369455357918e-06, + "loss": 0.5236, + "step": 1571 + }, + { + "epoch": 0.04817947774917249, + "grad_norm": 0.9959983760549106, + "learning_rate": 9.991340281912927e-06, + "loss": 0.5481, + "step": 1572 + }, + { + "epoch": 0.04821012627191369, + "grad_norm": 2.6323082563565277, + "learning_rate": 9.991311059286984e-06, + "loss": 0.7552, + "step": 1573 + }, + { + "epoch": 0.0482407747946549, + "grad_norm": 1.9280676525071196, + "learning_rate": 9.991281787480377e-06, + "loss": 0.8073, + "step": 1574 + }, + { + "epoch": 0.0482714233173961, + "grad_norm": 1.284053196638117, + "learning_rate": 9.991252466493391e-06, + "loss": 0.5343, + "step": 1575 + }, + { + "epoch": 0.0483020718401373, + "grad_norm": 2.352242463053297, + "learning_rate": 9.991223096326316e-06, + "loss": 0.8454, + "step": 1576 + }, + { + "epoch": 0.04833272036287851, + "grad_norm": 2.4279787959102292, + "learning_rate": 9.991193676979448e-06, + "loss": 0.7335, + "step": 1577 + }, + { + "epoch": 0.04836336888561971, + "grad_norm": 2.283049715942153, + "learning_rate": 9.991164208453069e-06, + "loss": 0.846, + "step": 1578 + }, + { + "epoch": 0.04839401740836092, + "grad_norm": 2.1826357798186544, + "learning_rate": 9.99113469074747e-06, + "loss": 0.8429, + "step": 1579 + }, + { + "epoch": 0.04842466593110212, + "grad_norm": 2.284335451693949, + "learning_rate": 9.991105123862944e-06, + "loss": 0.7471, + "step": 1580 + }, + { + "epoch": 0.04845531445384332, + "grad_norm": 2.1670240159322476, + "learning_rate": 9.991075507799786e-06, + "loss": 0.7414, + "step": 1581 + }, + { + "epoch": 0.04848596297658453, + "grad_norm": 2.3133202903183676, + "learning_rate": 9.991045842558282e-06, + "loss": 0.9109, + "step": 1582 + }, + { + "epoch": 0.048516611499325735, + "grad_norm": 2.4600045382060802, + "learning_rate": 9.991016128138726e-06, + "loss": 0.871, + "step": 1583 + }, + { + "epoch": 0.04854726002206693, + "grad_norm": 2.3481497570688203, + "learning_rate": 9.990986364541411e-06, + "loss": 0.882, + "step": 1584 + }, + { + "epoch": 0.04857790854480814, + "grad_norm": 2.0809473054035585, + "learning_rate": 9.990956551766631e-06, + "loss": 0.7809, + "step": 1585 + }, + { + "epoch": 0.048608557067549345, + "grad_norm": 2.3638897517885242, + "learning_rate": 9.990926689814678e-06, + "loss": 0.7786, + "step": 1586 + }, + { + "epoch": 0.04863920559029055, + "grad_norm": 2.12584704120499, + "learning_rate": 9.990896778685847e-06, + "loss": 0.8139, + "step": 1587 + }, + { + "epoch": 0.04866985411303175, + "grad_norm": 2.4037723726716993, + "learning_rate": 9.990866818380436e-06, + "loss": 0.8011, + "step": 1588 + }, + { + "epoch": 0.048700502635772955, + "grad_norm": 2.3089305141418746, + "learning_rate": 9.990836808898736e-06, + "loss": 0.8645, + "step": 1589 + }, + { + "epoch": 0.04873115115851416, + "grad_norm": 1.9801498199323009, + "learning_rate": 9.990806750241043e-06, + "loss": 0.7068, + "step": 1590 + }, + { + "epoch": 0.048761799681255366, + "grad_norm": 2.2780555349419775, + "learning_rate": 9.990776642407653e-06, + "loss": 0.7517, + "step": 1591 + }, + { + "epoch": 0.048792448203996565, + "grad_norm": 2.352106120178732, + "learning_rate": 9.990746485398866e-06, + "loss": 0.7767, + "step": 1592 + }, + { + "epoch": 0.04882309672673777, + "grad_norm": 2.0606966572713925, + "learning_rate": 9.990716279214976e-06, + "loss": 0.8177, + "step": 1593 + }, + { + "epoch": 0.048853745249478976, + "grad_norm": 2.2766049026180784, + "learning_rate": 9.990686023856282e-06, + "loss": 0.8725, + "step": 1594 + }, + { + "epoch": 0.04888439377222018, + "grad_norm": 2.5015773213663612, + "learning_rate": 9.990655719323082e-06, + "loss": 0.9104, + "step": 1595 + }, + { + "epoch": 0.04891504229496138, + "grad_norm": 2.1851350333311266, + "learning_rate": 9.990625365615674e-06, + "loss": 0.7464, + "step": 1596 + }, + { + "epoch": 0.048945690817702586, + "grad_norm": 2.2561517502916244, + "learning_rate": 9.990594962734357e-06, + "loss": 0.6561, + "step": 1597 + }, + { + "epoch": 0.04897633934044379, + "grad_norm": 2.1135402182865946, + "learning_rate": 9.99056451067943e-06, + "loss": 0.8557, + "step": 1598 + }, + { + "epoch": 0.049006987863185, + "grad_norm": 2.489787353294821, + "learning_rate": 9.990534009451195e-06, + "loss": 0.8966, + "step": 1599 + }, + { + "epoch": 0.049037636385926196, + "grad_norm": 2.9350400113613277, + "learning_rate": 9.990503459049951e-06, + "loss": 0.8099, + "step": 1600 + }, + { + "epoch": 0.0490682849086674, + "grad_norm": 2.214705285770832, + "learning_rate": 9.990472859476002e-06, + "loss": 0.7401, + "step": 1601 + }, + { + "epoch": 0.04909893343140861, + "grad_norm": 2.2035408406340578, + "learning_rate": 9.990442210729646e-06, + "loss": 0.7346, + "step": 1602 + }, + { + "epoch": 0.04912958195414981, + "grad_norm": 2.131028543474141, + "learning_rate": 9.990411512811185e-06, + "loss": 0.8536, + "step": 1603 + }, + { + "epoch": 0.04916023047689101, + "grad_norm": 2.0136602050806953, + "learning_rate": 9.990380765720922e-06, + "loss": 0.7341, + "step": 1604 + }, + { + "epoch": 0.04919087899963222, + "grad_norm": 2.369504129203594, + "learning_rate": 9.990349969459162e-06, + "loss": 0.8422, + "step": 1605 + }, + { + "epoch": 0.04922152752237342, + "grad_norm": 1.4668070938316822, + "learning_rate": 9.990319124026205e-06, + "loss": 0.5428, + "step": 1606 + }, + { + "epoch": 0.04925217604511462, + "grad_norm": 2.1931353003361793, + "learning_rate": 9.990288229422357e-06, + "loss": 0.8014, + "step": 1607 + }, + { + "epoch": 0.04928282456785583, + "grad_norm": 2.4864880907607576, + "learning_rate": 9.990257285647923e-06, + "loss": 0.859, + "step": 1608 + }, + { + "epoch": 0.04931347309059703, + "grad_norm": 2.3182245745471253, + "learning_rate": 9.990226292703208e-06, + "loss": 0.7914, + "step": 1609 + }, + { + "epoch": 0.04934412161333824, + "grad_norm": 2.4210379523540206, + "learning_rate": 9.990195250588515e-06, + "loss": 0.8867, + "step": 1610 + }, + { + "epoch": 0.04937477013607944, + "grad_norm": 1.122151809964259, + "learning_rate": 9.990164159304152e-06, + "loss": 0.5537, + "step": 1611 + }, + { + "epoch": 0.04940541865882064, + "grad_norm": 2.413381521884658, + "learning_rate": 9.990133018850427e-06, + "loss": 0.8126, + "step": 1612 + }, + { + "epoch": 0.04943606718156185, + "grad_norm": 2.080186484748652, + "learning_rate": 9.990101829227643e-06, + "loss": 0.811, + "step": 1613 + }, + { + "epoch": 0.049466715704303055, + "grad_norm": 2.394258187418213, + "learning_rate": 9.990070590436107e-06, + "loss": 0.8895, + "step": 1614 + }, + { + "epoch": 0.049497364227044253, + "grad_norm": 2.414905787455873, + "learning_rate": 9.99003930247613e-06, + "loss": 0.7041, + "step": 1615 + }, + { + "epoch": 0.04952801274978546, + "grad_norm": 2.1419453036691576, + "learning_rate": 9.99000796534802e-06, + "loss": 0.7328, + "step": 1616 + }, + { + "epoch": 0.049558661272526665, + "grad_norm": 2.2456969224490777, + "learning_rate": 9.989976579052082e-06, + "loss": 0.8374, + "step": 1617 + }, + { + "epoch": 0.04958930979526787, + "grad_norm": 2.0993628422160526, + "learning_rate": 9.989945143588633e-06, + "loss": 0.7602, + "step": 1618 + }, + { + "epoch": 0.04961995831800907, + "grad_norm": 2.5397942722737152, + "learning_rate": 9.989913658957973e-06, + "loss": 0.7686, + "step": 1619 + }, + { + "epoch": 0.049650606840750275, + "grad_norm": 2.0159036222623605, + "learning_rate": 9.989882125160419e-06, + "loss": 0.7855, + "step": 1620 + }, + { + "epoch": 0.04968125536349148, + "grad_norm": 2.130360057141736, + "learning_rate": 9.98985054219628e-06, + "loss": 0.7982, + "step": 1621 + }, + { + "epoch": 0.049711903886232686, + "grad_norm": 2.1550804255299574, + "learning_rate": 9.989818910065868e-06, + "loss": 0.8883, + "step": 1622 + }, + { + "epoch": 0.049742552408973885, + "grad_norm": 2.363877776960879, + "learning_rate": 9.989787228769489e-06, + "loss": 0.7135, + "step": 1623 + }, + { + "epoch": 0.04977320093171509, + "grad_norm": 2.406258989014138, + "learning_rate": 9.989755498307464e-06, + "loss": 0.8858, + "step": 1624 + }, + { + "epoch": 0.049803849454456296, + "grad_norm": 2.1448915090205767, + "learning_rate": 9.9897237186801e-06, + "loss": 0.7907, + "step": 1625 + }, + { + "epoch": 0.0498344979771975, + "grad_norm": 2.3965375186235347, + "learning_rate": 9.98969188988771e-06, + "loss": 0.8074, + "step": 1626 + }, + { + "epoch": 0.0498651464999387, + "grad_norm": 2.5125080880284436, + "learning_rate": 9.989660011930612e-06, + "loss": 0.834, + "step": 1627 + }, + { + "epoch": 0.049895795022679906, + "grad_norm": 1.9955916351227934, + "learning_rate": 9.989628084809116e-06, + "loss": 0.8276, + "step": 1628 + }, + { + "epoch": 0.04992644354542111, + "grad_norm": 2.3054380664028686, + "learning_rate": 9.989596108523537e-06, + "loss": 0.7996, + "step": 1629 + }, + { + "epoch": 0.04995709206816232, + "grad_norm": 2.2942126472285165, + "learning_rate": 9.989564083074193e-06, + "loss": 0.836, + "step": 1630 + }, + { + "epoch": 0.049987740590903516, + "grad_norm": 2.0814244723048114, + "learning_rate": 9.989532008461396e-06, + "loss": 0.8192, + "step": 1631 + }, + { + "epoch": 0.05001838911364472, + "grad_norm": 1.1166918793317753, + "learning_rate": 9.989499884685465e-06, + "loss": 0.5444, + "step": 1632 + }, + { + "epoch": 0.05004903763638593, + "grad_norm": 1.997778223768244, + "learning_rate": 9.989467711746711e-06, + "loss": 0.8126, + "step": 1633 + }, + { + "epoch": 0.05007968615912713, + "grad_norm": 2.3154032667508013, + "learning_rate": 9.98943548964546e-06, + "loss": 0.8258, + "step": 1634 + }, + { + "epoch": 0.05011033468186833, + "grad_norm": 0.9522792971073968, + "learning_rate": 9.98940321838202e-06, + "loss": 0.5454, + "step": 1635 + }, + { + "epoch": 0.05014098320460954, + "grad_norm": 2.40935311235054, + "learning_rate": 9.989370897956718e-06, + "loss": 0.8147, + "step": 1636 + }, + { + "epoch": 0.05017163172735074, + "grad_norm": 2.1397137449528354, + "learning_rate": 9.989338528369866e-06, + "loss": 0.8227, + "step": 1637 + }, + { + "epoch": 0.05020228025009194, + "grad_norm": 2.3852089045422975, + "learning_rate": 9.989306109621783e-06, + "loss": 0.7977, + "step": 1638 + }, + { + "epoch": 0.05023292877283315, + "grad_norm": 2.212449145128613, + "learning_rate": 9.989273641712792e-06, + "loss": 0.7656, + "step": 1639 + }, + { + "epoch": 0.05026357729557435, + "grad_norm": 1.1788598560851444, + "learning_rate": 9.989241124643212e-06, + "loss": 0.5516, + "step": 1640 + }, + { + "epoch": 0.05029422581831556, + "grad_norm": 1.0397141121237117, + "learning_rate": 9.989208558413361e-06, + "loss": 0.5518, + "step": 1641 + }, + { + "epoch": 0.05032487434105676, + "grad_norm": 2.233134655957945, + "learning_rate": 9.989175943023563e-06, + "loss": 0.8179, + "step": 1642 + }, + { + "epoch": 0.050355522863797963, + "grad_norm": 2.301555294017525, + "learning_rate": 9.989143278474136e-06, + "loss": 0.7698, + "step": 1643 + }, + { + "epoch": 0.05038617138653917, + "grad_norm": 2.2730910962278443, + "learning_rate": 9.989110564765406e-06, + "loss": 0.8133, + "step": 1644 + }, + { + "epoch": 0.050416819909280375, + "grad_norm": 2.056780503519396, + "learning_rate": 9.989077801897691e-06, + "loss": 0.7934, + "step": 1645 + }, + { + "epoch": 0.050447468432021574, + "grad_norm": 2.2604544305651992, + "learning_rate": 9.989044989871317e-06, + "loss": 0.7984, + "step": 1646 + }, + { + "epoch": 0.05047811695476278, + "grad_norm": 2.322907672732624, + "learning_rate": 9.989012128686609e-06, + "loss": 0.9275, + "step": 1647 + }, + { + "epoch": 0.050508765477503985, + "grad_norm": 2.0801735274727235, + "learning_rate": 9.988979218343884e-06, + "loss": 0.7525, + "step": 1648 + }, + { + "epoch": 0.05053941400024519, + "grad_norm": 2.4958763661431096, + "learning_rate": 9.988946258843471e-06, + "loss": 0.8154, + "step": 1649 + }, + { + "epoch": 0.05057006252298639, + "grad_norm": 2.287989321116229, + "learning_rate": 9.988913250185695e-06, + "loss": 0.8532, + "step": 1650 + }, + { + "epoch": 0.050600711045727595, + "grad_norm": 2.0427526955648405, + "learning_rate": 9.98888019237088e-06, + "loss": 0.7055, + "step": 1651 + }, + { + "epoch": 0.0506313595684688, + "grad_norm": 2.2386462328154275, + "learning_rate": 9.988847085399351e-06, + "loss": 0.8376, + "step": 1652 + }, + { + "epoch": 0.050662008091210006, + "grad_norm": 2.465729614683851, + "learning_rate": 9.988813929271438e-06, + "loss": 0.8173, + "step": 1653 + }, + { + "epoch": 0.050692656613951205, + "grad_norm": 2.3268730890866225, + "learning_rate": 9.988780723987461e-06, + "loss": 0.785, + "step": 1654 + }, + { + "epoch": 0.05072330513669241, + "grad_norm": 1.5631221918645464, + "learning_rate": 9.988747469547752e-06, + "loss": 0.5295, + "step": 1655 + }, + { + "epoch": 0.050753953659433616, + "grad_norm": 2.3608964922170768, + "learning_rate": 9.98871416595264e-06, + "loss": 0.7881, + "step": 1656 + }, + { + "epoch": 0.05078460218217482, + "grad_norm": 2.3049122995143083, + "learning_rate": 9.98868081320245e-06, + "loss": 0.7638, + "step": 1657 + }, + { + "epoch": 0.05081525070491602, + "grad_norm": 2.346642591344067, + "learning_rate": 9.988647411297512e-06, + "loss": 0.8811, + "step": 1658 + }, + { + "epoch": 0.050845899227657226, + "grad_norm": 2.3568567156184512, + "learning_rate": 9.988613960238152e-06, + "loss": 0.7893, + "step": 1659 + }, + { + "epoch": 0.05087654775039843, + "grad_norm": 2.0398195348540633, + "learning_rate": 9.988580460024705e-06, + "loss": 0.7532, + "step": 1660 + }, + { + "epoch": 0.05090719627313964, + "grad_norm": 2.1985507845244205, + "learning_rate": 9.988546910657497e-06, + "loss": 0.8123, + "step": 1661 + }, + { + "epoch": 0.050937844795880836, + "grad_norm": 2.2159375477297596, + "learning_rate": 9.98851331213686e-06, + "loss": 0.7712, + "step": 1662 + }, + { + "epoch": 0.05096849331862204, + "grad_norm": 2.022708375571199, + "learning_rate": 9.988479664463127e-06, + "loss": 0.7265, + "step": 1663 + }, + { + "epoch": 0.05099914184136325, + "grad_norm": 2.3041414362477743, + "learning_rate": 9.988445967636624e-06, + "loss": 0.6888, + "step": 1664 + }, + { + "epoch": 0.05102979036410445, + "grad_norm": 1.2544221670707971, + "learning_rate": 9.988412221657689e-06, + "loss": 0.551, + "step": 1665 + }, + { + "epoch": 0.05106043888684565, + "grad_norm": 2.468830000630046, + "learning_rate": 9.988378426526653e-06, + "loss": 0.8172, + "step": 1666 + }, + { + "epoch": 0.05109108740958686, + "grad_norm": 2.127800561268766, + "learning_rate": 9.988344582243845e-06, + "loss": 0.6761, + "step": 1667 + }, + { + "epoch": 0.05112173593232806, + "grad_norm": 2.0751512867685467, + "learning_rate": 9.988310688809603e-06, + "loss": 0.7537, + "step": 1668 + }, + { + "epoch": 0.05115238445506927, + "grad_norm": 2.3757249901031696, + "learning_rate": 9.98827674622426e-06, + "loss": 0.742, + "step": 1669 + }, + { + "epoch": 0.05118303297781047, + "grad_norm": 2.167632815682499, + "learning_rate": 9.988242754488149e-06, + "loss": 0.7888, + "step": 1670 + }, + { + "epoch": 0.051213681500551674, + "grad_norm": 2.153341693175387, + "learning_rate": 9.988208713601606e-06, + "loss": 0.6858, + "step": 1671 + }, + { + "epoch": 0.05124433002329288, + "grad_norm": 1.0228534104961036, + "learning_rate": 9.988174623564967e-06, + "loss": 0.5454, + "step": 1672 + }, + { + "epoch": 0.05127497854603408, + "grad_norm": 2.4615997831554037, + "learning_rate": 9.988140484378567e-06, + "loss": 0.7236, + "step": 1673 + }, + { + "epoch": 0.051305627068775284, + "grad_norm": 2.0447316266068043, + "learning_rate": 9.988106296042741e-06, + "loss": 0.7067, + "step": 1674 + }, + { + "epoch": 0.05133627559151649, + "grad_norm": 2.047848087296416, + "learning_rate": 9.988072058557829e-06, + "loss": 0.7532, + "step": 1675 + }, + { + "epoch": 0.051366924114257695, + "grad_norm": 2.3586480316707283, + "learning_rate": 9.988037771924167e-06, + "loss": 0.781, + "step": 1676 + }, + { + "epoch": 0.051397572636998894, + "grad_norm": 2.2751231149504623, + "learning_rate": 9.98800343614209e-06, + "loss": 0.8404, + "step": 1677 + }, + { + "epoch": 0.0514282211597401, + "grad_norm": 0.9617417328846181, + "learning_rate": 9.987969051211942e-06, + "loss": 0.5372, + "step": 1678 + }, + { + "epoch": 0.051458869682481305, + "grad_norm": 2.1873990819083953, + "learning_rate": 9.98793461713406e-06, + "loss": 0.6999, + "step": 1679 + }, + { + "epoch": 0.05148951820522251, + "grad_norm": 2.3392412419555355, + "learning_rate": 9.987900133908777e-06, + "loss": 0.8196, + "step": 1680 + }, + { + "epoch": 0.05152016672796371, + "grad_norm": 2.258210083790024, + "learning_rate": 9.98786560153644e-06, + "loss": 0.7812, + "step": 1681 + }, + { + "epoch": 0.051550815250704915, + "grad_norm": 2.5184118584767328, + "learning_rate": 9.987831020017389e-06, + "loss": 0.8118, + "step": 1682 + }, + { + "epoch": 0.05158146377344612, + "grad_norm": 2.3821221166720603, + "learning_rate": 9.98779638935196e-06, + "loss": 0.8068, + "step": 1683 + }, + { + "epoch": 0.051612112296187326, + "grad_norm": 2.157760764230448, + "learning_rate": 9.987761709540497e-06, + "loss": 0.8455, + "step": 1684 + }, + { + "epoch": 0.051642760818928525, + "grad_norm": 2.0705524868862684, + "learning_rate": 9.987726980583343e-06, + "loss": 0.789, + "step": 1685 + }, + { + "epoch": 0.05167340934166973, + "grad_norm": 2.192715000462249, + "learning_rate": 9.98769220248084e-06, + "loss": 0.7805, + "step": 1686 + }, + { + "epoch": 0.051704057864410936, + "grad_norm": 2.1343504044578725, + "learning_rate": 9.987657375233329e-06, + "loss": 0.8226, + "step": 1687 + }, + { + "epoch": 0.05173470638715214, + "grad_norm": 2.5760686877135908, + "learning_rate": 9.987622498841151e-06, + "loss": 0.7794, + "step": 1688 + }, + { + "epoch": 0.05176535490989334, + "grad_norm": 1.9455153357589585, + "learning_rate": 9.987587573304655e-06, + "loss": 0.7582, + "step": 1689 + }, + { + "epoch": 0.051796003432634546, + "grad_norm": 2.196051478216042, + "learning_rate": 9.987552598624182e-06, + "loss": 0.8572, + "step": 1690 + }, + { + "epoch": 0.05182665195537575, + "grad_norm": 2.0087440594643287, + "learning_rate": 9.987517574800077e-06, + "loss": 0.7654, + "step": 1691 + }, + { + "epoch": 0.05185730047811696, + "grad_norm": 1.9111241303659623, + "learning_rate": 9.987482501832686e-06, + "loss": 0.7801, + "step": 1692 + }, + { + "epoch": 0.051887949000858156, + "grad_norm": 2.239183063840295, + "learning_rate": 9.987447379722353e-06, + "loss": 0.7781, + "step": 1693 + }, + { + "epoch": 0.05191859752359936, + "grad_norm": 2.12612564129964, + "learning_rate": 9.987412208469424e-06, + "loss": 0.79, + "step": 1694 + }, + { + "epoch": 0.05194924604634057, + "grad_norm": 2.320439879382839, + "learning_rate": 9.987376988074248e-06, + "loss": 0.7339, + "step": 1695 + }, + { + "epoch": 0.051979894569081773, + "grad_norm": 1.9048492142448357, + "learning_rate": 9.987341718537169e-06, + "loss": 0.7335, + "step": 1696 + }, + { + "epoch": 0.05201054309182297, + "grad_norm": 2.1573021898571048, + "learning_rate": 9.987306399858537e-06, + "loss": 0.7735, + "step": 1697 + }, + { + "epoch": 0.05204119161456418, + "grad_norm": 2.2720040267010915, + "learning_rate": 9.987271032038698e-06, + "loss": 0.8715, + "step": 1698 + }, + { + "epoch": 0.052071840137305384, + "grad_norm": 2.0097556043725193, + "learning_rate": 9.987235615078003e-06, + "loss": 0.7599, + "step": 1699 + }, + { + "epoch": 0.05210248866004659, + "grad_norm": 3.1679820260016984, + "learning_rate": 9.987200148976798e-06, + "loss": 0.7336, + "step": 1700 + }, + { + "epoch": 0.05213313718278779, + "grad_norm": 2.3249559748002695, + "learning_rate": 9.987164633735436e-06, + "loss": 0.7538, + "step": 1701 + }, + { + "epoch": 0.052163785705528994, + "grad_norm": 2.638907367190402, + "learning_rate": 9.987129069354264e-06, + "loss": 0.706, + "step": 1702 + }, + { + "epoch": 0.0521944342282702, + "grad_norm": 1.209757492472639, + "learning_rate": 9.987093455833632e-06, + "loss": 0.541, + "step": 1703 + }, + { + "epoch": 0.0522250827510114, + "grad_norm": 1.0165893828046557, + "learning_rate": 9.987057793173893e-06, + "loss": 0.5432, + "step": 1704 + }, + { + "epoch": 0.052255731273752604, + "grad_norm": 2.3201732359809917, + "learning_rate": 9.987022081375397e-06, + "loss": 0.8571, + "step": 1705 + }, + { + "epoch": 0.05228637979649381, + "grad_norm": 2.280809625087376, + "learning_rate": 9.986986320438496e-06, + "loss": 0.7841, + "step": 1706 + }, + { + "epoch": 0.052317028319235015, + "grad_norm": 2.1815797408598017, + "learning_rate": 9.986950510363544e-06, + "loss": 0.7255, + "step": 1707 + }, + { + "epoch": 0.052347676841976214, + "grad_norm": 2.119142982554711, + "learning_rate": 9.986914651150894e-06, + "loss": 0.8343, + "step": 1708 + }, + { + "epoch": 0.05237832536471742, + "grad_norm": 2.0968938579953917, + "learning_rate": 9.986878742800895e-06, + "loss": 0.7148, + "step": 1709 + }, + { + "epoch": 0.052408973887458625, + "grad_norm": 1.6205493025716804, + "learning_rate": 9.986842785313906e-06, + "loss": 0.5394, + "step": 1710 + }, + { + "epoch": 0.05243962241019983, + "grad_norm": 1.238507120857092, + "learning_rate": 9.986806778690277e-06, + "loss": 0.528, + "step": 1711 + }, + { + "epoch": 0.05247027093294103, + "grad_norm": 2.147560304839575, + "learning_rate": 9.986770722930368e-06, + "loss": 0.7619, + "step": 1712 + }, + { + "epoch": 0.052500919455682235, + "grad_norm": 2.335992796844048, + "learning_rate": 9.986734618034529e-06, + "loss": 0.7488, + "step": 1713 + }, + { + "epoch": 0.05253156797842344, + "grad_norm": 2.1431134530599407, + "learning_rate": 9.986698464003117e-06, + "loss": 0.7758, + "step": 1714 + }, + { + "epoch": 0.052562216501164646, + "grad_norm": 1.9373571199339419, + "learning_rate": 9.986662260836491e-06, + "loss": 0.7341, + "step": 1715 + }, + { + "epoch": 0.052592865023905845, + "grad_norm": 1.6174366956292199, + "learning_rate": 9.986626008535005e-06, + "loss": 0.5644, + "step": 1716 + }, + { + "epoch": 0.05262351354664705, + "grad_norm": 2.437279245990581, + "learning_rate": 9.986589707099017e-06, + "loss": 0.7992, + "step": 1717 + }, + { + "epoch": 0.052654162069388256, + "grad_norm": 2.41231060141157, + "learning_rate": 9.986553356528885e-06, + "loss": 0.7596, + "step": 1718 + }, + { + "epoch": 0.05268481059212946, + "grad_norm": 1.9796487865215375, + "learning_rate": 9.986516956824967e-06, + "loss": 0.6205, + "step": 1719 + }, + { + "epoch": 0.05271545911487066, + "grad_norm": 1.0867781164739536, + "learning_rate": 9.98648050798762e-06, + "loss": 0.5664, + "step": 1720 + }, + { + "epoch": 0.052746107637611866, + "grad_norm": 2.191493143115638, + "learning_rate": 9.986444010017207e-06, + "loss": 0.8068, + "step": 1721 + }, + { + "epoch": 0.05277675616035307, + "grad_norm": 0.9416542366229889, + "learning_rate": 9.986407462914086e-06, + "loss": 0.5216, + "step": 1722 + }, + { + "epoch": 0.05280740468309428, + "grad_norm": 0.9589313154232326, + "learning_rate": 9.986370866678614e-06, + "loss": 0.5317, + "step": 1723 + }, + { + "epoch": 0.05283805320583548, + "grad_norm": 2.0945086229942302, + "learning_rate": 9.986334221311155e-06, + "loss": 0.8377, + "step": 1724 + }, + { + "epoch": 0.05286870172857668, + "grad_norm": 2.021724438611422, + "learning_rate": 9.986297526812068e-06, + "loss": 0.8668, + "step": 1725 + }, + { + "epoch": 0.05289935025131789, + "grad_norm": 2.4249245074303913, + "learning_rate": 9.986260783181718e-06, + "loss": 0.8293, + "step": 1726 + }, + { + "epoch": 0.052929998774059094, + "grad_norm": 1.1303515506598083, + "learning_rate": 9.986223990420462e-06, + "loss": 0.5363, + "step": 1727 + }, + { + "epoch": 0.05296064729680029, + "grad_norm": 0.9964537276425889, + "learning_rate": 9.986187148528668e-06, + "loss": 0.4971, + "step": 1728 + }, + { + "epoch": 0.0529912958195415, + "grad_norm": 1.9128725862623237, + "learning_rate": 9.986150257506696e-06, + "loss": 0.7325, + "step": 1729 + }, + { + "epoch": 0.053021944342282704, + "grad_norm": 2.094666961830216, + "learning_rate": 9.986113317354907e-06, + "loss": 0.835, + "step": 1730 + }, + { + "epoch": 0.05305259286502391, + "grad_norm": 0.9405516129167963, + "learning_rate": 9.986076328073669e-06, + "loss": 0.5254, + "step": 1731 + }, + { + "epoch": 0.05308324138776511, + "grad_norm": 2.1664767052468155, + "learning_rate": 9.986039289663346e-06, + "loss": 0.8077, + "step": 1732 + }, + { + "epoch": 0.053113889910506314, + "grad_norm": 0.9594887766036231, + "learning_rate": 9.986002202124302e-06, + "loss": 0.4928, + "step": 1733 + }, + { + "epoch": 0.05314453843324752, + "grad_norm": 1.0019375112572841, + "learning_rate": 9.985965065456902e-06, + "loss": 0.5317, + "step": 1734 + }, + { + "epoch": 0.05317518695598872, + "grad_norm": 2.4213816334588985, + "learning_rate": 9.985927879661513e-06, + "loss": 0.7986, + "step": 1735 + }, + { + "epoch": 0.053205835478729924, + "grad_norm": 2.264941272583994, + "learning_rate": 9.9858906447385e-06, + "loss": 0.8029, + "step": 1736 + }, + { + "epoch": 0.05323648400147113, + "grad_norm": 0.9423888720166836, + "learning_rate": 9.985853360688232e-06, + "loss": 0.5306, + "step": 1737 + }, + { + "epoch": 0.053267132524212335, + "grad_norm": 2.048893492873479, + "learning_rate": 9.985816027511075e-06, + "loss": 0.7928, + "step": 1738 + }, + { + "epoch": 0.053297781046953534, + "grad_norm": 1.9135643453660038, + "learning_rate": 9.985778645207397e-06, + "loss": 0.7726, + "step": 1739 + }, + { + "epoch": 0.05332842956969474, + "grad_norm": 2.5824471656734462, + "learning_rate": 9.985741213777566e-06, + "loss": 0.8821, + "step": 1740 + }, + { + "epoch": 0.053359078092435945, + "grad_norm": 1.9691646513296786, + "learning_rate": 9.985703733221951e-06, + "loss": 0.7206, + "step": 1741 + }, + { + "epoch": 0.05338972661517715, + "grad_norm": 2.1374715634078205, + "learning_rate": 9.985666203540923e-06, + "loss": 0.7718, + "step": 1742 + }, + { + "epoch": 0.05342037513791835, + "grad_norm": 2.380227554893973, + "learning_rate": 9.985628624734847e-06, + "loss": 0.8, + "step": 1743 + }, + { + "epoch": 0.053451023660659555, + "grad_norm": 2.156729887703542, + "learning_rate": 9.985590996804099e-06, + "loss": 0.8299, + "step": 1744 + }, + { + "epoch": 0.05348167218340076, + "grad_norm": 1.2252910319725205, + "learning_rate": 9.985553319749047e-06, + "loss": 0.5309, + "step": 1745 + }, + { + "epoch": 0.053512320706141966, + "grad_norm": 1.0525365253581744, + "learning_rate": 9.985515593570061e-06, + "loss": 0.5372, + "step": 1746 + }, + { + "epoch": 0.053542969228883165, + "grad_norm": 2.340990462293965, + "learning_rate": 9.985477818267517e-06, + "loss": 0.7604, + "step": 1747 + }, + { + "epoch": 0.05357361775162437, + "grad_norm": 2.3805889715665622, + "learning_rate": 9.985439993841783e-06, + "loss": 0.8291, + "step": 1748 + }, + { + "epoch": 0.053604266274365577, + "grad_norm": 2.194512288546915, + "learning_rate": 9.985402120293232e-06, + "loss": 0.7745, + "step": 1749 + }, + { + "epoch": 0.05363491479710678, + "grad_norm": 2.429115731993358, + "learning_rate": 9.98536419762224e-06, + "loss": 0.8288, + "step": 1750 + }, + { + "epoch": 0.05366556331984798, + "grad_norm": 1.2253884907519894, + "learning_rate": 9.985326225829179e-06, + "loss": 0.5013, + "step": 1751 + }, + { + "epoch": 0.05369621184258919, + "grad_norm": 2.619652052138028, + "learning_rate": 9.985288204914424e-06, + "loss": 0.7936, + "step": 1752 + }, + { + "epoch": 0.05372686036533039, + "grad_norm": 2.0888763535089705, + "learning_rate": 9.985250134878346e-06, + "loss": 0.822, + "step": 1753 + }, + { + "epoch": 0.0537575088880716, + "grad_norm": 2.541351378437032, + "learning_rate": 9.985212015721325e-06, + "loss": 0.791, + "step": 1754 + }, + { + "epoch": 0.0537881574108128, + "grad_norm": 2.1258164101373236, + "learning_rate": 9.985173847443733e-06, + "loss": 0.8569, + "step": 1755 + }, + { + "epoch": 0.053818805933554, + "grad_norm": 1.800321450570832, + "learning_rate": 9.985135630045948e-06, + "loss": 0.7688, + "step": 1756 + }, + { + "epoch": 0.05384945445629521, + "grad_norm": 2.0819980637082707, + "learning_rate": 9.985097363528347e-06, + "loss": 0.6871, + "step": 1757 + }, + { + "epoch": 0.053880102979036414, + "grad_norm": 2.0046236907642165, + "learning_rate": 9.985059047891305e-06, + "loss": 0.6266, + "step": 1758 + }, + { + "epoch": 0.05391075150177761, + "grad_norm": 2.322565878656705, + "learning_rate": 9.9850206831352e-06, + "loss": 0.8001, + "step": 1759 + }, + { + "epoch": 0.05394140002451882, + "grad_norm": 2.2929263574184557, + "learning_rate": 9.984982269260414e-06, + "loss": 0.7902, + "step": 1760 + }, + { + "epoch": 0.053972048547260024, + "grad_norm": 2.2767548873202106, + "learning_rate": 9.984943806267319e-06, + "loss": 0.7925, + "step": 1761 + }, + { + "epoch": 0.05400269707000123, + "grad_norm": 2.193547522009201, + "learning_rate": 9.9849052941563e-06, + "loss": 0.8025, + "step": 1762 + }, + { + "epoch": 0.05403334559274243, + "grad_norm": 2.016801128518426, + "learning_rate": 9.984866732927731e-06, + "loss": 0.7434, + "step": 1763 + }, + { + "epoch": 0.054063994115483634, + "grad_norm": 1.0626413065952682, + "learning_rate": 9.984828122581998e-06, + "loss": 0.5347, + "step": 1764 + }, + { + "epoch": 0.05409464263822484, + "grad_norm": 2.7988469224023516, + "learning_rate": 9.984789463119475e-06, + "loss": 0.7848, + "step": 1765 + }, + { + "epoch": 0.05412529116096604, + "grad_norm": 2.044741502074766, + "learning_rate": 9.984750754540547e-06, + "loss": 0.8079, + "step": 1766 + }, + { + "epoch": 0.054155939683707244, + "grad_norm": 2.203024944886088, + "learning_rate": 9.984711996845596e-06, + "loss": 0.8894, + "step": 1767 + }, + { + "epoch": 0.05418658820644845, + "grad_norm": 0.9748598534463385, + "learning_rate": 9.984673190034998e-06, + "loss": 0.5456, + "step": 1768 + }, + { + "epoch": 0.054217236729189655, + "grad_norm": 2.2985467844084932, + "learning_rate": 9.984634334109143e-06, + "loss": 0.7257, + "step": 1769 + }, + { + "epoch": 0.054247885251930854, + "grad_norm": 2.2226639724004165, + "learning_rate": 9.984595429068411e-06, + "loss": 0.7205, + "step": 1770 + }, + { + "epoch": 0.05427853377467206, + "grad_norm": 0.998109792944901, + "learning_rate": 9.984556474913183e-06, + "loss": 0.5088, + "step": 1771 + }, + { + "epoch": 0.054309182297413265, + "grad_norm": 2.1682431524948282, + "learning_rate": 9.984517471643846e-06, + "loss": 0.7256, + "step": 1772 + }, + { + "epoch": 0.05433983082015447, + "grad_norm": 2.65241217655717, + "learning_rate": 9.984478419260782e-06, + "loss": 0.77, + "step": 1773 + }, + { + "epoch": 0.05437047934289567, + "grad_norm": 2.357042320123524, + "learning_rate": 9.984439317764378e-06, + "loss": 0.7917, + "step": 1774 + }, + { + "epoch": 0.054401127865636875, + "grad_norm": 0.9638505130345171, + "learning_rate": 9.984400167155017e-06, + "loss": 0.5378, + "step": 1775 + }, + { + "epoch": 0.05443177638837808, + "grad_norm": 2.2037799213466838, + "learning_rate": 9.984360967433086e-06, + "loss": 0.7528, + "step": 1776 + }, + { + "epoch": 0.054462424911119287, + "grad_norm": 2.14173914858239, + "learning_rate": 9.984321718598972e-06, + "loss": 0.8032, + "step": 1777 + }, + { + "epoch": 0.054493073433860485, + "grad_norm": 2.215972731763003, + "learning_rate": 9.98428242065306e-06, + "loss": 0.8097, + "step": 1778 + }, + { + "epoch": 0.05452372195660169, + "grad_norm": 2.410529975463277, + "learning_rate": 9.98424307359574e-06, + "loss": 0.7632, + "step": 1779 + }, + { + "epoch": 0.0545543704793429, + "grad_norm": 2.0495392792877025, + "learning_rate": 9.984203677427393e-06, + "loss": 0.7466, + "step": 1780 + }, + { + "epoch": 0.0545850190020841, + "grad_norm": 2.093307823388011, + "learning_rate": 9.984164232148415e-06, + "loss": 0.6682, + "step": 1781 + }, + { + "epoch": 0.0546156675248253, + "grad_norm": 2.4973440785999985, + "learning_rate": 9.984124737759192e-06, + "loss": 0.6969, + "step": 1782 + }, + { + "epoch": 0.05464631604756651, + "grad_norm": 2.289172445216251, + "learning_rate": 9.984085194260112e-06, + "loss": 0.7867, + "step": 1783 + }, + { + "epoch": 0.05467696457030771, + "grad_norm": 2.0098326728648934, + "learning_rate": 9.984045601651566e-06, + "loss": 0.8219, + "step": 1784 + }, + { + "epoch": 0.05470761309304892, + "grad_norm": 2.234207398888604, + "learning_rate": 9.984005959933942e-06, + "loss": 0.7978, + "step": 1785 + }, + { + "epoch": 0.05473826161579012, + "grad_norm": 2.212460051746992, + "learning_rate": 9.983966269107634e-06, + "loss": 0.7841, + "step": 1786 + }, + { + "epoch": 0.05476891013853132, + "grad_norm": 1.0403415261031075, + "learning_rate": 9.98392652917303e-06, + "loss": 0.5436, + "step": 1787 + }, + { + "epoch": 0.05479955866127253, + "grad_norm": 2.085164085483519, + "learning_rate": 9.983886740130521e-06, + "loss": 0.8134, + "step": 1788 + }, + { + "epoch": 0.054830207184013734, + "grad_norm": 2.5978243494131537, + "learning_rate": 9.983846901980505e-06, + "loss": 0.7363, + "step": 1789 + }, + { + "epoch": 0.05486085570675493, + "grad_norm": 2.085280177031837, + "learning_rate": 9.983807014723367e-06, + "loss": 0.7717, + "step": 1790 + }, + { + "epoch": 0.05489150422949614, + "grad_norm": 2.097915599355097, + "learning_rate": 9.983767078359505e-06, + "loss": 0.8331, + "step": 1791 + }, + { + "epoch": 0.054922152752237344, + "grad_norm": 2.156486417898269, + "learning_rate": 9.983727092889309e-06, + "loss": 0.7619, + "step": 1792 + }, + { + "epoch": 0.05495280127497855, + "grad_norm": 2.304663572175765, + "learning_rate": 9.983687058313177e-06, + "loss": 0.6746, + "step": 1793 + }, + { + "epoch": 0.05498344979771975, + "grad_norm": 2.44008250041993, + "learning_rate": 9.9836469746315e-06, + "loss": 0.8495, + "step": 1794 + }, + { + "epoch": 0.055014098320460954, + "grad_norm": 2.0151423956739016, + "learning_rate": 9.983606841844672e-06, + "loss": 0.768, + "step": 1795 + }, + { + "epoch": 0.05504474684320216, + "grad_norm": 2.1520603963605494, + "learning_rate": 9.983566659953094e-06, + "loss": 0.744, + "step": 1796 + }, + { + "epoch": 0.05507539536594336, + "grad_norm": 1.0995297190458408, + "learning_rate": 9.983526428957157e-06, + "loss": 0.5276, + "step": 1797 + }, + { + "epoch": 0.055106043888684564, + "grad_norm": 2.1278382311743917, + "learning_rate": 9.98348614885726e-06, + "loss": 0.7947, + "step": 1798 + }, + { + "epoch": 0.05513669241142577, + "grad_norm": 2.3050703360233036, + "learning_rate": 9.983445819653798e-06, + "loss": 0.8548, + "step": 1799 + }, + { + "epoch": 0.055167340934166975, + "grad_norm": 2.66671171257881, + "learning_rate": 9.983405441347171e-06, + "loss": 0.7084, + "step": 1800 + }, + { + "epoch": 0.055197989456908174, + "grad_norm": 2.171458850719512, + "learning_rate": 9.983365013937774e-06, + "loss": 0.7909, + "step": 1801 + }, + { + "epoch": 0.05522863797964938, + "grad_norm": 2.16892589061846, + "learning_rate": 9.983324537426007e-06, + "loss": 0.7864, + "step": 1802 + }, + { + "epoch": 0.055259286502390585, + "grad_norm": 2.077696195501957, + "learning_rate": 9.983284011812267e-06, + "loss": 0.7147, + "step": 1803 + }, + { + "epoch": 0.05528993502513179, + "grad_norm": 2.195674436465187, + "learning_rate": 9.983243437096955e-06, + "loss": 0.7613, + "step": 1804 + }, + { + "epoch": 0.05532058354787299, + "grad_norm": 2.1471957628661924, + "learning_rate": 9.983202813280472e-06, + "loss": 0.8064, + "step": 1805 + }, + { + "epoch": 0.055351232070614195, + "grad_norm": 2.2568570726756954, + "learning_rate": 9.983162140363214e-06, + "loss": 0.6167, + "step": 1806 + }, + { + "epoch": 0.0553818805933554, + "grad_norm": 2.194477472803308, + "learning_rate": 9.983121418345587e-06, + "loss": 0.7154, + "step": 1807 + }, + { + "epoch": 0.05541252911609661, + "grad_norm": 1.0601689040472042, + "learning_rate": 9.983080647227987e-06, + "loss": 0.5194, + "step": 1808 + }, + { + "epoch": 0.055443177638837805, + "grad_norm": 2.180452678363023, + "learning_rate": 9.98303982701082e-06, + "loss": 0.8373, + "step": 1809 + }, + { + "epoch": 0.05547382616157901, + "grad_norm": 2.5952292492801066, + "learning_rate": 9.982998957694487e-06, + "loss": 0.7622, + "step": 1810 + }, + { + "epoch": 0.05550447468432022, + "grad_norm": 0.9073910656626829, + "learning_rate": 9.98295803927939e-06, + "loss": 0.5575, + "step": 1811 + }, + { + "epoch": 0.05553512320706142, + "grad_norm": 2.1229427670774323, + "learning_rate": 9.982917071765932e-06, + "loss": 0.8795, + "step": 1812 + }, + { + "epoch": 0.05556577172980262, + "grad_norm": 2.064046020571693, + "learning_rate": 9.982876055154518e-06, + "loss": 0.8498, + "step": 1813 + }, + { + "epoch": 0.05559642025254383, + "grad_norm": 0.966879235125804, + "learning_rate": 9.98283498944555e-06, + "loss": 0.5389, + "step": 1814 + }, + { + "epoch": 0.05562706877528503, + "grad_norm": 2.6197225907571524, + "learning_rate": 9.982793874639436e-06, + "loss": 0.8016, + "step": 1815 + }, + { + "epoch": 0.05565771729802624, + "grad_norm": 2.474689797662183, + "learning_rate": 9.982752710736577e-06, + "loss": 0.8975, + "step": 1816 + }, + { + "epoch": 0.05568836582076744, + "grad_norm": 2.1646660237565825, + "learning_rate": 9.982711497737382e-06, + "loss": 0.7613, + "step": 1817 + }, + { + "epoch": 0.05571901434350864, + "grad_norm": 0.9065723245288918, + "learning_rate": 9.982670235642255e-06, + "loss": 0.5019, + "step": 1818 + }, + { + "epoch": 0.05574966286624985, + "grad_norm": 2.284938270553212, + "learning_rate": 9.982628924451603e-06, + "loss": 0.7394, + "step": 1819 + }, + { + "epoch": 0.055780311388991054, + "grad_norm": 2.1423879797722756, + "learning_rate": 9.982587564165835e-06, + "loss": 0.7957, + "step": 1820 + }, + { + "epoch": 0.05581095991173225, + "grad_norm": 0.9570770320087169, + "learning_rate": 9.982546154785355e-06, + "loss": 0.5356, + "step": 1821 + }, + { + "epoch": 0.05584160843447346, + "grad_norm": 2.6566745321293523, + "learning_rate": 9.982504696310574e-06, + "loss": 0.7868, + "step": 1822 + }, + { + "epoch": 0.055872256957214664, + "grad_norm": 2.3646069039960183, + "learning_rate": 9.982463188741897e-06, + "loss": 0.7655, + "step": 1823 + }, + { + "epoch": 0.05590290547995587, + "grad_norm": 0.9387525427329726, + "learning_rate": 9.982421632079738e-06, + "loss": 0.5351, + "step": 1824 + }, + { + "epoch": 0.05593355400269707, + "grad_norm": 2.2452975820685594, + "learning_rate": 9.982380026324505e-06, + "loss": 0.7874, + "step": 1825 + }, + { + "epoch": 0.055964202525438274, + "grad_norm": 0.9206123730241805, + "learning_rate": 9.982338371476604e-06, + "loss": 0.5485, + "step": 1826 + }, + { + "epoch": 0.05599485104817948, + "grad_norm": 1.9222028879724846, + "learning_rate": 9.982296667536449e-06, + "loss": 0.7851, + "step": 1827 + }, + { + "epoch": 0.05602549957092068, + "grad_norm": 2.0647086144757902, + "learning_rate": 9.98225491450445e-06, + "loss": 0.7382, + "step": 1828 + }, + { + "epoch": 0.056056148093661884, + "grad_norm": 2.164707842224681, + "learning_rate": 9.98221311238102e-06, + "loss": 0.8248, + "step": 1829 + }, + { + "epoch": 0.05608679661640309, + "grad_norm": 2.9271821344426527, + "learning_rate": 9.982171261166568e-06, + "loss": 0.7316, + "step": 1830 + }, + { + "epoch": 0.056117445139144295, + "grad_norm": 2.2573693197025504, + "learning_rate": 9.982129360861507e-06, + "loss": 0.8605, + "step": 1831 + }, + { + "epoch": 0.056148093661885494, + "grad_norm": 2.241913443846562, + "learning_rate": 9.982087411466253e-06, + "loss": 0.8007, + "step": 1832 + }, + { + "epoch": 0.0561787421846267, + "grad_norm": 2.126656274600751, + "learning_rate": 9.982045412981217e-06, + "loss": 0.8861, + "step": 1833 + }, + { + "epoch": 0.056209390707367905, + "grad_norm": 2.155124454525486, + "learning_rate": 9.982003365406812e-06, + "loss": 0.7309, + "step": 1834 + }, + { + "epoch": 0.05624003923010911, + "grad_norm": 2.0690148574371623, + "learning_rate": 9.981961268743453e-06, + "loss": 0.7277, + "step": 1835 + }, + { + "epoch": 0.05627068775285031, + "grad_norm": 1.9182496322793967, + "learning_rate": 9.981919122991554e-06, + "loss": 0.6944, + "step": 1836 + }, + { + "epoch": 0.056301336275591515, + "grad_norm": 2.2939328122338485, + "learning_rate": 9.981876928151532e-06, + "loss": 0.9118, + "step": 1837 + }, + { + "epoch": 0.05633198479833272, + "grad_norm": 2.27303742416203, + "learning_rate": 9.9818346842238e-06, + "loss": 0.8857, + "step": 1838 + }, + { + "epoch": 0.05636263332107393, + "grad_norm": 2.2708189996553982, + "learning_rate": 9.98179239120878e-06, + "loss": 0.7928, + "step": 1839 + }, + { + "epoch": 0.056393281843815125, + "grad_norm": 1.9678258277838205, + "learning_rate": 9.981750049106882e-06, + "loss": 0.7445, + "step": 1840 + }, + { + "epoch": 0.05642393036655633, + "grad_norm": 2.3439487781029817, + "learning_rate": 9.981707657918529e-06, + "loss": 0.7807, + "step": 1841 + }, + { + "epoch": 0.05645457888929754, + "grad_norm": 2.1484134689489647, + "learning_rate": 9.981665217644134e-06, + "loss": 0.764, + "step": 1842 + }, + { + "epoch": 0.05648522741203874, + "grad_norm": 2.084359049381905, + "learning_rate": 9.981622728284117e-06, + "loss": 0.7396, + "step": 1843 + }, + { + "epoch": 0.05651587593477994, + "grad_norm": 2.5110886543490842, + "learning_rate": 9.981580189838896e-06, + "loss": 0.7946, + "step": 1844 + }, + { + "epoch": 0.05654652445752115, + "grad_norm": 2.2041313913734024, + "learning_rate": 9.981537602308892e-06, + "loss": 0.9313, + "step": 1845 + }, + { + "epoch": 0.05657717298026235, + "grad_norm": 2.4909367398899813, + "learning_rate": 9.981494965694522e-06, + "loss": 0.7679, + "step": 1846 + }, + { + "epoch": 0.05660782150300356, + "grad_norm": 2.353085329998533, + "learning_rate": 9.981452279996208e-06, + "loss": 0.8558, + "step": 1847 + }, + { + "epoch": 0.05663847002574476, + "grad_norm": 2.0441766231276004, + "learning_rate": 9.981409545214371e-06, + "loss": 0.7994, + "step": 1848 + }, + { + "epoch": 0.05666911854848596, + "grad_norm": 2.210481913595893, + "learning_rate": 9.981366761349431e-06, + "loss": 0.8669, + "step": 1849 + }, + { + "epoch": 0.05669976707122717, + "grad_norm": 2.388321045995909, + "learning_rate": 9.981323928401809e-06, + "loss": 0.6814, + "step": 1850 + }, + { + "epoch": 0.056730415593968374, + "grad_norm": 2.2725363632415183, + "learning_rate": 9.981281046371928e-06, + "loss": 0.8031, + "step": 1851 + }, + { + "epoch": 0.05676106411670957, + "grad_norm": 2.174530945812015, + "learning_rate": 9.981238115260212e-06, + "loss": 0.7705, + "step": 1852 + }, + { + "epoch": 0.05679171263945078, + "grad_norm": 2.167415919240571, + "learning_rate": 9.981195135067081e-06, + "loss": 0.8684, + "step": 1853 + }, + { + "epoch": 0.056822361162191984, + "grad_norm": 2.200782228017217, + "learning_rate": 9.981152105792959e-06, + "loss": 0.7244, + "step": 1854 + }, + { + "epoch": 0.05685300968493319, + "grad_norm": 2.521037942324619, + "learning_rate": 9.981109027438273e-06, + "loss": 0.8036, + "step": 1855 + }, + { + "epoch": 0.05688365820767439, + "grad_norm": 2.099286464554581, + "learning_rate": 9.981065900003444e-06, + "loss": 0.7527, + "step": 1856 + }, + { + "epoch": 0.056914306730415594, + "grad_norm": 2.3814309908702196, + "learning_rate": 9.981022723488897e-06, + "loss": 0.8363, + "step": 1857 + }, + { + "epoch": 0.0569449552531568, + "grad_norm": 2.043385494963043, + "learning_rate": 9.980979497895061e-06, + "loss": 0.7816, + "step": 1858 + }, + { + "epoch": 0.056975603775898, + "grad_norm": 1.9637899089890583, + "learning_rate": 9.980936223222358e-06, + "loss": 0.6476, + "step": 1859 + }, + { + "epoch": 0.057006252298639204, + "grad_norm": 2.078915290200768, + "learning_rate": 9.980892899471216e-06, + "loss": 0.7412, + "step": 1860 + }, + { + "epoch": 0.05703690082138041, + "grad_norm": 2.2179333856493417, + "learning_rate": 9.980849526642063e-06, + "loss": 0.7393, + "step": 1861 + }, + { + "epoch": 0.057067549344121615, + "grad_norm": 2.180820601137096, + "learning_rate": 9.980806104735325e-06, + "loss": 0.6696, + "step": 1862 + }, + { + "epoch": 0.057098197866862814, + "grad_norm": 2.129242169549953, + "learning_rate": 9.980762633751429e-06, + "loss": 0.7184, + "step": 1863 + }, + { + "epoch": 0.05712884638960402, + "grad_norm": 2.152140651835846, + "learning_rate": 9.980719113690805e-06, + "loss": 0.7577, + "step": 1864 + }, + { + "epoch": 0.057159494912345225, + "grad_norm": 1.0527734117414005, + "learning_rate": 9.980675544553881e-06, + "loss": 0.5228, + "step": 1865 + }, + { + "epoch": 0.05719014343508643, + "grad_norm": 2.0382157858634438, + "learning_rate": 9.980631926341086e-06, + "loss": 0.7296, + "step": 1866 + }, + { + "epoch": 0.05722079195782763, + "grad_norm": 2.0932922936835774, + "learning_rate": 9.980588259052853e-06, + "loss": 0.7889, + "step": 1867 + }, + { + "epoch": 0.057251440480568835, + "grad_norm": 2.3069749385299647, + "learning_rate": 9.980544542689606e-06, + "loss": 0.7777, + "step": 1868 + }, + { + "epoch": 0.05728208900331004, + "grad_norm": 1.9693740830675626, + "learning_rate": 9.98050077725178e-06, + "loss": 0.6602, + "step": 1869 + }, + { + "epoch": 0.05731273752605125, + "grad_norm": 1.8929865334286036, + "learning_rate": 9.980456962739808e-06, + "loss": 0.739, + "step": 1870 + }, + { + "epoch": 0.057343386048792445, + "grad_norm": 2.161153891348112, + "learning_rate": 9.980413099154116e-06, + "loss": 0.8819, + "step": 1871 + }, + { + "epoch": 0.05737403457153365, + "grad_norm": 2.9507127357101215, + "learning_rate": 9.98036918649514e-06, + "loss": 0.7698, + "step": 1872 + }, + { + "epoch": 0.05740468309427486, + "grad_norm": 2.3561365508880074, + "learning_rate": 9.980325224763315e-06, + "loss": 0.8715, + "step": 1873 + }, + { + "epoch": 0.05743533161701606, + "grad_norm": 2.198016607651309, + "learning_rate": 9.980281213959069e-06, + "loss": 0.8971, + "step": 1874 + }, + { + "epoch": 0.05746598013975726, + "grad_norm": 2.2512642725866976, + "learning_rate": 9.980237154082838e-06, + "loss": 0.8458, + "step": 1875 + }, + { + "epoch": 0.05749662866249847, + "grad_norm": 2.1987700119500846, + "learning_rate": 9.980193045135056e-06, + "loss": 0.8485, + "step": 1876 + }, + { + "epoch": 0.05752727718523967, + "grad_norm": 2.1195451258981275, + "learning_rate": 9.980148887116158e-06, + "loss": 0.808, + "step": 1877 + }, + { + "epoch": 0.05755792570798088, + "grad_norm": 2.2128921090045175, + "learning_rate": 9.980104680026579e-06, + "loss": 0.7796, + "step": 1878 + }, + { + "epoch": 0.05758857423072208, + "grad_norm": 2.1416732145145163, + "learning_rate": 9.980060423866756e-06, + "loss": 0.7894, + "step": 1879 + }, + { + "epoch": 0.05761922275346328, + "grad_norm": 1.87723349755078, + "learning_rate": 9.98001611863712e-06, + "loss": 0.6922, + "step": 1880 + }, + { + "epoch": 0.05764987127620449, + "grad_norm": 2.2039468829849294, + "learning_rate": 9.979971764338112e-06, + "loss": 0.7831, + "step": 1881 + }, + { + "epoch": 0.057680519798945694, + "grad_norm": 2.226952051451656, + "learning_rate": 9.97992736097017e-06, + "loss": 0.7702, + "step": 1882 + }, + { + "epoch": 0.05771116832168689, + "grad_norm": 2.355119227863165, + "learning_rate": 9.979882908533728e-06, + "loss": 0.893, + "step": 1883 + }, + { + "epoch": 0.0577418168444281, + "grad_norm": 2.3051273406597685, + "learning_rate": 9.979838407029226e-06, + "loss": 0.7865, + "step": 1884 + }, + { + "epoch": 0.057772465367169304, + "grad_norm": 2.102250414758317, + "learning_rate": 9.9797938564571e-06, + "loss": 0.8675, + "step": 1885 + }, + { + "epoch": 0.05780311388991051, + "grad_norm": 2.3684612924498083, + "learning_rate": 9.979749256817794e-06, + "loss": 0.7477, + "step": 1886 + }, + { + "epoch": 0.05783376241265171, + "grad_norm": 2.126629855752258, + "learning_rate": 9.979704608111742e-06, + "loss": 0.7327, + "step": 1887 + }, + { + "epoch": 0.057864410935392914, + "grad_norm": 1.040264207708446, + "learning_rate": 9.97965991033939e-06, + "loss": 0.53, + "step": 1888 + }, + { + "epoch": 0.05789505945813412, + "grad_norm": 2.3035386776962303, + "learning_rate": 9.979615163501172e-06, + "loss": 0.8017, + "step": 1889 + }, + { + "epoch": 0.05792570798087532, + "grad_norm": 2.3130780326705045, + "learning_rate": 9.979570367597532e-06, + "loss": 0.723, + "step": 1890 + }, + { + "epoch": 0.057956356503616524, + "grad_norm": 2.39164867047449, + "learning_rate": 9.97952552262891e-06, + "loss": 0.81, + "step": 1891 + }, + { + "epoch": 0.05798700502635773, + "grad_norm": 0.9328590183789807, + "learning_rate": 9.97948062859575e-06, + "loss": 0.5215, + "step": 1892 + }, + { + "epoch": 0.058017653549098935, + "grad_norm": 2.000846710857981, + "learning_rate": 9.979435685498496e-06, + "loss": 0.6929, + "step": 1893 + }, + { + "epoch": 0.058048302071840134, + "grad_norm": 2.05935610248606, + "learning_rate": 9.979390693337585e-06, + "loss": 0.7522, + "step": 1894 + }, + { + "epoch": 0.05807895059458134, + "grad_norm": 2.0275085963867916, + "learning_rate": 9.979345652113464e-06, + "loss": 0.7332, + "step": 1895 + }, + { + "epoch": 0.058109599117322545, + "grad_norm": 2.1529447929797176, + "learning_rate": 9.979300561826576e-06, + "loss": 0.7352, + "step": 1896 + }, + { + "epoch": 0.05814024764006375, + "grad_norm": 1.91912532021819, + "learning_rate": 9.979255422477366e-06, + "loss": 0.744, + "step": 1897 + }, + { + "epoch": 0.05817089616280495, + "grad_norm": 2.225340012370184, + "learning_rate": 9.979210234066278e-06, + "loss": 0.924, + "step": 1898 + }, + { + "epoch": 0.058201544685546155, + "grad_norm": 1.1476346124121435, + "learning_rate": 9.979164996593757e-06, + "loss": 0.538, + "step": 1899 + }, + { + "epoch": 0.05823219320828736, + "grad_norm": 2.061005064970415, + "learning_rate": 9.979119710060252e-06, + "loss": 0.8074, + "step": 1900 + }, + { + "epoch": 0.05826284173102857, + "grad_norm": 2.210600741751947, + "learning_rate": 9.979074374466203e-06, + "loss": 0.8046, + "step": 1901 + }, + { + "epoch": 0.058293490253769766, + "grad_norm": 2.356030986386718, + "learning_rate": 9.979028989812064e-06, + "loss": 0.7438, + "step": 1902 + }, + { + "epoch": 0.05832413877651097, + "grad_norm": 2.3234815925181116, + "learning_rate": 9.978983556098274e-06, + "loss": 0.8454, + "step": 1903 + }, + { + "epoch": 0.05835478729925218, + "grad_norm": 1.2180285554578802, + "learning_rate": 9.978938073325288e-06, + "loss": 0.5602, + "step": 1904 + }, + { + "epoch": 0.05838543582199338, + "grad_norm": 2.1011121234373085, + "learning_rate": 9.97889254149355e-06, + "loss": 0.8249, + "step": 1905 + }, + { + "epoch": 0.05841608434473458, + "grad_norm": 1.911493065271801, + "learning_rate": 9.978846960603512e-06, + "loss": 0.7687, + "step": 1906 + }, + { + "epoch": 0.05844673286747579, + "grad_norm": 2.3872691533270873, + "learning_rate": 9.97880133065562e-06, + "loss": 0.7519, + "step": 1907 + }, + { + "epoch": 0.05847738139021699, + "grad_norm": 2.184230267168088, + "learning_rate": 9.978755651650322e-06, + "loss": 0.8791, + "step": 1908 + }, + { + "epoch": 0.0585080299129582, + "grad_norm": 1.955245636167583, + "learning_rate": 9.978709923588074e-06, + "loss": 0.7665, + "step": 1909 + }, + { + "epoch": 0.0585386784356994, + "grad_norm": 2.112832356322987, + "learning_rate": 9.978664146469323e-06, + "loss": 0.8126, + "step": 1910 + }, + { + "epoch": 0.0585693269584406, + "grad_norm": 2.087930103437995, + "learning_rate": 9.978618320294518e-06, + "loss": 0.773, + "step": 1911 + }, + { + "epoch": 0.05859997548118181, + "grad_norm": 2.420909915990041, + "learning_rate": 9.978572445064114e-06, + "loss": 0.7943, + "step": 1912 + }, + { + "epoch": 0.058630624003923014, + "grad_norm": 2.497554058111714, + "learning_rate": 9.978526520778564e-06, + "loss": 0.861, + "step": 1913 + }, + { + "epoch": 0.05866127252666421, + "grad_norm": 2.0460595061315168, + "learning_rate": 9.978480547438317e-06, + "loss": 0.7358, + "step": 1914 + }, + { + "epoch": 0.05869192104940542, + "grad_norm": 2.1981389648610024, + "learning_rate": 9.978434525043825e-06, + "loss": 0.7608, + "step": 1915 + }, + { + "epoch": 0.058722569572146624, + "grad_norm": 2.036052862184877, + "learning_rate": 9.978388453595547e-06, + "loss": 0.851, + "step": 1916 + }, + { + "epoch": 0.05875321809488783, + "grad_norm": 1.9492666537567593, + "learning_rate": 9.978342333093932e-06, + "loss": 0.6748, + "step": 1917 + }, + { + "epoch": 0.05878386661762903, + "grad_norm": 2.010123440838686, + "learning_rate": 9.978296163539436e-06, + "loss": 0.7627, + "step": 1918 + }, + { + "epoch": 0.058814515140370234, + "grad_norm": 2.5778714967232936, + "learning_rate": 9.978249944932515e-06, + "loss": 0.7767, + "step": 1919 + }, + { + "epoch": 0.05884516366311144, + "grad_norm": 1.1631946366788732, + "learning_rate": 9.978203677273623e-06, + "loss": 0.5496, + "step": 1920 + }, + { + "epoch": 0.058875812185852645, + "grad_norm": 2.2476649060778984, + "learning_rate": 9.97815736056322e-06, + "loss": 0.826, + "step": 1921 + }, + { + "epoch": 0.058906460708593844, + "grad_norm": 2.2169137470193037, + "learning_rate": 9.978110994801754e-06, + "loss": 0.8095, + "step": 1922 + }, + { + "epoch": 0.05893710923133505, + "grad_norm": 2.047255820874383, + "learning_rate": 9.978064579989688e-06, + "loss": 0.8386, + "step": 1923 + }, + { + "epoch": 0.058967757754076255, + "grad_norm": 2.007214889494969, + "learning_rate": 9.97801811612748e-06, + "loss": 0.8458, + "step": 1924 + }, + { + "epoch": 0.058998406276817454, + "grad_norm": 2.247814769795314, + "learning_rate": 9.977971603215583e-06, + "loss": 0.7283, + "step": 1925 + }, + { + "epoch": 0.05902905479955866, + "grad_norm": 2.3852586820892583, + "learning_rate": 9.97792504125446e-06, + "loss": 0.8759, + "step": 1926 + }, + { + "epoch": 0.059059703322299865, + "grad_norm": 2.239708916754809, + "learning_rate": 9.977878430244566e-06, + "loss": 0.8246, + "step": 1927 + }, + { + "epoch": 0.05909035184504107, + "grad_norm": 1.2150014645517213, + "learning_rate": 9.977831770186364e-06, + "loss": 0.5519, + "step": 1928 + }, + { + "epoch": 0.05912100036778227, + "grad_norm": 0.9585743212984494, + "learning_rate": 9.977785061080312e-06, + "loss": 0.5082, + "step": 1929 + }, + { + "epoch": 0.059151648890523476, + "grad_norm": 0.9150215007361415, + "learning_rate": 9.97773830292687e-06, + "loss": 0.5308, + "step": 1930 + }, + { + "epoch": 0.05918229741326468, + "grad_norm": 2.231329172435063, + "learning_rate": 9.977691495726498e-06, + "loss": 0.8567, + "step": 1931 + }, + { + "epoch": 0.05921294593600589, + "grad_norm": 2.218860122879323, + "learning_rate": 9.977644639479658e-06, + "loss": 0.8188, + "step": 1932 + }, + { + "epoch": 0.059243594458747086, + "grad_norm": 2.456189543943653, + "learning_rate": 9.977597734186813e-06, + "loss": 0.9036, + "step": 1933 + }, + { + "epoch": 0.05927424298148829, + "grad_norm": 2.4235301522633987, + "learning_rate": 9.977550779848422e-06, + "loss": 0.9153, + "step": 1934 + }, + { + "epoch": 0.0593048915042295, + "grad_norm": 1.9489336690846524, + "learning_rate": 9.977503776464952e-06, + "loss": 0.7621, + "step": 1935 + }, + { + "epoch": 0.0593355400269707, + "grad_norm": 1.9828054881494837, + "learning_rate": 9.977456724036862e-06, + "loss": 0.7633, + "step": 1936 + }, + { + "epoch": 0.0593661885497119, + "grad_norm": 1.932716505172929, + "learning_rate": 9.977409622564619e-06, + "loss": 0.7921, + "step": 1937 + }, + { + "epoch": 0.05939683707245311, + "grad_norm": 2.2134951083157595, + "learning_rate": 9.977362472048685e-06, + "loss": 0.7817, + "step": 1938 + }, + { + "epoch": 0.05942748559519431, + "grad_norm": 2.261029832640137, + "learning_rate": 9.977315272489523e-06, + "loss": 0.8799, + "step": 1939 + }, + { + "epoch": 0.05945813411793552, + "grad_norm": 2.1125899012899167, + "learning_rate": 9.9772680238876e-06, + "loss": 0.5525, + "step": 1940 + }, + { + "epoch": 0.05948878264067672, + "grad_norm": 2.2406645782818297, + "learning_rate": 9.977220726243384e-06, + "loss": 0.8131, + "step": 1941 + }, + { + "epoch": 0.05951943116341792, + "grad_norm": 1.9369865950664453, + "learning_rate": 9.977173379557338e-06, + "loss": 0.7815, + "step": 1942 + }, + { + "epoch": 0.05955007968615913, + "grad_norm": 1.997385965124388, + "learning_rate": 9.97712598382993e-06, + "loss": 0.7106, + "step": 1943 + }, + { + "epoch": 0.059580728208900334, + "grad_norm": 2.1223098201520227, + "learning_rate": 9.977078539061625e-06, + "loss": 0.7542, + "step": 1944 + }, + { + "epoch": 0.05961137673164153, + "grad_norm": 2.0269813209545595, + "learning_rate": 9.977031045252892e-06, + "loss": 0.7515, + "step": 1945 + }, + { + "epoch": 0.05964202525438274, + "grad_norm": 2.2585764837467015, + "learning_rate": 9.976983502404199e-06, + "loss": 0.8692, + "step": 1946 + }, + { + "epoch": 0.059672673777123944, + "grad_norm": 2.076685310168226, + "learning_rate": 9.976935910516015e-06, + "loss": 0.7222, + "step": 1947 + }, + { + "epoch": 0.05970332229986515, + "grad_norm": 2.0517963698568544, + "learning_rate": 9.976888269588806e-06, + "loss": 0.7064, + "step": 1948 + }, + { + "epoch": 0.05973397082260635, + "grad_norm": 2.1783713571123773, + "learning_rate": 9.976840579623045e-06, + "loss": 0.8688, + "step": 1949 + }, + { + "epoch": 0.059764619345347554, + "grad_norm": 1.981713043240793, + "learning_rate": 9.9767928406192e-06, + "loss": 0.7474, + "step": 1950 + }, + { + "epoch": 0.05979526786808876, + "grad_norm": 1.8857387120031441, + "learning_rate": 9.976745052577741e-06, + "loss": 0.7668, + "step": 1951 + }, + { + "epoch": 0.059825916390829965, + "grad_norm": 2.1169989875284103, + "learning_rate": 9.97669721549914e-06, + "loss": 0.5748, + "step": 1952 + }, + { + "epoch": 0.059856564913571164, + "grad_norm": 2.1296441038946763, + "learning_rate": 9.97664932938387e-06, + "loss": 0.7332, + "step": 1953 + }, + { + "epoch": 0.05988721343631237, + "grad_norm": 2.1251565378415824, + "learning_rate": 9.9766013942324e-06, + "loss": 0.803, + "step": 1954 + }, + { + "epoch": 0.059917861959053575, + "grad_norm": 2.42350787445781, + "learning_rate": 9.9765534100452e-06, + "loss": 0.75, + "step": 1955 + }, + { + "epoch": 0.059948510481794774, + "grad_norm": 1.0098582488733845, + "learning_rate": 9.97650537682275e-06, + "loss": 0.5364, + "step": 1956 + }, + { + "epoch": 0.05997915900453598, + "grad_norm": 2.245707312815582, + "learning_rate": 9.976457294565515e-06, + "loss": 0.8404, + "step": 1957 + }, + { + "epoch": 0.060009807527277186, + "grad_norm": 1.0747523278992153, + "learning_rate": 9.976409163273977e-06, + "loss": 0.5282, + "step": 1958 + }, + { + "epoch": 0.06004045605001839, + "grad_norm": 2.168856562621754, + "learning_rate": 9.976360982948605e-06, + "loss": 0.7933, + "step": 1959 + }, + { + "epoch": 0.06007110457275959, + "grad_norm": 2.4407541883244153, + "learning_rate": 9.976312753589874e-06, + "loss": 0.8623, + "step": 1960 + }, + { + "epoch": 0.060101753095500796, + "grad_norm": 1.1490975660374343, + "learning_rate": 9.976264475198261e-06, + "loss": 0.5389, + "step": 1961 + }, + { + "epoch": 0.060132401618242, + "grad_norm": 1.875089586757832, + "learning_rate": 9.976216147774242e-06, + "loss": 0.8015, + "step": 1962 + }, + { + "epoch": 0.06016305014098321, + "grad_norm": 0.9431089340876715, + "learning_rate": 9.97616777131829e-06, + "loss": 0.5514, + "step": 1963 + }, + { + "epoch": 0.060193698663724406, + "grad_norm": 2.057865774677342, + "learning_rate": 9.976119345830885e-06, + "loss": 0.8338, + "step": 1964 + }, + { + "epoch": 0.06022434718646561, + "grad_norm": 2.042173093513659, + "learning_rate": 9.976070871312502e-06, + "loss": 0.7544, + "step": 1965 + }, + { + "epoch": 0.06025499570920682, + "grad_norm": 2.452882263024842, + "learning_rate": 9.976022347763621e-06, + "loss": 0.8625, + "step": 1966 + }, + { + "epoch": 0.06028564423194802, + "grad_norm": 2.1447400594755632, + "learning_rate": 9.975973775184718e-06, + "loss": 0.818, + "step": 1967 + }, + { + "epoch": 0.06031629275468922, + "grad_norm": 2.3095269466945405, + "learning_rate": 9.975925153576271e-06, + "loss": 0.9016, + "step": 1968 + }, + { + "epoch": 0.06034694127743043, + "grad_norm": 2.213654649288472, + "learning_rate": 9.97587648293876e-06, + "loss": 0.7511, + "step": 1969 + }, + { + "epoch": 0.06037758980017163, + "grad_norm": 2.3583294444323917, + "learning_rate": 9.975827763272667e-06, + "loss": 0.8416, + "step": 1970 + }, + { + "epoch": 0.06040823832291284, + "grad_norm": 2.3319732686770953, + "learning_rate": 9.975778994578469e-06, + "loss": 0.8162, + "step": 1971 + }, + { + "epoch": 0.06043888684565404, + "grad_norm": 2.279358694708987, + "learning_rate": 9.975730176856648e-06, + "loss": 0.721, + "step": 1972 + }, + { + "epoch": 0.06046953536839524, + "grad_norm": 2.1413089629564745, + "learning_rate": 9.975681310107683e-06, + "loss": 0.7509, + "step": 1973 + }, + { + "epoch": 0.06050018389113645, + "grad_norm": 1.504631225787842, + "learning_rate": 9.975632394332057e-06, + "loss": 0.5419, + "step": 1974 + }, + { + "epoch": 0.060530832413877654, + "grad_norm": 2.1594285437049496, + "learning_rate": 9.975583429530255e-06, + "loss": 0.8169, + "step": 1975 + }, + { + "epoch": 0.06056148093661885, + "grad_norm": 2.204446252169093, + "learning_rate": 9.975534415702753e-06, + "loss": 0.8063, + "step": 1976 + }, + { + "epoch": 0.06059212945936006, + "grad_norm": 2.1905882328915505, + "learning_rate": 9.97548535285004e-06, + "loss": 0.8741, + "step": 1977 + }, + { + "epoch": 0.060622777982101264, + "grad_norm": 2.3501792902239353, + "learning_rate": 9.975436240972594e-06, + "loss": 0.7987, + "step": 1978 + }, + { + "epoch": 0.06065342650484247, + "grad_norm": 2.232349076885057, + "learning_rate": 9.975387080070904e-06, + "loss": 0.7254, + "step": 1979 + }, + { + "epoch": 0.06068407502758367, + "grad_norm": 0.9706607964413632, + "learning_rate": 9.975337870145451e-06, + "loss": 0.5367, + "step": 1980 + }, + { + "epoch": 0.060714723550324874, + "grad_norm": 2.198941817045538, + "learning_rate": 9.975288611196721e-06, + "loss": 0.8036, + "step": 1981 + }, + { + "epoch": 0.06074537207306608, + "grad_norm": 2.1456784352543936, + "learning_rate": 9.975239303225199e-06, + "loss": 0.8279, + "step": 1982 + }, + { + "epoch": 0.060776020595807285, + "grad_norm": 0.9915247517452496, + "learning_rate": 9.975189946231372e-06, + "loss": 0.5311, + "step": 1983 + }, + { + "epoch": 0.060806669118548484, + "grad_norm": 2.187647554391678, + "learning_rate": 9.975140540215725e-06, + "loss": 0.7601, + "step": 1984 + }, + { + "epoch": 0.06083731764128969, + "grad_norm": 2.188721138621731, + "learning_rate": 9.975091085178745e-06, + "loss": 0.8489, + "step": 1985 + }, + { + "epoch": 0.060867966164030896, + "grad_norm": 2.1999279208935354, + "learning_rate": 9.975041581120922e-06, + "loss": 0.7398, + "step": 1986 + }, + { + "epoch": 0.060898614686772094, + "grad_norm": 2.1847594758570135, + "learning_rate": 9.974992028042738e-06, + "loss": 0.7942, + "step": 1987 + }, + { + "epoch": 0.0609292632095133, + "grad_norm": 0.9398887879313389, + "learning_rate": 9.974942425944687e-06, + "loss": 0.5279, + "step": 1988 + }, + { + "epoch": 0.060959911732254506, + "grad_norm": 2.418252248649921, + "learning_rate": 9.974892774827254e-06, + "loss": 0.7577, + "step": 1989 + }, + { + "epoch": 0.06099056025499571, + "grad_norm": 2.062353725852378, + "learning_rate": 9.974843074690929e-06, + "loss": 0.8711, + "step": 1990 + }, + { + "epoch": 0.06102120877773691, + "grad_norm": 2.0485357221725664, + "learning_rate": 9.974793325536206e-06, + "loss": 0.7639, + "step": 1991 + }, + { + "epoch": 0.061051857300478116, + "grad_norm": 2.093144680733953, + "learning_rate": 9.974743527363569e-06, + "loss": 0.8778, + "step": 1992 + }, + { + "epoch": 0.06108250582321932, + "grad_norm": 2.2436712123982705, + "learning_rate": 9.97469368017351e-06, + "loss": 0.9099, + "step": 1993 + }, + { + "epoch": 0.06111315434596053, + "grad_norm": 2.3456973347632335, + "learning_rate": 9.974643783966522e-06, + "loss": 0.7401, + "step": 1994 + }, + { + "epoch": 0.061143802868701726, + "grad_norm": 2.064030462692603, + "learning_rate": 9.974593838743097e-06, + "loss": 0.7789, + "step": 1995 + }, + { + "epoch": 0.06117445139144293, + "grad_norm": 2.426426453877442, + "learning_rate": 9.974543844503726e-06, + "loss": 0.7885, + "step": 1996 + }, + { + "epoch": 0.06120509991418414, + "grad_norm": 2.1491819893797715, + "learning_rate": 9.9744938012489e-06, + "loss": 0.7827, + "step": 1997 + }, + { + "epoch": 0.06123574843692534, + "grad_norm": 2.031215948059911, + "learning_rate": 9.974443708979116e-06, + "loss": 0.7136, + "step": 1998 + }, + { + "epoch": 0.06126639695966654, + "grad_norm": 1.066167246081794, + "learning_rate": 9.974393567694864e-06, + "loss": 0.5546, + "step": 1999 + }, + { + "epoch": 0.06129704548240775, + "grad_norm": 2.219519140802758, + "learning_rate": 9.97434337739664e-06, + "loss": 0.8362, + "step": 2000 + }, + { + "epoch": 0.06132769400514895, + "grad_norm": 2.797114755529844, + "learning_rate": 9.974293138084939e-06, + "loss": 0.7519, + "step": 2001 + }, + { + "epoch": 0.06135834252789016, + "grad_norm": 2.323220601134036, + "learning_rate": 9.974242849760253e-06, + "loss": 0.7186, + "step": 2002 + }, + { + "epoch": 0.06138899105063136, + "grad_norm": 1.9879881839387583, + "learning_rate": 9.97419251242308e-06, + "loss": 0.7242, + "step": 2003 + }, + { + "epoch": 0.06141963957337256, + "grad_norm": 2.215658169692654, + "learning_rate": 9.974142126073915e-06, + "loss": 0.7585, + "step": 2004 + }, + { + "epoch": 0.06145028809611377, + "grad_norm": 2.2173122218148142, + "learning_rate": 9.974091690713256e-06, + "loss": 0.8237, + "step": 2005 + }, + { + "epoch": 0.061480936618854974, + "grad_norm": 2.043869206547236, + "learning_rate": 9.974041206341599e-06, + "loss": 0.7775, + "step": 2006 + }, + { + "epoch": 0.06151158514159617, + "grad_norm": 0.9384817859659448, + "learning_rate": 9.97399067295944e-06, + "loss": 0.5265, + "step": 2007 + }, + { + "epoch": 0.06154223366433738, + "grad_norm": 2.0722726023090154, + "learning_rate": 9.97394009056728e-06, + "loss": 0.8015, + "step": 2008 + }, + { + "epoch": 0.061572882187078584, + "grad_norm": 2.483481432848903, + "learning_rate": 9.973889459165615e-06, + "loss": 0.7958, + "step": 2009 + }, + { + "epoch": 0.06160353070981979, + "grad_norm": 2.202414667352168, + "learning_rate": 9.973838778754944e-06, + "loss": 0.7597, + "step": 2010 + }, + { + "epoch": 0.06163417923256099, + "grad_norm": 2.191341162700266, + "learning_rate": 9.973788049335768e-06, + "loss": 0.8808, + "step": 2011 + }, + { + "epoch": 0.061664827755302194, + "grad_norm": 2.1951685055516297, + "learning_rate": 9.973737270908584e-06, + "loss": 0.8287, + "step": 2012 + }, + { + "epoch": 0.0616954762780434, + "grad_norm": 2.1648654833177887, + "learning_rate": 9.973686443473895e-06, + "loss": 0.7125, + "step": 2013 + }, + { + "epoch": 0.061726124800784606, + "grad_norm": 1.935442968941545, + "learning_rate": 9.973635567032201e-06, + "loss": 0.6988, + "step": 2014 + }, + { + "epoch": 0.061756773323525804, + "grad_norm": 1.003605928328952, + "learning_rate": 9.973584641584005e-06, + "loss": 0.5361, + "step": 2015 + }, + { + "epoch": 0.06178742184626701, + "grad_norm": 0.9775443677453762, + "learning_rate": 9.973533667129804e-06, + "loss": 0.5277, + "step": 2016 + }, + { + "epoch": 0.061818070369008216, + "grad_norm": 2.044809058237664, + "learning_rate": 9.973482643670106e-06, + "loss": 0.7977, + "step": 2017 + }, + { + "epoch": 0.061848718891749414, + "grad_norm": 2.17891925499273, + "learning_rate": 9.973431571205408e-06, + "loss": 0.7306, + "step": 2018 + }, + { + "epoch": 0.06187936741449062, + "grad_norm": 2.0590470946869472, + "learning_rate": 9.973380449736218e-06, + "loss": 0.8407, + "step": 2019 + }, + { + "epoch": 0.061910015937231826, + "grad_norm": 2.05762958043724, + "learning_rate": 9.973329279263038e-06, + "loss": 0.7554, + "step": 2020 + }, + { + "epoch": 0.06194066445997303, + "grad_norm": 2.1561478336810542, + "learning_rate": 9.97327805978637e-06, + "loss": 0.8197, + "step": 2021 + }, + { + "epoch": 0.06197131298271423, + "grad_norm": 2.0171074483690354, + "learning_rate": 9.973226791306723e-06, + "loss": 0.6965, + "step": 2022 + }, + { + "epoch": 0.062001961505455436, + "grad_norm": 2.747444439280233, + "learning_rate": 9.9731754738246e-06, + "loss": 0.9501, + "step": 2023 + }, + { + "epoch": 0.06203261002819664, + "grad_norm": 2.639251044104261, + "learning_rate": 9.973124107340506e-06, + "loss": 0.7791, + "step": 2024 + }, + { + "epoch": 0.06206325855093785, + "grad_norm": 1.9039415900768146, + "learning_rate": 9.973072691854949e-06, + "loss": 0.7751, + "step": 2025 + }, + { + "epoch": 0.062093907073679046, + "grad_norm": 2.094116672334243, + "learning_rate": 9.97302122736843e-06, + "loss": 0.8419, + "step": 2026 + }, + { + "epoch": 0.06212455559642025, + "grad_norm": 2.400587979137635, + "learning_rate": 9.972969713881466e-06, + "loss": 0.8446, + "step": 2027 + }, + { + "epoch": 0.06215520411916146, + "grad_norm": 1.9180443499812394, + "learning_rate": 9.972918151394556e-06, + "loss": 0.7303, + "step": 2028 + }, + { + "epoch": 0.06218585264190266, + "grad_norm": 2.1668262670735667, + "learning_rate": 9.972866539908212e-06, + "loss": 0.8103, + "step": 2029 + }, + { + "epoch": 0.06221650116464386, + "grad_norm": 1.4628965303221813, + "learning_rate": 9.97281487942294e-06, + "loss": 0.5473, + "step": 2030 + }, + { + "epoch": 0.06224714968738507, + "grad_norm": 1.863164648982354, + "learning_rate": 9.972763169939252e-06, + "loss": 0.7393, + "step": 2031 + }, + { + "epoch": 0.06227779821012627, + "grad_norm": 2.0520059650541427, + "learning_rate": 9.972711411457657e-06, + "loss": 0.7681, + "step": 2032 + }, + { + "epoch": 0.06230844673286748, + "grad_norm": 2.111205291465274, + "learning_rate": 9.972659603978664e-06, + "loss": 0.7241, + "step": 2033 + }, + { + "epoch": 0.06233909525560868, + "grad_norm": 1.055545612014101, + "learning_rate": 9.972607747502782e-06, + "loss": 0.5477, + "step": 2034 + }, + { + "epoch": 0.06236974377834988, + "grad_norm": 1.9828693661713261, + "learning_rate": 9.972555842030525e-06, + "loss": 0.7957, + "step": 2035 + }, + { + "epoch": 0.06240039230109109, + "grad_norm": 2.1878920162564928, + "learning_rate": 9.972503887562403e-06, + "loss": 0.6662, + "step": 2036 + }, + { + "epoch": 0.062431040823832294, + "grad_norm": 2.423218717253532, + "learning_rate": 9.972451884098927e-06, + "loss": 0.8484, + "step": 2037 + }, + { + "epoch": 0.06246168934657349, + "grad_norm": 1.0368261479534426, + "learning_rate": 9.97239983164061e-06, + "loss": 0.5321, + "step": 2038 + }, + { + "epoch": 0.0624923378693147, + "grad_norm": 1.864479562835654, + "learning_rate": 9.972347730187967e-06, + "loss": 0.8047, + "step": 2039 + }, + { + "epoch": 0.0625229863920559, + "grad_norm": 2.1442246655387955, + "learning_rate": 9.972295579741508e-06, + "loss": 0.7303, + "step": 2040 + }, + { + "epoch": 0.06255363491479711, + "grad_norm": 1.8585813936181628, + "learning_rate": 9.972243380301749e-06, + "loss": 0.7692, + "step": 2041 + }, + { + "epoch": 0.06258428343753832, + "grad_norm": 2.175793897414943, + "learning_rate": 9.972191131869204e-06, + "loss": 0.8831, + "step": 2042 + }, + { + "epoch": 0.06261493196027952, + "grad_norm": 2.0551907415164528, + "learning_rate": 9.972138834444387e-06, + "loss": 0.7767, + "step": 2043 + }, + { + "epoch": 0.06264558048302071, + "grad_norm": 2.2905181354263426, + "learning_rate": 9.972086488027815e-06, + "loss": 0.7963, + "step": 2044 + }, + { + "epoch": 0.06267622900576192, + "grad_norm": 2.3602048689108477, + "learning_rate": 9.97203409262e-06, + "loss": 0.7591, + "step": 2045 + }, + { + "epoch": 0.06270687752850312, + "grad_norm": 2.3749023564821283, + "learning_rate": 9.971981648221463e-06, + "loss": 0.72, + "step": 2046 + }, + { + "epoch": 0.06273752605124433, + "grad_norm": 2.234566812614006, + "learning_rate": 9.97192915483272e-06, + "loss": 0.8082, + "step": 2047 + }, + { + "epoch": 0.06276817457398554, + "grad_norm": 2.203293717995487, + "learning_rate": 9.971876612454285e-06, + "loss": 0.8159, + "step": 2048 + }, + { + "epoch": 0.06279882309672674, + "grad_norm": 1.2669334089074398, + "learning_rate": 9.971824021086677e-06, + "loss": 0.5377, + "step": 2049 + }, + { + "epoch": 0.06282947161946795, + "grad_norm": 2.230991559285099, + "learning_rate": 9.971771380730418e-06, + "loss": 0.817, + "step": 2050 + }, + { + "epoch": 0.06286012014220914, + "grad_norm": 2.153672417532638, + "learning_rate": 9.97171869138602e-06, + "loss": 0.7175, + "step": 2051 + }, + { + "epoch": 0.06289076866495034, + "grad_norm": 1.9747445964638857, + "learning_rate": 9.971665953054007e-06, + "loss": 0.8351, + "step": 2052 + }, + { + "epoch": 0.06292141718769155, + "grad_norm": 2.120288357625989, + "learning_rate": 9.971613165734897e-06, + "loss": 0.7828, + "step": 2053 + }, + { + "epoch": 0.06295206571043276, + "grad_norm": 2.0467486910135384, + "learning_rate": 9.971560329429211e-06, + "loss": 0.7138, + "step": 2054 + }, + { + "epoch": 0.06298271423317396, + "grad_norm": 2.0860603634995916, + "learning_rate": 9.971507444137469e-06, + "loss": 0.7622, + "step": 2055 + }, + { + "epoch": 0.06301336275591517, + "grad_norm": 1.970908481824912, + "learning_rate": 9.971454509860192e-06, + "loss": 0.7894, + "step": 2056 + }, + { + "epoch": 0.06304401127865637, + "grad_norm": 2.351028947935282, + "learning_rate": 9.971401526597902e-06, + "loss": 0.7863, + "step": 2057 + }, + { + "epoch": 0.06307465980139758, + "grad_norm": 2.3476567850094128, + "learning_rate": 9.97134849435112e-06, + "loss": 0.7491, + "step": 2058 + }, + { + "epoch": 0.06310530832413877, + "grad_norm": 1.2789193219211434, + "learning_rate": 9.97129541312037e-06, + "loss": 0.5409, + "step": 2059 + }, + { + "epoch": 0.06313595684687998, + "grad_norm": 2.1444822550913636, + "learning_rate": 9.971242282906174e-06, + "loss": 0.7199, + "step": 2060 + }, + { + "epoch": 0.06316660536962118, + "grad_norm": 2.1675940718720232, + "learning_rate": 9.971189103709056e-06, + "loss": 0.795, + "step": 2061 + }, + { + "epoch": 0.06319725389236239, + "grad_norm": 2.0359234767886223, + "learning_rate": 9.97113587552954e-06, + "loss": 0.8698, + "step": 2062 + }, + { + "epoch": 0.06322790241510359, + "grad_norm": 1.8650027029537368, + "learning_rate": 9.97108259836815e-06, + "loss": 0.8209, + "step": 2063 + }, + { + "epoch": 0.0632585509378448, + "grad_norm": 1.9974046332205118, + "learning_rate": 9.971029272225411e-06, + "loss": 0.7714, + "step": 2064 + }, + { + "epoch": 0.063289199460586, + "grad_norm": 2.0750579412268766, + "learning_rate": 9.970975897101849e-06, + "loss": 0.812, + "step": 2065 + }, + { + "epoch": 0.06331984798332721, + "grad_norm": 2.0978151301161634, + "learning_rate": 9.97092247299799e-06, + "loss": 0.8284, + "step": 2066 + }, + { + "epoch": 0.0633504965060684, + "grad_norm": 2.1473816265494694, + "learning_rate": 9.97086899991436e-06, + "loss": 0.8042, + "step": 2067 + }, + { + "epoch": 0.06338114502880961, + "grad_norm": 2.1866361602612057, + "learning_rate": 9.970815477851485e-06, + "loss": 0.8169, + "step": 2068 + }, + { + "epoch": 0.06341179355155081, + "grad_norm": 2.682441778691228, + "learning_rate": 9.970761906809893e-06, + "loss": 0.7176, + "step": 2069 + }, + { + "epoch": 0.06344244207429202, + "grad_norm": 2.171405778735718, + "learning_rate": 9.970708286790114e-06, + "loss": 0.7473, + "step": 2070 + }, + { + "epoch": 0.06347309059703322, + "grad_norm": 1.206786605535976, + "learning_rate": 9.970654617792672e-06, + "loss": 0.5348, + "step": 2071 + }, + { + "epoch": 0.06350373911977443, + "grad_norm": 1.0005110449034555, + "learning_rate": 9.9706008998181e-06, + "loss": 0.5366, + "step": 2072 + }, + { + "epoch": 0.06353438764251564, + "grad_norm": 2.2136384948817844, + "learning_rate": 9.970547132866925e-06, + "loss": 0.7987, + "step": 2073 + }, + { + "epoch": 0.06356503616525684, + "grad_norm": 0.9846022918193877, + "learning_rate": 9.970493316939678e-06, + "loss": 0.5177, + "step": 2074 + }, + { + "epoch": 0.06359568468799803, + "grad_norm": 2.2918406736872, + "learning_rate": 9.970439452036888e-06, + "loss": 0.8659, + "step": 2075 + }, + { + "epoch": 0.06362633321073924, + "grad_norm": 1.7885602044347395, + "learning_rate": 9.970385538159086e-06, + "loss": 0.7207, + "step": 2076 + }, + { + "epoch": 0.06365698173348044, + "grad_norm": 2.1043429975281813, + "learning_rate": 9.970331575306804e-06, + "loss": 0.8042, + "step": 2077 + }, + { + "epoch": 0.06368763025622165, + "grad_norm": 2.1269443710634177, + "learning_rate": 9.970277563480573e-06, + "loss": 0.7283, + "step": 2078 + }, + { + "epoch": 0.06371827877896286, + "grad_norm": 2.094042444978855, + "learning_rate": 9.970223502680926e-06, + "loss": 0.8354, + "step": 2079 + }, + { + "epoch": 0.06374892730170406, + "grad_norm": 2.2691856040213727, + "learning_rate": 9.970169392908396e-06, + "loss": 0.8973, + "step": 2080 + }, + { + "epoch": 0.06377957582444527, + "grad_norm": 2.146786875749665, + "learning_rate": 9.970115234163513e-06, + "loss": 0.7608, + "step": 2081 + }, + { + "epoch": 0.06381022434718646, + "grad_norm": 1.33817470027106, + "learning_rate": 9.970061026446813e-06, + "loss": 0.5371, + "step": 2082 + }, + { + "epoch": 0.06384087286992766, + "grad_norm": 2.113295349030361, + "learning_rate": 9.970006769758832e-06, + "loss": 0.7241, + "step": 2083 + }, + { + "epoch": 0.06387152139266887, + "grad_norm": 2.189394459460152, + "learning_rate": 9.969952464100102e-06, + "loss": 0.8261, + "step": 2084 + }, + { + "epoch": 0.06390216991541008, + "grad_norm": 2.096821085285166, + "learning_rate": 9.969898109471159e-06, + "loss": 0.6992, + "step": 2085 + }, + { + "epoch": 0.06393281843815128, + "grad_norm": 0.983307030508446, + "learning_rate": 9.969843705872537e-06, + "loss": 0.5524, + "step": 2086 + }, + { + "epoch": 0.06396346696089249, + "grad_norm": 2.0282459388447807, + "learning_rate": 9.969789253304775e-06, + "loss": 0.8415, + "step": 2087 + }, + { + "epoch": 0.06399411548363369, + "grad_norm": 2.2293020046284115, + "learning_rate": 9.969734751768407e-06, + "loss": 0.8148, + "step": 2088 + }, + { + "epoch": 0.0640247640063749, + "grad_norm": 2.1807114615735137, + "learning_rate": 9.969680201263972e-06, + "loss": 0.7978, + "step": 2089 + }, + { + "epoch": 0.06405541252911609, + "grad_norm": 2.246099194161644, + "learning_rate": 9.969625601792005e-06, + "loss": 0.738, + "step": 2090 + }, + { + "epoch": 0.0640860610518573, + "grad_norm": 2.0631276755339405, + "learning_rate": 9.969570953353044e-06, + "loss": 0.7432, + "step": 2091 + }, + { + "epoch": 0.0641167095745985, + "grad_norm": 2.314551191410228, + "learning_rate": 9.969516255947633e-06, + "loss": 0.7931, + "step": 2092 + }, + { + "epoch": 0.06414735809733971, + "grad_norm": 2.023814050497669, + "learning_rate": 9.969461509576303e-06, + "loss": 0.6928, + "step": 2093 + }, + { + "epoch": 0.06417800662008091, + "grad_norm": 0.975612991370605, + "learning_rate": 9.9694067142396e-06, + "loss": 0.5236, + "step": 2094 + }, + { + "epoch": 0.06420865514282212, + "grad_norm": 1.9482713458557115, + "learning_rate": 9.96935186993806e-06, + "loss": 0.8752, + "step": 2095 + }, + { + "epoch": 0.06423930366556332, + "grad_norm": 2.043165740362296, + "learning_rate": 9.969296976672224e-06, + "loss": 0.8623, + "step": 2096 + }, + { + "epoch": 0.06426995218830453, + "grad_norm": 2.595054972460468, + "learning_rate": 9.969242034442634e-06, + "loss": 0.8543, + "step": 2097 + }, + { + "epoch": 0.06430060071104572, + "grad_norm": 2.023617374028626, + "learning_rate": 9.96918704324983e-06, + "loss": 0.7684, + "step": 2098 + }, + { + "epoch": 0.06433124923378693, + "grad_norm": 2.0782316341818587, + "learning_rate": 9.969132003094357e-06, + "loss": 0.7714, + "step": 2099 + }, + { + "epoch": 0.06436189775652813, + "grad_norm": 2.1398860858768343, + "learning_rate": 9.969076913976755e-06, + "loss": 0.8764, + "step": 2100 + }, + { + "epoch": 0.06439254627926934, + "grad_norm": 2.0924601057602437, + "learning_rate": 9.969021775897563e-06, + "loss": 0.7774, + "step": 2101 + }, + { + "epoch": 0.06442319480201054, + "grad_norm": 2.0135083202828676, + "learning_rate": 9.968966588857331e-06, + "loss": 0.8245, + "step": 2102 + }, + { + "epoch": 0.06445384332475175, + "grad_norm": 2.1455932850832298, + "learning_rate": 9.968911352856598e-06, + "loss": 0.7763, + "step": 2103 + }, + { + "epoch": 0.06448449184749296, + "grad_norm": 2.296649605643846, + "learning_rate": 9.968856067895913e-06, + "loss": 0.7419, + "step": 2104 + }, + { + "epoch": 0.06451514037023416, + "grad_norm": 1.9180768075295855, + "learning_rate": 9.968800733975816e-06, + "loss": 0.7121, + "step": 2105 + }, + { + "epoch": 0.06454578889297535, + "grad_norm": 2.1598450292397615, + "learning_rate": 9.968745351096854e-06, + "loss": 0.7761, + "step": 2106 + }, + { + "epoch": 0.06457643741571656, + "grad_norm": 1.9737258952132486, + "learning_rate": 9.968689919259572e-06, + "loss": 0.7588, + "step": 2107 + }, + { + "epoch": 0.06460708593845776, + "grad_norm": 2.263063970854475, + "learning_rate": 9.968634438464517e-06, + "loss": 0.7745, + "step": 2108 + }, + { + "epoch": 0.06463773446119897, + "grad_norm": 2.0907945255674663, + "learning_rate": 9.968578908712236e-06, + "loss": 0.7192, + "step": 2109 + }, + { + "epoch": 0.06466838298394018, + "grad_norm": 2.013874502684691, + "learning_rate": 9.968523330003276e-06, + "loss": 0.8259, + "step": 2110 + }, + { + "epoch": 0.06469903150668138, + "grad_norm": 1.9465671037758647, + "learning_rate": 9.968467702338186e-06, + "loss": 0.7602, + "step": 2111 + }, + { + "epoch": 0.06472968002942259, + "grad_norm": 2.183053837980145, + "learning_rate": 9.968412025717511e-06, + "loss": 0.8279, + "step": 2112 + }, + { + "epoch": 0.06476032855216378, + "grad_norm": 1.9894163772572004, + "learning_rate": 9.968356300141802e-06, + "loss": 0.7742, + "step": 2113 + }, + { + "epoch": 0.06479097707490498, + "grad_norm": 1.0665853376295422, + "learning_rate": 9.968300525611605e-06, + "loss": 0.5235, + "step": 2114 + }, + { + "epoch": 0.06482162559764619, + "grad_norm": 1.9246734933022092, + "learning_rate": 9.968244702127473e-06, + "loss": 0.8614, + "step": 2115 + }, + { + "epoch": 0.0648522741203874, + "grad_norm": 2.1688156503734657, + "learning_rate": 9.968188829689955e-06, + "loss": 0.8233, + "step": 2116 + }, + { + "epoch": 0.0648829226431286, + "grad_norm": 0.8763840515998065, + "learning_rate": 9.968132908299602e-06, + "loss": 0.4822, + "step": 2117 + }, + { + "epoch": 0.06491357116586981, + "grad_norm": 2.019337317627924, + "learning_rate": 9.968076937956962e-06, + "loss": 0.7458, + "step": 2118 + }, + { + "epoch": 0.06494421968861101, + "grad_norm": 2.006076773812726, + "learning_rate": 9.968020918662591e-06, + "loss": 0.7624, + "step": 2119 + }, + { + "epoch": 0.06497486821135222, + "grad_norm": 2.186142943173811, + "learning_rate": 9.967964850417039e-06, + "loss": 0.8353, + "step": 2120 + }, + { + "epoch": 0.06500551673409341, + "grad_norm": 1.8901383576565194, + "learning_rate": 9.967908733220854e-06, + "loss": 0.7913, + "step": 2121 + }, + { + "epoch": 0.06503616525683462, + "grad_norm": 2.283952769820935, + "learning_rate": 9.967852567074598e-06, + "loss": 0.8044, + "step": 2122 + }, + { + "epoch": 0.06506681377957582, + "grad_norm": 2.0111622241916955, + "learning_rate": 9.967796351978817e-06, + "loss": 0.818, + "step": 2123 + }, + { + "epoch": 0.06509746230231703, + "grad_norm": 2.2003744665371707, + "learning_rate": 9.967740087934069e-06, + "loss": 0.8497, + "step": 2124 + }, + { + "epoch": 0.06512811082505823, + "grad_norm": 1.1281888041021124, + "learning_rate": 9.967683774940905e-06, + "loss": 0.5113, + "step": 2125 + }, + { + "epoch": 0.06515875934779944, + "grad_norm": 2.1173133824619903, + "learning_rate": 9.967627412999883e-06, + "loss": 0.7661, + "step": 2126 + }, + { + "epoch": 0.06518940787054064, + "grad_norm": 1.9972725894448196, + "learning_rate": 9.967571002111558e-06, + "loss": 0.7477, + "step": 2127 + }, + { + "epoch": 0.06522005639328185, + "grad_norm": 2.108709650343693, + "learning_rate": 9.967514542276484e-06, + "loss": 0.757, + "step": 2128 + }, + { + "epoch": 0.06525070491602304, + "grad_norm": 0.9321693755694129, + "learning_rate": 9.967458033495219e-06, + "loss": 0.5353, + "step": 2129 + }, + { + "epoch": 0.06528135343876425, + "grad_norm": 2.3836923400380825, + "learning_rate": 9.967401475768316e-06, + "loss": 0.8741, + "step": 2130 + }, + { + "epoch": 0.06531200196150545, + "grad_norm": 2.2126358048161348, + "learning_rate": 9.967344869096338e-06, + "loss": 0.7301, + "step": 2131 + }, + { + "epoch": 0.06534265048424666, + "grad_norm": 2.103803106155121, + "learning_rate": 9.96728821347984e-06, + "loss": 0.8903, + "step": 2132 + }, + { + "epoch": 0.06537329900698786, + "grad_norm": 2.3551240204044395, + "learning_rate": 9.96723150891938e-06, + "loss": 0.6542, + "step": 2133 + }, + { + "epoch": 0.06540394752972907, + "grad_norm": 2.1938712037236967, + "learning_rate": 9.967174755415516e-06, + "loss": 0.8673, + "step": 2134 + }, + { + "epoch": 0.06543459605247028, + "grad_norm": 2.081366678419273, + "learning_rate": 9.96711795296881e-06, + "loss": 0.8446, + "step": 2135 + }, + { + "epoch": 0.06546524457521148, + "grad_norm": 1.9792243244272119, + "learning_rate": 9.967061101579818e-06, + "loss": 0.7746, + "step": 2136 + }, + { + "epoch": 0.06549589309795267, + "grad_norm": 2.103405999211665, + "learning_rate": 9.967004201249105e-06, + "loss": 0.7664, + "step": 2137 + }, + { + "epoch": 0.06552654162069388, + "grad_norm": 2.4899757070075887, + "learning_rate": 9.966947251977226e-06, + "loss": 0.7625, + "step": 2138 + }, + { + "epoch": 0.06555719014343508, + "grad_norm": 2.1202105381408436, + "learning_rate": 9.966890253764746e-06, + "loss": 0.7688, + "step": 2139 + }, + { + "epoch": 0.06558783866617629, + "grad_norm": 1.89867709299372, + "learning_rate": 9.966833206612225e-06, + "loss": 0.7678, + "step": 2140 + }, + { + "epoch": 0.0656184871889175, + "grad_norm": 2.1848520907538886, + "learning_rate": 9.966776110520224e-06, + "loss": 0.7785, + "step": 2141 + }, + { + "epoch": 0.0656491357116587, + "grad_norm": 2.098025078853529, + "learning_rate": 9.96671896548931e-06, + "loss": 0.8328, + "step": 2142 + }, + { + "epoch": 0.06567978423439991, + "grad_norm": 2.007601127865126, + "learning_rate": 9.966661771520042e-06, + "loss": 0.8349, + "step": 2143 + }, + { + "epoch": 0.0657104327571411, + "grad_norm": 2.044808825554177, + "learning_rate": 9.966604528612986e-06, + "loss": 0.7358, + "step": 2144 + }, + { + "epoch": 0.0657410812798823, + "grad_norm": 1.8598107359942524, + "learning_rate": 9.966547236768703e-06, + "loss": 0.7508, + "step": 2145 + }, + { + "epoch": 0.06577172980262351, + "grad_norm": 2.0557068269963064, + "learning_rate": 9.96648989598776e-06, + "loss": 0.8378, + "step": 2146 + }, + { + "epoch": 0.06580237832536472, + "grad_norm": 2.271084784178762, + "learning_rate": 9.966432506270723e-06, + "loss": 0.8084, + "step": 2147 + }, + { + "epoch": 0.06583302684810592, + "grad_norm": 1.964374004653288, + "learning_rate": 9.966375067618152e-06, + "loss": 0.7987, + "step": 2148 + }, + { + "epoch": 0.06586367537084713, + "grad_norm": 2.4313752994034834, + "learning_rate": 9.96631758003062e-06, + "loss": 0.8199, + "step": 2149 + }, + { + "epoch": 0.06589432389358833, + "grad_norm": 2.1622493966032517, + "learning_rate": 9.966260043508688e-06, + "loss": 0.8115, + "step": 2150 + }, + { + "epoch": 0.06592497241632954, + "grad_norm": 1.9868125355512196, + "learning_rate": 9.966202458052927e-06, + "loss": 0.8526, + "step": 2151 + }, + { + "epoch": 0.06595562093907073, + "grad_norm": 2.0640701998437505, + "learning_rate": 9.966144823663903e-06, + "loss": 0.7902, + "step": 2152 + }, + { + "epoch": 0.06598626946181194, + "grad_norm": 1.1968513502688158, + "learning_rate": 9.966087140342182e-06, + "loss": 0.5393, + "step": 2153 + }, + { + "epoch": 0.06601691798455314, + "grad_norm": 2.421201180272247, + "learning_rate": 9.966029408088333e-06, + "loss": 0.835, + "step": 2154 + }, + { + "epoch": 0.06604756650729435, + "grad_norm": 2.0967431386138897, + "learning_rate": 9.965971626902928e-06, + "loss": 0.6672, + "step": 2155 + }, + { + "epoch": 0.06607821503003555, + "grad_norm": 2.3320211344739468, + "learning_rate": 9.965913796786532e-06, + "loss": 0.8802, + "step": 2156 + }, + { + "epoch": 0.06610886355277676, + "grad_norm": 2.094658365673452, + "learning_rate": 9.965855917739718e-06, + "loss": 0.8902, + "step": 2157 + }, + { + "epoch": 0.06613951207551796, + "grad_norm": 0.9830070493747071, + "learning_rate": 9.965797989763053e-06, + "loss": 0.5074, + "step": 2158 + }, + { + "epoch": 0.06617016059825917, + "grad_norm": 2.0147241959818043, + "learning_rate": 9.965740012857113e-06, + "loss": 0.7685, + "step": 2159 + }, + { + "epoch": 0.06620080912100036, + "grad_norm": 2.0468522774264595, + "learning_rate": 9.965681987022463e-06, + "loss": 0.78, + "step": 2160 + }, + { + "epoch": 0.06623145764374157, + "grad_norm": 2.080727431703376, + "learning_rate": 9.96562391225968e-06, + "loss": 0.795, + "step": 2161 + }, + { + "epoch": 0.06626210616648277, + "grad_norm": 2.2541985790710566, + "learning_rate": 9.965565788569333e-06, + "loss": 0.7191, + "step": 2162 + }, + { + "epoch": 0.06629275468922398, + "grad_norm": 1.9846537291227921, + "learning_rate": 9.965507615951997e-06, + "loss": 0.84, + "step": 2163 + }, + { + "epoch": 0.06632340321196518, + "grad_norm": 2.4495402687177545, + "learning_rate": 9.965449394408243e-06, + "loss": 0.8865, + "step": 2164 + }, + { + "epoch": 0.06635405173470639, + "grad_norm": 2.2116034314909827, + "learning_rate": 9.965391123938645e-06, + "loss": 0.8055, + "step": 2165 + }, + { + "epoch": 0.0663847002574476, + "grad_norm": 1.0851129448292867, + "learning_rate": 9.96533280454378e-06, + "loss": 0.5296, + "step": 2166 + }, + { + "epoch": 0.0664153487801888, + "grad_norm": 2.2528595477746665, + "learning_rate": 9.965274436224217e-06, + "loss": 0.8141, + "step": 2167 + }, + { + "epoch": 0.06644599730293, + "grad_norm": 2.2223346411464577, + "learning_rate": 9.965216018980537e-06, + "loss": 0.7688, + "step": 2168 + }, + { + "epoch": 0.0664766458256712, + "grad_norm": 2.379975756503146, + "learning_rate": 9.965157552813313e-06, + "loss": 0.801, + "step": 2169 + }, + { + "epoch": 0.0665072943484124, + "grad_norm": 1.9877300064413226, + "learning_rate": 9.96509903772312e-06, + "loss": 0.8012, + "step": 2170 + }, + { + "epoch": 0.06653794287115361, + "grad_norm": 1.9607862005108565, + "learning_rate": 9.96504047371054e-06, + "loss": 0.7899, + "step": 2171 + }, + { + "epoch": 0.06656859139389482, + "grad_norm": 0.9990388204966087, + "learning_rate": 9.96498186077614e-06, + "loss": 0.5242, + "step": 2172 + }, + { + "epoch": 0.06659923991663602, + "grad_norm": 2.1659737701518633, + "learning_rate": 9.964923198920507e-06, + "loss": 0.7851, + "step": 2173 + }, + { + "epoch": 0.06662988843937723, + "grad_norm": 2.035379607767426, + "learning_rate": 9.964864488144215e-06, + "loss": 0.7819, + "step": 2174 + }, + { + "epoch": 0.06666053696211842, + "grad_norm": 2.037625499523693, + "learning_rate": 9.964805728447842e-06, + "loss": 0.7815, + "step": 2175 + }, + { + "epoch": 0.06669118548485962, + "grad_norm": 2.316002450605538, + "learning_rate": 9.964746919831969e-06, + "loss": 0.7168, + "step": 2176 + }, + { + "epoch": 0.06672183400760083, + "grad_norm": 2.2507434940689306, + "learning_rate": 9.964688062297173e-06, + "loss": 0.7938, + "step": 2177 + }, + { + "epoch": 0.06675248253034204, + "grad_norm": 2.100984483480584, + "learning_rate": 9.964629155844034e-06, + "loss": 0.7947, + "step": 2178 + }, + { + "epoch": 0.06678313105308324, + "grad_norm": 1.902341322755823, + "learning_rate": 9.964570200473136e-06, + "loss": 0.8098, + "step": 2179 + }, + { + "epoch": 0.06681377957582445, + "grad_norm": 1.7980057264256, + "learning_rate": 9.964511196185058e-06, + "loss": 0.6298, + "step": 2180 + }, + { + "epoch": 0.06684442809856565, + "grad_norm": 1.1829040753543272, + "learning_rate": 9.964452142980379e-06, + "loss": 0.546, + "step": 2181 + }, + { + "epoch": 0.06687507662130686, + "grad_norm": 2.032482654360311, + "learning_rate": 9.964393040859683e-06, + "loss": 0.8095, + "step": 2182 + }, + { + "epoch": 0.06690572514404805, + "grad_norm": 1.9451687226408776, + "learning_rate": 9.964333889823555e-06, + "loss": 0.687, + "step": 2183 + }, + { + "epoch": 0.06693637366678926, + "grad_norm": 2.0208977104092076, + "learning_rate": 9.964274689872571e-06, + "loss": 0.721, + "step": 2184 + }, + { + "epoch": 0.06696702218953046, + "grad_norm": 1.9188952970600646, + "learning_rate": 9.96421544100732e-06, + "loss": 0.7435, + "step": 2185 + }, + { + "epoch": 0.06699767071227167, + "grad_norm": 2.0820174325374095, + "learning_rate": 9.964156143228386e-06, + "loss": 0.7623, + "step": 2186 + }, + { + "epoch": 0.06702831923501287, + "grad_norm": 1.9171615323939455, + "learning_rate": 9.964096796536349e-06, + "loss": 0.8264, + "step": 2187 + }, + { + "epoch": 0.06705896775775408, + "grad_norm": 2.0596004568367228, + "learning_rate": 9.964037400931798e-06, + "loss": 0.8959, + "step": 2188 + }, + { + "epoch": 0.06708961628049528, + "grad_norm": 2.2531996917131476, + "learning_rate": 9.963977956415315e-06, + "loss": 0.8154, + "step": 2189 + }, + { + "epoch": 0.06712026480323649, + "grad_norm": 1.9187939971203034, + "learning_rate": 9.963918462987488e-06, + "loss": 0.7388, + "step": 2190 + }, + { + "epoch": 0.06715091332597768, + "grad_norm": 1.726025261124731, + "learning_rate": 9.9638589206489e-06, + "loss": 0.6565, + "step": 2191 + }, + { + "epoch": 0.06718156184871889, + "grad_norm": 2.3690641046137015, + "learning_rate": 9.963799329400142e-06, + "loss": 0.6963, + "step": 2192 + }, + { + "epoch": 0.0672122103714601, + "grad_norm": 1.8444013738588496, + "learning_rate": 9.9637396892418e-06, + "loss": 0.7816, + "step": 2193 + }, + { + "epoch": 0.0672428588942013, + "grad_norm": 2.110519170680331, + "learning_rate": 9.963680000174458e-06, + "loss": 0.7446, + "step": 2194 + }, + { + "epoch": 0.0672735074169425, + "grad_norm": 1.9842619970088375, + "learning_rate": 9.96362026219871e-06, + "loss": 0.727, + "step": 2195 + }, + { + "epoch": 0.06730415593968371, + "grad_norm": 2.1153026627747153, + "learning_rate": 9.96356047531514e-06, + "loss": 0.7701, + "step": 2196 + }, + { + "epoch": 0.06733480446242492, + "grad_norm": 1.9539012440373302, + "learning_rate": 9.96350063952434e-06, + "loss": 0.7595, + "step": 2197 + }, + { + "epoch": 0.06736545298516612, + "grad_norm": 2.013744379744089, + "learning_rate": 9.963440754826897e-06, + "loss": 0.7252, + "step": 2198 + }, + { + "epoch": 0.06739610150790731, + "grad_norm": 2.330238198039784, + "learning_rate": 9.9633808212234e-06, + "loss": 0.8312, + "step": 2199 + }, + { + "epoch": 0.06742675003064852, + "grad_norm": 1.7300227536515687, + "learning_rate": 9.963320838714445e-06, + "loss": 0.5436, + "step": 2200 + }, + { + "epoch": 0.06745739855338972, + "grad_norm": 1.9353692053153488, + "learning_rate": 9.96326080730062e-06, + "loss": 0.8492, + "step": 2201 + }, + { + "epoch": 0.06748804707613093, + "grad_norm": 1.927794536193327, + "learning_rate": 9.963200726982515e-06, + "loss": 0.8063, + "step": 2202 + }, + { + "epoch": 0.06751869559887214, + "grad_norm": 0.9874130908523474, + "learning_rate": 9.963140597760723e-06, + "loss": 0.5279, + "step": 2203 + }, + { + "epoch": 0.06754934412161334, + "grad_norm": 2.4205650565527717, + "learning_rate": 9.963080419635838e-06, + "loss": 0.7716, + "step": 2204 + }, + { + "epoch": 0.06757999264435455, + "grad_norm": 2.0072256647599485, + "learning_rate": 9.963020192608452e-06, + "loss": 0.776, + "step": 2205 + }, + { + "epoch": 0.06761064116709574, + "grad_norm": 2.226562111877214, + "learning_rate": 9.962959916679158e-06, + "loss": 0.8651, + "step": 2206 + }, + { + "epoch": 0.06764128968983694, + "grad_norm": 1.9983765105705928, + "learning_rate": 9.962899591848549e-06, + "loss": 0.7769, + "step": 2207 + }, + { + "epoch": 0.06767193821257815, + "grad_norm": 2.5650750075665, + "learning_rate": 9.962839218117222e-06, + "loss": 0.8, + "step": 2208 + }, + { + "epoch": 0.06770258673531936, + "grad_norm": 2.2523718352211044, + "learning_rate": 9.962778795485768e-06, + "loss": 0.72, + "step": 2209 + }, + { + "epoch": 0.06773323525806056, + "grad_norm": 2.1319206521492164, + "learning_rate": 9.962718323954787e-06, + "loss": 0.6884, + "step": 2210 + }, + { + "epoch": 0.06776388378080177, + "grad_norm": 1.5393376285490057, + "learning_rate": 9.96265780352487e-06, + "loss": 0.529, + "step": 2211 + }, + { + "epoch": 0.06779453230354297, + "grad_norm": 2.144240920787987, + "learning_rate": 9.962597234196621e-06, + "loss": 0.7214, + "step": 2212 + }, + { + "epoch": 0.06782518082628418, + "grad_norm": 1.0653868037529317, + "learning_rate": 9.962536615970626e-06, + "loss": 0.5173, + "step": 2213 + }, + { + "epoch": 0.06785582934902537, + "grad_norm": 2.529703575529376, + "learning_rate": 9.962475948847492e-06, + "loss": 0.835, + "step": 2214 + }, + { + "epoch": 0.06788647787176658, + "grad_norm": 2.530920872253848, + "learning_rate": 9.962415232827811e-06, + "loss": 0.7281, + "step": 2215 + }, + { + "epoch": 0.06791712639450778, + "grad_norm": 2.2357292994161972, + "learning_rate": 9.962354467912183e-06, + "loss": 0.6943, + "step": 2216 + }, + { + "epoch": 0.06794777491724899, + "grad_norm": 2.240005488155496, + "learning_rate": 9.962293654101207e-06, + "loss": 0.8399, + "step": 2217 + }, + { + "epoch": 0.0679784234399902, + "grad_norm": 2.111450627861137, + "learning_rate": 9.962232791395483e-06, + "loss": 0.8586, + "step": 2218 + }, + { + "epoch": 0.0680090719627314, + "grad_norm": 2.539758313959378, + "learning_rate": 9.962171879795607e-06, + "loss": 0.7742, + "step": 2219 + }, + { + "epoch": 0.0680397204854726, + "grad_norm": 1.7455274455994347, + "learning_rate": 9.962110919302184e-06, + "loss": 0.5173, + "step": 2220 + }, + { + "epoch": 0.06807036900821381, + "grad_norm": 2.1825217971222743, + "learning_rate": 9.962049909915812e-06, + "loss": 0.8258, + "step": 2221 + }, + { + "epoch": 0.068101017530955, + "grad_norm": 2.3728366985893397, + "learning_rate": 9.961988851637094e-06, + "loss": 0.7138, + "step": 2222 + }, + { + "epoch": 0.06813166605369621, + "grad_norm": 2.2525720355465637, + "learning_rate": 9.961927744466628e-06, + "loss": 0.8432, + "step": 2223 + }, + { + "epoch": 0.06816231457643741, + "grad_norm": 2.2614464379828823, + "learning_rate": 9.96186658840502e-06, + "loss": 0.8263, + "step": 2224 + }, + { + "epoch": 0.06819296309917862, + "grad_norm": 1.9095359935812997, + "learning_rate": 9.96180538345287e-06, + "loss": 0.7632, + "step": 2225 + }, + { + "epoch": 0.06822361162191982, + "grad_norm": 2.1415237124381172, + "learning_rate": 9.961744129610781e-06, + "loss": 0.7972, + "step": 2226 + }, + { + "epoch": 0.06825426014466103, + "grad_norm": 1.1390680027420395, + "learning_rate": 9.961682826879359e-06, + "loss": 0.5388, + "step": 2227 + }, + { + "epoch": 0.06828490866740224, + "grad_norm": 2.2793037231933493, + "learning_rate": 9.961621475259208e-06, + "loss": 0.733, + "step": 2228 + }, + { + "epoch": 0.06831555719014344, + "grad_norm": 2.236780976885697, + "learning_rate": 9.961560074750929e-06, + "loss": 0.7858, + "step": 2229 + }, + { + "epoch": 0.06834620571288463, + "grad_norm": 2.127100387764417, + "learning_rate": 9.96149862535513e-06, + "loss": 0.7114, + "step": 2230 + }, + { + "epoch": 0.06837685423562584, + "grad_norm": 2.235587093406675, + "learning_rate": 9.961437127072415e-06, + "loss": 0.7605, + "step": 2231 + }, + { + "epoch": 0.06840750275836704, + "grad_norm": 2.2999758163737702, + "learning_rate": 9.961375579903392e-06, + "loss": 0.814, + "step": 2232 + }, + { + "epoch": 0.06843815128110825, + "grad_norm": 2.2874812946920793, + "learning_rate": 9.961313983848665e-06, + "loss": 0.7675, + "step": 2233 + }, + { + "epoch": 0.06846879980384946, + "grad_norm": 1.9842297260534474, + "learning_rate": 9.96125233890884e-06, + "loss": 0.7789, + "step": 2234 + }, + { + "epoch": 0.06849944832659066, + "grad_norm": 2.200797772771613, + "learning_rate": 9.961190645084529e-06, + "loss": 0.7679, + "step": 2235 + }, + { + "epoch": 0.06853009684933187, + "grad_norm": 2.0686038551102444, + "learning_rate": 9.961128902376335e-06, + "loss": 0.7344, + "step": 2236 + }, + { + "epoch": 0.06856074537207306, + "grad_norm": 3.9381461279701524, + "learning_rate": 9.96106711078487e-06, + "loss": 0.8906, + "step": 2237 + }, + { + "epoch": 0.06859139389481426, + "grad_norm": 1.7575409156762989, + "learning_rate": 9.961005270310742e-06, + "loss": 0.6814, + "step": 2238 + }, + { + "epoch": 0.06862204241755547, + "grad_norm": 2.2981082103273804, + "learning_rate": 9.96094338095456e-06, + "loss": 0.7402, + "step": 2239 + }, + { + "epoch": 0.06865269094029668, + "grad_norm": 2.109675772869491, + "learning_rate": 9.960881442716931e-06, + "loss": 0.8654, + "step": 2240 + }, + { + "epoch": 0.06868333946303788, + "grad_norm": 2.089347543776716, + "learning_rate": 9.96081945559847e-06, + "loss": 0.7832, + "step": 2241 + }, + { + "epoch": 0.06871398798577909, + "grad_norm": 2.122791794612436, + "learning_rate": 9.960757419599785e-06, + "loss": 0.8012, + "step": 2242 + }, + { + "epoch": 0.0687446365085203, + "grad_norm": 2.3408780391481656, + "learning_rate": 9.960695334721489e-06, + "loss": 0.7147, + "step": 2243 + }, + { + "epoch": 0.0687752850312615, + "grad_norm": 2.1414144518233096, + "learning_rate": 9.960633200964192e-06, + "loss": 0.7923, + "step": 2244 + }, + { + "epoch": 0.06880593355400269, + "grad_norm": 2.025301655839275, + "learning_rate": 9.960571018328505e-06, + "loss": 0.803, + "step": 2245 + }, + { + "epoch": 0.0688365820767439, + "grad_norm": 2.012102201741091, + "learning_rate": 9.960508786815045e-06, + "loss": 0.7159, + "step": 2246 + }, + { + "epoch": 0.0688672305994851, + "grad_norm": 1.937147346864909, + "learning_rate": 9.96044650642442e-06, + "loss": 0.8162, + "step": 2247 + }, + { + "epoch": 0.06889787912222631, + "grad_norm": 1.9953158464492042, + "learning_rate": 9.96038417715725e-06, + "loss": 0.747, + "step": 2248 + }, + { + "epoch": 0.06892852764496751, + "grad_norm": 2.12837025166506, + "learning_rate": 9.960321799014142e-06, + "loss": 0.7859, + "step": 2249 + }, + { + "epoch": 0.06895917616770872, + "grad_norm": 2.159668545485188, + "learning_rate": 9.960259371995715e-06, + "loss": 0.7257, + "step": 2250 + }, + { + "epoch": 0.06898982469044992, + "grad_norm": 2.4127701179407035, + "learning_rate": 9.960196896102585e-06, + "loss": 0.7779, + "step": 2251 + }, + { + "epoch": 0.06902047321319113, + "grad_norm": 1.410171261029431, + "learning_rate": 9.960134371335364e-06, + "loss": 0.5516, + "step": 2252 + }, + { + "epoch": 0.06905112173593232, + "grad_norm": 2.301345209981521, + "learning_rate": 9.960071797694671e-06, + "loss": 0.7542, + "step": 2253 + }, + { + "epoch": 0.06908177025867353, + "grad_norm": 2.279861608136347, + "learning_rate": 9.960009175181122e-06, + "loss": 0.8157, + "step": 2254 + }, + { + "epoch": 0.06911241878141473, + "grad_norm": 1.9899700246862795, + "learning_rate": 9.959946503795333e-06, + "loss": 0.6722, + "step": 2255 + }, + { + "epoch": 0.06914306730415594, + "grad_norm": 2.1432719250662817, + "learning_rate": 9.959883783537922e-06, + "loss": 0.7443, + "step": 2256 + }, + { + "epoch": 0.06917371582689714, + "grad_norm": 1.220000324072857, + "learning_rate": 9.959821014409506e-06, + "loss": 0.5315, + "step": 2257 + }, + { + "epoch": 0.06920436434963835, + "grad_norm": 2.136524355626567, + "learning_rate": 9.959758196410705e-06, + "loss": 0.7464, + "step": 2258 + }, + { + "epoch": 0.06923501287237956, + "grad_norm": 1.926801000050369, + "learning_rate": 9.959695329542138e-06, + "loss": 0.8048, + "step": 2259 + }, + { + "epoch": 0.06926566139512076, + "grad_norm": 2.2339970240509603, + "learning_rate": 9.959632413804424e-06, + "loss": 0.8672, + "step": 2260 + }, + { + "epoch": 0.06929630991786195, + "grad_norm": 2.30043659206228, + "learning_rate": 9.959569449198183e-06, + "loss": 0.8633, + "step": 2261 + }, + { + "epoch": 0.06932695844060316, + "grad_norm": 2.0891099381468217, + "learning_rate": 9.959506435724036e-06, + "loss": 0.7645, + "step": 2262 + }, + { + "epoch": 0.06935760696334436, + "grad_norm": 2.1507611072298767, + "learning_rate": 9.959443373382602e-06, + "loss": 0.7655, + "step": 2263 + }, + { + "epoch": 0.06938825548608557, + "grad_norm": 2.1889686325531983, + "learning_rate": 9.959380262174502e-06, + "loss": 0.808, + "step": 2264 + }, + { + "epoch": 0.06941890400882678, + "grad_norm": 2.129697684809936, + "learning_rate": 9.959317102100362e-06, + "loss": 0.7479, + "step": 2265 + }, + { + "epoch": 0.06944955253156798, + "grad_norm": 2.3585225259210847, + "learning_rate": 9.9592538931608e-06, + "loss": 0.7833, + "step": 2266 + }, + { + "epoch": 0.06948020105430919, + "grad_norm": 2.0061671112099395, + "learning_rate": 9.959190635356441e-06, + "loss": 0.8749, + "step": 2267 + }, + { + "epoch": 0.06951084957705038, + "grad_norm": 1.971412482041802, + "learning_rate": 9.959127328687908e-06, + "loss": 0.7458, + "step": 2268 + }, + { + "epoch": 0.06954149809979158, + "grad_norm": 2.315981718256524, + "learning_rate": 9.959063973155824e-06, + "loss": 0.8122, + "step": 2269 + }, + { + "epoch": 0.06957214662253279, + "grad_norm": 2.0945932986398614, + "learning_rate": 9.959000568760815e-06, + "loss": 0.7472, + "step": 2270 + }, + { + "epoch": 0.069602795145274, + "grad_norm": 2.016658760924455, + "learning_rate": 9.958937115503505e-06, + "loss": 0.8201, + "step": 2271 + }, + { + "epoch": 0.0696334436680152, + "grad_norm": 1.1349457608207056, + "learning_rate": 9.958873613384516e-06, + "loss": 0.5316, + "step": 2272 + }, + { + "epoch": 0.06966409219075641, + "grad_norm": 1.9411666908337206, + "learning_rate": 9.958810062404479e-06, + "loss": 0.7576, + "step": 2273 + }, + { + "epoch": 0.06969474071349761, + "grad_norm": 1.7566906057567735, + "learning_rate": 9.958746462564017e-06, + "loss": 0.7375, + "step": 2274 + }, + { + "epoch": 0.06972538923623882, + "grad_norm": 1.980634315404257, + "learning_rate": 9.958682813863758e-06, + "loss": 0.7497, + "step": 2275 + }, + { + "epoch": 0.06975603775898001, + "grad_norm": 2.0957189401415954, + "learning_rate": 9.958619116304327e-06, + "loss": 0.8038, + "step": 2276 + }, + { + "epoch": 0.06978668628172122, + "grad_norm": 3.6028781662981313, + "learning_rate": 9.958555369886354e-06, + "loss": 0.7109, + "step": 2277 + }, + { + "epoch": 0.06981733480446242, + "grad_norm": 2.3183718327495155, + "learning_rate": 9.958491574610467e-06, + "loss": 0.8936, + "step": 2278 + }, + { + "epoch": 0.06984798332720363, + "grad_norm": 3.558992980172275, + "learning_rate": 9.958427730477292e-06, + "loss": 0.704, + "step": 2279 + }, + { + "epoch": 0.06987863184994483, + "grad_norm": 1.773075922891207, + "learning_rate": 9.958363837487462e-06, + "loss": 0.73, + "step": 2280 + }, + { + "epoch": 0.06990928037268604, + "grad_norm": 1.1251303050554349, + "learning_rate": 9.958299895641603e-06, + "loss": 0.5277, + "step": 2281 + }, + { + "epoch": 0.06993992889542724, + "grad_norm": 2.0084229602634296, + "learning_rate": 9.958235904940346e-06, + "loss": 0.8704, + "step": 2282 + }, + { + "epoch": 0.06997057741816845, + "grad_norm": 0.9305158334400182, + "learning_rate": 9.958171865384322e-06, + "loss": 0.53, + "step": 2283 + }, + { + "epoch": 0.07000122594090964, + "grad_norm": 2.116327639015216, + "learning_rate": 9.958107776974164e-06, + "loss": 0.8644, + "step": 2284 + }, + { + "epoch": 0.07003187446365085, + "grad_norm": 1.0993500859313505, + "learning_rate": 9.958043639710501e-06, + "loss": 0.5251, + "step": 2285 + }, + { + "epoch": 0.07006252298639205, + "grad_norm": 1.9445683281201769, + "learning_rate": 9.957979453593964e-06, + "loss": 0.6999, + "step": 2286 + }, + { + "epoch": 0.07009317150913326, + "grad_norm": 2.2734606426813126, + "learning_rate": 9.957915218625188e-06, + "loss": 0.9442, + "step": 2287 + }, + { + "epoch": 0.07012382003187446, + "grad_norm": 2.0900671089800915, + "learning_rate": 9.957850934804805e-06, + "loss": 0.7945, + "step": 2288 + }, + { + "epoch": 0.07015446855461567, + "grad_norm": 2.0836648811688208, + "learning_rate": 9.957786602133448e-06, + "loss": 0.6713, + "step": 2289 + }, + { + "epoch": 0.07018511707735688, + "grad_norm": 1.88867304493332, + "learning_rate": 9.95772222061175e-06, + "loss": 0.7712, + "step": 2290 + }, + { + "epoch": 0.07021576560009808, + "grad_norm": 1.2502033956281147, + "learning_rate": 9.957657790240347e-06, + "loss": 0.5339, + "step": 2291 + }, + { + "epoch": 0.07024641412283927, + "grad_norm": 2.1242277129954887, + "learning_rate": 9.957593311019875e-06, + "loss": 0.7955, + "step": 2292 + }, + { + "epoch": 0.07027706264558048, + "grad_norm": 1.8733258140929518, + "learning_rate": 9.957528782950965e-06, + "loss": 0.6389, + "step": 2293 + }, + { + "epoch": 0.07030771116832168, + "grad_norm": 2.2053643787839077, + "learning_rate": 9.957464206034258e-06, + "loss": 0.8507, + "step": 2294 + }, + { + "epoch": 0.07033835969106289, + "grad_norm": 1.9756747402991037, + "learning_rate": 9.957399580270386e-06, + "loss": 0.7365, + "step": 2295 + }, + { + "epoch": 0.0703690082138041, + "grad_norm": 2.105506429757199, + "learning_rate": 9.957334905659987e-06, + "loss": 0.8492, + "step": 2296 + }, + { + "epoch": 0.0703996567365453, + "grad_norm": 2.161648797505632, + "learning_rate": 9.9572701822037e-06, + "loss": 0.7824, + "step": 2297 + }, + { + "epoch": 0.07043030525928651, + "grad_norm": 2.1997741460810136, + "learning_rate": 9.957205409902163e-06, + "loss": 0.8099, + "step": 2298 + }, + { + "epoch": 0.07046095378202771, + "grad_norm": 1.9041341739660858, + "learning_rate": 9.95714058875601e-06, + "loss": 0.7398, + "step": 2299 + }, + { + "epoch": 0.0704916023047689, + "grad_norm": 1.1579663901710928, + "learning_rate": 9.957075718765882e-06, + "loss": 0.5374, + "step": 2300 + }, + { + "epoch": 0.07052225082751011, + "grad_norm": 2.122681627512686, + "learning_rate": 9.95701079993242e-06, + "loss": 0.8533, + "step": 2301 + }, + { + "epoch": 0.07055289935025132, + "grad_norm": 2.247602678547781, + "learning_rate": 9.956945832256264e-06, + "loss": 0.7632, + "step": 2302 + }, + { + "epoch": 0.07058354787299252, + "grad_norm": 1.0040932122104247, + "learning_rate": 9.956880815738051e-06, + "loss": 0.5255, + "step": 2303 + }, + { + "epoch": 0.07061419639573373, + "grad_norm": 2.0486265132336934, + "learning_rate": 9.956815750378423e-06, + "loss": 0.7718, + "step": 2304 + }, + { + "epoch": 0.07064484491847493, + "grad_norm": 2.212818869553762, + "learning_rate": 9.956750636178021e-06, + "loss": 0.8079, + "step": 2305 + }, + { + "epoch": 0.07067549344121614, + "grad_norm": 2.2056746599809545, + "learning_rate": 9.956685473137486e-06, + "loss": 0.8227, + "step": 2306 + }, + { + "epoch": 0.07070614196395733, + "grad_norm": 2.184958372037437, + "learning_rate": 9.956620261257462e-06, + "loss": 0.8093, + "step": 2307 + }, + { + "epoch": 0.07073679048669854, + "grad_norm": 1.0251598790100613, + "learning_rate": 9.95655500053859e-06, + "loss": 0.5329, + "step": 2308 + }, + { + "epoch": 0.07076743900943974, + "grad_norm": 2.0762823403000987, + "learning_rate": 9.956489690981513e-06, + "loss": 0.8089, + "step": 2309 + }, + { + "epoch": 0.07079808753218095, + "grad_norm": 2.0237324744245844, + "learning_rate": 9.956424332586876e-06, + "loss": 0.8065, + "step": 2310 + }, + { + "epoch": 0.07082873605492215, + "grad_norm": 2.0174862889472, + "learning_rate": 9.956358925355321e-06, + "loss": 0.7073, + "step": 2311 + }, + { + "epoch": 0.07085938457766336, + "grad_norm": 2.1165186825939415, + "learning_rate": 9.956293469287494e-06, + "loss": 0.8557, + "step": 2312 + }, + { + "epoch": 0.07089003310040456, + "grad_norm": 0.9239901399183491, + "learning_rate": 9.956227964384038e-06, + "loss": 0.5212, + "step": 2313 + }, + { + "epoch": 0.07092068162314577, + "grad_norm": 0.9844516899825204, + "learning_rate": 9.9561624106456e-06, + "loss": 0.5372, + "step": 2314 + }, + { + "epoch": 0.07095133014588696, + "grad_norm": 2.43111042007713, + "learning_rate": 9.956096808072827e-06, + "loss": 0.7436, + "step": 2315 + }, + { + "epoch": 0.07098197866862817, + "grad_norm": 2.8007240673552434, + "learning_rate": 9.956031156666364e-06, + "loss": 0.7616, + "step": 2316 + }, + { + "epoch": 0.07101262719136937, + "grad_norm": 0.8621651657339893, + "learning_rate": 9.955965456426856e-06, + "loss": 0.5208, + "step": 2317 + }, + { + "epoch": 0.07104327571411058, + "grad_norm": 2.0359959395934695, + "learning_rate": 9.955899707354954e-06, + "loss": 0.9252, + "step": 2318 + }, + { + "epoch": 0.07107392423685178, + "grad_norm": 2.323913280957989, + "learning_rate": 9.955833909451304e-06, + "loss": 0.9017, + "step": 2319 + }, + { + "epoch": 0.07110457275959299, + "grad_norm": 2.081039609552139, + "learning_rate": 9.955768062716553e-06, + "loss": 0.8027, + "step": 2320 + }, + { + "epoch": 0.0711352212823342, + "grad_norm": 2.198253971449805, + "learning_rate": 9.955702167151355e-06, + "loss": 0.7978, + "step": 2321 + }, + { + "epoch": 0.0711658698050754, + "grad_norm": 2.292421528859222, + "learning_rate": 9.955636222756353e-06, + "loss": 0.8421, + "step": 2322 + }, + { + "epoch": 0.0711965183278166, + "grad_norm": 1.9150821868603327, + "learning_rate": 9.955570229532198e-06, + "loss": 0.7252, + "step": 2323 + }, + { + "epoch": 0.0712271668505578, + "grad_norm": 1.9117849292917966, + "learning_rate": 9.955504187479542e-06, + "loss": 0.7022, + "step": 2324 + }, + { + "epoch": 0.071257815373299, + "grad_norm": 2.0564686996029535, + "learning_rate": 9.955438096599038e-06, + "loss": 0.7961, + "step": 2325 + }, + { + "epoch": 0.07128846389604021, + "grad_norm": 2.1537348271735004, + "learning_rate": 9.955371956891334e-06, + "loss": 0.7216, + "step": 2326 + }, + { + "epoch": 0.07131911241878142, + "grad_norm": 2.2418362883656844, + "learning_rate": 9.95530576835708e-06, + "loss": 0.818, + "step": 2327 + }, + { + "epoch": 0.07134976094152262, + "grad_norm": 2.2426250538090624, + "learning_rate": 9.955239530996932e-06, + "loss": 0.7842, + "step": 2328 + }, + { + "epoch": 0.07138040946426383, + "grad_norm": 2.043711310497986, + "learning_rate": 9.95517324481154e-06, + "loss": 0.8246, + "step": 2329 + }, + { + "epoch": 0.07141105798700503, + "grad_norm": 1.8715842089675616, + "learning_rate": 9.95510690980156e-06, + "loss": 0.6835, + "step": 2330 + }, + { + "epoch": 0.07144170650974623, + "grad_norm": 1.2530717107207856, + "learning_rate": 9.955040525967643e-06, + "loss": 0.5113, + "step": 2331 + }, + { + "epoch": 0.07147235503248743, + "grad_norm": 1.997130846871725, + "learning_rate": 9.954974093310443e-06, + "loss": 0.7639, + "step": 2332 + }, + { + "epoch": 0.07150300355522864, + "grad_norm": 2.1259047516889007, + "learning_rate": 9.954907611830615e-06, + "loss": 0.8995, + "step": 2333 + }, + { + "epoch": 0.07153365207796984, + "grad_norm": 2.256633582298184, + "learning_rate": 9.954841081528817e-06, + "loss": 0.8027, + "step": 2334 + }, + { + "epoch": 0.07156430060071105, + "grad_norm": 2.143700428365341, + "learning_rate": 9.954774502405699e-06, + "loss": 0.8998, + "step": 2335 + }, + { + "epoch": 0.07159494912345225, + "grad_norm": 2.2708748976899424, + "learning_rate": 9.954707874461921e-06, + "loss": 0.7861, + "step": 2336 + }, + { + "epoch": 0.07162559764619346, + "grad_norm": 2.107543554087462, + "learning_rate": 9.95464119769814e-06, + "loss": 0.7424, + "step": 2337 + }, + { + "epoch": 0.07165624616893465, + "grad_norm": 2.0202001603035664, + "learning_rate": 9.954574472115011e-06, + "loss": 0.7813, + "step": 2338 + }, + { + "epoch": 0.07168689469167586, + "grad_norm": 1.994890850270316, + "learning_rate": 9.954507697713192e-06, + "loss": 0.7989, + "step": 2339 + }, + { + "epoch": 0.07171754321441706, + "grad_norm": 1.9942301190882383, + "learning_rate": 9.95444087449334e-06, + "loss": 0.7688, + "step": 2340 + }, + { + "epoch": 0.07174819173715827, + "grad_norm": 1.840454829414237, + "learning_rate": 9.954374002456116e-06, + "loss": 0.7598, + "step": 2341 + }, + { + "epoch": 0.07177884025989947, + "grad_norm": 2.0859220888487635, + "learning_rate": 9.954307081602176e-06, + "loss": 0.711, + "step": 2342 + }, + { + "epoch": 0.07180948878264068, + "grad_norm": 2.234223985251637, + "learning_rate": 9.954240111932182e-06, + "loss": 0.8012, + "step": 2343 + }, + { + "epoch": 0.07184013730538188, + "grad_norm": 2.163057883919565, + "learning_rate": 9.954173093446792e-06, + "loss": 0.8664, + "step": 2344 + }, + { + "epoch": 0.07187078582812309, + "grad_norm": 1.9610925495235778, + "learning_rate": 9.954106026146667e-06, + "loss": 0.6918, + "step": 2345 + }, + { + "epoch": 0.07190143435086428, + "grad_norm": 2.0506703576129537, + "learning_rate": 9.954038910032468e-06, + "loss": 0.7797, + "step": 2346 + }, + { + "epoch": 0.07193208287360549, + "grad_norm": 2.3287840826210537, + "learning_rate": 9.953971745104855e-06, + "loss": 0.816, + "step": 2347 + }, + { + "epoch": 0.0719627313963467, + "grad_norm": 2.236976134186717, + "learning_rate": 9.95390453136449e-06, + "loss": 0.8008, + "step": 2348 + }, + { + "epoch": 0.0719933799190879, + "grad_norm": 2.074841759483753, + "learning_rate": 9.953837268812039e-06, + "loss": 0.642, + "step": 2349 + }, + { + "epoch": 0.0720240284418291, + "grad_norm": 1.9174105289703707, + "learning_rate": 9.95376995744816e-06, + "loss": 0.7384, + "step": 2350 + }, + { + "epoch": 0.07205467696457031, + "grad_norm": 1.9840227093516738, + "learning_rate": 9.95370259727352e-06, + "loss": 0.8143, + "step": 2351 + }, + { + "epoch": 0.07208532548731152, + "grad_norm": 2.1195520179640033, + "learning_rate": 9.95363518828878e-06, + "loss": 0.7685, + "step": 2352 + }, + { + "epoch": 0.07211597401005272, + "grad_norm": 1.3243825497937183, + "learning_rate": 9.953567730494604e-06, + "loss": 0.5333, + "step": 2353 + }, + { + "epoch": 0.07214662253279391, + "grad_norm": 1.802680791522805, + "learning_rate": 9.953500223891657e-06, + "loss": 0.6822, + "step": 2354 + }, + { + "epoch": 0.07217727105553512, + "grad_norm": 0.9865670520705674, + "learning_rate": 9.953432668480607e-06, + "loss": 0.508, + "step": 2355 + }, + { + "epoch": 0.07220791957827633, + "grad_norm": 2.0364961056053508, + "learning_rate": 9.953365064262117e-06, + "loss": 0.8026, + "step": 2356 + }, + { + "epoch": 0.07223856810101753, + "grad_norm": 2.304593169297076, + "learning_rate": 9.953297411236853e-06, + "loss": 0.7622, + "step": 2357 + }, + { + "epoch": 0.07226921662375874, + "grad_norm": 2.113923099326523, + "learning_rate": 9.953229709405483e-06, + "loss": 0.7865, + "step": 2358 + }, + { + "epoch": 0.07229986514649994, + "grad_norm": 1.9040093398457312, + "learning_rate": 9.953161958768673e-06, + "loss": 0.6946, + "step": 2359 + }, + { + "epoch": 0.07233051366924115, + "grad_norm": 1.5844849276883763, + "learning_rate": 9.95309415932709e-06, + "loss": 0.5493, + "step": 2360 + }, + { + "epoch": 0.07236116219198235, + "grad_norm": 1.9246178676523136, + "learning_rate": 9.953026311081404e-06, + "loss": 0.746, + "step": 2361 + }, + { + "epoch": 0.07239181071472355, + "grad_norm": 2.1129987734135804, + "learning_rate": 9.952958414032283e-06, + "loss": 0.7985, + "step": 2362 + }, + { + "epoch": 0.07242245923746475, + "grad_norm": 1.828587294656579, + "learning_rate": 9.952890468180396e-06, + "loss": 0.7873, + "step": 2363 + }, + { + "epoch": 0.07245310776020596, + "grad_norm": 2.122151298885558, + "learning_rate": 9.95282247352641e-06, + "loss": 0.8465, + "step": 2364 + }, + { + "epoch": 0.07248375628294716, + "grad_norm": 1.8422994071413745, + "learning_rate": 9.952754430070997e-06, + "loss": 0.6145, + "step": 2365 + }, + { + "epoch": 0.07251440480568837, + "grad_norm": 1.816051859453394, + "learning_rate": 9.952686337814827e-06, + "loss": 0.8394, + "step": 2366 + }, + { + "epoch": 0.07254505332842957, + "grad_norm": 1.9276603122909988, + "learning_rate": 9.952618196758574e-06, + "loss": 0.7234, + "step": 2367 + }, + { + "epoch": 0.07257570185117078, + "grad_norm": 2.2896530056026108, + "learning_rate": 9.952550006902905e-06, + "loss": 0.6613, + "step": 2368 + }, + { + "epoch": 0.07260635037391197, + "grad_norm": 2.085789402417831, + "learning_rate": 9.952481768248495e-06, + "loss": 0.8405, + "step": 2369 + }, + { + "epoch": 0.07263699889665318, + "grad_norm": 1.851515271266915, + "learning_rate": 9.952413480796013e-06, + "loss": 0.7459, + "step": 2370 + }, + { + "epoch": 0.07266764741939438, + "grad_norm": 2.0629016880702338, + "learning_rate": 9.952345144546135e-06, + "loss": 0.7892, + "step": 2371 + }, + { + "epoch": 0.07269829594213559, + "grad_norm": 2.3085819001223102, + "learning_rate": 9.952276759499531e-06, + "loss": 0.8055, + "step": 2372 + }, + { + "epoch": 0.0727289444648768, + "grad_norm": 2.15327267602673, + "learning_rate": 9.95220832565688e-06, + "loss": 0.7889, + "step": 2373 + }, + { + "epoch": 0.072759592987618, + "grad_norm": 1.1580089364260648, + "learning_rate": 9.95213984301885e-06, + "loss": 0.5266, + "step": 2374 + }, + { + "epoch": 0.0727902415103592, + "grad_norm": 1.0968870104213087, + "learning_rate": 9.952071311586123e-06, + "loss": 0.5288, + "step": 2375 + }, + { + "epoch": 0.07282089003310041, + "grad_norm": 2.0530390886668335, + "learning_rate": 9.952002731359368e-06, + "loss": 0.7287, + "step": 2376 + }, + { + "epoch": 0.0728515385558416, + "grad_norm": 2.0433877804860083, + "learning_rate": 9.951934102339263e-06, + "loss": 0.7396, + "step": 2377 + }, + { + "epoch": 0.07288218707858281, + "grad_norm": 2.6663417137799206, + "learning_rate": 9.951865424526486e-06, + "loss": 0.8207, + "step": 2378 + }, + { + "epoch": 0.07291283560132401, + "grad_norm": 2.271280053445808, + "learning_rate": 9.95179669792171e-06, + "loss": 0.816, + "step": 2379 + }, + { + "epoch": 0.07294348412406522, + "grad_norm": 1.107793175664813, + "learning_rate": 9.951727922525615e-06, + "loss": 0.536, + "step": 2380 + }, + { + "epoch": 0.07297413264680642, + "grad_norm": 2.0299498079595604, + "learning_rate": 9.951659098338878e-06, + "loss": 0.7367, + "step": 2381 + }, + { + "epoch": 0.07300478116954763, + "grad_norm": 2.1917539521624594, + "learning_rate": 9.951590225362176e-06, + "loss": 0.8437, + "step": 2382 + }, + { + "epoch": 0.07303542969228884, + "grad_norm": 2.318702848845766, + "learning_rate": 9.95152130359619e-06, + "loss": 0.8504, + "step": 2383 + }, + { + "epoch": 0.07306607821503004, + "grad_norm": 2.393834930581342, + "learning_rate": 9.951452333041596e-06, + "loss": 0.7144, + "step": 2384 + }, + { + "epoch": 0.07309672673777123, + "grad_norm": 0.9865067315116456, + "learning_rate": 9.951383313699077e-06, + "loss": 0.5258, + "step": 2385 + }, + { + "epoch": 0.07312737526051244, + "grad_norm": 2.339742463952982, + "learning_rate": 9.951314245569311e-06, + "loss": 0.8598, + "step": 2386 + }, + { + "epoch": 0.07315802378325365, + "grad_norm": 2.0032337778192226, + "learning_rate": 9.951245128652978e-06, + "loss": 0.8121, + "step": 2387 + }, + { + "epoch": 0.07318867230599485, + "grad_norm": 2.1919879385112226, + "learning_rate": 9.95117596295076e-06, + "loss": 0.7158, + "step": 2388 + }, + { + "epoch": 0.07321932082873606, + "grad_norm": 2.0058179771200466, + "learning_rate": 9.951106748463339e-06, + "loss": 0.7654, + "step": 2389 + }, + { + "epoch": 0.07324996935147726, + "grad_norm": 1.9808913238388546, + "learning_rate": 9.951037485191395e-06, + "loss": 0.8043, + "step": 2390 + }, + { + "epoch": 0.07328061787421847, + "grad_norm": 0.9647237200640969, + "learning_rate": 9.950968173135614e-06, + "loss": 0.538, + "step": 2391 + }, + { + "epoch": 0.07331126639695967, + "grad_norm": 1.9200481643411278, + "learning_rate": 9.950898812296676e-06, + "loss": 0.7328, + "step": 2392 + }, + { + "epoch": 0.07334191491970087, + "grad_norm": 2.2581302388579125, + "learning_rate": 9.950829402675264e-06, + "loss": 0.8173, + "step": 2393 + }, + { + "epoch": 0.07337256344244207, + "grad_norm": 0.8660349141010847, + "learning_rate": 9.950759944272066e-06, + "loss": 0.529, + "step": 2394 + }, + { + "epoch": 0.07340321196518328, + "grad_norm": 1.7759415147027164, + "learning_rate": 9.950690437087763e-06, + "loss": 0.7549, + "step": 2395 + }, + { + "epoch": 0.07343386048792448, + "grad_norm": 2.1051595180598324, + "learning_rate": 9.950620881123039e-06, + "loss": 0.8733, + "step": 2396 + }, + { + "epoch": 0.07346450901066569, + "grad_norm": 2.0622141365735462, + "learning_rate": 9.950551276378579e-06, + "loss": 0.7172, + "step": 2397 + }, + { + "epoch": 0.0734951575334069, + "grad_norm": 2.2010468730194876, + "learning_rate": 9.950481622855073e-06, + "loss": 0.8318, + "step": 2398 + }, + { + "epoch": 0.0735258060561481, + "grad_norm": 2.3005499963861973, + "learning_rate": 9.950411920553205e-06, + "loss": 0.7938, + "step": 2399 + }, + { + "epoch": 0.07355645457888929, + "grad_norm": 1.9752658379271069, + "learning_rate": 9.950342169473661e-06, + "loss": 0.8062, + "step": 2400 + }, + { + "epoch": 0.0735871031016305, + "grad_norm": 2.1417987413544104, + "learning_rate": 9.950272369617132e-06, + "loss": 0.6836, + "step": 2401 + }, + { + "epoch": 0.0736177516243717, + "grad_norm": 0.9625795563493352, + "learning_rate": 9.9502025209843e-06, + "loss": 0.5209, + "step": 2402 + }, + { + "epoch": 0.07364840014711291, + "grad_norm": 1.9319837574834204, + "learning_rate": 9.950132623575855e-06, + "loss": 0.8127, + "step": 2403 + }, + { + "epoch": 0.07367904866985411, + "grad_norm": 2.28019346269713, + "learning_rate": 9.950062677392488e-06, + "loss": 0.7701, + "step": 2404 + }, + { + "epoch": 0.07370969719259532, + "grad_norm": 1.8434735363991641, + "learning_rate": 9.949992682434887e-06, + "loss": 0.7403, + "step": 2405 + }, + { + "epoch": 0.07374034571533652, + "grad_norm": 0.9580831561003115, + "learning_rate": 9.949922638703742e-06, + "loss": 0.5125, + "step": 2406 + }, + { + "epoch": 0.07377099423807773, + "grad_norm": 2.1313367676844415, + "learning_rate": 9.949852546199741e-06, + "loss": 0.877, + "step": 2407 + }, + { + "epoch": 0.07380164276081892, + "grad_norm": 2.189804339716585, + "learning_rate": 9.949782404923579e-06, + "loss": 0.7919, + "step": 2408 + }, + { + "epoch": 0.07383229128356013, + "grad_norm": 2.324731172151757, + "learning_rate": 9.949712214875942e-06, + "loss": 0.833, + "step": 2409 + }, + { + "epoch": 0.07386293980630133, + "grad_norm": 2.0876640173487373, + "learning_rate": 9.949641976057525e-06, + "loss": 0.7685, + "step": 2410 + }, + { + "epoch": 0.07389358832904254, + "grad_norm": 2.138972642832821, + "learning_rate": 9.94957168846902e-06, + "loss": 0.9369, + "step": 2411 + }, + { + "epoch": 0.07392423685178375, + "grad_norm": 1.8524999880780226, + "learning_rate": 9.949501352111118e-06, + "loss": 0.8028, + "step": 2412 + }, + { + "epoch": 0.07395488537452495, + "grad_norm": 1.7814942450531153, + "learning_rate": 9.949430966984512e-06, + "loss": 0.7489, + "step": 2413 + }, + { + "epoch": 0.07398553389726616, + "grad_norm": 2.17751065747944, + "learning_rate": 9.949360533089898e-06, + "loss": 0.8953, + "step": 2414 + }, + { + "epoch": 0.07401618242000736, + "grad_norm": 2.020054124053781, + "learning_rate": 9.949290050427967e-06, + "loss": 0.8045, + "step": 2415 + }, + { + "epoch": 0.07404683094274855, + "grad_norm": 1.4071217303538606, + "learning_rate": 9.949219518999416e-06, + "loss": 0.5469, + "step": 2416 + }, + { + "epoch": 0.07407747946548976, + "grad_norm": 2.1932441271972807, + "learning_rate": 9.949148938804938e-06, + "loss": 0.7871, + "step": 2417 + }, + { + "epoch": 0.07410812798823097, + "grad_norm": 2.0369743835433725, + "learning_rate": 9.949078309845229e-06, + "loss": 0.8051, + "step": 2418 + }, + { + "epoch": 0.07413877651097217, + "grad_norm": 1.952965174437804, + "learning_rate": 9.949007632120986e-06, + "loss": 0.7259, + "step": 2419 + }, + { + "epoch": 0.07416942503371338, + "grad_norm": 2.124558225728616, + "learning_rate": 9.948936905632905e-06, + "loss": 0.7998, + "step": 2420 + }, + { + "epoch": 0.07420007355645458, + "grad_norm": 2.0292873048216316, + "learning_rate": 9.94886613038168e-06, + "loss": 0.7561, + "step": 2421 + }, + { + "epoch": 0.07423072207919579, + "grad_norm": 1.9087602798638492, + "learning_rate": 9.948795306368012e-06, + "loss": 0.6522, + "step": 2422 + }, + { + "epoch": 0.074261370601937, + "grad_norm": 2.2154691398617343, + "learning_rate": 9.948724433592599e-06, + "loss": 0.7118, + "step": 2423 + }, + { + "epoch": 0.07429201912467819, + "grad_norm": 1.9397081059552408, + "learning_rate": 9.948653512056136e-06, + "loss": 0.7612, + "step": 2424 + }, + { + "epoch": 0.07432266764741939, + "grad_norm": 1.8464531859905036, + "learning_rate": 9.948582541759324e-06, + "loss": 0.7493, + "step": 2425 + }, + { + "epoch": 0.0743533161701606, + "grad_norm": 1.5412597989509653, + "learning_rate": 9.948511522702864e-06, + "loss": 0.5435, + "step": 2426 + }, + { + "epoch": 0.0743839646929018, + "grad_norm": 2.4048770886051107, + "learning_rate": 9.94844045488745e-06, + "loss": 0.9019, + "step": 2427 + }, + { + "epoch": 0.07441461321564301, + "grad_norm": 1.8891503120925406, + "learning_rate": 9.94836933831379e-06, + "loss": 0.7482, + "step": 2428 + }, + { + "epoch": 0.07444526173838421, + "grad_norm": 2.43126195267096, + "learning_rate": 9.94829817298258e-06, + "loss": 0.8009, + "step": 2429 + }, + { + "epoch": 0.07447591026112542, + "grad_norm": 2.219406957267496, + "learning_rate": 9.94822695889452e-06, + "loss": 0.9824, + "step": 2430 + }, + { + "epoch": 0.07450655878386661, + "grad_norm": 2.124437207032232, + "learning_rate": 9.948155696050316e-06, + "loss": 0.8258, + "step": 2431 + }, + { + "epoch": 0.07453720730660782, + "grad_norm": 2.386570933224071, + "learning_rate": 9.948084384450667e-06, + "loss": 0.8945, + "step": 2432 + }, + { + "epoch": 0.07456785582934902, + "grad_norm": 2.0733285369404704, + "learning_rate": 9.948013024096277e-06, + "loss": 0.879, + "step": 2433 + }, + { + "epoch": 0.07459850435209023, + "grad_norm": 1.9827299124982838, + "learning_rate": 9.947941614987848e-06, + "loss": 0.8229, + "step": 2434 + }, + { + "epoch": 0.07462915287483143, + "grad_norm": 2.3065877021447823, + "learning_rate": 9.947870157126085e-06, + "loss": 0.7648, + "step": 2435 + }, + { + "epoch": 0.07465980139757264, + "grad_norm": 2.2104104461815464, + "learning_rate": 9.94779865051169e-06, + "loss": 0.7612, + "step": 2436 + }, + { + "epoch": 0.07469044992031385, + "grad_norm": 1.2447217181226216, + "learning_rate": 9.947727095145371e-06, + "loss": 0.524, + "step": 2437 + }, + { + "epoch": 0.07472109844305505, + "grad_norm": 1.9128371833040507, + "learning_rate": 9.94765549102783e-06, + "loss": 0.7745, + "step": 2438 + }, + { + "epoch": 0.07475174696579624, + "grad_norm": 2.0951163971424918, + "learning_rate": 9.947583838159774e-06, + "loss": 0.7969, + "step": 2439 + }, + { + "epoch": 0.07478239548853745, + "grad_norm": 1.8937482078979568, + "learning_rate": 9.947512136541906e-06, + "loss": 0.7418, + "step": 2440 + }, + { + "epoch": 0.07481304401127865, + "grad_norm": 1.8025607079668318, + "learning_rate": 9.947440386174938e-06, + "loss": 0.7232, + "step": 2441 + }, + { + "epoch": 0.07484369253401986, + "grad_norm": 2.113830910552414, + "learning_rate": 9.947368587059574e-06, + "loss": 0.7817, + "step": 2442 + }, + { + "epoch": 0.07487434105676107, + "grad_norm": 2.597309306297494, + "learning_rate": 9.94729673919652e-06, + "loss": 0.9525, + "step": 2443 + }, + { + "epoch": 0.07490498957950227, + "grad_norm": 1.8238973049026954, + "learning_rate": 9.947224842586484e-06, + "loss": 0.688, + "step": 2444 + }, + { + "epoch": 0.07493563810224348, + "grad_norm": 1.9293514437442676, + "learning_rate": 9.947152897230179e-06, + "loss": 0.7567, + "step": 2445 + }, + { + "epoch": 0.07496628662498468, + "grad_norm": 2.0268955366794863, + "learning_rate": 9.94708090312831e-06, + "loss": 0.7881, + "step": 2446 + }, + { + "epoch": 0.07499693514772587, + "grad_norm": 2.1514440082339643, + "learning_rate": 9.947008860281586e-06, + "loss": 0.662, + "step": 2447 + }, + { + "epoch": 0.07502758367046708, + "grad_norm": 2.0220424706853253, + "learning_rate": 9.946936768690719e-06, + "loss": 0.6793, + "step": 2448 + }, + { + "epoch": 0.07505823219320829, + "grad_norm": 2.418173844251185, + "learning_rate": 9.946864628356418e-06, + "loss": 0.7132, + "step": 2449 + }, + { + "epoch": 0.07508888071594949, + "grad_norm": 2.166402727625687, + "learning_rate": 9.946792439279393e-06, + "loss": 0.7231, + "step": 2450 + }, + { + "epoch": 0.0751195292386907, + "grad_norm": 1.9391688374504192, + "learning_rate": 9.946720201460358e-06, + "loss": 0.7059, + "step": 2451 + }, + { + "epoch": 0.0751501777614319, + "grad_norm": 1.1213477336362034, + "learning_rate": 9.946647914900023e-06, + "loss": 0.5088, + "step": 2452 + }, + { + "epoch": 0.07518082628417311, + "grad_norm": 2.10429622072135, + "learning_rate": 9.946575579599098e-06, + "loss": 0.8425, + "step": 2453 + }, + { + "epoch": 0.07521147480691431, + "grad_norm": 0.945466667897788, + "learning_rate": 9.946503195558302e-06, + "loss": 0.5077, + "step": 2454 + }, + { + "epoch": 0.0752421233296555, + "grad_norm": 2.0980044741099295, + "learning_rate": 9.94643076277834e-06, + "loss": 0.8593, + "step": 2455 + }, + { + "epoch": 0.07527277185239671, + "grad_norm": 2.0435121029779935, + "learning_rate": 9.946358281259933e-06, + "loss": 0.7731, + "step": 2456 + }, + { + "epoch": 0.07530342037513792, + "grad_norm": 2.024204347171377, + "learning_rate": 9.946285751003793e-06, + "loss": 0.7812, + "step": 2457 + }, + { + "epoch": 0.07533406889787912, + "grad_norm": 2.24886884277932, + "learning_rate": 9.946213172010633e-06, + "loss": 0.8007, + "step": 2458 + }, + { + "epoch": 0.07536471742062033, + "grad_norm": 2.3714306793549365, + "learning_rate": 9.946140544281168e-06, + "loss": 0.778, + "step": 2459 + }, + { + "epoch": 0.07539536594336153, + "grad_norm": 1.2474521211209728, + "learning_rate": 9.946067867816116e-06, + "loss": 0.5462, + "step": 2460 + }, + { + "epoch": 0.07542601446610274, + "grad_norm": 1.9709703146936286, + "learning_rate": 9.945995142616192e-06, + "loss": 0.788, + "step": 2461 + }, + { + "epoch": 0.07545666298884393, + "grad_norm": 0.9772789020232173, + "learning_rate": 9.945922368682111e-06, + "loss": 0.5082, + "step": 2462 + }, + { + "epoch": 0.07548731151158514, + "grad_norm": 1.8840522159465025, + "learning_rate": 9.945849546014591e-06, + "loss": 0.7057, + "step": 2463 + }, + { + "epoch": 0.07551796003432634, + "grad_norm": 2.0140654513140714, + "learning_rate": 9.945776674614353e-06, + "loss": 0.7592, + "step": 2464 + }, + { + "epoch": 0.07554860855706755, + "grad_norm": 0.9705242509695273, + "learning_rate": 9.94570375448211e-06, + "loss": 0.509, + "step": 2465 + }, + { + "epoch": 0.07557925707980875, + "grad_norm": 2.12649521164474, + "learning_rate": 9.945630785618583e-06, + "loss": 0.8113, + "step": 2466 + }, + { + "epoch": 0.07560990560254996, + "grad_norm": 2.1882550944957817, + "learning_rate": 9.94555776802449e-06, + "loss": 0.8094, + "step": 2467 + }, + { + "epoch": 0.07564055412529117, + "grad_norm": 1.1384629623669473, + "learning_rate": 9.94548470170055e-06, + "loss": 0.5453, + "step": 2468 + }, + { + "epoch": 0.07567120264803237, + "grad_norm": 2.1038741464515547, + "learning_rate": 9.945411586647486e-06, + "loss": 0.759, + "step": 2469 + }, + { + "epoch": 0.07570185117077356, + "grad_norm": 2.006687776493445, + "learning_rate": 9.945338422866015e-06, + "loss": 0.8351, + "step": 2470 + }, + { + "epoch": 0.07573249969351477, + "grad_norm": 2.1192717447328393, + "learning_rate": 9.945265210356858e-06, + "loss": 0.8439, + "step": 2471 + }, + { + "epoch": 0.07576314821625597, + "grad_norm": 1.906266596811565, + "learning_rate": 9.94519194912074e-06, + "loss": 0.7584, + "step": 2472 + }, + { + "epoch": 0.07579379673899718, + "grad_norm": 2.028393544072328, + "learning_rate": 9.94511863915838e-06, + "loss": 0.7329, + "step": 2473 + }, + { + "epoch": 0.07582444526173839, + "grad_norm": 2.1008767564621253, + "learning_rate": 9.9450452804705e-06, + "loss": 0.8347, + "step": 2474 + }, + { + "epoch": 0.07585509378447959, + "grad_norm": 1.935780727882793, + "learning_rate": 9.944971873057822e-06, + "loss": 0.8131, + "step": 2475 + }, + { + "epoch": 0.0758857423072208, + "grad_norm": 2.884611397534499, + "learning_rate": 9.944898416921073e-06, + "loss": 0.7837, + "step": 2476 + }, + { + "epoch": 0.075916390829962, + "grad_norm": 1.8486855706695913, + "learning_rate": 9.944824912060975e-06, + "loss": 0.7736, + "step": 2477 + }, + { + "epoch": 0.0759470393527032, + "grad_norm": 1.9713283835204514, + "learning_rate": 9.944751358478253e-06, + "loss": 0.7787, + "step": 2478 + }, + { + "epoch": 0.0759776878754444, + "grad_norm": 2.1081649724111933, + "learning_rate": 9.944677756173629e-06, + "loss": 0.7519, + "step": 2479 + }, + { + "epoch": 0.0760083363981856, + "grad_norm": 2.0099040321514923, + "learning_rate": 9.944604105147828e-06, + "loss": 0.7977, + "step": 2480 + }, + { + "epoch": 0.07603898492092681, + "grad_norm": 2.056858176796959, + "learning_rate": 9.94453040540158e-06, + "loss": 0.6969, + "step": 2481 + }, + { + "epoch": 0.07606963344366802, + "grad_norm": 1.3535756478739112, + "learning_rate": 9.944456656935609e-06, + "loss": 0.518, + "step": 2482 + }, + { + "epoch": 0.07610028196640922, + "grad_norm": 1.1545413050091684, + "learning_rate": 9.94438285975064e-06, + "loss": 0.5247, + "step": 2483 + }, + { + "epoch": 0.07613093048915043, + "grad_norm": 2.1163112358759393, + "learning_rate": 9.944309013847403e-06, + "loss": 0.8188, + "step": 2484 + }, + { + "epoch": 0.07616157901189163, + "grad_norm": 2.0222274534343647, + "learning_rate": 9.944235119226624e-06, + "loss": 0.7873, + "step": 2485 + }, + { + "epoch": 0.07619222753463283, + "grad_norm": 2.7286202539857465, + "learning_rate": 9.944161175889031e-06, + "loss": 0.7645, + "step": 2486 + }, + { + "epoch": 0.07622287605737403, + "grad_norm": 2.009773837154301, + "learning_rate": 9.944087183835353e-06, + "loss": 0.7479, + "step": 2487 + }, + { + "epoch": 0.07625352458011524, + "grad_norm": 2.3393415950390115, + "learning_rate": 9.944013143066318e-06, + "loss": 0.8115, + "step": 2488 + }, + { + "epoch": 0.07628417310285644, + "grad_norm": 1.8259822960116994, + "learning_rate": 9.943939053582659e-06, + "loss": 0.5438, + "step": 2489 + }, + { + "epoch": 0.07631482162559765, + "grad_norm": 1.5395345459106202, + "learning_rate": 9.943864915385102e-06, + "loss": 0.5408, + "step": 2490 + }, + { + "epoch": 0.07634547014833885, + "grad_norm": 2.124634743919251, + "learning_rate": 9.943790728474378e-06, + "loss": 0.7987, + "step": 2491 + }, + { + "epoch": 0.07637611867108006, + "grad_norm": 0.9868660787976484, + "learning_rate": 9.943716492851221e-06, + "loss": 0.545, + "step": 2492 + }, + { + "epoch": 0.07640676719382125, + "grad_norm": 2.1613573422883205, + "learning_rate": 9.94364220851636e-06, + "loss": 0.7416, + "step": 2493 + }, + { + "epoch": 0.07643741571656246, + "grad_norm": 2.1455107383246865, + "learning_rate": 9.943567875470526e-06, + "loss": 0.8122, + "step": 2494 + }, + { + "epoch": 0.07646806423930366, + "grad_norm": 2.300314333141909, + "learning_rate": 9.943493493714453e-06, + "loss": 0.8407, + "step": 2495 + }, + { + "epoch": 0.07649871276204487, + "grad_norm": 2.0778972946735705, + "learning_rate": 9.943419063248876e-06, + "loss": 0.5203, + "step": 2496 + }, + { + "epoch": 0.07652936128478607, + "grad_norm": 2.2623669972857527, + "learning_rate": 9.943344584074525e-06, + "loss": 0.8415, + "step": 2497 + }, + { + "epoch": 0.07656000980752728, + "grad_norm": 1.7786644621354666, + "learning_rate": 9.943270056192133e-06, + "loss": 0.7362, + "step": 2498 + }, + { + "epoch": 0.07659065833026849, + "grad_norm": 1.8530739799348115, + "learning_rate": 9.94319547960244e-06, + "loss": 0.6911, + "step": 2499 + }, + { + "epoch": 0.07662130685300969, + "grad_norm": 2.1768178599880974, + "learning_rate": 9.943120854306175e-06, + "loss": 0.6386, + "step": 2500 + }, + { + "epoch": 0.07665195537575088, + "grad_norm": 2.023852027506842, + "learning_rate": 9.943046180304075e-06, + "loss": 0.7697, + "step": 2501 + }, + { + "epoch": 0.07668260389849209, + "grad_norm": 1.09031393365183, + "learning_rate": 9.942971457596877e-06, + "loss": 0.5313, + "step": 2502 + }, + { + "epoch": 0.0767132524212333, + "grad_norm": 2.066412995009834, + "learning_rate": 9.942896686185317e-06, + "loss": 0.73, + "step": 2503 + }, + { + "epoch": 0.0767439009439745, + "grad_norm": 2.166527453036614, + "learning_rate": 9.942821866070131e-06, + "loss": 0.7614, + "step": 2504 + }, + { + "epoch": 0.0767745494667157, + "grad_norm": 2.253396779382133, + "learning_rate": 9.942746997252056e-06, + "loss": 0.757, + "step": 2505 + }, + { + "epoch": 0.07680519798945691, + "grad_norm": 1.0825589528730823, + "learning_rate": 9.94267207973183e-06, + "loss": 0.5233, + "step": 2506 + }, + { + "epoch": 0.07683584651219812, + "grad_norm": 2.07321931849457, + "learning_rate": 9.942597113510193e-06, + "loss": 0.7137, + "step": 2507 + }, + { + "epoch": 0.07686649503493932, + "grad_norm": 2.4612401485322, + "learning_rate": 9.94252209858788e-06, + "loss": 0.8127, + "step": 2508 + }, + { + "epoch": 0.07689714355768051, + "grad_norm": 2.0940000336636615, + "learning_rate": 9.942447034965634e-06, + "loss": 0.7655, + "step": 2509 + }, + { + "epoch": 0.07692779208042172, + "grad_norm": 2.1667714562354443, + "learning_rate": 9.942371922644191e-06, + "loss": 0.7105, + "step": 2510 + }, + { + "epoch": 0.07695844060316293, + "grad_norm": 2.3222254763635024, + "learning_rate": 9.942296761624293e-06, + "loss": 0.833, + "step": 2511 + }, + { + "epoch": 0.07698908912590413, + "grad_norm": 1.9075345516116033, + "learning_rate": 9.94222155190668e-06, + "loss": 0.6072, + "step": 2512 + }, + { + "epoch": 0.07701973764864534, + "grad_norm": 2.1799482508438786, + "learning_rate": 9.942146293492097e-06, + "loss": 0.743, + "step": 2513 + }, + { + "epoch": 0.07705038617138654, + "grad_norm": 3.2180970268347973, + "learning_rate": 9.942070986381279e-06, + "loss": 0.7245, + "step": 2514 + }, + { + "epoch": 0.07708103469412775, + "grad_norm": 2.14825275019501, + "learning_rate": 9.941995630574972e-06, + "loss": 0.7057, + "step": 2515 + }, + { + "epoch": 0.07711168321686895, + "grad_norm": 1.8938140784613668, + "learning_rate": 9.941920226073917e-06, + "loss": 0.8, + "step": 2516 + }, + { + "epoch": 0.07714233173961015, + "grad_norm": 1.830360346938496, + "learning_rate": 9.941844772878857e-06, + "loss": 0.7474, + "step": 2517 + }, + { + "epoch": 0.07717298026235135, + "grad_norm": 1.7738020077568564, + "learning_rate": 9.941769270990538e-06, + "loss": 0.7416, + "step": 2518 + }, + { + "epoch": 0.07720362878509256, + "grad_norm": 1.8312561147436017, + "learning_rate": 9.941693720409701e-06, + "loss": 0.7619, + "step": 2519 + }, + { + "epoch": 0.07723427730783376, + "grad_norm": 1.9844344825904607, + "learning_rate": 9.941618121137091e-06, + "loss": 0.826, + "step": 2520 + }, + { + "epoch": 0.07726492583057497, + "grad_norm": 2.177445618905062, + "learning_rate": 9.941542473173454e-06, + "loss": 0.7479, + "step": 2521 + }, + { + "epoch": 0.07729557435331617, + "grad_norm": 2.168174992915219, + "learning_rate": 9.941466776519535e-06, + "loss": 0.7833, + "step": 2522 + }, + { + "epoch": 0.07732622287605738, + "grad_norm": 2.1766856548823412, + "learning_rate": 9.941391031176077e-06, + "loss": 0.843, + "step": 2523 + }, + { + "epoch": 0.07735687139879857, + "grad_norm": 2.004747541946515, + "learning_rate": 9.941315237143831e-06, + "loss": 0.7594, + "step": 2524 + }, + { + "epoch": 0.07738751992153978, + "grad_norm": 1.2796013893014515, + "learning_rate": 9.941239394423544e-06, + "loss": 0.5256, + "step": 2525 + }, + { + "epoch": 0.07741816844428098, + "grad_norm": 1.9486174256625346, + "learning_rate": 9.941163503015956e-06, + "loss": 0.7447, + "step": 2526 + }, + { + "epoch": 0.07744881696702219, + "grad_norm": 2.1457868621214784, + "learning_rate": 9.941087562921823e-06, + "loss": 0.7766, + "step": 2527 + }, + { + "epoch": 0.0774794654897634, + "grad_norm": 2.0344530140817083, + "learning_rate": 9.941011574141888e-06, + "loss": 0.8272, + "step": 2528 + }, + { + "epoch": 0.0775101140125046, + "grad_norm": 2.0091323461346016, + "learning_rate": 9.940935536676903e-06, + "loss": 0.6206, + "step": 2529 + }, + { + "epoch": 0.0775407625352458, + "grad_norm": 2.251220095218001, + "learning_rate": 9.940859450527615e-06, + "loss": 0.7569, + "step": 2530 + }, + { + "epoch": 0.07757141105798701, + "grad_norm": 1.1250010820286345, + "learning_rate": 9.940783315694778e-06, + "loss": 0.5174, + "step": 2531 + }, + { + "epoch": 0.0776020595807282, + "grad_norm": 1.0460033311579888, + "learning_rate": 9.940707132179134e-06, + "loss": 0.503, + "step": 2532 + }, + { + "epoch": 0.07763270810346941, + "grad_norm": 2.1939018914896797, + "learning_rate": 9.940630899981442e-06, + "loss": 0.6655, + "step": 2533 + }, + { + "epoch": 0.07766335662621061, + "grad_norm": 2.298825303022901, + "learning_rate": 9.940554619102448e-06, + "loss": 0.7867, + "step": 2534 + }, + { + "epoch": 0.07769400514895182, + "grad_norm": 2.1387828617046987, + "learning_rate": 9.940478289542906e-06, + "loss": 0.7103, + "step": 2535 + }, + { + "epoch": 0.07772465367169303, + "grad_norm": 2.0147981472418106, + "learning_rate": 9.940401911303568e-06, + "loss": 0.7536, + "step": 2536 + }, + { + "epoch": 0.07775530219443423, + "grad_norm": 2.3272818203993224, + "learning_rate": 9.940325484385185e-06, + "loss": 0.8691, + "step": 2537 + }, + { + "epoch": 0.07778595071717544, + "grad_norm": 2.1379520455168737, + "learning_rate": 9.94024900878851e-06, + "loss": 0.8241, + "step": 2538 + }, + { + "epoch": 0.07781659923991664, + "grad_norm": 2.1346762078971833, + "learning_rate": 9.9401724845143e-06, + "loss": 0.7754, + "step": 2539 + }, + { + "epoch": 0.07784724776265783, + "grad_norm": 2.030065689364255, + "learning_rate": 9.940095911563303e-06, + "loss": 0.8266, + "step": 2540 + }, + { + "epoch": 0.07787789628539904, + "grad_norm": 2.0966309439363733, + "learning_rate": 9.94001928993628e-06, + "loss": 0.8146, + "step": 2541 + }, + { + "epoch": 0.07790854480814025, + "grad_norm": 2.1793571578072357, + "learning_rate": 9.93994261963398e-06, + "loss": 0.7726, + "step": 2542 + }, + { + "epoch": 0.07793919333088145, + "grad_norm": 2.1478898111054954, + "learning_rate": 9.939865900657164e-06, + "loss": 0.747, + "step": 2543 + }, + { + "epoch": 0.07796984185362266, + "grad_norm": 2.161908541933987, + "learning_rate": 9.939789133006586e-06, + "loss": 0.7832, + "step": 2544 + }, + { + "epoch": 0.07800049037636386, + "grad_norm": 2.0360500114766977, + "learning_rate": 9.939712316682998e-06, + "loss": 0.7375, + "step": 2545 + }, + { + "epoch": 0.07803113889910507, + "grad_norm": 2.1732304460672793, + "learning_rate": 9.939635451687164e-06, + "loss": 0.8383, + "step": 2546 + }, + { + "epoch": 0.07806178742184627, + "grad_norm": 1.90691320399302, + "learning_rate": 9.939558538019838e-06, + "loss": 0.794, + "step": 2547 + }, + { + "epoch": 0.07809243594458747, + "grad_norm": 2.109639243899091, + "learning_rate": 9.939481575681777e-06, + "loss": 0.7439, + "step": 2548 + }, + { + "epoch": 0.07812308446732867, + "grad_norm": 1.745226856812606, + "learning_rate": 9.93940456467374e-06, + "loss": 0.7647, + "step": 2549 + }, + { + "epoch": 0.07815373299006988, + "grad_norm": 2.2282965622374755, + "learning_rate": 9.939327504996485e-06, + "loss": 0.7292, + "step": 2550 + }, + { + "epoch": 0.07818438151281108, + "grad_norm": 2.2904837586503, + "learning_rate": 9.939250396650775e-06, + "loss": 0.8609, + "step": 2551 + }, + { + "epoch": 0.07821503003555229, + "grad_norm": 2.2155342692928466, + "learning_rate": 9.939173239637365e-06, + "loss": 0.7903, + "step": 2552 + }, + { + "epoch": 0.0782456785582935, + "grad_norm": 2.100582229348818, + "learning_rate": 9.939096033957019e-06, + "loss": 0.7766, + "step": 2553 + }, + { + "epoch": 0.0782763270810347, + "grad_norm": 1.888542769169912, + "learning_rate": 9.939018779610494e-06, + "loss": 0.7398, + "step": 2554 + }, + { + "epoch": 0.07830697560377589, + "grad_norm": 2.050323055484214, + "learning_rate": 9.938941476598554e-06, + "loss": 0.711, + "step": 2555 + }, + { + "epoch": 0.0783376241265171, + "grad_norm": 1.8626247571765295, + "learning_rate": 9.938864124921961e-06, + "loss": 0.707, + "step": 2556 + }, + { + "epoch": 0.0783682726492583, + "grad_norm": 2.2899169897329004, + "learning_rate": 9.938786724581476e-06, + "loss": 0.7663, + "step": 2557 + }, + { + "epoch": 0.07839892117199951, + "grad_norm": 2.3046169357327098, + "learning_rate": 9.93870927557786e-06, + "loss": 0.763, + "step": 2558 + }, + { + "epoch": 0.07842956969474071, + "grad_norm": 2.0933369966051036, + "learning_rate": 9.93863177791188e-06, + "loss": 0.747, + "step": 2559 + }, + { + "epoch": 0.07846021821748192, + "grad_norm": 2.31918137785533, + "learning_rate": 9.938554231584296e-06, + "loss": 0.8604, + "step": 2560 + }, + { + "epoch": 0.07849086674022313, + "grad_norm": 2.125158031184007, + "learning_rate": 9.938476636595874e-06, + "loss": 0.8189, + "step": 2561 + }, + { + "epoch": 0.07852151526296433, + "grad_norm": 1.9451010158633402, + "learning_rate": 9.938398992947379e-06, + "loss": 0.7744, + "step": 2562 + }, + { + "epoch": 0.07855216378570552, + "grad_norm": 2.166808321550343, + "learning_rate": 9.938321300639574e-06, + "loss": 0.8271, + "step": 2563 + }, + { + "epoch": 0.07858281230844673, + "grad_norm": 2.176376158591177, + "learning_rate": 9.938243559673227e-06, + "loss": 0.8324, + "step": 2564 + }, + { + "epoch": 0.07861346083118793, + "grad_norm": 2.788663994102116, + "learning_rate": 9.938165770049101e-06, + "loss": 0.5617, + "step": 2565 + }, + { + "epoch": 0.07864410935392914, + "grad_norm": 2.103875192234296, + "learning_rate": 9.938087931767965e-06, + "loss": 0.7503, + "step": 2566 + }, + { + "epoch": 0.07867475787667035, + "grad_norm": 0.9765301103912973, + "learning_rate": 9.938010044830585e-06, + "loss": 0.5234, + "step": 2567 + }, + { + "epoch": 0.07870540639941155, + "grad_norm": 2.190577472584811, + "learning_rate": 9.937932109237729e-06, + "loss": 0.8036, + "step": 2568 + }, + { + "epoch": 0.07873605492215276, + "grad_norm": 1.4420305137674148, + "learning_rate": 9.937854124990163e-06, + "loss": 0.5339, + "step": 2569 + }, + { + "epoch": 0.07876670344489396, + "grad_norm": 2.2003917708839458, + "learning_rate": 9.937776092088659e-06, + "loss": 0.7495, + "step": 2570 + }, + { + "epoch": 0.07879735196763515, + "grad_norm": 1.9831153788979285, + "learning_rate": 9.937698010533982e-06, + "loss": 0.7157, + "step": 2571 + }, + { + "epoch": 0.07882800049037636, + "grad_norm": 2.311629142726717, + "learning_rate": 9.937619880326905e-06, + "loss": 0.8535, + "step": 2572 + }, + { + "epoch": 0.07885864901311757, + "grad_norm": 2.2787987080729986, + "learning_rate": 9.937541701468194e-06, + "loss": 0.8667, + "step": 2573 + }, + { + "epoch": 0.07888929753585877, + "grad_norm": 1.811000671819652, + "learning_rate": 9.937463473958624e-06, + "loss": 0.8188, + "step": 2574 + }, + { + "epoch": 0.07891994605859998, + "grad_norm": 1.3801825195064847, + "learning_rate": 9.93738519779896e-06, + "loss": 0.5323, + "step": 2575 + }, + { + "epoch": 0.07895059458134118, + "grad_norm": 2.1794108687546494, + "learning_rate": 9.937306872989977e-06, + "loss": 0.7684, + "step": 2576 + }, + { + "epoch": 0.07898124310408239, + "grad_norm": 2.403611169339792, + "learning_rate": 9.937228499532448e-06, + "loss": 0.8548, + "step": 2577 + }, + { + "epoch": 0.0790118916268236, + "grad_norm": 2.2087779855122402, + "learning_rate": 9.937150077427143e-06, + "loss": 0.83, + "step": 2578 + }, + { + "epoch": 0.07904254014956479, + "grad_norm": 2.1391945803906607, + "learning_rate": 9.937071606674834e-06, + "loss": 0.7663, + "step": 2579 + }, + { + "epoch": 0.07907318867230599, + "grad_norm": 2.0799386228872723, + "learning_rate": 9.936993087276294e-06, + "loss": 0.7587, + "step": 2580 + }, + { + "epoch": 0.0791038371950472, + "grad_norm": 2.037031490567396, + "learning_rate": 9.9369145192323e-06, + "loss": 0.7661, + "step": 2581 + }, + { + "epoch": 0.0791344857177884, + "grad_norm": 2.019856237979349, + "learning_rate": 9.936835902543624e-06, + "loss": 0.6924, + "step": 2582 + }, + { + "epoch": 0.07916513424052961, + "grad_norm": 1.0371816094717494, + "learning_rate": 9.93675723721104e-06, + "loss": 0.5148, + "step": 2583 + }, + { + "epoch": 0.07919578276327081, + "grad_norm": 2.187577715457417, + "learning_rate": 9.936678523235324e-06, + "loss": 0.7335, + "step": 2584 + }, + { + "epoch": 0.07922643128601202, + "grad_norm": 2.2465378543568963, + "learning_rate": 9.936599760617251e-06, + "loss": 0.8691, + "step": 2585 + }, + { + "epoch": 0.07925707980875321, + "grad_norm": 2.2359897724365094, + "learning_rate": 9.936520949357599e-06, + "loss": 0.766, + "step": 2586 + }, + { + "epoch": 0.07928772833149442, + "grad_norm": 0.9168083956045001, + "learning_rate": 9.936442089457142e-06, + "loss": 0.5273, + "step": 2587 + }, + { + "epoch": 0.07931837685423562, + "grad_norm": 2.147544622626064, + "learning_rate": 9.936363180916657e-06, + "loss": 0.7532, + "step": 2588 + }, + { + "epoch": 0.07934902537697683, + "grad_norm": 2.0416807194276556, + "learning_rate": 9.936284223736924e-06, + "loss": 0.7343, + "step": 2589 + }, + { + "epoch": 0.07937967389971803, + "grad_norm": 2.0182264326452675, + "learning_rate": 9.93620521791872e-06, + "loss": 0.6877, + "step": 2590 + }, + { + "epoch": 0.07941032242245924, + "grad_norm": 2.18679603598116, + "learning_rate": 9.936126163462821e-06, + "loss": 0.8142, + "step": 2591 + }, + { + "epoch": 0.07944097094520045, + "grad_norm": 2.106315843736519, + "learning_rate": 9.936047060370008e-06, + "loss": 0.8552, + "step": 2592 + }, + { + "epoch": 0.07947161946794165, + "grad_norm": 2.1488116859042807, + "learning_rate": 9.935967908641063e-06, + "loss": 0.6992, + "step": 2593 + }, + { + "epoch": 0.07950226799068284, + "grad_norm": 2.114070633283222, + "learning_rate": 9.935888708276761e-06, + "loss": 0.7667, + "step": 2594 + }, + { + "epoch": 0.07953291651342405, + "grad_norm": 1.0180557307937597, + "learning_rate": 9.935809459277885e-06, + "loss": 0.5268, + "step": 2595 + }, + { + "epoch": 0.07956356503616525, + "grad_norm": 1.8667549270959345, + "learning_rate": 9.935730161645216e-06, + "loss": 0.7414, + "step": 2596 + }, + { + "epoch": 0.07959421355890646, + "grad_norm": 2.0843885404405884, + "learning_rate": 9.935650815379536e-06, + "loss": 0.8309, + "step": 2597 + }, + { + "epoch": 0.07962486208164767, + "grad_norm": 2.031838922173489, + "learning_rate": 9.935571420481625e-06, + "loss": 0.812, + "step": 2598 + }, + { + "epoch": 0.07965551060438887, + "grad_norm": 2.124027439853411, + "learning_rate": 9.935491976952265e-06, + "loss": 0.8297, + "step": 2599 + }, + { + "epoch": 0.07968615912713008, + "grad_norm": 1.8257753473502356, + "learning_rate": 9.935412484792239e-06, + "loss": 0.7276, + "step": 2600 + }, + { + "epoch": 0.07971680764987128, + "grad_norm": 1.924028728170998, + "learning_rate": 9.935332944002333e-06, + "loss": 0.7586, + "step": 2601 + }, + { + "epoch": 0.07974745617261247, + "grad_norm": 1.0461856966138636, + "learning_rate": 9.935253354583327e-06, + "loss": 0.5272, + "step": 2602 + }, + { + "epoch": 0.07977810469535368, + "grad_norm": 1.99881612067181, + "learning_rate": 9.935173716536008e-06, + "loss": 0.7212, + "step": 2603 + }, + { + "epoch": 0.07980875321809489, + "grad_norm": 2.2283936063898317, + "learning_rate": 9.93509402986116e-06, + "loss": 0.8235, + "step": 2604 + }, + { + "epoch": 0.07983940174083609, + "grad_norm": 2.2239395676265103, + "learning_rate": 9.935014294559567e-06, + "loss": 0.8449, + "step": 2605 + }, + { + "epoch": 0.0798700502635773, + "grad_norm": 2.2827843979036575, + "learning_rate": 9.934934510632017e-06, + "loss": 0.7598, + "step": 2606 + }, + { + "epoch": 0.0799006987863185, + "grad_norm": 2.006446476930678, + "learning_rate": 9.934854678079293e-06, + "loss": 0.7861, + "step": 2607 + }, + { + "epoch": 0.07993134730905971, + "grad_norm": 2.1928678949209544, + "learning_rate": 9.934774796902182e-06, + "loss": 0.7899, + "step": 2608 + }, + { + "epoch": 0.07996199583180091, + "grad_norm": 1.839823536379241, + "learning_rate": 9.934694867101475e-06, + "loss": 0.696, + "step": 2609 + }, + { + "epoch": 0.0799926443545421, + "grad_norm": 2.2542016581762403, + "learning_rate": 9.934614888677955e-06, + "loss": 0.7394, + "step": 2610 + }, + { + "epoch": 0.08002329287728331, + "grad_norm": 1.9045915921386953, + "learning_rate": 9.934534861632413e-06, + "loss": 0.7996, + "step": 2611 + }, + { + "epoch": 0.08005394140002452, + "grad_norm": 1.9065529046097507, + "learning_rate": 9.934454785965636e-06, + "loss": 0.8082, + "step": 2612 + }, + { + "epoch": 0.08008458992276572, + "grad_norm": 2.2721349521013323, + "learning_rate": 9.934374661678415e-06, + "loss": 0.774, + "step": 2613 + }, + { + "epoch": 0.08011523844550693, + "grad_norm": 1.9089382003669308, + "learning_rate": 9.934294488771537e-06, + "loss": 0.7629, + "step": 2614 + }, + { + "epoch": 0.08014588696824813, + "grad_norm": 1.069085022848832, + "learning_rate": 9.934214267245792e-06, + "loss": 0.5049, + "step": 2615 + }, + { + "epoch": 0.08017653549098934, + "grad_norm": 0.9669515884752163, + "learning_rate": 9.934133997101972e-06, + "loss": 0.5366, + "step": 2616 + }, + { + "epoch": 0.08020718401373053, + "grad_norm": 2.357819841967656, + "learning_rate": 9.934053678340867e-06, + "loss": 0.8923, + "step": 2617 + }, + { + "epoch": 0.08023783253647174, + "grad_norm": 2.401765129219085, + "learning_rate": 9.933973310963269e-06, + "loss": 0.725, + "step": 2618 + }, + { + "epoch": 0.08026848105921294, + "grad_norm": 1.9715511235868584, + "learning_rate": 9.933892894969968e-06, + "loss": 0.6728, + "step": 2619 + }, + { + "epoch": 0.08029912958195415, + "grad_norm": 0.9839184626850886, + "learning_rate": 9.93381243036176e-06, + "loss": 0.5338, + "step": 2620 + }, + { + "epoch": 0.08032977810469535, + "grad_norm": 2.1689170320517173, + "learning_rate": 9.933731917139436e-06, + "loss": 0.8297, + "step": 2621 + }, + { + "epoch": 0.08036042662743656, + "grad_norm": 1.9980084390065578, + "learning_rate": 9.933651355303788e-06, + "loss": 0.7464, + "step": 2622 + }, + { + "epoch": 0.08039107515017777, + "grad_norm": 2.0828406606007492, + "learning_rate": 9.933570744855609e-06, + "loss": 0.734, + "step": 2623 + }, + { + "epoch": 0.08042172367291897, + "grad_norm": 2.035964382954254, + "learning_rate": 9.933490085795698e-06, + "loss": 0.7864, + "step": 2624 + }, + { + "epoch": 0.08045237219566016, + "grad_norm": 2.0418723961646803, + "learning_rate": 9.933409378124847e-06, + "loss": 0.8813, + "step": 2625 + }, + { + "epoch": 0.08048302071840137, + "grad_norm": 1.8278905549834152, + "learning_rate": 9.93332862184385e-06, + "loss": 0.7725, + "step": 2626 + }, + { + "epoch": 0.08051366924114257, + "grad_norm": 1.9241733404871277, + "learning_rate": 9.933247816953505e-06, + "loss": 0.8085, + "step": 2627 + }, + { + "epoch": 0.08054431776388378, + "grad_norm": 2.068445952228297, + "learning_rate": 9.933166963454604e-06, + "loss": 0.8738, + "step": 2628 + }, + { + "epoch": 0.08057496628662499, + "grad_norm": 2.301132557195125, + "learning_rate": 9.93308606134795e-06, + "loss": 0.8263, + "step": 2629 + }, + { + "epoch": 0.08060561480936619, + "grad_norm": 1.9621064719857866, + "learning_rate": 9.933005110634334e-06, + "loss": 0.7056, + "step": 2630 + }, + { + "epoch": 0.0806362633321074, + "grad_norm": 2.223031877667249, + "learning_rate": 9.93292411131456e-06, + "loss": 0.9015, + "step": 2631 + }, + { + "epoch": 0.0806669118548486, + "grad_norm": 2.258594794098356, + "learning_rate": 9.932843063389418e-06, + "loss": 0.8222, + "step": 2632 + }, + { + "epoch": 0.0806975603775898, + "grad_norm": 2.0417823344218426, + "learning_rate": 9.932761966859716e-06, + "loss": 0.7689, + "step": 2633 + }, + { + "epoch": 0.080728208900331, + "grad_norm": 2.1300510153308765, + "learning_rate": 9.932680821726246e-06, + "loss": 0.6587, + "step": 2634 + }, + { + "epoch": 0.0807588574230722, + "grad_norm": 1.9237699927895895, + "learning_rate": 9.93259962798981e-06, + "loss": 0.7909, + "step": 2635 + }, + { + "epoch": 0.08078950594581341, + "grad_norm": 2.050053023510852, + "learning_rate": 9.932518385651207e-06, + "loss": 0.895, + "step": 2636 + }, + { + "epoch": 0.08082015446855462, + "grad_norm": 3.9952859888658927, + "learning_rate": 9.932437094711238e-06, + "loss": 0.7414, + "step": 2637 + }, + { + "epoch": 0.08085080299129582, + "grad_norm": 1.991961550741085, + "learning_rate": 9.932355755170706e-06, + "loss": 0.7644, + "step": 2638 + }, + { + "epoch": 0.08088145151403703, + "grad_norm": 2.3592341507318784, + "learning_rate": 9.93227436703041e-06, + "loss": 0.6936, + "step": 2639 + }, + { + "epoch": 0.08091210003677823, + "grad_norm": 2.0062711404642646, + "learning_rate": 9.932192930291152e-06, + "loss": 0.7602, + "step": 2640 + }, + { + "epoch": 0.08094274855951943, + "grad_norm": 2.243147355842606, + "learning_rate": 9.932111444953735e-06, + "loss": 0.7479, + "step": 2641 + }, + { + "epoch": 0.08097339708226063, + "grad_norm": 1.0916969280237623, + "learning_rate": 9.932029911018965e-06, + "loss": 0.5011, + "step": 2642 + }, + { + "epoch": 0.08100404560500184, + "grad_norm": 2.094093949376908, + "learning_rate": 9.931948328487639e-06, + "loss": 0.7276, + "step": 2643 + }, + { + "epoch": 0.08103469412774304, + "grad_norm": 2.1802407499350416, + "learning_rate": 9.931866697360566e-06, + "loss": 0.6982, + "step": 2644 + }, + { + "epoch": 0.08106534265048425, + "grad_norm": 2.023893917699585, + "learning_rate": 9.931785017638547e-06, + "loss": 0.8371, + "step": 2645 + }, + { + "epoch": 0.08109599117322545, + "grad_norm": 1.9962995688865126, + "learning_rate": 9.931703289322389e-06, + "loss": 0.766, + "step": 2646 + }, + { + "epoch": 0.08112663969596666, + "grad_norm": 2.041102653432929, + "learning_rate": 9.931621512412897e-06, + "loss": 0.7356, + "step": 2647 + }, + { + "epoch": 0.08115728821870785, + "grad_norm": 1.819214664267237, + "learning_rate": 9.931539686910877e-06, + "loss": 0.6833, + "step": 2648 + }, + { + "epoch": 0.08118793674144906, + "grad_norm": 2.1809213089941637, + "learning_rate": 9.931457812817135e-06, + "loss": 0.7648, + "step": 2649 + }, + { + "epoch": 0.08121858526419026, + "grad_norm": 2.0250108659952732, + "learning_rate": 9.931375890132475e-06, + "loss": 0.7682, + "step": 2650 + }, + { + "epoch": 0.08124923378693147, + "grad_norm": 1.9957946820813048, + "learning_rate": 9.93129391885771e-06, + "loss": 0.7788, + "step": 2651 + }, + { + "epoch": 0.08127988230967267, + "grad_norm": 2.1546047947104823, + "learning_rate": 9.93121189899364e-06, + "loss": 0.7635, + "step": 2652 + }, + { + "epoch": 0.08131053083241388, + "grad_norm": 0.9653337726776143, + "learning_rate": 9.931129830541082e-06, + "loss": 0.54, + "step": 2653 + }, + { + "epoch": 0.08134117935515509, + "grad_norm": 2.3429695538460447, + "learning_rate": 9.931047713500836e-06, + "loss": 0.8396, + "step": 2654 + }, + { + "epoch": 0.08137182787789629, + "grad_norm": 1.9589407064520505, + "learning_rate": 9.930965547873719e-06, + "loss": 0.7868, + "step": 2655 + }, + { + "epoch": 0.08140247640063748, + "grad_norm": 2.2595705882534323, + "learning_rate": 9.930883333660535e-06, + "loss": 0.7781, + "step": 2656 + }, + { + "epoch": 0.08143312492337869, + "grad_norm": 2.2845229311101956, + "learning_rate": 9.930801070862095e-06, + "loss": 0.8132, + "step": 2657 + }, + { + "epoch": 0.0814637734461199, + "grad_norm": 2.4357689847699997, + "learning_rate": 9.930718759479213e-06, + "loss": 0.6953, + "step": 2658 + }, + { + "epoch": 0.0814944219688611, + "grad_norm": 2.1688507623400652, + "learning_rate": 9.930636399512694e-06, + "loss": 0.7458, + "step": 2659 + }, + { + "epoch": 0.0815250704916023, + "grad_norm": 1.9581255447888435, + "learning_rate": 9.930553990963355e-06, + "loss": 0.8095, + "step": 2660 + }, + { + "epoch": 0.08155571901434351, + "grad_norm": 2.7088544412410602, + "learning_rate": 9.930471533832005e-06, + "loss": 0.8161, + "step": 2661 + }, + { + "epoch": 0.08158636753708472, + "grad_norm": 2.0000037823606873, + "learning_rate": 9.930389028119458e-06, + "loss": 0.82, + "step": 2662 + }, + { + "epoch": 0.08161701605982592, + "grad_norm": 2.0898535002012144, + "learning_rate": 9.930306473826528e-06, + "loss": 0.7601, + "step": 2663 + }, + { + "epoch": 0.08164766458256711, + "grad_norm": 2.040329955896837, + "learning_rate": 9.930223870954025e-06, + "loss": 0.6739, + "step": 2664 + }, + { + "epoch": 0.08167831310530832, + "grad_norm": 2.1972264361178007, + "learning_rate": 9.930141219502765e-06, + "loss": 0.7748, + "step": 2665 + }, + { + "epoch": 0.08170896162804953, + "grad_norm": 2.139138856620026, + "learning_rate": 9.930058519473562e-06, + "loss": 0.7579, + "step": 2666 + }, + { + "epoch": 0.08173961015079073, + "grad_norm": 1.765016123623868, + "learning_rate": 9.929975770867231e-06, + "loss": 0.8372, + "step": 2667 + }, + { + "epoch": 0.08177025867353194, + "grad_norm": 1.9538766749452612, + "learning_rate": 9.92989297368459e-06, + "loss": 0.722, + "step": 2668 + }, + { + "epoch": 0.08180090719627314, + "grad_norm": 1.9265619439713244, + "learning_rate": 9.92981012792645e-06, + "loss": 0.7931, + "step": 2669 + }, + { + "epoch": 0.08183155571901435, + "grad_norm": 2.1070003175380005, + "learning_rate": 9.929727233593628e-06, + "loss": 0.7889, + "step": 2670 + }, + { + "epoch": 0.08186220424175555, + "grad_norm": 2.2159299511155495, + "learning_rate": 9.929644290686945e-06, + "loss": 0.8527, + "step": 2671 + }, + { + "epoch": 0.08189285276449675, + "grad_norm": 2.1711587538201074, + "learning_rate": 9.929561299207213e-06, + "loss": 0.8594, + "step": 2672 + }, + { + "epoch": 0.08192350128723795, + "grad_norm": 0.946021742397934, + "learning_rate": 9.929478259155253e-06, + "loss": 0.5197, + "step": 2673 + }, + { + "epoch": 0.08195414980997916, + "grad_norm": 1.9399775228684495, + "learning_rate": 9.929395170531883e-06, + "loss": 0.6095, + "step": 2674 + }, + { + "epoch": 0.08198479833272036, + "grad_norm": 1.827118468392208, + "learning_rate": 9.929312033337922e-06, + "loss": 0.7473, + "step": 2675 + }, + { + "epoch": 0.08201544685546157, + "grad_norm": 2.2128249938303024, + "learning_rate": 9.929228847574186e-06, + "loss": 0.7377, + "step": 2676 + }, + { + "epoch": 0.08204609537820277, + "grad_norm": 2.0421937595756985, + "learning_rate": 9.929145613241497e-06, + "loss": 0.7468, + "step": 2677 + }, + { + "epoch": 0.08207674390094398, + "grad_norm": 1.9750412302344689, + "learning_rate": 9.929062330340678e-06, + "loss": 0.7513, + "step": 2678 + }, + { + "epoch": 0.08210739242368517, + "grad_norm": 1.9468377988511165, + "learning_rate": 9.928978998872543e-06, + "loss": 0.8253, + "step": 2679 + }, + { + "epoch": 0.08213804094642638, + "grad_norm": 2.3948610411582627, + "learning_rate": 9.92889561883792e-06, + "loss": 0.7849, + "step": 2680 + }, + { + "epoch": 0.08216868946916758, + "grad_norm": 0.9700834429882567, + "learning_rate": 9.928812190237627e-06, + "loss": 0.5113, + "step": 2681 + }, + { + "epoch": 0.08219933799190879, + "grad_norm": 2.4819788713890905, + "learning_rate": 9.928728713072485e-06, + "loss": 0.7308, + "step": 2682 + }, + { + "epoch": 0.08222998651465, + "grad_norm": 2.1376906176995107, + "learning_rate": 9.928645187343318e-06, + "loss": 0.726, + "step": 2683 + }, + { + "epoch": 0.0822606350373912, + "grad_norm": 2.043297350865831, + "learning_rate": 9.92856161305095e-06, + "loss": 0.7985, + "step": 2684 + }, + { + "epoch": 0.0822912835601324, + "grad_norm": 2.243795773612064, + "learning_rate": 9.928477990196202e-06, + "loss": 0.6816, + "step": 2685 + }, + { + "epoch": 0.08232193208287361, + "grad_norm": 2.024446809071369, + "learning_rate": 9.928394318779901e-06, + "loss": 0.8322, + "step": 2686 + }, + { + "epoch": 0.0823525806056148, + "grad_norm": 0.9903457498452554, + "learning_rate": 9.928310598802868e-06, + "loss": 0.5125, + "step": 2687 + }, + { + "epoch": 0.08238322912835601, + "grad_norm": 1.9963175500175137, + "learning_rate": 9.92822683026593e-06, + "loss": 0.6784, + "step": 2688 + }, + { + "epoch": 0.08241387765109721, + "grad_norm": 2.077286574291309, + "learning_rate": 9.928143013169912e-06, + "loss": 0.7553, + "step": 2689 + }, + { + "epoch": 0.08244452617383842, + "grad_norm": 2.1417329533038574, + "learning_rate": 9.928059147515643e-06, + "loss": 0.8, + "step": 2690 + }, + { + "epoch": 0.08247517469657963, + "grad_norm": 1.8886306218841271, + "learning_rate": 9.927975233303943e-06, + "loss": 0.6654, + "step": 2691 + }, + { + "epoch": 0.08250582321932083, + "grad_norm": 2.147768198441069, + "learning_rate": 9.927891270535643e-06, + "loss": 0.7314, + "step": 2692 + }, + { + "epoch": 0.08253647174206204, + "grad_norm": 1.9054752595366884, + "learning_rate": 9.92780725921157e-06, + "loss": 0.692, + "step": 2693 + }, + { + "epoch": 0.08256712026480324, + "grad_norm": 2.3081428084371183, + "learning_rate": 9.927723199332551e-06, + "loss": 0.9531, + "step": 2694 + }, + { + "epoch": 0.08259776878754443, + "grad_norm": 2.2627684623907487, + "learning_rate": 9.927639090899415e-06, + "loss": 0.6746, + "step": 2695 + }, + { + "epoch": 0.08262841731028564, + "grad_norm": 1.83490780301932, + "learning_rate": 9.927554933912988e-06, + "loss": 0.785, + "step": 2696 + }, + { + "epoch": 0.08265906583302685, + "grad_norm": 2.1151988906287023, + "learning_rate": 9.927470728374105e-06, + "loss": 0.821, + "step": 2697 + }, + { + "epoch": 0.08268971435576805, + "grad_norm": 1.7993013637147561, + "learning_rate": 9.927386474283589e-06, + "loss": 0.6034, + "step": 2698 + }, + { + "epoch": 0.08272036287850926, + "grad_norm": 2.083375037821424, + "learning_rate": 9.927302171642275e-06, + "loss": 0.8317, + "step": 2699 + }, + { + "epoch": 0.08275101140125046, + "grad_norm": 2.14812827308437, + "learning_rate": 9.927217820450992e-06, + "loss": 0.7342, + "step": 2700 + }, + { + "epoch": 0.08278165992399167, + "grad_norm": 2.035772923944073, + "learning_rate": 9.92713342071057e-06, + "loss": 0.7323, + "step": 2701 + }, + { + "epoch": 0.08281230844673287, + "grad_norm": 2.347430087626188, + "learning_rate": 9.927048972421843e-06, + "loss": 0.7008, + "step": 2702 + }, + { + "epoch": 0.08284295696947407, + "grad_norm": 2.0012923141000982, + "learning_rate": 9.92696447558564e-06, + "loss": 0.7278, + "step": 2703 + }, + { + "epoch": 0.08287360549221527, + "grad_norm": 2.183570756749403, + "learning_rate": 9.926879930202798e-06, + "loss": 0.7851, + "step": 2704 + }, + { + "epoch": 0.08290425401495648, + "grad_norm": 1.9484025493035653, + "learning_rate": 9.926795336274146e-06, + "loss": 0.7313, + "step": 2705 + }, + { + "epoch": 0.08293490253769768, + "grad_norm": 1.0248378547134052, + "learning_rate": 9.926710693800518e-06, + "loss": 0.5219, + "step": 2706 + }, + { + "epoch": 0.08296555106043889, + "grad_norm": 2.002229902468972, + "learning_rate": 9.92662600278275e-06, + "loss": 0.7521, + "step": 2707 + }, + { + "epoch": 0.0829961995831801, + "grad_norm": 2.1153105030156896, + "learning_rate": 9.926541263221676e-06, + "loss": 0.8354, + "step": 2708 + }, + { + "epoch": 0.0830268481059213, + "grad_norm": 1.994100166828228, + "learning_rate": 9.926456475118131e-06, + "loss": 0.7543, + "step": 2709 + }, + { + "epoch": 0.08305749662866249, + "grad_norm": 0.957293138763307, + "learning_rate": 9.926371638472949e-06, + "loss": 0.5265, + "step": 2710 + }, + { + "epoch": 0.0830881451514037, + "grad_norm": 1.944753651996318, + "learning_rate": 9.926286753286966e-06, + "loss": 0.8413, + "step": 2711 + }, + { + "epoch": 0.0831187936741449, + "grad_norm": 2.118739396257106, + "learning_rate": 9.92620181956102e-06, + "loss": 0.9347, + "step": 2712 + }, + { + "epoch": 0.08314944219688611, + "grad_norm": 0.9545202180601586, + "learning_rate": 9.926116837295948e-06, + "loss": 0.5313, + "step": 2713 + }, + { + "epoch": 0.08318009071962731, + "grad_norm": 1.997013036715125, + "learning_rate": 9.926031806492584e-06, + "loss": 0.7559, + "step": 2714 + }, + { + "epoch": 0.08321073924236852, + "grad_norm": 1.9193049315574184, + "learning_rate": 9.92594672715177e-06, + "loss": 0.7974, + "step": 2715 + }, + { + "epoch": 0.08324138776510973, + "grad_norm": 1.9299760119240283, + "learning_rate": 9.925861599274342e-06, + "loss": 0.7567, + "step": 2716 + }, + { + "epoch": 0.08327203628785093, + "grad_norm": 2.0124862569358455, + "learning_rate": 9.92577642286114e-06, + "loss": 0.7183, + "step": 2717 + }, + { + "epoch": 0.08330268481059212, + "grad_norm": 2.0810786675985535, + "learning_rate": 9.925691197913001e-06, + "loss": 0.8251, + "step": 2718 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 2.2528894819449516, + "learning_rate": 9.925605924430768e-06, + "loss": 0.8267, + "step": 2719 + }, + { + "epoch": 0.08336398185607453, + "grad_norm": 2.1494875925984767, + "learning_rate": 9.925520602415278e-06, + "loss": 0.8846, + "step": 2720 + }, + { + "epoch": 0.08339463037881574, + "grad_norm": 2.0074835076572652, + "learning_rate": 9.925435231867374e-06, + "loss": 0.7752, + "step": 2721 + }, + { + "epoch": 0.08342527890155695, + "grad_norm": 1.188088684695165, + "learning_rate": 9.925349812787897e-06, + "loss": 0.5281, + "step": 2722 + }, + { + "epoch": 0.08345592742429815, + "grad_norm": 1.9276448470183487, + "learning_rate": 9.925264345177687e-06, + "loss": 0.7177, + "step": 2723 + }, + { + "epoch": 0.08348657594703936, + "grad_norm": 2.1413251103192117, + "learning_rate": 9.925178829037588e-06, + "loss": 0.7859, + "step": 2724 + }, + { + "epoch": 0.08351722446978056, + "grad_norm": 2.4491100160445765, + "learning_rate": 9.925093264368441e-06, + "loss": 0.9074, + "step": 2725 + }, + { + "epoch": 0.08354787299252175, + "grad_norm": 1.9192015340379056, + "learning_rate": 9.925007651171091e-06, + "loss": 0.7473, + "step": 2726 + }, + { + "epoch": 0.08357852151526296, + "grad_norm": 2.097733156929076, + "learning_rate": 9.924921989446382e-06, + "loss": 0.832, + "step": 2727 + }, + { + "epoch": 0.08360917003800417, + "grad_norm": 2.0486124273728294, + "learning_rate": 9.924836279195153e-06, + "loss": 0.77, + "step": 2728 + }, + { + "epoch": 0.08363981856074537, + "grad_norm": 2.2337014709340113, + "learning_rate": 9.924750520418254e-06, + "loss": 0.7067, + "step": 2729 + }, + { + "epoch": 0.08367046708348658, + "grad_norm": 2.207780097592895, + "learning_rate": 9.924664713116528e-06, + "loss": 0.773, + "step": 2730 + }, + { + "epoch": 0.08370111560622778, + "grad_norm": 1.8683094047192397, + "learning_rate": 9.92457885729082e-06, + "loss": 0.8015, + "step": 2731 + }, + { + "epoch": 0.08373176412896899, + "grad_norm": 1.941709115891972, + "learning_rate": 9.924492952941977e-06, + "loss": 0.7653, + "step": 2732 + }, + { + "epoch": 0.0837624126517102, + "grad_norm": 2.190193418384482, + "learning_rate": 9.924407000070844e-06, + "loss": 0.7633, + "step": 2733 + }, + { + "epoch": 0.08379306117445139, + "grad_norm": 2.0476205163926484, + "learning_rate": 9.924320998678271e-06, + "loss": 0.772, + "step": 2734 + }, + { + "epoch": 0.08382370969719259, + "grad_norm": 2.6153375650690553, + "learning_rate": 9.924234948765101e-06, + "loss": 0.8117, + "step": 2735 + }, + { + "epoch": 0.0838543582199338, + "grad_norm": 2.0024920246901576, + "learning_rate": 9.924148850332185e-06, + "loss": 0.7457, + "step": 2736 + }, + { + "epoch": 0.083885006742675, + "grad_norm": 1.8692939517643228, + "learning_rate": 9.92406270338037e-06, + "loss": 0.6931, + "step": 2737 + }, + { + "epoch": 0.08391565526541621, + "grad_norm": 2.014969772318779, + "learning_rate": 9.923976507910506e-06, + "loss": 0.8039, + "step": 2738 + }, + { + "epoch": 0.08394630378815741, + "grad_norm": 2.075020302485697, + "learning_rate": 9.923890263923443e-06, + "loss": 0.8775, + "step": 2739 + }, + { + "epoch": 0.08397695231089862, + "grad_norm": 2.2322615673794353, + "learning_rate": 9.923803971420027e-06, + "loss": 0.8261, + "step": 2740 + }, + { + "epoch": 0.08400760083363981, + "grad_norm": 2.040541391123931, + "learning_rate": 9.923717630401113e-06, + "loss": 0.8056, + "step": 2741 + }, + { + "epoch": 0.08403824935638102, + "grad_norm": 1.7140626235277618, + "learning_rate": 9.923631240867546e-06, + "loss": 0.7164, + "step": 2742 + }, + { + "epoch": 0.08406889787912222, + "grad_norm": 1.8448586815668266, + "learning_rate": 9.923544802820183e-06, + "loss": 0.6785, + "step": 2743 + }, + { + "epoch": 0.08409954640186343, + "grad_norm": 2.077553388298157, + "learning_rate": 9.923458316259872e-06, + "loss": 0.7819, + "step": 2744 + }, + { + "epoch": 0.08413019492460463, + "grad_norm": 2.137741980802605, + "learning_rate": 9.923371781187468e-06, + "loss": 0.7578, + "step": 2745 + }, + { + "epoch": 0.08416084344734584, + "grad_norm": 2.0637183769713956, + "learning_rate": 9.923285197603823e-06, + "loss": 0.7628, + "step": 2746 + }, + { + "epoch": 0.08419149197008705, + "grad_norm": 2.0008803421924286, + "learning_rate": 9.923198565509787e-06, + "loss": 0.7916, + "step": 2747 + }, + { + "epoch": 0.08422214049282825, + "grad_norm": 3.432094555275299, + "learning_rate": 9.923111884906216e-06, + "loss": 0.7469, + "step": 2748 + }, + { + "epoch": 0.08425278901556944, + "grad_norm": 2.2027503547809526, + "learning_rate": 9.923025155793965e-06, + "loss": 0.8059, + "step": 2749 + }, + { + "epoch": 0.08428343753831065, + "grad_norm": 1.9071661588274997, + "learning_rate": 9.922938378173887e-06, + "loss": 0.7496, + "step": 2750 + }, + { + "epoch": 0.08431408606105185, + "grad_norm": 2.1196374313843975, + "learning_rate": 9.922851552046837e-06, + "loss": 0.8323, + "step": 2751 + }, + { + "epoch": 0.08434473458379306, + "grad_norm": 2.287333915391311, + "learning_rate": 9.922764677413672e-06, + "loss": 0.7613, + "step": 2752 + }, + { + "epoch": 0.08437538310653427, + "grad_norm": 2.158343207595034, + "learning_rate": 9.922677754275248e-06, + "loss": 0.7573, + "step": 2753 + }, + { + "epoch": 0.08440603162927547, + "grad_norm": 2.31561648838972, + "learning_rate": 9.922590782632419e-06, + "loss": 0.8704, + "step": 2754 + }, + { + "epoch": 0.08443668015201668, + "grad_norm": 1.9088130753049506, + "learning_rate": 9.922503762486044e-06, + "loss": 0.8272, + "step": 2755 + }, + { + "epoch": 0.08446732867475788, + "grad_norm": 2.216923668357904, + "learning_rate": 9.92241669383698e-06, + "loss": 0.807, + "step": 2756 + }, + { + "epoch": 0.08449797719749907, + "grad_norm": 2.14709551239781, + "learning_rate": 9.922329576686084e-06, + "loss": 0.7609, + "step": 2757 + }, + { + "epoch": 0.08452862572024028, + "grad_norm": 2.2135466892303044, + "learning_rate": 9.922242411034216e-06, + "loss": 0.7439, + "step": 2758 + }, + { + "epoch": 0.08455927424298149, + "grad_norm": 1.7462872991461085, + "learning_rate": 9.922155196882234e-06, + "loss": 0.7314, + "step": 2759 + }, + { + "epoch": 0.08458992276572269, + "grad_norm": 1.1629891457508263, + "learning_rate": 9.922067934230999e-06, + "loss": 0.5376, + "step": 2760 + }, + { + "epoch": 0.0846205712884639, + "grad_norm": 2.006020400700199, + "learning_rate": 9.921980623081366e-06, + "loss": 0.6975, + "step": 2761 + }, + { + "epoch": 0.0846512198112051, + "grad_norm": 1.8153580475909896, + "learning_rate": 9.9218932634342e-06, + "loss": 0.6997, + "step": 2762 + }, + { + "epoch": 0.08468186833394631, + "grad_norm": 1.8826183142717494, + "learning_rate": 9.921805855290362e-06, + "loss": 0.6794, + "step": 2763 + }, + { + "epoch": 0.08471251685668751, + "grad_norm": 1.8651210347453582, + "learning_rate": 9.92171839865071e-06, + "loss": 0.7024, + "step": 2764 + }, + { + "epoch": 0.0847431653794287, + "grad_norm": 2.0305734068417727, + "learning_rate": 9.921630893516108e-06, + "loss": 0.753, + "step": 2765 + }, + { + "epoch": 0.08477381390216991, + "grad_norm": 1.9568156123874307, + "learning_rate": 9.921543339887419e-06, + "loss": 0.7509, + "step": 2766 + }, + { + "epoch": 0.08480446242491112, + "grad_norm": 1.0154494948611328, + "learning_rate": 9.921455737765502e-06, + "loss": 0.5409, + "step": 2767 + }, + { + "epoch": 0.08483511094765232, + "grad_norm": 2.233116986633063, + "learning_rate": 9.921368087151222e-06, + "loss": 0.6884, + "step": 2768 + }, + { + "epoch": 0.08486575947039353, + "grad_norm": 2.2276675947506526, + "learning_rate": 9.921280388045444e-06, + "loss": 0.8746, + "step": 2769 + }, + { + "epoch": 0.08489640799313473, + "grad_norm": 1.9130941458752173, + "learning_rate": 9.92119264044903e-06, + "loss": 0.7472, + "step": 2770 + }, + { + "epoch": 0.08492705651587594, + "grad_norm": 2.1490052151715444, + "learning_rate": 9.921104844362849e-06, + "loss": 0.793, + "step": 2771 + }, + { + "epoch": 0.08495770503861713, + "grad_norm": 2.144562356683047, + "learning_rate": 9.921016999787761e-06, + "loss": 0.7797, + "step": 2772 + }, + { + "epoch": 0.08498835356135834, + "grad_norm": 2.072712436070739, + "learning_rate": 9.920929106724633e-06, + "loss": 0.7052, + "step": 2773 + }, + { + "epoch": 0.08501900208409954, + "grad_norm": 2.2028093459776796, + "learning_rate": 9.92084116517433e-06, + "loss": 0.8755, + "step": 2774 + }, + { + "epoch": 0.08504965060684075, + "grad_norm": 0.9803874169237852, + "learning_rate": 9.920753175137723e-06, + "loss": 0.5332, + "step": 2775 + }, + { + "epoch": 0.08508029912958195, + "grad_norm": 2.119179255792458, + "learning_rate": 9.920665136615675e-06, + "loss": 0.7684, + "step": 2776 + }, + { + "epoch": 0.08511094765232316, + "grad_norm": 1.8056766449545016, + "learning_rate": 9.920577049609054e-06, + "loss": 0.7032, + "step": 2777 + }, + { + "epoch": 0.08514159617506437, + "grad_norm": 0.8637155928681947, + "learning_rate": 9.920488914118727e-06, + "loss": 0.5199, + "step": 2778 + }, + { + "epoch": 0.08517224469780557, + "grad_norm": 2.205405936080973, + "learning_rate": 9.920400730145566e-06, + "loss": 0.7786, + "step": 2779 + }, + { + "epoch": 0.08520289322054676, + "grad_norm": 0.8901938560095461, + "learning_rate": 9.920312497690436e-06, + "loss": 0.5298, + "step": 2780 + }, + { + "epoch": 0.08523354174328797, + "grad_norm": 2.117339772048953, + "learning_rate": 9.92022421675421e-06, + "loss": 0.8161, + "step": 2781 + }, + { + "epoch": 0.08526419026602917, + "grad_norm": 1.965102640845952, + "learning_rate": 9.920135887337754e-06, + "loss": 0.8807, + "step": 2782 + }, + { + "epoch": 0.08529483878877038, + "grad_norm": 1.8508862540395732, + "learning_rate": 9.92004750944194e-06, + "loss": 0.7359, + "step": 2783 + }, + { + "epoch": 0.08532548731151159, + "grad_norm": 1.9851518627279487, + "learning_rate": 9.919959083067641e-06, + "loss": 0.6464, + "step": 2784 + }, + { + "epoch": 0.08535613583425279, + "grad_norm": 0.8965453283956124, + "learning_rate": 9.919870608215726e-06, + "loss": 0.5099, + "step": 2785 + }, + { + "epoch": 0.085386784356994, + "grad_norm": 1.8538893773509266, + "learning_rate": 9.919782084887066e-06, + "loss": 0.7797, + "step": 2786 + }, + { + "epoch": 0.0854174328797352, + "grad_norm": 2.1269979505346663, + "learning_rate": 9.919693513082534e-06, + "loss": 0.8173, + "step": 2787 + }, + { + "epoch": 0.0854480814024764, + "grad_norm": 2.0300480643624623, + "learning_rate": 9.919604892803003e-06, + "loss": 0.7424, + "step": 2788 + }, + { + "epoch": 0.0854787299252176, + "grad_norm": 1.9366762021611592, + "learning_rate": 9.919516224049348e-06, + "loss": 0.7304, + "step": 2789 + }, + { + "epoch": 0.0855093784479588, + "grad_norm": 1.8564847842167818, + "learning_rate": 9.91942750682244e-06, + "loss": 0.7088, + "step": 2790 + }, + { + "epoch": 0.08554002697070001, + "grad_norm": 1.9958945138848723, + "learning_rate": 9.919338741123155e-06, + "loss": 0.6621, + "step": 2791 + }, + { + "epoch": 0.08557067549344122, + "grad_norm": 2.2490696362600273, + "learning_rate": 9.919249926952365e-06, + "loss": 0.7708, + "step": 2792 + }, + { + "epoch": 0.08560132401618242, + "grad_norm": 1.8122948864437145, + "learning_rate": 9.919161064310948e-06, + "loss": 0.7992, + "step": 2793 + }, + { + "epoch": 0.08563197253892363, + "grad_norm": 2.072339789315475, + "learning_rate": 9.919072153199778e-06, + "loss": 0.7765, + "step": 2794 + }, + { + "epoch": 0.08566262106166483, + "grad_norm": 1.0701218058652677, + "learning_rate": 9.91898319361973e-06, + "loss": 0.5221, + "step": 2795 + }, + { + "epoch": 0.08569326958440603, + "grad_norm": 1.9093408097506683, + "learning_rate": 9.918894185571684e-06, + "loss": 0.7663, + "step": 2796 + }, + { + "epoch": 0.08572391810714723, + "grad_norm": 1.7303197479772467, + "learning_rate": 9.918805129056514e-06, + "loss": 0.7675, + "step": 2797 + }, + { + "epoch": 0.08575456662988844, + "grad_norm": 2.0328303553946863, + "learning_rate": 9.9187160240751e-06, + "loss": 0.7939, + "step": 2798 + }, + { + "epoch": 0.08578521515262964, + "grad_norm": 1.9480008159992905, + "learning_rate": 9.918626870628317e-06, + "loss": 0.7056, + "step": 2799 + }, + { + "epoch": 0.08581586367537085, + "grad_norm": 1.8710461832947172, + "learning_rate": 9.918537668717045e-06, + "loss": 0.7747, + "step": 2800 + }, + { + "epoch": 0.08584651219811205, + "grad_norm": 1.9437641460997337, + "learning_rate": 9.918448418342164e-06, + "loss": 0.8291, + "step": 2801 + }, + { + "epoch": 0.08587716072085326, + "grad_norm": 1.9915250233439314, + "learning_rate": 9.918359119504552e-06, + "loss": 0.8349, + "step": 2802 + }, + { + "epoch": 0.08590780924359445, + "grad_norm": 2.1426013232700685, + "learning_rate": 9.918269772205089e-06, + "loss": 0.7124, + "step": 2803 + }, + { + "epoch": 0.08593845776633566, + "grad_norm": 2.1261012120825624, + "learning_rate": 9.918180376444655e-06, + "loss": 0.7489, + "step": 2804 + }, + { + "epoch": 0.08596910628907686, + "grad_norm": 1.9429376180166744, + "learning_rate": 9.918090932224131e-06, + "loss": 0.6202, + "step": 2805 + }, + { + "epoch": 0.08599975481181807, + "grad_norm": 2.117855517430449, + "learning_rate": 9.9180014395444e-06, + "loss": 0.8366, + "step": 2806 + }, + { + "epoch": 0.08603040333455927, + "grad_norm": 1.845240015292876, + "learning_rate": 9.917911898406343e-06, + "loss": 0.7839, + "step": 2807 + }, + { + "epoch": 0.08606105185730048, + "grad_norm": 2.1682175476347494, + "learning_rate": 9.91782230881084e-06, + "loss": 0.7597, + "step": 2808 + }, + { + "epoch": 0.08609170038004169, + "grad_norm": 2.327857199602804, + "learning_rate": 9.917732670758776e-06, + "loss": 0.6906, + "step": 2809 + }, + { + "epoch": 0.08612234890278289, + "grad_norm": 2.1002146628530327, + "learning_rate": 9.917642984251034e-06, + "loss": 0.7516, + "step": 2810 + }, + { + "epoch": 0.08615299742552408, + "grad_norm": 2.3170323169153697, + "learning_rate": 9.9175532492885e-06, + "loss": 0.8391, + "step": 2811 + }, + { + "epoch": 0.08618364594826529, + "grad_norm": 1.1865599874969224, + "learning_rate": 9.917463465872051e-06, + "loss": 0.5243, + "step": 2812 + }, + { + "epoch": 0.0862142944710065, + "grad_norm": 1.9264069232973606, + "learning_rate": 9.91737363400258e-06, + "loss": 0.7552, + "step": 2813 + }, + { + "epoch": 0.0862449429937477, + "grad_norm": 1.8834524415368594, + "learning_rate": 9.917283753680966e-06, + "loss": 0.8211, + "step": 2814 + }, + { + "epoch": 0.0862755915164889, + "grad_norm": 0.9323671543326179, + "learning_rate": 9.917193824908097e-06, + "loss": 0.5378, + "step": 2815 + }, + { + "epoch": 0.08630624003923011, + "grad_norm": 1.9771381434887447, + "learning_rate": 9.91710384768486e-06, + "loss": 0.8271, + "step": 2816 + }, + { + "epoch": 0.08633688856197132, + "grad_norm": 1.9993794305312345, + "learning_rate": 9.91701382201214e-06, + "loss": 0.8479, + "step": 2817 + }, + { + "epoch": 0.08636753708471252, + "grad_norm": 2.0805328763980198, + "learning_rate": 9.916923747890825e-06, + "loss": 0.6801, + "step": 2818 + }, + { + "epoch": 0.08639818560745371, + "grad_norm": 1.9917716460625376, + "learning_rate": 9.916833625321804e-06, + "loss": 0.842, + "step": 2819 + }, + { + "epoch": 0.08642883413019492, + "grad_norm": 2.118796769684119, + "learning_rate": 9.91674345430596e-06, + "loss": 0.8499, + "step": 2820 + }, + { + "epoch": 0.08645948265293613, + "grad_norm": 2.2622989212996196, + "learning_rate": 9.916653234844188e-06, + "loss": 0.783, + "step": 2821 + }, + { + "epoch": 0.08649013117567733, + "grad_norm": 1.7289177033957353, + "learning_rate": 9.916562966937371e-06, + "loss": 0.7306, + "step": 2822 + }, + { + "epoch": 0.08652077969841854, + "grad_norm": 2.1357310562439884, + "learning_rate": 9.916472650586404e-06, + "loss": 0.713, + "step": 2823 + }, + { + "epoch": 0.08655142822115974, + "grad_norm": 1.5069078748728122, + "learning_rate": 9.916382285792172e-06, + "loss": 0.5351, + "step": 2824 + }, + { + "epoch": 0.08658207674390095, + "grad_norm": 1.2114369162341239, + "learning_rate": 9.916291872555568e-06, + "loss": 0.5523, + "step": 2825 + }, + { + "epoch": 0.08661272526664215, + "grad_norm": 1.9300664885396714, + "learning_rate": 9.916201410877481e-06, + "loss": 0.7412, + "step": 2826 + }, + { + "epoch": 0.08664337378938335, + "grad_norm": 2.156616208077294, + "learning_rate": 9.916110900758806e-06, + "loss": 0.7746, + "step": 2827 + }, + { + "epoch": 0.08667402231212455, + "grad_norm": 2.1482388124053173, + "learning_rate": 9.916020342200432e-06, + "loss": 0.8164, + "step": 2828 + }, + { + "epoch": 0.08670467083486576, + "grad_norm": 1.8587918641781775, + "learning_rate": 9.915929735203252e-06, + "loss": 0.6577, + "step": 2829 + }, + { + "epoch": 0.08673531935760696, + "grad_norm": 1.9129004184248062, + "learning_rate": 9.915839079768156e-06, + "loss": 0.7364, + "step": 2830 + }, + { + "epoch": 0.08676596788034817, + "grad_norm": 1.887620758025398, + "learning_rate": 9.915748375896041e-06, + "loss": 0.7813, + "step": 2831 + }, + { + "epoch": 0.08679661640308937, + "grad_norm": 1.9748703226235613, + "learning_rate": 9.9156576235878e-06, + "loss": 0.8062, + "step": 2832 + }, + { + "epoch": 0.08682726492583058, + "grad_norm": 1.8398942114196957, + "learning_rate": 9.915566822844326e-06, + "loss": 0.7561, + "step": 2833 + }, + { + "epoch": 0.08685791344857179, + "grad_norm": 2.5202876586753393, + "learning_rate": 9.915475973666516e-06, + "loss": 0.5579, + "step": 2834 + }, + { + "epoch": 0.08688856197131298, + "grad_norm": 2.1937201868788536, + "learning_rate": 9.915385076055262e-06, + "loss": 0.8184, + "step": 2835 + }, + { + "epoch": 0.08691921049405418, + "grad_norm": 2.5510772539046958, + "learning_rate": 9.915294130011461e-06, + "loss": 0.7483, + "step": 2836 + }, + { + "epoch": 0.08694985901679539, + "grad_norm": 2.3327852394131185, + "learning_rate": 9.915203135536011e-06, + "loss": 0.7549, + "step": 2837 + }, + { + "epoch": 0.0869805075395366, + "grad_norm": 2.1868755801900805, + "learning_rate": 9.915112092629806e-06, + "loss": 0.7461, + "step": 2838 + }, + { + "epoch": 0.0870111560622778, + "grad_norm": 2.2241405061038044, + "learning_rate": 9.915021001293743e-06, + "loss": 0.6821, + "step": 2839 + }, + { + "epoch": 0.087041804585019, + "grad_norm": 2.210451552625109, + "learning_rate": 9.914929861528722e-06, + "loss": 0.793, + "step": 2840 + }, + { + "epoch": 0.08707245310776021, + "grad_norm": 2.0257172398183663, + "learning_rate": 9.914838673335639e-06, + "loss": 0.7354, + "step": 2841 + }, + { + "epoch": 0.0871031016305014, + "grad_norm": 2.294219070356522, + "learning_rate": 9.914747436715394e-06, + "loss": 0.8216, + "step": 2842 + }, + { + "epoch": 0.08713375015324261, + "grad_norm": 2.110928279767287, + "learning_rate": 9.914656151668884e-06, + "loss": 0.8283, + "step": 2843 + }, + { + "epoch": 0.08716439867598381, + "grad_norm": 2.085429446062461, + "learning_rate": 9.914564818197008e-06, + "loss": 0.7515, + "step": 2844 + }, + { + "epoch": 0.08719504719872502, + "grad_norm": 2.5638782517425756, + "learning_rate": 9.914473436300668e-06, + "loss": 0.8064, + "step": 2845 + }, + { + "epoch": 0.08722569572146623, + "grad_norm": 2.1528600674030542, + "learning_rate": 9.914382005980766e-06, + "loss": 0.6968, + "step": 2846 + }, + { + "epoch": 0.08725634424420743, + "grad_norm": 2.129417509092398, + "learning_rate": 9.9142905272382e-06, + "loss": 0.7473, + "step": 2847 + }, + { + "epoch": 0.08728699276694864, + "grad_norm": 2.092657300024219, + "learning_rate": 9.914199000073871e-06, + "loss": 0.817, + "step": 2848 + }, + { + "epoch": 0.08731764128968984, + "grad_norm": 1.9888538811286924, + "learning_rate": 9.91410742448868e-06, + "loss": 0.5512, + "step": 2849 + }, + { + "epoch": 0.08734828981243103, + "grad_norm": 1.8617642335116646, + "learning_rate": 9.914015800483536e-06, + "loss": 0.7391, + "step": 2850 + }, + { + "epoch": 0.08737893833517224, + "grad_norm": 1.9433247028397045, + "learning_rate": 9.913924128059334e-06, + "loss": 0.6766, + "step": 2851 + }, + { + "epoch": 0.08740958685791345, + "grad_norm": 4.096813949910427, + "learning_rate": 9.91383240721698e-06, + "loss": 0.7933, + "step": 2852 + }, + { + "epoch": 0.08744023538065465, + "grad_norm": 2.0959789326737615, + "learning_rate": 9.91374063795738e-06, + "loss": 0.8039, + "step": 2853 + }, + { + "epoch": 0.08747088390339586, + "grad_norm": 2.4470140220927297, + "learning_rate": 9.913648820281435e-06, + "loss": 0.7235, + "step": 2854 + }, + { + "epoch": 0.08750153242613706, + "grad_norm": 1.1532677709384356, + "learning_rate": 9.913556954190051e-06, + "loss": 0.5377, + "step": 2855 + }, + { + "epoch": 0.08753218094887827, + "grad_norm": 1.8770022539800963, + "learning_rate": 9.913465039684134e-06, + "loss": 0.8043, + "step": 2856 + }, + { + "epoch": 0.08756282947161947, + "grad_norm": 1.8365524786004934, + "learning_rate": 9.913373076764587e-06, + "loss": 0.6646, + "step": 2857 + }, + { + "epoch": 0.08759347799436067, + "grad_norm": 2.1329778267103636, + "learning_rate": 9.913281065432318e-06, + "loss": 0.7744, + "step": 2858 + }, + { + "epoch": 0.08762412651710187, + "grad_norm": 2.0881869550217456, + "learning_rate": 9.913189005688235e-06, + "loss": 0.8447, + "step": 2859 + }, + { + "epoch": 0.08765477503984308, + "grad_norm": 2.1564853414418694, + "learning_rate": 9.913096897533244e-06, + "loss": 0.8696, + "step": 2860 + }, + { + "epoch": 0.08768542356258428, + "grad_norm": 2.2517471059167424, + "learning_rate": 9.913004740968251e-06, + "loss": 0.8151, + "step": 2861 + }, + { + "epoch": 0.08771607208532549, + "grad_norm": 2.027943471793319, + "learning_rate": 9.912912535994166e-06, + "loss": 0.777, + "step": 2862 + }, + { + "epoch": 0.0877467206080667, + "grad_norm": 2.444486172452427, + "learning_rate": 9.912820282611896e-06, + "loss": 0.7992, + "step": 2863 + }, + { + "epoch": 0.0877773691308079, + "grad_norm": 1.9828726954379376, + "learning_rate": 9.912727980822352e-06, + "loss": 0.8082, + "step": 2864 + }, + { + "epoch": 0.0878080176535491, + "grad_norm": 1.9887370362106622, + "learning_rate": 9.91263563062644e-06, + "loss": 0.8303, + "step": 2865 + }, + { + "epoch": 0.0878386661762903, + "grad_norm": 1.2223453248510276, + "learning_rate": 9.912543232025074e-06, + "loss": 0.5141, + "step": 2866 + }, + { + "epoch": 0.0878693146990315, + "grad_norm": 1.8984356655418029, + "learning_rate": 9.912450785019162e-06, + "loss": 0.7457, + "step": 2867 + }, + { + "epoch": 0.08789996322177271, + "grad_norm": 1.9823626732246962, + "learning_rate": 9.912358289609616e-06, + "loss": 0.7825, + "step": 2868 + }, + { + "epoch": 0.08793061174451391, + "grad_norm": 0.94575546012522, + "learning_rate": 9.912265745797347e-06, + "loss": 0.4961, + "step": 2869 + }, + { + "epoch": 0.08796126026725512, + "grad_norm": 1.9841375396378416, + "learning_rate": 9.912173153583266e-06, + "loss": 0.7848, + "step": 2870 + }, + { + "epoch": 0.08799190878999633, + "grad_norm": 1.9891731370255552, + "learning_rate": 9.912080512968286e-06, + "loss": 0.7947, + "step": 2871 + }, + { + "epoch": 0.08802255731273753, + "grad_norm": 1.8262005723409052, + "learning_rate": 9.91198782395332e-06, + "loss": 0.8045, + "step": 2872 + }, + { + "epoch": 0.08805320583547872, + "grad_norm": 0.9882362844135113, + "learning_rate": 9.911895086539281e-06, + "loss": 0.5209, + "step": 2873 + }, + { + "epoch": 0.08808385435821993, + "grad_norm": 1.955458933969871, + "learning_rate": 9.911802300727084e-06, + "loss": 0.7864, + "step": 2874 + }, + { + "epoch": 0.08811450288096113, + "grad_norm": 2.009053965669366, + "learning_rate": 9.911709466517641e-06, + "loss": 0.7911, + "step": 2875 + }, + { + "epoch": 0.08814515140370234, + "grad_norm": 2.036255083101178, + "learning_rate": 9.91161658391187e-06, + "loss": 0.7158, + "step": 2876 + }, + { + "epoch": 0.08817579992644355, + "grad_norm": 2.349161529043961, + "learning_rate": 9.911523652910681e-06, + "loss": 0.8049, + "step": 2877 + }, + { + "epoch": 0.08820644844918475, + "grad_norm": 2.0382357063010215, + "learning_rate": 9.911430673514994e-06, + "loss": 0.7915, + "step": 2878 + }, + { + "epoch": 0.08823709697192596, + "grad_norm": 0.9159809771023902, + "learning_rate": 9.911337645725725e-06, + "loss": 0.5265, + "step": 2879 + }, + { + "epoch": 0.08826774549466716, + "grad_norm": 1.9818103422722864, + "learning_rate": 9.91124456954379e-06, + "loss": 0.7004, + "step": 2880 + }, + { + "epoch": 0.08829839401740835, + "grad_norm": 1.9410107926493472, + "learning_rate": 9.911151444970104e-06, + "loss": 0.8303, + "step": 2881 + }, + { + "epoch": 0.08832904254014956, + "grad_norm": 1.9021517583368106, + "learning_rate": 9.911058272005587e-06, + "loss": 0.6714, + "step": 2882 + }, + { + "epoch": 0.08835969106289077, + "grad_norm": 2.0102072738708956, + "learning_rate": 9.910965050651155e-06, + "loss": 0.8246, + "step": 2883 + }, + { + "epoch": 0.08839033958563197, + "grad_norm": 1.8540414080567469, + "learning_rate": 9.910871780907729e-06, + "loss": 0.772, + "step": 2884 + }, + { + "epoch": 0.08842098810837318, + "grad_norm": 1.8997254953767237, + "learning_rate": 9.910778462776227e-06, + "loss": 0.8002, + "step": 2885 + }, + { + "epoch": 0.08845163663111438, + "grad_norm": 1.873952239072599, + "learning_rate": 9.910685096257568e-06, + "loss": 0.6901, + "step": 2886 + }, + { + "epoch": 0.08848228515385559, + "grad_norm": 1.8658751588450884, + "learning_rate": 9.910591681352673e-06, + "loss": 0.7237, + "step": 2887 + }, + { + "epoch": 0.0885129336765968, + "grad_norm": 1.01946461146687, + "learning_rate": 9.910498218062461e-06, + "loss": 0.5236, + "step": 2888 + }, + { + "epoch": 0.08854358219933799, + "grad_norm": 2.1835457162529073, + "learning_rate": 9.910404706387853e-06, + "loss": 0.7284, + "step": 2889 + }, + { + "epoch": 0.08857423072207919, + "grad_norm": 1.9532211310141454, + "learning_rate": 9.910311146329772e-06, + "loss": 0.7747, + "step": 2890 + }, + { + "epoch": 0.0886048792448204, + "grad_norm": 2.0211868559209196, + "learning_rate": 9.910217537889139e-06, + "loss": 0.7528, + "step": 2891 + }, + { + "epoch": 0.0886355277675616, + "grad_norm": 1.865222362117812, + "learning_rate": 9.910123881066875e-06, + "loss": 0.6305, + "step": 2892 + }, + { + "epoch": 0.08866617629030281, + "grad_norm": 0.9098295397149683, + "learning_rate": 9.910030175863905e-06, + "loss": 0.5294, + "step": 2893 + }, + { + "epoch": 0.08869682481304401, + "grad_norm": 2.0562036597042095, + "learning_rate": 9.909936422281152e-06, + "loss": 0.7973, + "step": 2894 + }, + { + "epoch": 0.08872747333578522, + "grad_norm": 1.9608743670047886, + "learning_rate": 9.909842620319539e-06, + "loss": 0.8057, + "step": 2895 + }, + { + "epoch": 0.08875812185852643, + "grad_norm": 0.9372065121088602, + "learning_rate": 9.90974876997999e-06, + "loss": 0.5302, + "step": 2896 + }, + { + "epoch": 0.08878877038126762, + "grad_norm": 2.045061394056276, + "learning_rate": 9.90965487126343e-06, + "loss": 0.7589, + "step": 2897 + }, + { + "epoch": 0.08881941890400882, + "grad_norm": 1.8824008495028193, + "learning_rate": 9.909560924170784e-06, + "loss": 0.7578, + "step": 2898 + }, + { + "epoch": 0.08885006742675003, + "grad_norm": 2.1116835643554044, + "learning_rate": 9.90946692870298e-06, + "loss": 0.749, + "step": 2899 + }, + { + "epoch": 0.08888071594949123, + "grad_norm": 2.0209850449067104, + "learning_rate": 9.90937288486094e-06, + "loss": 0.7986, + "step": 2900 + }, + { + "epoch": 0.08891136447223244, + "grad_norm": 0.9842847336165884, + "learning_rate": 9.909278792645594e-06, + "loss": 0.5214, + "step": 2901 + }, + { + "epoch": 0.08894201299497365, + "grad_norm": 1.0776668670616094, + "learning_rate": 9.909184652057866e-06, + "loss": 0.5473, + "step": 2902 + }, + { + "epoch": 0.08897266151771485, + "grad_norm": 2.665439336846594, + "learning_rate": 9.909090463098688e-06, + "loss": 0.7292, + "step": 2903 + }, + { + "epoch": 0.08900331004045604, + "grad_norm": 1.862324502287059, + "learning_rate": 9.908996225768985e-06, + "loss": 0.6705, + "step": 2904 + }, + { + "epoch": 0.08903395856319725, + "grad_norm": 0.9297149131548258, + "learning_rate": 9.908901940069686e-06, + "loss": 0.5239, + "step": 2905 + }, + { + "epoch": 0.08906460708593845, + "grad_norm": 2.229772718119466, + "learning_rate": 9.908807606001721e-06, + "loss": 0.8952, + "step": 2906 + }, + { + "epoch": 0.08909525560867966, + "grad_norm": 2.0818775582039817, + "learning_rate": 9.908713223566018e-06, + "loss": 0.8677, + "step": 2907 + }, + { + "epoch": 0.08912590413142087, + "grad_norm": 1.0406719897624879, + "learning_rate": 9.908618792763507e-06, + "loss": 0.5067, + "step": 2908 + }, + { + "epoch": 0.08915655265416207, + "grad_norm": 1.850991669300992, + "learning_rate": 9.90852431359512e-06, + "loss": 0.7096, + "step": 2909 + }, + { + "epoch": 0.08918720117690328, + "grad_norm": 2.100679323619573, + "learning_rate": 9.908429786061787e-06, + "loss": 0.7611, + "step": 2910 + }, + { + "epoch": 0.08921784969964448, + "grad_norm": 1.87974596180356, + "learning_rate": 9.908335210164438e-06, + "loss": 0.7131, + "step": 2911 + }, + { + "epoch": 0.08924849822238567, + "grad_norm": 1.9945212256282383, + "learning_rate": 9.908240585904008e-06, + "loss": 0.7514, + "step": 2912 + }, + { + "epoch": 0.08927914674512688, + "grad_norm": 1.8973035029125709, + "learning_rate": 9.908145913281426e-06, + "loss": 0.6969, + "step": 2913 + }, + { + "epoch": 0.08930979526786809, + "grad_norm": 2.0489122361958905, + "learning_rate": 9.908051192297628e-06, + "loss": 0.811, + "step": 2914 + }, + { + "epoch": 0.08934044379060929, + "grad_norm": 1.9303286703673397, + "learning_rate": 9.907956422953546e-06, + "loss": 0.721, + "step": 2915 + }, + { + "epoch": 0.0893710923133505, + "grad_norm": 1.9182256603242847, + "learning_rate": 9.907861605250114e-06, + "loss": 0.7437, + "step": 2916 + }, + { + "epoch": 0.0894017408360917, + "grad_norm": 1.914908034182179, + "learning_rate": 9.907766739188264e-06, + "loss": 0.8115, + "step": 2917 + }, + { + "epoch": 0.08943238935883291, + "grad_norm": 1.9158828968568766, + "learning_rate": 9.907671824768933e-06, + "loss": 0.8052, + "step": 2918 + }, + { + "epoch": 0.08946303788157411, + "grad_norm": 1.0378280903687027, + "learning_rate": 9.907576861993056e-06, + "loss": 0.5157, + "step": 2919 + }, + { + "epoch": 0.0894936864043153, + "grad_norm": 2.003290205150268, + "learning_rate": 9.90748185086157e-06, + "loss": 0.7279, + "step": 2920 + }, + { + "epoch": 0.08952433492705651, + "grad_norm": 2.141964164447751, + "learning_rate": 9.907386791375408e-06, + "loss": 0.8353, + "step": 2921 + }, + { + "epoch": 0.08955498344979772, + "grad_norm": 2.2499066311921094, + "learning_rate": 9.90729168353551e-06, + "loss": 0.8374, + "step": 2922 + }, + { + "epoch": 0.08958563197253892, + "grad_norm": 2.1426305392153386, + "learning_rate": 9.907196527342809e-06, + "loss": 0.7369, + "step": 2923 + }, + { + "epoch": 0.08961628049528013, + "grad_norm": 1.8714171767116958, + "learning_rate": 9.907101322798247e-06, + "loss": 0.7942, + "step": 2924 + }, + { + "epoch": 0.08964692901802133, + "grad_norm": 1.8531762816208448, + "learning_rate": 9.90700606990276e-06, + "loss": 0.7328, + "step": 2925 + }, + { + "epoch": 0.08967757754076254, + "grad_norm": 2.59543229773488, + "learning_rate": 9.906910768657286e-06, + "loss": 0.8091, + "step": 2926 + }, + { + "epoch": 0.08970822606350375, + "grad_norm": 2.134864044098138, + "learning_rate": 9.906815419062763e-06, + "loss": 0.8339, + "step": 2927 + }, + { + "epoch": 0.08973887458624494, + "grad_norm": 0.991415961756191, + "learning_rate": 9.906720021120136e-06, + "loss": 0.5125, + "step": 2928 + }, + { + "epoch": 0.08976952310898614, + "grad_norm": 2.1253611163810957, + "learning_rate": 9.90662457483034e-06, + "loss": 0.8055, + "step": 2929 + }, + { + "epoch": 0.08980017163172735, + "grad_norm": 1.7703652280275948, + "learning_rate": 9.906529080194315e-06, + "loss": 0.7505, + "step": 2930 + }, + { + "epoch": 0.08983082015446855, + "grad_norm": 1.6737703868703893, + "learning_rate": 9.906433537213006e-06, + "loss": 0.7381, + "step": 2931 + }, + { + "epoch": 0.08986146867720976, + "grad_norm": 1.825643182951097, + "learning_rate": 9.90633794588735e-06, + "loss": 0.7395, + "step": 2932 + }, + { + "epoch": 0.08989211719995097, + "grad_norm": 1.8526142558836871, + "learning_rate": 9.90624230621829e-06, + "loss": 0.7432, + "step": 2933 + }, + { + "epoch": 0.08992276572269217, + "grad_norm": 0.891150292893928, + "learning_rate": 9.906146618206772e-06, + "loss": 0.5036, + "step": 2934 + }, + { + "epoch": 0.08995341424543336, + "grad_norm": 0.9198376109055253, + "learning_rate": 9.906050881853735e-06, + "loss": 0.5108, + "step": 2935 + }, + { + "epoch": 0.08998406276817457, + "grad_norm": 2.1674358242975327, + "learning_rate": 9.905955097160122e-06, + "loss": 0.7217, + "step": 2936 + }, + { + "epoch": 0.09001471129091577, + "grad_norm": 1.8977329271145595, + "learning_rate": 9.90585926412688e-06, + "loss": 0.8347, + "step": 2937 + }, + { + "epoch": 0.09004535981365698, + "grad_norm": 0.9205005392765547, + "learning_rate": 9.90576338275495e-06, + "loss": 0.5155, + "step": 2938 + }, + { + "epoch": 0.09007600833639819, + "grad_norm": 1.7794589296148104, + "learning_rate": 9.90566745304528e-06, + "loss": 0.7623, + "step": 2939 + }, + { + "epoch": 0.09010665685913939, + "grad_norm": 2.038122386746521, + "learning_rate": 9.905571474998812e-06, + "loss": 0.7717, + "step": 2940 + }, + { + "epoch": 0.0901373053818806, + "grad_norm": 1.7522717529784557, + "learning_rate": 9.905475448616493e-06, + "loss": 0.7367, + "step": 2941 + }, + { + "epoch": 0.0901679539046218, + "grad_norm": 2.014206048753149, + "learning_rate": 9.90537937389927e-06, + "loss": 0.7937, + "step": 2942 + }, + { + "epoch": 0.090198602427363, + "grad_norm": 2.075321598480121, + "learning_rate": 9.905283250848089e-06, + "loss": 0.8471, + "step": 2943 + }, + { + "epoch": 0.0902292509501042, + "grad_norm": 3.822099402861195, + "learning_rate": 9.905187079463895e-06, + "loss": 0.7383, + "step": 2944 + }, + { + "epoch": 0.0902598994728454, + "grad_norm": 0.9260166158876769, + "learning_rate": 9.90509085974764e-06, + "loss": 0.5145, + "step": 2945 + }, + { + "epoch": 0.09029054799558661, + "grad_norm": 2.0006451004551367, + "learning_rate": 9.90499459170027e-06, + "loss": 0.7397, + "step": 2946 + }, + { + "epoch": 0.09032119651832782, + "grad_norm": 0.8754019312261665, + "learning_rate": 9.904898275322734e-06, + "loss": 0.5138, + "step": 2947 + }, + { + "epoch": 0.09035184504106902, + "grad_norm": 2.2469663051391255, + "learning_rate": 9.904801910615978e-06, + "loss": 0.6924, + "step": 2948 + }, + { + "epoch": 0.09038249356381023, + "grad_norm": 2.50139243277914, + "learning_rate": 9.904705497580954e-06, + "loss": 0.7636, + "step": 2949 + }, + { + "epoch": 0.09041314208655143, + "grad_norm": 2.1178248618903956, + "learning_rate": 9.904609036218613e-06, + "loss": 0.8459, + "step": 2950 + }, + { + "epoch": 0.09044379060929263, + "grad_norm": 2.058614617254843, + "learning_rate": 9.904512526529904e-06, + "loss": 0.8735, + "step": 2951 + }, + { + "epoch": 0.09047443913203383, + "grad_norm": 1.9400316968602513, + "learning_rate": 9.904415968515777e-06, + "loss": 0.6138, + "step": 2952 + }, + { + "epoch": 0.09050508765477504, + "grad_norm": 1.976848237097893, + "learning_rate": 9.904319362177186e-06, + "loss": 0.7192, + "step": 2953 + }, + { + "epoch": 0.09053573617751624, + "grad_norm": 2.2227198685398872, + "learning_rate": 9.90422270751508e-06, + "loss": 0.7256, + "step": 2954 + }, + { + "epoch": 0.09056638470025745, + "grad_norm": 2.0671852874241035, + "learning_rate": 9.904126004530415e-06, + "loss": 0.7668, + "step": 2955 + }, + { + "epoch": 0.09059703322299865, + "grad_norm": 2.094290385139489, + "learning_rate": 9.904029253224142e-06, + "loss": 0.7491, + "step": 2956 + }, + { + "epoch": 0.09062768174573986, + "grad_norm": 2.120106247833941, + "learning_rate": 9.903932453597212e-06, + "loss": 0.7691, + "step": 2957 + }, + { + "epoch": 0.09065833026848107, + "grad_norm": 2.352868824539404, + "learning_rate": 9.90383560565058e-06, + "loss": 0.7368, + "step": 2958 + }, + { + "epoch": 0.09068897879122226, + "grad_norm": 1.923810283829918, + "learning_rate": 9.903738709385203e-06, + "loss": 0.6941, + "step": 2959 + }, + { + "epoch": 0.09071962731396346, + "grad_norm": 2.0967394291025063, + "learning_rate": 9.903641764802033e-06, + "loss": 0.7393, + "step": 2960 + }, + { + "epoch": 0.09075027583670467, + "grad_norm": 1.110152452920369, + "learning_rate": 9.903544771902027e-06, + "loss": 0.5058, + "step": 2961 + }, + { + "epoch": 0.09078092435944587, + "grad_norm": 2.010657323402387, + "learning_rate": 9.903447730686139e-06, + "loss": 0.7774, + "step": 2962 + }, + { + "epoch": 0.09081157288218708, + "grad_norm": 2.0201186718157604, + "learning_rate": 9.903350641155325e-06, + "loss": 0.8047, + "step": 2963 + }, + { + "epoch": 0.09084222140492829, + "grad_norm": 0.9092581404126574, + "learning_rate": 9.903253503310544e-06, + "loss": 0.5376, + "step": 2964 + }, + { + "epoch": 0.09087286992766949, + "grad_norm": 2.1333269628889737, + "learning_rate": 9.90315631715275e-06, + "loss": 0.8767, + "step": 2965 + }, + { + "epoch": 0.09090351845041068, + "grad_norm": 0.9016797769891856, + "learning_rate": 9.903059082682906e-06, + "loss": 0.5054, + "step": 2966 + }, + { + "epoch": 0.09093416697315189, + "grad_norm": 1.9827941519728578, + "learning_rate": 9.902961799901964e-06, + "loss": 0.7353, + "step": 2967 + }, + { + "epoch": 0.0909648154958931, + "grad_norm": 2.1621176837490346, + "learning_rate": 9.902864468810884e-06, + "loss": 0.8014, + "step": 2968 + }, + { + "epoch": 0.0909954640186343, + "grad_norm": 1.873406710219088, + "learning_rate": 9.902767089410627e-06, + "loss": 0.7985, + "step": 2969 + }, + { + "epoch": 0.0910261125413755, + "grad_norm": 0.9069094138864114, + "learning_rate": 9.902669661702151e-06, + "loss": 0.5137, + "step": 2970 + }, + { + "epoch": 0.09105676106411671, + "grad_norm": 1.9760880279360868, + "learning_rate": 9.902572185686416e-06, + "loss": 0.8907, + "step": 2971 + }, + { + "epoch": 0.09108740958685792, + "grad_norm": 2.060128020015315, + "learning_rate": 9.902474661364383e-06, + "loss": 0.7038, + "step": 2972 + }, + { + "epoch": 0.09111805810959912, + "grad_norm": 1.8666993274614836, + "learning_rate": 9.902377088737014e-06, + "loss": 0.7399, + "step": 2973 + }, + { + "epoch": 0.09114870663234032, + "grad_norm": 0.9510888458673106, + "learning_rate": 9.90227946780527e-06, + "loss": 0.5184, + "step": 2974 + }, + { + "epoch": 0.09117935515508152, + "grad_norm": 1.8099936427382226, + "learning_rate": 9.90218179857011e-06, + "loss": 0.7409, + "step": 2975 + }, + { + "epoch": 0.09121000367782273, + "grad_norm": 2.011166807129866, + "learning_rate": 9.902084081032499e-06, + "loss": 0.8063, + "step": 2976 + }, + { + "epoch": 0.09124065220056393, + "grad_norm": 1.9949168011773546, + "learning_rate": 9.901986315193399e-06, + "loss": 0.7379, + "step": 2977 + }, + { + "epoch": 0.09127130072330514, + "grad_norm": 2.144512973248358, + "learning_rate": 9.901888501053773e-06, + "loss": 0.7745, + "step": 2978 + }, + { + "epoch": 0.09130194924604634, + "grad_norm": 0.9126491600515183, + "learning_rate": 9.901790638614588e-06, + "loss": 0.5244, + "step": 2979 + }, + { + "epoch": 0.09133259776878755, + "grad_norm": 0.8840413160326305, + "learning_rate": 9.901692727876804e-06, + "loss": 0.5058, + "step": 2980 + }, + { + "epoch": 0.09136324629152875, + "grad_norm": 2.073430894262685, + "learning_rate": 9.901594768841386e-06, + "loss": 0.6115, + "step": 2981 + }, + { + "epoch": 0.09139389481426995, + "grad_norm": 1.829077839768486, + "learning_rate": 9.901496761509304e-06, + "loss": 0.7866, + "step": 2982 + }, + { + "epoch": 0.09142454333701115, + "grad_norm": 0.9246032474752768, + "learning_rate": 9.901398705881518e-06, + "loss": 0.528, + "step": 2983 + }, + { + "epoch": 0.09145519185975236, + "grad_norm": 2.0370978524302954, + "learning_rate": 9.901300601958997e-06, + "loss": 0.7731, + "step": 2984 + }, + { + "epoch": 0.09148584038249356, + "grad_norm": 1.9063728668772513, + "learning_rate": 9.901202449742706e-06, + "loss": 0.7686, + "step": 2985 + }, + { + "epoch": 0.09151648890523477, + "grad_norm": 2.0840171644338117, + "learning_rate": 9.901104249233614e-06, + "loss": 0.8157, + "step": 2986 + }, + { + "epoch": 0.09154713742797597, + "grad_norm": 1.973417714282018, + "learning_rate": 9.901006000432688e-06, + "loss": 0.7406, + "step": 2987 + }, + { + "epoch": 0.09157778595071718, + "grad_norm": 1.9506065717084464, + "learning_rate": 9.900907703340897e-06, + "loss": 0.7099, + "step": 2988 + }, + { + "epoch": 0.09160843447345839, + "grad_norm": 2.1383953137734655, + "learning_rate": 9.900809357959206e-06, + "loss": 0.7033, + "step": 2989 + }, + { + "epoch": 0.09163908299619958, + "grad_norm": 1.0224251282248615, + "learning_rate": 9.900710964288588e-06, + "loss": 0.523, + "step": 2990 + }, + { + "epoch": 0.09166973151894078, + "grad_norm": 1.8040411008390507, + "learning_rate": 9.900612522330012e-06, + "loss": 0.7042, + "step": 2991 + }, + { + "epoch": 0.09170038004168199, + "grad_norm": 1.856735447542042, + "learning_rate": 9.900514032084445e-06, + "loss": 0.7557, + "step": 2992 + }, + { + "epoch": 0.0917310285644232, + "grad_norm": 1.7680089410707587, + "learning_rate": 9.90041549355286e-06, + "loss": 0.8022, + "step": 2993 + }, + { + "epoch": 0.0917616770871644, + "grad_norm": 2.105603823036762, + "learning_rate": 9.900316906736227e-06, + "loss": 0.769, + "step": 2994 + }, + { + "epoch": 0.0917923256099056, + "grad_norm": 2.0853762697803275, + "learning_rate": 9.900218271635517e-06, + "loss": 0.7752, + "step": 2995 + }, + { + "epoch": 0.09182297413264681, + "grad_norm": 1.9886762084870182, + "learning_rate": 9.900119588251706e-06, + "loss": 0.7479, + "step": 2996 + }, + { + "epoch": 0.091853622655388, + "grad_norm": 1.9069456173894421, + "learning_rate": 9.90002085658576e-06, + "loss": 0.7864, + "step": 2997 + }, + { + "epoch": 0.09188427117812921, + "grad_norm": 1.8556038914980573, + "learning_rate": 9.899922076638655e-06, + "loss": 0.7503, + "step": 2998 + }, + { + "epoch": 0.09191491970087041, + "grad_norm": 1.950633639797168, + "learning_rate": 9.899823248411364e-06, + "loss": 0.7378, + "step": 2999 + }, + { + "epoch": 0.09194556822361162, + "grad_norm": 1.8262543287561022, + "learning_rate": 9.899724371904862e-06, + "loss": 0.71, + "step": 3000 + }, + { + "epoch": 0.09197621674635283, + "grad_norm": 2.0523927321967492, + "learning_rate": 9.899625447120122e-06, + "loss": 0.7773, + "step": 3001 + }, + { + "epoch": 0.09200686526909403, + "grad_norm": 2.125326676669027, + "learning_rate": 9.899526474058118e-06, + "loss": 0.7972, + "step": 3002 + }, + { + "epoch": 0.09203751379183524, + "grad_norm": 0.9702465332780518, + "learning_rate": 9.899427452719826e-06, + "loss": 0.5186, + "step": 3003 + }, + { + "epoch": 0.09206816231457644, + "grad_norm": 2.350098362531575, + "learning_rate": 9.899328383106224e-06, + "loss": 0.7834, + "step": 3004 + }, + { + "epoch": 0.09209881083731764, + "grad_norm": 1.8618077438709997, + "learning_rate": 9.899229265218284e-06, + "loss": 0.8113, + "step": 3005 + }, + { + "epoch": 0.09212945936005884, + "grad_norm": 2.069782826442215, + "learning_rate": 9.899130099056983e-06, + "loss": 0.7664, + "step": 3006 + }, + { + "epoch": 0.09216010788280005, + "grad_norm": 1.9369975374593769, + "learning_rate": 9.899030884623302e-06, + "loss": 0.8463, + "step": 3007 + }, + { + "epoch": 0.09219075640554125, + "grad_norm": 1.947463006416339, + "learning_rate": 9.898931621918215e-06, + "loss": 0.7283, + "step": 3008 + }, + { + "epoch": 0.09222140492828246, + "grad_norm": 1.897455243748763, + "learning_rate": 9.898832310942702e-06, + "loss": 0.723, + "step": 3009 + }, + { + "epoch": 0.09225205345102366, + "grad_norm": 2.1482012001554023, + "learning_rate": 9.89873295169774e-06, + "loss": 0.7799, + "step": 3010 + }, + { + "epoch": 0.09228270197376487, + "grad_norm": 1.755712412491787, + "learning_rate": 9.89863354418431e-06, + "loss": 0.6366, + "step": 3011 + }, + { + "epoch": 0.09231335049650607, + "grad_norm": 1.9475302459807515, + "learning_rate": 9.89853408840339e-06, + "loss": 0.7284, + "step": 3012 + }, + { + "epoch": 0.09234399901924727, + "grad_norm": 1.969885484402543, + "learning_rate": 9.89843458435596e-06, + "loss": 0.8301, + "step": 3013 + }, + { + "epoch": 0.09237464754198847, + "grad_norm": 1.9713679645604654, + "learning_rate": 9.898335032043001e-06, + "loss": 0.8374, + "step": 3014 + }, + { + "epoch": 0.09240529606472968, + "grad_norm": 1.8377723086417188, + "learning_rate": 9.898235431465492e-06, + "loss": 0.743, + "step": 3015 + }, + { + "epoch": 0.09243594458747088, + "grad_norm": 1.852987917966043, + "learning_rate": 9.898135782624418e-06, + "loss": 0.6962, + "step": 3016 + }, + { + "epoch": 0.09246659311021209, + "grad_norm": 1.0150261263012186, + "learning_rate": 9.898036085520759e-06, + "loss": 0.5365, + "step": 3017 + }, + { + "epoch": 0.0924972416329533, + "grad_norm": 2.086328600955086, + "learning_rate": 9.897936340155496e-06, + "loss": 0.808, + "step": 3018 + }, + { + "epoch": 0.0925278901556945, + "grad_norm": 0.9389717959249938, + "learning_rate": 9.897836546529614e-06, + "loss": 0.5271, + "step": 3019 + }, + { + "epoch": 0.0925585386784357, + "grad_norm": 0.8467040850317221, + "learning_rate": 9.897736704644093e-06, + "loss": 0.4897, + "step": 3020 + }, + { + "epoch": 0.0925891872011769, + "grad_norm": 2.0014875730578887, + "learning_rate": 9.897636814499923e-06, + "loss": 0.7398, + "step": 3021 + }, + { + "epoch": 0.0926198357239181, + "grad_norm": 1.8497617764043035, + "learning_rate": 9.897536876098081e-06, + "loss": 0.7176, + "step": 3022 + }, + { + "epoch": 0.09265048424665931, + "grad_norm": 0.9704261810261902, + "learning_rate": 9.897436889439558e-06, + "loss": 0.5365, + "step": 3023 + }, + { + "epoch": 0.09268113276940051, + "grad_norm": 0.9522709631084986, + "learning_rate": 9.897336854525334e-06, + "loss": 0.5264, + "step": 3024 + }, + { + "epoch": 0.09271178129214172, + "grad_norm": 2.3434626242495975, + "learning_rate": 9.897236771356397e-06, + "loss": 0.8587, + "step": 3025 + }, + { + "epoch": 0.09274242981488293, + "grad_norm": 1.8830160736142323, + "learning_rate": 9.897136639933734e-06, + "loss": 0.6929, + "step": 3026 + }, + { + "epoch": 0.09277307833762413, + "grad_norm": 2.2420881646689197, + "learning_rate": 9.89703646025833e-06, + "loss": 0.969, + "step": 3027 + }, + { + "epoch": 0.09280372686036532, + "grad_norm": 1.9449867685052686, + "learning_rate": 9.896936232331173e-06, + "loss": 0.6601, + "step": 3028 + }, + { + "epoch": 0.09283437538310653, + "grad_norm": 1.9270472513232544, + "learning_rate": 9.896835956153251e-06, + "loss": 0.704, + "step": 3029 + }, + { + "epoch": 0.09286502390584774, + "grad_norm": 1.861633103112164, + "learning_rate": 9.896735631725551e-06, + "loss": 0.7009, + "step": 3030 + }, + { + "epoch": 0.09289567242858894, + "grad_norm": 1.8969363212991495, + "learning_rate": 9.896635259049062e-06, + "loss": 0.6976, + "step": 3031 + }, + { + "epoch": 0.09292632095133015, + "grad_norm": 1.8024356091617366, + "learning_rate": 9.896534838124773e-06, + "loss": 0.7443, + "step": 3032 + }, + { + "epoch": 0.09295696947407135, + "grad_norm": 2.1662350399407395, + "learning_rate": 9.896434368953673e-06, + "loss": 0.7279, + "step": 3033 + }, + { + "epoch": 0.09298761799681256, + "grad_norm": 2.021870055976263, + "learning_rate": 9.896333851536753e-06, + "loss": 0.8532, + "step": 3034 + }, + { + "epoch": 0.09301826651955376, + "grad_norm": 1.9529552841239664, + "learning_rate": 9.896233285875003e-06, + "loss": 0.7839, + "step": 3035 + }, + { + "epoch": 0.09304891504229496, + "grad_norm": 1.9853581109600422, + "learning_rate": 9.896132671969412e-06, + "loss": 0.7379, + "step": 3036 + }, + { + "epoch": 0.09307956356503616, + "grad_norm": 1.9183420815340442, + "learning_rate": 9.896032009820975e-06, + "loss": 0.737, + "step": 3037 + }, + { + "epoch": 0.09311021208777737, + "grad_norm": 1.9625728573532395, + "learning_rate": 9.895931299430681e-06, + "loss": 0.7241, + "step": 3038 + }, + { + "epoch": 0.09314086061051857, + "grad_norm": 1.9934100589264947, + "learning_rate": 9.895830540799523e-06, + "loss": 0.7277, + "step": 3039 + }, + { + "epoch": 0.09317150913325978, + "grad_norm": 1.727014578910172, + "learning_rate": 9.895729733928494e-06, + "loss": 0.7058, + "step": 3040 + }, + { + "epoch": 0.09320215765600098, + "grad_norm": 2.5106432439268365, + "learning_rate": 9.895628878818588e-06, + "loss": 0.688, + "step": 3041 + }, + { + "epoch": 0.09323280617874219, + "grad_norm": 2.029478554818263, + "learning_rate": 9.895527975470799e-06, + "loss": 0.7834, + "step": 3042 + }, + { + "epoch": 0.0932634547014834, + "grad_norm": 2.0564191242991323, + "learning_rate": 9.895427023886118e-06, + "loss": 0.7741, + "step": 3043 + }, + { + "epoch": 0.09329410322422459, + "grad_norm": 2.4570054086912463, + "learning_rate": 9.895326024065542e-06, + "loss": 0.7287, + "step": 3044 + }, + { + "epoch": 0.09332475174696579, + "grad_norm": 2.5473811407175173, + "learning_rate": 9.895224976010067e-06, + "loss": 0.748, + "step": 3045 + }, + { + "epoch": 0.093355400269707, + "grad_norm": 2.0498239318943634, + "learning_rate": 9.895123879720688e-06, + "loss": 0.7376, + "step": 3046 + }, + { + "epoch": 0.0933860487924482, + "grad_norm": 2.0075399506018927, + "learning_rate": 9.8950227351984e-06, + "loss": 0.7376, + "step": 3047 + }, + { + "epoch": 0.09341669731518941, + "grad_norm": 1.896162057273456, + "learning_rate": 9.894921542444202e-06, + "loss": 0.6852, + "step": 3048 + }, + { + "epoch": 0.09344734583793061, + "grad_norm": 2.2331461971274593, + "learning_rate": 9.894820301459089e-06, + "loss": 0.7883, + "step": 3049 + }, + { + "epoch": 0.09347799436067182, + "grad_norm": 2.1301935894727038, + "learning_rate": 9.89471901224406e-06, + "loss": 0.8313, + "step": 3050 + }, + { + "epoch": 0.09350864288341303, + "grad_norm": 2.0308293748567725, + "learning_rate": 9.89461767480011e-06, + "loss": 0.7504, + "step": 3051 + }, + { + "epoch": 0.09353929140615422, + "grad_norm": 2.2142792969142118, + "learning_rate": 9.894516289128242e-06, + "loss": 0.7295, + "step": 3052 + }, + { + "epoch": 0.09356993992889542, + "grad_norm": 2.251992854968373, + "learning_rate": 9.894414855229453e-06, + "loss": 0.7547, + "step": 3053 + }, + { + "epoch": 0.09360058845163663, + "grad_norm": 1.6994240981410307, + "learning_rate": 9.89431337310474e-06, + "loss": 0.7362, + "step": 3054 + }, + { + "epoch": 0.09363123697437783, + "grad_norm": 2.0887714684008194, + "learning_rate": 9.894211842755107e-06, + "loss": 0.766, + "step": 3055 + }, + { + "epoch": 0.09366188549711904, + "grad_norm": 1.6037998534577527, + "learning_rate": 9.894110264181551e-06, + "loss": 0.545, + "step": 3056 + }, + { + "epoch": 0.09369253401986025, + "grad_norm": 1.9083373856858015, + "learning_rate": 9.894008637385075e-06, + "loss": 0.6871, + "step": 3057 + }, + { + "epoch": 0.09372318254260145, + "grad_norm": 0.9192843892741529, + "learning_rate": 9.89390696236668e-06, + "loss": 0.5018, + "step": 3058 + }, + { + "epoch": 0.09375383106534264, + "grad_norm": 2.4320183971177087, + "learning_rate": 9.893805239127366e-06, + "loss": 0.6762, + "step": 3059 + }, + { + "epoch": 0.09378447958808385, + "grad_norm": 2.3805530645323674, + "learning_rate": 9.893703467668139e-06, + "loss": 0.7946, + "step": 3060 + }, + { + "epoch": 0.09381512811082506, + "grad_norm": 2.107261029701774, + "learning_rate": 9.893601647989997e-06, + "loss": 0.7825, + "step": 3061 + }, + { + "epoch": 0.09384577663356626, + "grad_norm": 1.34307615809011, + "learning_rate": 9.893499780093948e-06, + "loss": 0.5278, + "step": 3062 + }, + { + "epoch": 0.09387642515630747, + "grad_norm": 1.899362857688762, + "learning_rate": 9.893397863980993e-06, + "loss": 0.7351, + "step": 3063 + }, + { + "epoch": 0.09390707367904867, + "grad_norm": 2.2416826012807474, + "learning_rate": 9.893295899652137e-06, + "loss": 0.9459, + "step": 3064 + }, + { + "epoch": 0.09393772220178988, + "grad_norm": 1.015750767767657, + "learning_rate": 9.893193887108385e-06, + "loss": 0.5237, + "step": 3065 + }, + { + "epoch": 0.09396837072453108, + "grad_norm": 1.965344979951572, + "learning_rate": 9.893091826350741e-06, + "loss": 0.8151, + "step": 3066 + }, + { + "epoch": 0.09399901924727228, + "grad_norm": 2.280701029356865, + "learning_rate": 9.892989717380211e-06, + "loss": 0.8547, + "step": 3067 + }, + { + "epoch": 0.09402966777001348, + "grad_norm": 2.127059399383621, + "learning_rate": 9.892887560197802e-06, + "loss": 0.7657, + "step": 3068 + }, + { + "epoch": 0.09406031629275469, + "grad_norm": 1.004689569729217, + "learning_rate": 9.892785354804519e-06, + "loss": 0.5015, + "step": 3069 + }, + { + "epoch": 0.09409096481549589, + "grad_norm": 2.305732839802527, + "learning_rate": 9.89268310120137e-06, + "loss": 0.8174, + "step": 3070 + }, + { + "epoch": 0.0941216133382371, + "grad_norm": 2.063180806492626, + "learning_rate": 9.892580799389364e-06, + "loss": 0.8766, + "step": 3071 + }, + { + "epoch": 0.0941522618609783, + "grad_norm": 2.2237382419465326, + "learning_rate": 9.892478449369507e-06, + "loss": 0.8899, + "step": 3072 + }, + { + "epoch": 0.09418291038371951, + "grad_norm": 0.931775740022436, + "learning_rate": 9.892376051142807e-06, + "loss": 0.5292, + "step": 3073 + }, + { + "epoch": 0.09421355890646071, + "grad_norm": 2.032611807935302, + "learning_rate": 9.892273604710275e-06, + "loss": 0.8422, + "step": 3074 + }, + { + "epoch": 0.0942442074292019, + "grad_norm": 1.7826180569732668, + "learning_rate": 9.89217111007292e-06, + "loss": 0.6895, + "step": 3075 + }, + { + "epoch": 0.09427485595194311, + "grad_norm": 1.9187119923650222, + "learning_rate": 9.89206856723175e-06, + "loss": 0.8113, + "step": 3076 + }, + { + "epoch": 0.09430550447468432, + "grad_norm": 1.8216478992839453, + "learning_rate": 9.891965976187778e-06, + "loss": 0.8409, + "step": 3077 + }, + { + "epoch": 0.09433615299742552, + "grad_norm": 1.7978049427019784, + "learning_rate": 9.891863336942012e-06, + "loss": 0.7052, + "step": 3078 + }, + { + "epoch": 0.09436680152016673, + "grad_norm": 1.8450558510309216, + "learning_rate": 9.891760649495465e-06, + "loss": 0.7577, + "step": 3079 + }, + { + "epoch": 0.09439745004290793, + "grad_norm": 1.904999274334601, + "learning_rate": 9.89165791384915e-06, + "loss": 0.8058, + "step": 3080 + }, + { + "epoch": 0.09442809856564914, + "grad_norm": 1.9878184522393483, + "learning_rate": 9.891555130004078e-06, + "loss": 0.8118, + "step": 3081 + }, + { + "epoch": 0.09445874708839035, + "grad_norm": 2.057346883525178, + "learning_rate": 9.891452297961261e-06, + "loss": 0.7788, + "step": 3082 + }, + { + "epoch": 0.09448939561113154, + "grad_norm": 1.7762225719119498, + "learning_rate": 9.891349417721713e-06, + "loss": 0.7422, + "step": 3083 + }, + { + "epoch": 0.09452004413387274, + "grad_norm": 1.0228753007032234, + "learning_rate": 9.891246489286448e-06, + "loss": 0.5226, + "step": 3084 + }, + { + "epoch": 0.09455069265661395, + "grad_norm": 1.9519897559718322, + "learning_rate": 9.89114351265648e-06, + "loss": 0.8521, + "step": 3085 + }, + { + "epoch": 0.09458134117935516, + "grad_norm": 2.213732107119467, + "learning_rate": 9.891040487832824e-06, + "loss": 0.7693, + "step": 3086 + }, + { + "epoch": 0.09461198970209636, + "grad_norm": 1.9990540740805318, + "learning_rate": 9.890937414816493e-06, + "loss": 0.8033, + "step": 3087 + }, + { + "epoch": 0.09464263822483757, + "grad_norm": 1.804513231643838, + "learning_rate": 9.890834293608506e-06, + "loss": 0.733, + "step": 3088 + }, + { + "epoch": 0.09467328674757877, + "grad_norm": 2.0149275931463166, + "learning_rate": 9.890731124209875e-06, + "loss": 0.7566, + "step": 3089 + }, + { + "epoch": 0.09470393527031996, + "grad_norm": 1.9701806024468351, + "learning_rate": 9.890627906621622e-06, + "loss": 0.7915, + "step": 3090 + }, + { + "epoch": 0.09473458379306117, + "grad_norm": 1.9213739451871277, + "learning_rate": 9.890524640844759e-06, + "loss": 0.8123, + "step": 3091 + }, + { + "epoch": 0.09476523231580238, + "grad_norm": 1.9487427121272856, + "learning_rate": 9.890421326880306e-06, + "loss": 0.6846, + "step": 3092 + }, + { + "epoch": 0.09479588083854358, + "grad_norm": 1.8435513975579743, + "learning_rate": 9.89031796472928e-06, + "loss": 0.6791, + "step": 3093 + }, + { + "epoch": 0.09482652936128479, + "grad_norm": 1.9231023651801011, + "learning_rate": 9.8902145543927e-06, + "loss": 0.7728, + "step": 3094 + }, + { + "epoch": 0.09485717788402599, + "grad_norm": 1.305507074656505, + "learning_rate": 9.890111095871584e-06, + "loss": 0.5165, + "step": 3095 + }, + { + "epoch": 0.0948878264067672, + "grad_norm": 2.072991295687858, + "learning_rate": 9.890007589166954e-06, + "loss": 0.6796, + "step": 3096 + }, + { + "epoch": 0.0949184749295084, + "grad_norm": 1.8334515427323135, + "learning_rate": 9.889904034279827e-06, + "loss": 0.7676, + "step": 3097 + }, + { + "epoch": 0.0949491234522496, + "grad_norm": 2.3119762317203225, + "learning_rate": 9.889800431211224e-06, + "loss": 0.8066, + "step": 3098 + }, + { + "epoch": 0.0949797719749908, + "grad_norm": 1.8359676017658741, + "learning_rate": 9.889696779962167e-06, + "loss": 0.5975, + "step": 3099 + }, + { + "epoch": 0.095010420497732, + "grad_norm": 2.004630206500889, + "learning_rate": 9.889593080533675e-06, + "loss": 0.7866, + "step": 3100 + }, + { + "epoch": 0.09504106902047321, + "grad_norm": 1.0774832179924643, + "learning_rate": 9.889489332926773e-06, + "loss": 0.5289, + "step": 3101 + }, + { + "epoch": 0.09507171754321442, + "grad_norm": 2.191040227936995, + "learning_rate": 9.889385537142482e-06, + "loss": 0.7609, + "step": 3102 + }, + { + "epoch": 0.09510236606595562, + "grad_norm": 0.9079817635876531, + "learning_rate": 9.889281693181823e-06, + "loss": 0.4997, + "step": 3103 + }, + { + "epoch": 0.09513301458869683, + "grad_norm": 2.101644022077999, + "learning_rate": 9.889177801045821e-06, + "loss": 0.8222, + "step": 3104 + }, + { + "epoch": 0.09516366311143803, + "grad_norm": 2.1762898620928404, + "learning_rate": 9.889073860735499e-06, + "loss": 0.7182, + "step": 3105 + }, + { + "epoch": 0.09519431163417923, + "grad_norm": 1.855633024011805, + "learning_rate": 9.888969872251881e-06, + "loss": 0.7311, + "step": 3106 + }, + { + "epoch": 0.09522496015692043, + "grad_norm": 1.078879568707824, + "learning_rate": 9.888865835595994e-06, + "loss": 0.5044, + "step": 3107 + }, + { + "epoch": 0.09525560867966164, + "grad_norm": 2.0448343724535296, + "learning_rate": 9.888761750768858e-06, + "loss": 0.7595, + "step": 3108 + }, + { + "epoch": 0.09528625720240284, + "grad_norm": 2.1311234390608647, + "learning_rate": 9.888657617771503e-06, + "loss": 0.8242, + "step": 3109 + }, + { + "epoch": 0.09531690572514405, + "grad_norm": 0.8927257330655198, + "learning_rate": 9.888553436604954e-06, + "loss": 0.5167, + "step": 3110 + }, + { + "epoch": 0.09534755424788526, + "grad_norm": 0.8899331855328574, + "learning_rate": 9.888449207270237e-06, + "loss": 0.512, + "step": 3111 + }, + { + "epoch": 0.09537820277062646, + "grad_norm": 1.885540235188503, + "learning_rate": 9.888344929768378e-06, + "loss": 0.6832, + "step": 3112 + }, + { + "epoch": 0.09540885129336767, + "grad_norm": 2.264751816417359, + "learning_rate": 9.888240604100407e-06, + "loss": 0.8106, + "step": 3113 + }, + { + "epoch": 0.09543949981610886, + "grad_norm": 2.0242456101203072, + "learning_rate": 9.888136230267351e-06, + "loss": 0.8261, + "step": 3114 + }, + { + "epoch": 0.09547014833885006, + "grad_norm": 2.161224918375079, + "learning_rate": 9.888031808270237e-06, + "loss": 0.635, + "step": 3115 + }, + { + "epoch": 0.09550079686159127, + "grad_norm": 1.7796970474503073, + "learning_rate": 9.887927338110095e-06, + "loss": 0.7119, + "step": 3116 + }, + { + "epoch": 0.09553144538433248, + "grad_norm": 1.2662247653614425, + "learning_rate": 9.887822819787955e-06, + "loss": 0.5176, + "step": 3117 + }, + { + "epoch": 0.09556209390707368, + "grad_norm": 2.135431423503074, + "learning_rate": 9.887718253304847e-06, + "loss": 0.6808, + "step": 3118 + }, + { + "epoch": 0.09559274242981489, + "grad_norm": 2.029203656123572, + "learning_rate": 9.8876136386618e-06, + "loss": 0.7552, + "step": 3119 + }, + { + "epoch": 0.09562339095255609, + "grad_norm": 1.8390378616231604, + "learning_rate": 9.887508975859843e-06, + "loss": 0.7299, + "step": 3120 + }, + { + "epoch": 0.09565403947529728, + "grad_norm": 0.960875332197341, + "learning_rate": 9.887404264900012e-06, + "loss": 0.4997, + "step": 3121 + }, + { + "epoch": 0.09568468799803849, + "grad_norm": 1.9828069127113512, + "learning_rate": 9.887299505783334e-06, + "loss": 0.7095, + "step": 3122 + }, + { + "epoch": 0.0957153365207797, + "grad_norm": 2.1965817510581265, + "learning_rate": 9.887194698510846e-06, + "loss": 0.7172, + "step": 3123 + }, + { + "epoch": 0.0957459850435209, + "grad_norm": 1.9049828001380131, + "learning_rate": 9.887089843083577e-06, + "loss": 0.7501, + "step": 3124 + }, + { + "epoch": 0.0957766335662621, + "grad_norm": 2.0739160120818, + "learning_rate": 9.886984939502562e-06, + "loss": 0.7655, + "step": 3125 + }, + { + "epoch": 0.09580728208900331, + "grad_norm": 1.7362355106912433, + "learning_rate": 9.886879987768833e-06, + "loss": 0.7289, + "step": 3126 + }, + { + "epoch": 0.09583793061174452, + "grad_norm": 1.8871283027268784, + "learning_rate": 9.886774987883426e-06, + "loss": 0.7786, + "step": 3127 + }, + { + "epoch": 0.09586857913448572, + "grad_norm": 2.2580630918740914, + "learning_rate": 9.886669939847373e-06, + "loss": 0.7668, + "step": 3128 + }, + { + "epoch": 0.09589922765722692, + "grad_norm": 2.07137866589592, + "learning_rate": 9.886564843661713e-06, + "loss": 0.7766, + "step": 3129 + }, + { + "epoch": 0.09592987617996812, + "grad_norm": 1.1760084331272502, + "learning_rate": 9.886459699327478e-06, + "loss": 0.5012, + "step": 3130 + }, + { + "epoch": 0.09596052470270933, + "grad_norm": 2.2474347051137893, + "learning_rate": 9.886354506845706e-06, + "loss": 0.7593, + "step": 3131 + }, + { + "epoch": 0.09599117322545053, + "grad_norm": 2.016696818197017, + "learning_rate": 9.886249266217432e-06, + "loss": 0.7475, + "step": 3132 + }, + { + "epoch": 0.09602182174819174, + "grad_norm": 1.8443705103947232, + "learning_rate": 9.886143977443694e-06, + "loss": 0.7756, + "step": 3133 + }, + { + "epoch": 0.09605247027093294, + "grad_norm": 1.969644942978252, + "learning_rate": 9.886038640525531e-06, + "loss": 0.7741, + "step": 3134 + }, + { + "epoch": 0.09608311879367415, + "grad_norm": 1.8956639124688641, + "learning_rate": 9.885933255463978e-06, + "loss": 0.7679, + "step": 3135 + }, + { + "epoch": 0.09611376731641535, + "grad_norm": 2.023725536939127, + "learning_rate": 9.885827822260073e-06, + "loss": 0.7748, + "step": 3136 + }, + { + "epoch": 0.09614441583915655, + "grad_norm": 2.1340901126688254, + "learning_rate": 9.885722340914857e-06, + "loss": 0.7719, + "step": 3137 + }, + { + "epoch": 0.09617506436189775, + "grad_norm": 1.0941695559545437, + "learning_rate": 9.88561681142937e-06, + "loss": 0.5284, + "step": 3138 + }, + { + "epoch": 0.09620571288463896, + "grad_norm": 2.360018517790863, + "learning_rate": 9.88551123380465e-06, + "loss": 0.7866, + "step": 3139 + }, + { + "epoch": 0.09623636140738016, + "grad_norm": 2.001019546668572, + "learning_rate": 9.885405608041738e-06, + "loss": 0.8982, + "step": 3140 + }, + { + "epoch": 0.09626700993012137, + "grad_norm": 1.743282854477548, + "learning_rate": 9.885299934141674e-06, + "loss": 0.8399, + "step": 3141 + }, + { + "epoch": 0.09629765845286258, + "grad_norm": 0.9127530759771358, + "learning_rate": 9.885194212105498e-06, + "loss": 0.5022, + "step": 3142 + }, + { + "epoch": 0.09632830697560378, + "grad_norm": 1.883934417878318, + "learning_rate": 9.885088441934257e-06, + "loss": 0.7603, + "step": 3143 + }, + { + "epoch": 0.09635895549834499, + "grad_norm": 1.8772890553821155, + "learning_rate": 9.884982623628987e-06, + "loss": 0.7297, + "step": 3144 + }, + { + "epoch": 0.09638960402108618, + "grad_norm": 2.0639564985082473, + "learning_rate": 9.884876757190736e-06, + "loss": 0.7847, + "step": 3145 + }, + { + "epoch": 0.09642025254382738, + "grad_norm": 2.4254010058073936, + "learning_rate": 9.884770842620541e-06, + "loss": 0.8096, + "step": 3146 + }, + { + "epoch": 0.09645090106656859, + "grad_norm": 0.9544661373127116, + "learning_rate": 9.884664879919452e-06, + "loss": 0.5119, + "step": 3147 + }, + { + "epoch": 0.0964815495893098, + "grad_norm": 1.94302472192901, + "learning_rate": 9.88455886908851e-06, + "loss": 0.7738, + "step": 3148 + }, + { + "epoch": 0.096512198112051, + "grad_norm": 1.8986480881130983, + "learning_rate": 9.884452810128757e-06, + "loss": 0.5832, + "step": 3149 + }, + { + "epoch": 0.0965428466347922, + "grad_norm": 1.835629283485844, + "learning_rate": 9.884346703041243e-06, + "loss": 0.6869, + "step": 3150 + }, + { + "epoch": 0.09657349515753341, + "grad_norm": 1.9596889728030968, + "learning_rate": 9.88424054782701e-06, + "loss": 0.8015, + "step": 3151 + }, + { + "epoch": 0.0966041436802746, + "grad_norm": 1.9577383839908424, + "learning_rate": 9.884134344487106e-06, + "loss": 0.7906, + "step": 3152 + }, + { + "epoch": 0.09663479220301581, + "grad_norm": 1.9887874349626393, + "learning_rate": 9.884028093022577e-06, + "loss": 0.7341, + "step": 3153 + }, + { + "epoch": 0.09666544072575702, + "grad_norm": 1.8460487455767838, + "learning_rate": 9.88392179343447e-06, + "loss": 0.6699, + "step": 3154 + }, + { + "epoch": 0.09669608924849822, + "grad_norm": 1.970601467745264, + "learning_rate": 9.88381544572383e-06, + "loss": 0.7812, + "step": 3155 + }, + { + "epoch": 0.09672673777123943, + "grad_norm": 0.9996326784576466, + "learning_rate": 9.883709049891709e-06, + "loss": 0.5151, + "step": 3156 + }, + { + "epoch": 0.09675738629398063, + "grad_norm": 1.8210783678889526, + "learning_rate": 9.883602605939151e-06, + "loss": 0.7054, + "step": 3157 + }, + { + "epoch": 0.09678803481672184, + "grad_norm": 2.1411798110278752, + "learning_rate": 9.883496113867209e-06, + "loss": 0.8208, + "step": 3158 + }, + { + "epoch": 0.09681868333946304, + "grad_norm": 1.7605945795994768, + "learning_rate": 9.883389573676929e-06, + "loss": 0.8128, + "step": 3159 + }, + { + "epoch": 0.09684933186220424, + "grad_norm": 1.745836413964514, + "learning_rate": 9.883282985369362e-06, + "loss": 0.6826, + "step": 3160 + }, + { + "epoch": 0.09687998038494544, + "grad_norm": 1.8639985818750056, + "learning_rate": 9.88317634894556e-06, + "loss": 0.7358, + "step": 3161 + }, + { + "epoch": 0.09691062890768665, + "grad_norm": 2.004590921901454, + "learning_rate": 9.883069664406571e-06, + "loss": 0.7167, + "step": 3162 + }, + { + "epoch": 0.09694127743042785, + "grad_norm": 0.8706534899914451, + "learning_rate": 9.882962931753446e-06, + "loss": 0.5057, + "step": 3163 + }, + { + "epoch": 0.09697192595316906, + "grad_norm": 1.9123380768522737, + "learning_rate": 9.88285615098724e-06, + "loss": 0.7448, + "step": 3164 + }, + { + "epoch": 0.09700257447591026, + "grad_norm": 1.719084803459663, + "learning_rate": 9.882749322109002e-06, + "loss": 0.794, + "step": 3165 + }, + { + "epoch": 0.09703322299865147, + "grad_norm": 1.9730991698784919, + "learning_rate": 9.882642445119784e-06, + "loss": 0.6757, + "step": 3166 + }, + { + "epoch": 0.09706387152139268, + "grad_norm": 2.021802651029125, + "learning_rate": 9.882535520020641e-06, + "loss": 0.9024, + "step": 3167 + }, + { + "epoch": 0.09709452004413387, + "grad_norm": 1.8791866763738625, + "learning_rate": 9.88242854681263e-06, + "loss": 0.787, + "step": 3168 + }, + { + "epoch": 0.09712516856687507, + "grad_norm": 1.946588997564343, + "learning_rate": 9.882321525496799e-06, + "loss": 0.7028, + "step": 3169 + }, + { + "epoch": 0.09715581708961628, + "grad_norm": 1.957075250550083, + "learning_rate": 9.882214456074204e-06, + "loss": 0.8063, + "step": 3170 + }, + { + "epoch": 0.09718646561235748, + "grad_norm": 1.0202594536853047, + "learning_rate": 9.882107338545902e-06, + "loss": 0.5317, + "step": 3171 + }, + { + "epoch": 0.09721711413509869, + "grad_norm": 1.879528320031204, + "learning_rate": 9.882000172912946e-06, + "loss": 0.7898, + "step": 3172 + }, + { + "epoch": 0.0972477626578399, + "grad_norm": 0.8580450193614779, + "learning_rate": 9.881892959176394e-06, + "loss": 0.5137, + "step": 3173 + }, + { + "epoch": 0.0972784111805811, + "grad_norm": 1.7688457361696521, + "learning_rate": 9.8817856973373e-06, + "loss": 0.769, + "step": 3174 + }, + { + "epoch": 0.0973090597033223, + "grad_norm": 1.913719303835709, + "learning_rate": 9.881678387396724e-06, + "loss": 0.7216, + "step": 3175 + }, + { + "epoch": 0.0973397082260635, + "grad_norm": 2.880182839379903, + "learning_rate": 9.881571029355724e-06, + "loss": 0.8087, + "step": 3176 + }, + { + "epoch": 0.0973703567488047, + "grad_norm": 1.8739205845765692, + "learning_rate": 9.881463623215352e-06, + "loss": 0.7341, + "step": 3177 + }, + { + "epoch": 0.09740100527154591, + "grad_norm": 2.0294702946810843, + "learning_rate": 9.88135616897667e-06, + "loss": 0.7623, + "step": 3178 + }, + { + "epoch": 0.09743165379428712, + "grad_norm": 2.242445423686008, + "learning_rate": 9.88124866664074e-06, + "loss": 0.8428, + "step": 3179 + }, + { + "epoch": 0.09746230231702832, + "grad_norm": 2.08721651838326, + "learning_rate": 9.881141116208614e-06, + "loss": 0.7868, + "step": 3180 + }, + { + "epoch": 0.09749295083976953, + "grad_norm": 1.7510215468263186, + "learning_rate": 9.88103351768136e-06, + "loss": 0.6951, + "step": 3181 + }, + { + "epoch": 0.09752359936251073, + "grad_norm": 1.9081783433671262, + "learning_rate": 9.88092587106003e-06, + "loss": 0.8246, + "step": 3182 + }, + { + "epoch": 0.09755424788525192, + "grad_norm": 2.180843408756449, + "learning_rate": 9.88081817634569e-06, + "loss": 0.7097, + "step": 3183 + }, + { + "epoch": 0.09758489640799313, + "grad_norm": 2.151772218546445, + "learning_rate": 9.8807104335394e-06, + "loss": 0.6847, + "step": 3184 + }, + { + "epoch": 0.09761554493073434, + "grad_norm": 2.0825450609906375, + "learning_rate": 9.88060264264222e-06, + "loss": 0.7928, + "step": 3185 + }, + { + "epoch": 0.09764619345347554, + "grad_norm": 2.0964817187516074, + "learning_rate": 9.880494803655216e-06, + "loss": 0.7167, + "step": 3186 + }, + { + "epoch": 0.09767684197621675, + "grad_norm": 1.8974306783112393, + "learning_rate": 9.880386916579446e-06, + "loss": 0.7316, + "step": 3187 + }, + { + "epoch": 0.09770749049895795, + "grad_norm": 1.8018454447100123, + "learning_rate": 9.880278981415975e-06, + "loss": 0.7595, + "step": 3188 + }, + { + "epoch": 0.09773813902169916, + "grad_norm": 2.269916958650838, + "learning_rate": 9.880170998165868e-06, + "loss": 0.7981, + "step": 3189 + }, + { + "epoch": 0.09776878754444036, + "grad_norm": 2.034776139013544, + "learning_rate": 9.880062966830186e-06, + "loss": 0.847, + "step": 3190 + }, + { + "epoch": 0.09779943606718156, + "grad_norm": 2.0148172986083486, + "learning_rate": 9.879954887409996e-06, + "loss": 0.7871, + "step": 3191 + }, + { + "epoch": 0.09783008458992276, + "grad_norm": 2.0063430637094775, + "learning_rate": 9.879846759906361e-06, + "loss": 0.6791, + "step": 3192 + }, + { + "epoch": 0.09786073311266397, + "grad_norm": 1.5690474620393513, + "learning_rate": 9.87973858432035e-06, + "loss": 0.6968, + "step": 3193 + }, + { + "epoch": 0.09789138163540517, + "grad_norm": 1.896185144749186, + "learning_rate": 9.879630360653022e-06, + "loss": 0.7865, + "step": 3194 + }, + { + "epoch": 0.09792203015814638, + "grad_norm": 1.2319826568718453, + "learning_rate": 9.879522088905448e-06, + "loss": 0.5177, + "step": 3195 + }, + { + "epoch": 0.09795267868088758, + "grad_norm": 2.155809411328702, + "learning_rate": 9.879413769078697e-06, + "loss": 0.7917, + "step": 3196 + }, + { + "epoch": 0.09798332720362879, + "grad_norm": 2.1244073602513986, + "learning_rate": 9.879305401173832e-06, + "loss": 0.7365, + "step": 3197 + }, + { + "epoch": 0.09801397572637, + "grad_norm": 0.901133685798479, + "learning_rate": 9.879196985191923e-06, + "loss": 0.5313, + "step": 3198 + }, + { + "epoch": 0.09804462424911119, + "grad_norm": 2.1641980841787802, + "learning_rate": 9.87908852113404e-06, + "loss": 0.8165, + "step": 3199 + }, + { + "epoch": 0.09807527277185239, + "grad_norm": 1.9078519011508377, + "learning_rate": 9.878980009001245e-06, + "loss": 0.7285, + "step": 3200 + }, + { + "epoch": 0.0981059212945936, + "grad_norm": 1.9217851238125245, + "learning_rate": 9.878871448794615e-06, + "loss": 0.7739, + "step": 3201 + }, + { + "epoch": 0.0981365698173348, + "grad_norm": 1.0518819079195778, + "learning_rate": 9.878762840515215e-06, + "loss": 0.5086, + "step": 3202 + }, + { + "epoch": 0.09816721834007601, + "grad_norm": 2.0278005087145377, + "learning_rate": 9.878654184164116e-06, + "loss": 0.7691, + "step": 3203 + }, + { + "epoch": 0.09819786686281722, + "grad_norm": 0.9283745697957672, + "learning_rate": 9.87854547974239e-06, + "loss": 0.519, + "step": 3204 + }, + { + "epoch": 0.09822851538555842, + "grad_norm": 1.8760075800902942, + "learning_rate": 9.878436727251106e-06, + "loss": 0.7133, + "step": 3205 + }, + { + "epoch": 0.09825916390829963, + "grad_norm": 0.8624524541652753, + "learning_rate": 9.878327926691338e-06, + "loss": 0.5092, + "step": 3206 + }, + { + "epoch": 0.09828981243104082, + "grad_norm": 2.0746144309209407, + "learning_rate": 9.878219078064156e-06, + "loss": 0.7634, + "step": 3207 + }, + { + "epoch": 0.09832046095378202, + "grad_norm": 2.1299586293615276, + "learning_rate": 9.878110181370634e-06, + "loss": 0.707, + "step": 3208 + }, + { + "epoch": 0.09835110947652323, + "grad_norm": 2.052442168661215, + "learning_rate": 9.878001236611842e-06, + "loss": 0.7332, + "step": 3209 + }, + { + "epoch": 0.09838175799926444, + "grad_norm": 1.7587214488179168, + "learning_rate": 9.877892243788858e-06, + "loss": 0.7768, + "step": 3210 + }, + { + "epoch": 0.09841240652200564, + "grad_norm": 1.9466496664165014, + "learning_rate": 9.877783202902754e-06, + "loss": 0.7572, + "step": 3211 + }, + { + "epoch": 0.09844305504474685, + "grad_norm": 2.0916118445604974, + "learning_rate": 9.877674113954603e-06, + "loss": 0.7069, + "step": 3212 + }, + { + "epoch": 0.09847370356748805, + "grad_norm": 1.15069458655777, + "learning_rate": 9.877564976945482e-06, + "loss": 0.5156, + "step": 3213 + }, + { + "epoch": 0.09850435209022924, + "grad_norm": 2.006388524480147, + "learning_rate": 9.877455791876464e-06, + "loss": 0.7848, + "step": 3214 + }, + { + "epoch": 0.09853500061297045, + "grad_norm": 2.038119925949998, + "learning_rate": 9.877346558748626e-06, + "loss": 0.6634, + "step": 3215 + }, + { + "epoch": 0.09856564913571166, + "grad_norm": 1.9342967158314261, + "learning_rate": 9.877237277563046e-06, + "loss": 0.725, + "step": 3216 + }, + { + "epoch": 0.09859629765845286, + "grad_norm": 1.9653537254438755, + "learning_rate": 9.877127948320798e-06, + "loss": 0.7376, + "step": 3217 + }, + { + "epoch": 0.09862694618119407, + "grad_norm": 1.9659671125987035, + "learning_rate": 9.87701857102296e-06, + "loss": 0.8113, + "step": 3218 + }, + { + "epoch": 0.09865759470393527, + "grad_norm": 0.9374740099067715, + "learning_rate": 9.876909145670612e-06, + "loss": 0.5091, + "step": 3219 + }, + { + "epoch": 0.09868824322667648, + "grad_norm": 1.7781949543840276, + "learning_rate": 9.876799672264828e-06, + "loss": 0.7055, + "step": 3220 + }, + { + "epoch": 0.09871889174941768, + "grad_norm": 2.0423266896493364, + "learning_rate": 9.876690150806692e-06, + "loss": 0.9437, + "step": 3221 + }, + { + "epoch": 0.09874954027215888, + "grad_norm": 2.0004263402224134, + "learning_rate": 9.876580581297277e-06, + "loss": 0.7698, + "step": 3222 + }, + { + "epoch": 0.09878018879490008, + "grad_norm": 2.009042189179956, + "learning_rate": 9.876470963737667e-06, + "loss": 0.7774, + "step": 3223 + }, + { + "epoch": 0.09881083731764129, + "grad_norm": 1.8094690977907182, + "learning_rate": 9.876361298128942e-06, + "loss": 0.7168, + "step": 3224 + }, + { + "epoch": 0.09884148584038249, + "grad_norm": 1.0775869869758814, + "learning_rate": 9.876251584472181e-06, + "loss": 0.5085, + "step": 3225 + }, + { + "epoch": 0.0988721343631237, + "grad_norm": 1.0249273878587346, + "learning_rate": 9.876141822768464e-06, + "loss": 0.5244, + "step": 3226 + }, + { + "epoch": 0.0989027828858649, + "grad_norm": 1.8299871154790213, + "learning_rate": 9.876032013018875e-06, + "loss": 0.6791, + "step": 3227 + }, + { + "epoch": 0.09893343140860611, + "grad_norm": 1.9244729840576908, + "learning_rate": 9.875922155224495e-06, + "loss": 0.8648, + "step": 3228 + }, + { + "epoch": 0.09896407993134732, + "grad_norm": 2.1330491830475924, + "learning_rate": 9.875812249386407e-06, + "loss": 0.7902, + "step": 3229 + }, + { + "epoch": 0.09899472845408851, + "grad_norm": 2.2492504624816556, + "learning_rate": 9.875702295505694e-06, + "loss": 0.8205, + "step": 3230 + }, + { + "epoch": 0.09902537697682971, + "grad_norm": 1.9587598453939024, + "learning_rate": 9.875592293583438e-06, + "loss": 0.761, + "step": 3231 + }, + { + "epoch": 0.09905602549957092, + "grad_norm": 2.263616978296009, + "learning_rate": 9.875482243620722e-06, + "loss": 0.7683, + "step": 3232 + }, + { + "epoch": 0.09908667402231212, + "grad_norm": 2.0315963841288265, + "learning_rate": 9.875372145618633e-06, + "loss": 0.6955, + "step": 3233 + }, + { + "epoch": 0.09911732254505333, + "grad_norm": 1.8582954571320818, + "learning_rate": 9.875261999578257e-06, + "loss": 0.7517, + "step": 3234 + }, + { + "epoch": 0.09914797106779454, + "grad_norm": 1.6153113574381734, + "learning_rate": 9.875151805500675e-06, + "loss": 0.5115, + "step": 3235 + }, + { + "epoch": 0.09917861959053574, + "grad_norm": 1.3375701805620093, + "learning_rate": 9.875041563386975e-06, + "loss": 0.5179, + "step": 3236 + }, + { + "epoch": 0.09920926811327695, + "grad_norm": 2.151514070330498, + "learning_rate": 9.874931273238244e-06, + "loss": 0.8259, + "step": 3237 + }, + { + "epoch": 0.09923991663601814, + "grad_norm": 2.005925588374603, + "learning_rate": 9.874820935055566e-06, + "loss": 0.7804, + "step": 3238 + }, + { + "epoch": 0.09927056515875934, + "grad_norm": 2.2280053048189914, + "learning_rate": 9.874710548840032e-06, + "loss": 0.7073, + "step": 3239 + }, + { + "epoch": 0.09930121368150055, + "grad_norm": 2.0641089223358176, + "learning_rate": 9.874600114592728e-06, + "loss": 0.8326, + "step": 3240 + }, + { + "epoch": 0.09933186220424176, + "grad_norm": 1.7671291535116802, + "learning_rate": 9.87448963231474e-06, + "loss": 0.8363, + "step": 3241 + }, + { + "epoch": 0.09936251072698296, + "grad_norm": 1.9251167787675554, + "learning_rate": 9.874379102007159e-06, + "loss": 0.7227, + "step": 3242 + }, + { + "epoch": 0.09939315924972417, + "grad_norm": 1.9566117854644487, + "learning_rate": 9.874268523671074e-06, + "loss": 0.7429, + "step": 3243 + }, + { + "epoch": 0.09942380777246537, + "grad_norm": 1.9422960535065812, + "learning_rate": 9.874157897307575e-06, + "loss": 0.7377, + "step": 3244 + }, + { + "epoch": 0.09945445629520656, + "grad_norm": 2.36596129949425, + "learning_rate": 9.87404722291775e-06, + "loss": 0.5547, + "step": 3245 + }, + { + "epoch": 0.09948510481794777, + "grad_norm": 2.381953260260112, + "learning_rate": 9.87393650050269e-06, + "loss": 0.7536, + "step": 3246 + }, + { + "epoch": 0.09951575334068898, + "grad_norm": 2.1931599278106555, + "learning_rate": 9.873825730063488e-06, + "loss": 0.7966, + "step": 3247 + }, + { + "epoch": 0.09954640186343018, + "grad_norm": 1.018274146684996, + "learning_rate": 9.873714911601234e-06, + "loss": 0.5152, + "step": 3248 + }, + { + "epoch": 0.09957705038617139, + "grad_norm": 1.839953225961578, + "learning_rate": 9.873604045117018e-06, + "loss": 0.692, + "step": 3249 + }, + { + "epoch": 0.09960769890891259, + "grad_norm": 2.08708066188575, + "learning_rate": 9.873493130611937e-06, + "loss": 0.7683, + "step": 3250 + }, + { + "epoch": 0.0996383474316538, + "grad_norm": 1.8561721977102414, + "learning_rate": 9.87338216808708e-06, + "loss": 0.8214, + "step": 3251 + }, + { + "epoch": 0.099668995954395, + "grad_norm": 2.237482391162869, + "learning_rate": 9.87327115754354e-06, + "loss": 0.8687, + "step": 3252 + }, + { + "epoch": 0.0996996444771362, + "grad_norm": 1.9466179651202504, + "learning_rate": 9.873160098982415e-06, + "loss": 0.8375, + "step": 3253 + }, + { + "epoch": 0.0997302929998774, + "grad_norm": 1.9675676150136092, + "learning_rate": 9.873048992404795e-06, + "loss": 0.7838, + "step": 3254 + }, + { + "epoch": 0.0997609415226186, + "grad_norm": 1.9212794873699468, + "learning_rate": 9.872937837811778e-06, + "loss": 0.6264, + "step": 3255 + }, + { + "epoch": 0.09979159004535981, + "grad_norm": 2.4765108726741483, + "learning_rate": 9.872826635204457e-06, + "loss": 0.7965, + "step": 3256 + }, + { + "epoch": 0.09982223856810102, + "grad_norm": 2.073278478589844, + "learning_rate": 9.872715384583928e-06, + "loss": 0.7432, + "step": 3257 + }, + { + "epoch": 0.09985288709084222, + "grad_norm": 2.485815008419135, + "learning_rate": 9.872604085951288e-06, + "loss": 0.909, + "step": 3258 + }, + { + "epoch": 0.09988353561358343, + "grad_norm": 1.9006286576993616, + "learning_rate": 9.872492739307633e-06, + "loss": 0.7789, + "step": 3259 + }, + { + "epoch": 0.09991418413632464, + "grad_norm": 2.148361198660419, + "learning_rate": 9.87238134465406e-06, + "loss": 0.7066, + "step": 3260 + }, + { + "epoch": 0.09994483265906583, + "grad_norm": 1.7593300740047142, + "learning_rate": 9.872269901991668e-06, + "loss": 0.7627, + "step": 3261 + }, + { + "epoch": 0.09997548118180703, + "grad_norm": 2.155733006019818, + "learning_rate": 9.872158411321552e-06, + "loss": 0.873, + "step": 3262 + }, + { + "epoch": 0.10000612970454824, + "grad_norm": 1.8733920795522887, + "learning_rate": 9.872046872644815e-06, + "loss": 0.7688, + "step": 3263 + }, + { + "epoch": 0.10003677822728944, + "grad_norm": 2.0476401620364846, + "learning_rate": 9.871935285962553e-06, + "loss": 0.7024, + "step": 3264 + }, + { + "epoch": 0.10006742675003065, + "grad_norm": 2.1200100733723692, + "learning_rate": 9.871823651275865e-06, + "loss": 0.7612, + "step": 3265 + }, + { + "epoch": 0.10009807527277186, + "grad_norm": 2.1657792618663945, + "learning_rate": 9.871711968585854e-06, + "loss": 0.7458, + "step": 3266 + }, + { + "epoch": 0.10012872379551306, + "grad_norm": 2.0303334175294676, + "learning_rate": 9.871600237893617e-06, + "loss": 0.8692, + "step": 3267 + }, + { + "epoch": 0.10015937231825427, + "grad_norm": 1.9129794393179627, + "learning_rate": 9.871488459200256e-06, + "loss": 0.6889, + "step": 3268 + }, + { + "epoch": 0.10019002084099546, + "grad_norm": 1.9043329749788893, + "learning_rate": 9.871376632506872e-06, + "loss": 0.7825, + "step": 3269 + }, + { + "epoch": 0.10022066936373666, + "grad_norm": 1.982639948488753, + "learning_rate": 9.87126475781457e-06, + "loss": 0.7288, + "step": 3270 + }, + { + "epoch": 0.10025131788647787, + "grad_norm": 2.2202283022302187, + "learning_rate": 9.871152835124448e-06, + "loss": 0.5604, + "step": 3271 + }, + { + "epoch": 0.10028196640921908, + "grad_norm": 1.9745534427125302, + "learning_rate": 9.871040864437613e-06, + "loss": 0.8308, + "step": 3272 + }, + { + "epoch": 0.10031261493196028, + "grad_norm": 2.4965000758145073, + "learning_rate": 9.870928845755165e-06, + "loss": 0.8974, + "step": 3273 + }, + { + "epoch": 0.10034326345470149, + "grad_norm": 2.061319010007779, + "learning_rate": 9.870816779078209e-06, + "loss": 0.7094, + "step": 3274 + }, + { + "epoch": 0.10037391197744269, + "grad_norm": 2.154273548526055, + "learning_rate": 9.870704664407849e-06, + "loss": 0.8112, + "step": 3275 + }, + { + "epoch": 0.10040456050018388, + "grad_norm": 2.0980997102763435, + "learning_rate": 9.870592501745189e-06, + "loss": 0.8532, + "step": 3276 + }, + { + "epoch": 0.10043520902292509, + "grad_norm": 1.8582525863970936, + "learning_rate": 9.870480291091336e-06, + "loss": 0.7667, + "step": 3277 + }, + { + "epoch": 0.1004658575456663, + "grad_norm": 1.2895449947610842, + "learning_rate": 9.870368032447393e-06, + "loss": 0.5396, + "step": 3278 + }, + { + "epoch": 0.1004965060684075, + "grad_norm": 1.8903999085205088, + "learning_rate": 9.870255725814468e-06, + "loss": 0.7487, + "step": 3279 + }, + { + "epoch": 0.1005271545911487, + "grad_norm": 2.4476305814779735, + "learning_rate": 9.870143371193668e-06, + "loss": 0.7843, + "step": 3280 + }, + { + "epoch": 0.10055780311388991, + "grad_norm": 1.9749681747092513, + "learning_rate": 9.8700309685861e-06, + "loss": 0.7796, + "step": 3281 + }, + { + "epoch": 0.10058845163663112, + "grad_norm": 2.3532050997378047, + "learning_rate": 9.86991851799287e-06, + "loss": 0.7091, + "step": 3282 + }, + { + "epoch": 0.10061910015937232, + "grad_norm": 1.0226076050991484, + "learning_rate": 9.869806019415086e-06, + "loss": 0.5011, + "step": 3283 + }, + { + "epoch": 0.10064974868211352, + "grad_norm": 1.8361525128355767, + "learning_rate": 9.869693472853858e-06, + "loss": 0.7136, + "step": 3284 + }, + { + "epoch": 0.10068039720485472, + "grad_norm": 2.1129954135872744, + "learning_rate": 9.869580878310294e-06, + "loss": 0.7474, + "step": 3285 + }, + { + "epoch": 0.10071104572759593, + "grad_norm": 2.029249903712124, + "learning_rate": 9.869468235785504e-06, + "loss": 0.7976, + "step": 3286 + }, + { + "epoch": 0.10074169425033713, + "grad_norm": 1.943745820663761, + "learning_rate": 9.869355545280596e-06, + "loss": 0.7509, + "step": 3287 + }, + { + "epoch": 0.10077234277307834, + "grad_norm": 1.677609612556868, + "learning_rate": 9.869242806796684e-06, + "loss": 0.6448, + "step": 3288 + }, + { + "epoch": 0.10080299129581954, + "grad_norm": 2.1251576686102225, + "learning_rate": 9.869130020334876e-06, + "loss": 0.7829, + "step": 3289 + }, + { + "epoch": 0.10083363981856075, + "grad_norm": 1.9981924363323105, + "learning_rate": 9.869017185896284e-06, + "loss": 0.7417, + "step": 3290 + }, + { + "epoch": 0.10086428834130196, + "grad_norm": 1.696036958822324, + "learning_rate": 9.86890430348202e-06, + "loss": 0.8902, + "step": 3291 + }, + { + "epoch": 0.10089493686404315, + "grad_norm": 1.909475919363897, + "learning_rate": 9.868791373093197e-06, + "loss": 0.7679, + "step": 3292 + }, + { + "epoch": 0.10092558538678435, + "grad_norm": 2.1826872843462017, + "learning_rate": 9.868678394730925e-06, + "loss": 0.76, + "step": 3293 + }, + { + "epoch": 0.10095623390952556, + "grad_norm": 2.1446784573691353, + "learning_rate": 9.868565368396321e-06, + "loss": 0.6375, + "step": 3294 + }, + { + "epoch": 0.10098688243226676, + "grad_norm": 1.9721750626928225, + "learning_rate": 9.868452294090496e-06, + "loss": 0.7277, + "step": 3295 + }, + { + "epoch": 0.10101753095500797, + "grad_norm": 2.0206127079673184, + "learning_rate": 9.868339171814565e-06, + "loss": 0.8368, + "step": 3296 + }, + { + "epoch": 0.10104817947774918, + "grad_norm": 1.7151611633186716, + "learning_rate": 9.868226001569643e-06, + "loss": 0.7622, + "step": 3297 + }, + { + "epoch": 0.10107882800049038, + "grad_norm": 1.9544837982624697, + "learning_rate": 9.868112783356843e-06, + "loss": 0.6962, + "step": 3298 + }, + { + "epoch": 0.10110947652323159, + "grad_norm": 2.047702220852074, + "learning_rate": 9.867999517177284e-06, + "loss": 0.8418, + "step": 3299 + }, + { + "epoch": 0.10114012504597278, + "grad_norm": 2.1423074024163427, + "learning_rate": 9.867886203032079e-06, + "loss": 0.7779, + "step": 3300 + }, + { + "epoch": 0.10117077356871398, + "grad_norm": 2.127523287193254, + "learning_rate": 9.867772840922346e-06, + "loss": 0.793, + "step": 3301 + }, + { + "epoch": 0.10120142209145519, + "grad_norm": 2.0753149544134137, + "learning_rate": 9.8676594308492e-06, + "loss": 0.8097, + "step": 3302 + }, + { + "epoch": 0.1012320706141964, + "grad_norm": 2.168458418442501, + "learning_rate": 9.867545972813763e-06, + "loss": 0.7917, + "step": 3303 + }, + { + "epoch": 0.1012627191369376, + "grad_norm": 2.035512955757485, + "learning_rate": 9.867432466817151e-06, + "loss": 0.8014, + "step": 3304 + }, + { + "epoch": 0.1012933676596788, + "grad_norm": 1.9088334710993347, + "learning_rate": 9.867318912860479e-06, + "loss": 0.6566, + "step": 3305 + }, + { + "epoch": 0.10132401618242001, + "grad_norm": 2.285234916054993, + "learning_rate": 9.867205310944868e-06, + "loss": 0.7716, + "step": 3306 + }, + { + "epoch": 0.1013546647051612, + "grad_norm": 2.314217686729217, + "learning_rate": 9.867091661071439e-06, + "loss": 0.8563, + "step": 3307 + }, + { + "epoch": 0.10138531322790241, + "grad_norm": 1.7159920579219774, + "learning_rate": 9.866977963241312e-06, + "loss": 0.7063, + "step": 3308 + }, + { + "epoch": 0.10141596175064362, + "grad_norm": 2.052521889143315, + "learning_rate": 9.866864217455603e-06, + "loss": 0.7328, + "step": 3309 + }, + { + "epoch": 0.10144661027338482, + "grad_norm": 1.9656363582150504, + "learning_rate": 9.866750423715437e-06, + "loss": 0.7762, + "step": 3310 + }, + { + "epoch": 0.10147725879612603, + "grad_norm": 2.0864739504126226, + "learning_rate": 9.866636582021934e-06, + "loss": 0.8276, + "step": 3311 + }, + { + "epoch": 0.10150790731886723, + "grad_norm": 1.9502741170786484, + "learning_rate": 9.866522692376215e-06, + "loss": 0.775, + "step": 3312 + }, + { + "epoch": 0.10153855584160844, + "grad_norm": 1.0238737071083501, + "learning_rate": 9.866408754779402e-06, + "loss": 0.5294, + "step": 3313 + }, + { + "epoch": 0.10156920436434964, + "grad_norm": 0.9294288968472466, + "learning_rate": 9.866294769232621e-06, + "loss": 0.5125, + "step": 3314 + }, + { + "epoch": 0.10159985288709084, + "grad_norm": 2.0859998740421637, + "learning_rate": 9.86618073573699e-06, + "loss": 0.8018, + "step": 3315 + }, + { + "epoch": 0.10163050140983204, + "grad_norm": 1.0587081351925636, + "learning_rate": 9.866066654293635e-06, + "loss": 0.531, + "step": 3316 + }, + { + "epoch": 0.10166114993257325, + "grad_norm": 1.7674176048978776, + "learning_rate": 9.865952524903682e-06, + "loss": 0.6435, + "step": 3317 + }, + { + "epoch": 0.10169179845531445, + "grad_norm": 0.869045350132348, + "learning_rate": 9.865838347568252e-06, + "loss": 0.5365, + "step": 3318 + }, + { + "epoch": 0.10172244697805566, + "grad_norm": 1.9780281596633964, + "learning_rate": 9.865724122288474e-06, + "loss": 0.8403, + "step": 3319 + }, + { + "epoch": 0.10175309550079686, + "grad_norm": 1.9046770451025408, + "learning_rate": 9.865609849065471e-06, + "loss": 0.778, + "step": 3320 + }, + { + "epoch": 0.10178374402353807, + "grad_norm": 2.316741996834157, + "learning_rate": 9.865495527900369e-06, + "loss": 0.8801, + "step": 3321 + }, + { + "epoch": 0.10181439254627928, + "grad_norm": 1.77023889417196, + "learning_rate": 9.865381158794293e-06, + "loss": 0.753, + "step": 3322 + }, + { + "epoch": 0.10184504106902047, + "grad_norm": 2.028515833875039, + "learning_rate": 9.865266741748372e-06, + "loss": 0.7805, + "step": 3323 + }, + { + "epoch": 0.10187568959176167, + "grad_norm": 1.9888453501065764, + "learning_rate": 9.865152276763735e-06, + "loss": 0.7482, + "step": 3324 + }, + { + "epoch": 0.10190633811450288, + "grad_norm": 1.9884453002949687, + "learning_rate": 9.865037763841505e-06, + "loss": 0.6361, + "step": 3325 + }, + { + "epoch": 0.10193698663724408, + "grad_norm": 1.2465582785383895, + "learning_rate": 9.864923202982815e-06, + "loss": 0.5212, + "step": 3326 + }, + { + "epoch": 0.10196763515998529, + "grad_norm": 1.7189831875676653, + "learning_rate": 9.864808594188792e-06, + "loss": 0.7746, + "step": 3327 + }, + { + "epoch": 0.1019982836827265, + "grad_norm": 2.1702212944950072, + "learning_rate": 9.864693937460565e-06, + "loss": 0.741, + "step": 3328 + }, + { + "epoch": 0.1020289322054677, + "grad_norm": 2.0236660714091905, + "learning_rate": 9.864579232799263e-06, + "loss": 0.8418, + "step": 3329 + }, + { + "epoch": 0.1020595807282089, + "grad_norm": 1.7826780103267497, + "learning_rate": 9.864464480206017e-06, + "loss": 0.7443, + "step": 3330 + }, + { + "epoch": 0.1020902292509501, + "grad_norm": 2.048389088130823, + "learning_rate": 9.86434967968196e-06, + "loss": 0.8108, + "step": 3331 + }, + { + "epoch": 0.1021208777736913, + "grad_norm": 1.793435549234387, + "learning_rate": 9.864234831228218e-06, + "loss": 0.77, + "step": 3332 + }, + { + "epoch": 0.10215152629643251, + "grad_norm": 1.8697426120305034, + "learning_rate": 9.864119934845928e-06, + "loss": 0.7568, + "step": 3333 + }, + { + "epoch": 0.10218217481917372, + "grad_norm": 1.1247384604433528, + "learning_rate": 9.86400499053622e-06, + "loss": 0.5337, + "step": 3334 + }, + { + "epoch": 0.10221282334191492, + "grad_norm": 0.9664996484315944, + "learning_rate": 9.863889998300225e-06, + "loss": 0.5073, + "step": 3335 + }, + { + "epoch": 0.10224347186465613, + "grad_norm": 2.0986165254267846, + "learning_rate": 9.863774958139078e-06, + "loss": 0.759, + "step": 3336 + }, + { + "epoch": 0.10227412038739733, + "grad_norm": 1.8849468042765154, + "learning_rate": 9.863659870053912e-06, + "loss": 0.8399, + "step": 3337 + }, + { + "epoch": 0.10230476891013854, + "grad_norm": 2.0364647422751374, + "learning_rate": 9.86354473404586e-06, + "loss": 0.8049, + "step": 3338 + }, + { + "epoch": 0.10233541743287973, + "grad_norm": 2.084005392014446, + "learning_rate": 9.863429550116056e-06, + "loss": 0.785, + "step": 3339 + }, + { + "epoch": 0.10236606595562094, + "grad_norm": 1.9506389308905057, + "learning_rate": 9.86331431826564e-06, + "loss": 0.8099, + "step": 3340 + }, + { + "epoch": 0.10239671447836214, + "grad_norm": 1.9876772446061046, + "learning_rate": 9.863199038495741e-06, + "loss": 0.7491, + "step": 3341 + }, + { + "epoch": 0.10242736300110335, + "grad_norm": 1.7618386908108465, + "learning_rate": 9.8630837108075e-06, + "loss": 0.6849, + "step": 3342 + }, + { + "epoch": 0.10245801152384455, + "grad_norm": 1.334403973050647, + "learning_rate": 9.862968335202048e-06, + "loss": 0.5423, + "step": 3343 + }, + { + "epoch": 0.10248866004658576, + "grad_norm": 1.8877156589099788, + "learning_rate": 9.862852911680527e-06, + "loss": 0.8023, + "step": 3344 + }, + { + "epoch": 0.10251930856932696, + "grad_norm": 2.0550252049257307, + "learning_rate": 9.86273744024407e-06, + "loss": 0.7626, + "step": 3345 + }, + { + "epoch": 0.10254995709206816, + "grad_norm": 2.239664601669099, + "learning_rate": 9.862621920893817e-06, + "loss": 0.7011, + "step": 3346 + }, + { + "epoch": 0.10258060561480936, + "grad_norm": 1.8063846513397017, + "learning_rate": 9.862506353630908e-06, + "loss": 0.707, + "step": 3347 + }, + { + "epoch": 0.10261125413755057, + "grad_norm": 1.7754205051332632, + "learning_rate": 9.86239073845648e-06, + "loss": 0.733, + "step": 3348 + }, + { + "epoch": 0.10264190266029177, + "grad_norm": 1.752841479592276, + "learning_rate": 9.86227507537167e-06, + "loss": 0.76, + "step": 3349 + }, + { + "epoch": 0.10267255118303298, + "grad_norm": 0.949762443106377, + "learning_rate": 9.86215936437762e-06, + "loss": 0.5163, + "step": 3350 + }, + { + "epoch": 0.10270319970577418, + "grad_norm": 2.2271368774351203, + "learning_rate": 9.86204360547547e-06, + "loss": 0.732, + "step": 3351 + }, + { + "epoch": 0.10273384822851539, + "grad_norm": 1.8794123866454127, + "learning_rate": 9.861927798666361e-06, + "loss": 0.8053, + "step": 3352 + }, + { + "epoch": 0.1027644967512566, + "grad_norm": 1.8605751804037651, + "learning_rate": 9.861811943951432e-06, + "loss": 0.7615, + "step": 3353 + }, + { + "epoch": 0.10279514527399779, + "grad_norm": 2.040164550036215, + "learning_rate": 9.861696041331828e-06, + "loss": 0.7372, + "step": 3354 + }, + { + "epoch": 0.10282579379673899, + "grad_norm": 2.5992955527778463, + "learning_rate": 9.861580090808687e-06, + "loss": 0.7815, + "step": 3355 + }, + { + "epoch": 0.1028564423194802, + "grad_norm": 2.15843160965356, + "learning_rate": 9.861464092383155e-06, + "loss": 0.8411, + "step": 3356 + }, + { + "epoch": 0.1028870908422214, + "grad_norm": 0.8965247873747452, + "learning_rate": 9.86134804605637e-06, + "loss": 0.5252, + "step": 3357 + }, + { + "epoch": 0.10291773936496261, + "grad_norm": 2.127408927768419, + "learning_rate": 9.861231951829484e-06, + "loss": 0.8161, + "step": 3358 + }, + { + "epoch": 0.10294838788770382, + "grad_norm": 1.7746405971719903, + "learning_rate": 9.861115809703633e-06, + "loss": 0.733, + "step": 3359 + }, + { + "epoch": 0.10297903641044502, + "grad_norm": 1.8976954157705956, + "learning_rate": 9.860999619679965e-06, + "loss": 0.6941, + "step": 3360 + }, + { + "epoch": 0.10300968493318623, + "grad_norm": 1.9372499014131155, + "learning_rate": 9.860883381759622e-06, + "loss": 0.8611, + "step": 3361 + }, + { + "epoch": 0.10304033345592742, + "grad_norm": 1.9216925648227183, + "learning_rate": 9.860767095943754e-06, + "loss": 0.7107, + "step": 3362 + }, + { + "epoch": 0.10307098197866862, + "grad_norm": 1.9247978866463764, + "learning_rate": 9.860650762233504e-06, + "loss": 0.7401, + "step": 3363 + }, + { + "epoch": 0.10310163050140983, + "grad_norm": 2.0120639994926153, + "learning_rate": 9.860534380630016e-06, + "loss": 0.8481, + "step": 3364 + }, + { + "epoch": 0.10313227902415104, + "grad_norm": 2.055111778332105, + "learning_rate": 9.860417951134441e-06, + "loss": 0.9191, + "step": 3365 + }, + { + "epoch": 0.10316292754689224, + "grad_norm": 0.9661033746155321, + "learning_rate": 9.860301473747923e-06, + "loss": 0.4827, + "step": 3366 + }, + { + "epoch": 0.10319357606963345, + "grad_norm": 1.9763417018184317, + "learning_rate": 9.860184948471613e-06, + "loss": 0.8029, + "step": 3367 + }, + { + "epoch": 0.10322422459237465, + "grad_norm": 1.7606451711765014, + "learning_rate": 9.860068375306655e-06, + "loss": 0.6759, + "step": 3368 + }, + { + "epoch": 0.10325487311511586, + "grad_norm": 1.8587996711665913, + "learning_rate": 9.859951754254203e-06, + "loss": 0.749, + "step": 3369 + }, + { + "epoch": 0.10328552163785705, + "grad_norm": 1.8020954369199185, + "learning_rate": 9.859835085315399e-06, + "loss": 0.8178, + "step": 3370 + }, + { + "epoch": 0.10331617016059826, + "grad_norm": 1.9291517126398061, + "learning_rate": 9.859718368491398e-06, + "loss": 0.8434, + "step": 3371 + }, + { + "epoch": 0.10334681868333946, + "grad_norm": 1.794283709415034, + "learning_rate": 9.85960160378335e-06, + "loss": 0.7331, + "step": 3372 + }, + { + "epoch": 0.10337746720608067, + "grad_norm": 1.976305253465015, + "learning_rate": 9.859484791192402e-06, + "loss": 0.8231, + "step": 3373 + }, + { + "epoch": 0.10340811572882187, + "grad_norm": 1.9568111254858573, + "learning_rate": 9.859367930719708e-06, + "loss": 0.726, + "step": 3374 + }, + { + "epoch": 0.10343876425156308, + "grad_norm": 1.9662485005680252, + "learning_rate": 9.859251022366418e-06, + "loss": 0.8185, + "step": 3375 + }, + { + "epoch": 0.10346941277430428, + "grad_norm": 0.9669851871740194, + "learning_rate": 9.859134066133685e-06, + "loss": 0.5256, + "step": 3376 + }, + { + "epoch": 0.10350006129704548, + "grad_norm": 1.967094325560716, + "learning_rate": 9.85901706202266e-06, + "loss": 0.7563, + "step": 3377 + }, + { + "epoch": 0.10353070981978668, + "grad_norm": 1.951027562747449, + "learning_rate": 9.858900010034498e-06, + "loss": 0.7596, + "step": 3378 + }, + { + "epoch": 0.10356135834252789, + "grad_norm": 1.97136216305659, + "learning_rate": 9.858782910170348e-06, + "loss": 0.7748, + "step": 3379 + }, + { + "epoch": 0.10359200686526909, + "grad_norm": 1.8523293371693412, + "learning_rate": 9.85866576243137e-06, + "loss": 0.6748, + "step": 3380 + }, + { + "epoch": 0.1036226553880103, + "grad_norm": 1.8899013632198438, + "learning_rate": 9.858548566818712e-06, + "loss": 0.7613, + "step": 3381 + }, + { + "epoch": 0.1036533039107515, + "grad_norm": 1.6126234848173633, + "learning_rate": 9.858431323333535e-06, + "loss": 0.7234, + "step": 3382 + }, + { + "epoch": 0.10368395243349271, + "grad_norm": 2.0943469341012597, + "learning_rate": 9.85831403197699e-06, + "loss": 0.7862, + "step": 3383 + }, + { + "epoch": 0.10371460095623392, + "grad_norm": 1.9783226767083921, + "learning_rate": 9.858196692750233e-06, + "loss": 0.7186, + "step": 3384 + }, + { + "epoch": 0.10374524947897511, + "grad_norm": 2.1863105356768027, + "learning_rate": 9.858079305654421e-06, + "loss": 0.8247, + "step": 3385 + }, + { + "epoch": 0.10377589800171631, + "grad_norm": 1.5416822913850479, + "learning_rate": 9.857961870690712e-06, + "loss": 0.6566, + "step": 3386 + }, + { + "epoch": 0.10380654652445752, + "grad_norm": 2.0246202023490603, + "learning_rate": 9.85784438786026e-06, + "loss": 0.8187, + "step": 3387 + }, + { + "epoch": 0.10383719504719872, + "grad_norm": 1.7470817386904833, + "learning_rate": 9.857726857164227e-06, + "loss": 0.6718, + "step": 3388 + }, + { + "epoch": 0.10386784356993993, + "grad_norm": 1.8915248731316423, + "learning_rate": 9.857609278603766e-06, + "loss": 0.7491, + "step": 3389 + }, + { + "epoch": 0.10389849209268114, + "grad_norm": 1.8951994298049348, + "learning_rate": 9.857491652180038e-06, + "loss": 0.7428, + "step": 3390 + }, + { + "epoch": 0.10392914061542234, + "grad_norm": 1.9174888612575596, + "learning_rate": 9.857373977894202e-06, + "loss": 0.7153, + "step": 3391 + }, + { + "epoch": 0.10395978913816355, + "grad_norm": 1.9478080429944078, + "learning_rate": 9.857256255747418e-06, + "loss": 0.8533, + "step": 3392 + }, + { + "epoch": 0.10399043766090474, + "grad_norm": 1.7054965872170793, + "learning_rate": 9.857138485740845e-06, + "loss": 0.7147, + "step": 3393 + }, + { + "epoch": 0.10402108618364594, + "grad_norm": 1.951528642540221, + "learning_rate": 9.857020667875645e-06, + "loss": 0.7213, + "step": 3394 + }, + { + "epoch": 0.10405173470638715, + "grad_norm": 1.7715960367938648, + "learning_rate": 9.856902802152977e-06, + "loss": 0.811, + "step": 3395 + }, + { + "epoch": 0.10408238322912836, + "grad_norm": 1.7663989444942718, + "learning_rate": 9.856784888574e-06, + "loss": 0.7038, + "step": 3396 + }, + { + "epoch": 0.10411303175186956, + "grad_norm": 1.7441800861234025, + "learning_rate": 9.856666927139882e-06, + "loss": 0.7783, + "step": 3397 + }, + { + "epoch": 0.10414368027461077, + "grad_norm": 1.943100973957482, + "learning_rate": 9.856548917851782e-06, + "loss": 0.7042, + "step": 3398 + }, + { + "epoch": 0.10417432879735197, + "grad_norm": 1.7806277838897804, + "learning_rate": 9.856430860710862e-06, + "loss": 0.7364, + "step": 3399 + }, + { + "epoch": 0.10420497732009318, + "grad_norm": 1.8231775756091178, + "learning_rate": 9.856312755718286e-06, + "loss": 0.767, + "step": 3400 + }, + { + "epoch": 0.10423562584283437, + "grad_norm": 1.845130596851287, + "learning_rate": 9.85619460287522e-06, + "loss": 0.7244, + "step": 3401 + }, + { + "epoch": 0.10426627436557558, + "grad_norm": 1.8466730492514754, + "learning_rate": 9.856076402182824e-06, + "loss": 0.7215, + "step": 3402 + }, + { + "epoch": 0.10429692288831678, + "grad_norm": 2.069180900969566, + "learning_rate": 9.855958153642265e-06, + "loss": 0.7938, + "step": 3403 + }, + { + "epoch": 0.10432757141105799, + "grad_norm": 1.7616843571988896, + "learning_rate": 9.855839857254709e-06, + "loss": 0.6228, + "step": 3404 + }, + { + "epoch": 0.10435821993379919, + "grad_norm": 1.817916295715751, + "learning_rate": 9.855721513021319e-06, + "loss": 0.7292, + "step": 3405 + }, + { + "epoch": 0.1043888684565404, + "grad_norm": 2.149134016429619, + "learning_rate": 9.855603120943263e-06, + "loss": 0.7677, + "step": 3406 + }, + { + "epoch": 0.1044195169792816, + "grad_norm": 1.7823200503006205, + "learning_rate": 9.855484681021708e-06, + "loss": 0.8291, + "step": 3407 + }, + { + "epoch": 0.1044501655020228, + "grad_norm": 1.9915742772368141, + "learning_rate": 9.855366193257818e-06, + "loss": 0.8144, + "step": 3408 + }, + { + "epoch": 0.104480814024764, + "grad_norm": 2.2686934413561906, + "learning_rate": 9.855247657652764e-06, + "loss": 0.8958, + "step": 3409 + }, + { + "epoch": 0.10451146254750521, + "grad_norm": 1.9504772686285592, + "learning_rate": 9.855129074207714e-06, + "loss": 0.7124, + "step": 3410 + }, + { + "epoch": 0.10454211107024641, + "grad_norm": 0.9195438563876296, + "learning_rate": 9.855010442923832e-06, + "loss": 0.5116, + "step": 3411 + }, + { + "epoch": 0.10457275959298762, + "grad_norm": 2.1987103548423663, + "learning_rate": 9.854891763802292e-06, + "loss": 0.8782, + "step": 3412 + }, + { + "epoch": 0.10460340811572882, + "grad_norm": 2.1032521390605323, + "learning_rate": 9.85477303684426e-06, + "loss": 0.6942, + "step": 3413 + }, + { + "epoch": 0.10463405663847003, + "grad_norm": 0.8938263832214811, + "learning_rate": 9.85465426205091e-06, + "loss": 0.5199, + "step": 3414 + }, + { + "epoch": 0.10466470516121124, + "grad_norm": 1.9284637768679715, + "learning_rate": 9.854535439423404e-06, + "loss": 0.852, + "step": 3415 + }, + { + "epoch": 0.10469535368395243, + "grad_norm": 2.0394588249447896, + "learning_rate": 9.854416568962924e-06, + "loss": 0.7737, + "step": 3416 + }, + { + "epoch": 0.10472600220669363, + "grad_norm": 0.9022644134442472, + "learning_rate": 9.854297650670632e-06, + "loss": 0.5242, + "step": 3417 + }, + { + "epoch": 0.10475665072943484, + "grad_norm": 1.9727589950465614, + "learning_rate": 9.854178684547704e-06, + "loss": 0.7456, + "step": 3418 + }, + { + "epoch": 0.10478729925217604, + "grad_norm": 1.7546696629227836, + "learning_rate": 9.85405967059531e-06, + "loss": 0.8299, + "step": 3419 + }, + { + "epoch": 0.10481794777491725, + "grad_norm": 1.8388203292837142, + "learning_rate": 9.853940608814628e-06, + "loss": 0.8046, + "step": 3420 + }, + { + "epoch": 0.10484859629765846, + "grad_norm": 1.906513364628526, + "learning_rate": 9.853821499206824e-06, + "loss": 0.7497, + "step": 3421 + }, + { + "epoch": 0.10487924482039966, + "grad_norm": 1.9178955842834906, + "learning_rate": 9.853702341773075e-06, + "loss": 0.7971, + "step": 3422 + }, + { + "epoch": 0.10490989334314087, + "grad_norm": 1.8995258725378348, + "learning_rate": 9.853583136514557e-06, + "loss": 0.7642, + "step": 3423 + }, + { + "epoch": 0.10494054186588206, + "grad_norm": 1.9572938069104808, + "learning_rate": 9.85346388343244e-06, + "loss": 0.7246, + "step": 3424 + }, + { + "epoch": 0.10497119038862326, + "grad_norm": 0.971491576045539, + "learning_rate": 9.853344582527903e-06, + "loss": 0.5077, + "step": 3425 + }, + { + "epoch": 0.10500183891136447, + "grad_norm": 2.0400971962376024, + "learning_rate": 9.85322523380212e-06, + "loss": 0.8013, + "step": 3426 + }, + { + "epoch": 0.10503248743410568, + "grad_norm": 1.7541364741140228, + "learning_rate": 9.853105837256267e-06, + "loss": 0.8695, + "step": 3427 + }, + { + "epoch": 0.10506313595684688, + "grad_norm": 2.032836682235508, + "learning_rate": 9.85298639289152e-06, + "loss": 0.8271, + "step": 3428 + }, + { + "epoch": 0.10509378447958809, + "grad_norm": 1.876576207374924, + "learning_rate": 9.852866900709058e-06, + "loss": 0.7188, + "step": 3429 + }, + { + "epoch": 0.10512443300232929, + "grad_norm": 2.0707135866323516, + "learning_rate": 9.852747360710055e-06, + "loss": 0.7747, + "step": 3430 + }, + { + "epoch": 0.1051550815250705, + "grad_norm": 1.7031550377170925, + "learning_rate": 9.85262777289569e-06, + "loss": 0.7583, + "step": 3431 + }, + { + "epoch": 0.10518573004781169, + "grad_norm": 2.0478577704723535, + "learning_rate": 9.852508137267143e-06, + "loss": 0.7526, + "step": 3432 + }, + { + "epoch": 0.1052163785705529, + "grad_norm": 1.850219576902033, + "learning_rate": 9.852388453825592e-06, + "loss": 0.7673, + "step": 3433 + }, + { + "epoch": 0.1052470270932941, + "grad_norm": 1.8288105224958613, + "learning_rate": 9.852268722572216e-06, + "loss": 0.7235, + "step": 3434 + }, + { + "epoch": 0.10527767561603531, + "grad_norm": 1.0943549381203692, + "learning_rate": 9.852148943508195e-06, + "loss": 0.5162, + "step": 3435 + }, + { + "epoch": 0.10530832413877651, + "grad_norm": 2.1287883423194622, + "learning_rate": 9.852029116634708e-06, + "loss": 0.763, + "step": 3436 + }, + { + "epoch": 0.10533897266151772, + "grad_norm": 2.084992860152075, + "learning_rate": 9.851909241952938e-06, + "loss": 0.7562, + "step": 3437 + }, + { + "epoch": 0.10536962118425892, + "grad_norm": 0.8729156347121957, + "learning_rate": 9.851789319464064e-06, + "loss": 0.5213, + "step": 3438 + }, + { + "epoch": 0.10540026970700012, + "grad_norm": 2.354787565214842, + "learning_rate": 9.851669349169269e-06, + "loss": 0.8182, + "step": 3439 + }, + { + "epoch": 0.10543091822974132, + "grad_norm": 1.9733395546914139, + "learning_rate": 9.851549331069734e-06, + "loss": 0.803, + "step": 3440 + }, + { + "epoch": 0.10546156675248253, + "grad_norm": 1.8952983693081684, + "learning_rate": 9.85142926516664e-06, + "loss": 0.7096, + "step": 3441 + }, + { + "epoch": 0.10549221527522373, + "grad_norm": 0.9546695178247402, + "learning_rate": 9.851309151461176e-06, + "loss": 0.5215, + "step": 3442 + }, + { + "epoch": 0.10552286379796494, + "grad_norm": 1.959058560639718, + "learning_rate": 9.85118898995452e-06, + "loss": 0.7837, + "step": 3443 + }, + { + "epoch": 0.10555351232070614, + "grad_norm": 1.885958498865122, + "learning_rate": 9.851068780647857e-06, + "loss": 0.799, + "step": 3444 + }, + { + "epoch": 0.10558416084344735, + "grad_norm": 1.7365840007929305, + "learning_rate": 9.850948523542373e-06, + "loss": 0.7031, + "step": 3445 + }, + { + "epoch": 0.10561480936618856, + "grad_norm": 1.8840408590769417, + "learning_rate": 9.850828218639252e-06, + "loss": 0.6794, + "step": 3446 + }, + { + "epoch": 0.10564545788892975, + "grad_norm": 0.9125811777371574, + "learning_rate": 9.85070786593968e-06, + "loss": 0.4937, + "step": 3447 + }, + { + "epoch": 0.10567610641167095, + "grad_norm": 1.9156843811575894, + "learning_rate": 9.850587465444841e-06, + "loss": 0.7944, + "step": 3448 + }, + { + "epoch": 0.10570675493441216, + "grad_norm": 1.9301497077104062, + "learning_rate": 9.850467017155922e-06, + "loss": 0.6034, + "step": 3449 + }, + { + "epoch": 0.10573740345715336, + "grad_norm": 1.6813582253270296, + "learning_rate": 9.850346521074112e-06, + "loss": 0.69, + "step": 3450 + }, + { + "epoch": 0.10576805197989457, + "grad_norm": 1.929648718523143, + "learning_rate": 9.850225977200596e-06, + "loss": 0.8481, + "step": 3451 + }, + { + "epoch": 0.10579870050263578, + "grad_norm": 2.0335329910731885, + "learning_rate": 9.850105385536564e-06, + "loss": 0.7747, + "step": 3452 + }, + { + "epoch": 0.10582934902537698, + "grad_norm": 2.150751013027864, + "learning_rate": 9.849984746083202e-06, + "loss": 0.7533, + "step": 3453 + }, + { + "epoch": 0.10585999754811819, + "grad_norm": 2.0002935222484215, + "learning_rate": 9.849864058841699e-06, + "loss": 0.8071, + "step": 3454 + }, + { + "epoch": 0.10589064607085938, + "grad_norm": 1.9807250174425348, + "learning_rate": 9.849743323813243e-06, + "loss": 0.7472, + "step": 3455 + }, + { + "epoch": 0.10592129459360058, + "grad_norm": 1.8749731046675853, + "learning_rate": 9.849622540999027e-06, + "loss": 0.7543, + "step": 3456 + }, + { + "epoch": 0.10595194311634179, + "grad_norm": 1.0215085226886347, + "learning_rate": 9.849501710400238e-06, + "loss": 0.511, + "step": 3457 + }, + { + "epoch": 0.105982591639083, + "grad_norm": 2.3995366251130648, + "learning_rate": 9.84938083201807e-06, + "loss": 0.8441, + "step": 3458 + }, + { + "epoch": 0.1060132401618242, + "grad_norm": 2.1547902635329055, + "learning_rate": 9.84925990585371e-06, + "loss": 0.8197, + "step": 3459 + }, + { + "epoch": 0.10604388868456541, + "grad_norm": 2.276678789596579, + "learning_rate": 9.849138931908352e-06, + "loss": 0.7576, + "step": 3460 + }, + { + "epoch": 0.10607453720730661, + "grad_norm": 1.8722300840836583, + "learning_rate": 9.849017910183187e-06, + "loss": 0.7652, + "step": 3461 + }, + { + "epoch": 0.10610518573004782, + "grad_norm": 2.0026060604189375, + "learning_rate": 9.848896840679408e-06, + "loss": 0.6869, + "step": 3462 + }, + { + "epoch": 0.10613583425278901, + "grad_norm": 1.9008937081813915, + "learning_rate": 9.848775723398207e-06, + "loss": 0.8644, + "step": 3463 + }, + { + "epoch": 0.10616648277553022, + "grad_norm": 1.797626390991786, + "learning_rate": 9.84865455834078e-06, + "loss": 0.7392, + "step": 3464 + }, + { + "epoch": 0.10619713129827142, + "grad_norm": 1.841066465713901, + "learning_rate": 9.848533345508318e-06, + "loss": 0.7445, + "step": 3465 + }, + { + "epoch": 0.10622777982101263, + "grad_norm": 2.66661881858276, + "learning_rate": 9.848412084902017e-06, + "loss": 0.9385, + "step": 3466 + }, + { + "epoch": 0.10625842834375383, + "grad_norm": 0.9443273191258454, + "learning_rate": 9.848290776523071e-06, + "loss": 0.5011, + "step": 3467 + }, + { + "epoch": 0.10628907686649504, + "grad_norm": 1.7461835680378532, + "learning_rate": 9.848169420372675e-06, + "loss": 0.7667, + "step": 3468 + }, + { + "epoch": 0.10631972538923624, + "grad_norm": 1.9726695664471732, + "learning_rate": 9.848048016452025e-06, + "loss": 0.7407, + "step": 3469 + }, + { + "epoch": 0.10635037391197744, + "grad_norm": 1.6345912277439651, + "learning_rate": 9.847926564762318e-06, + "loss": 0.6027, + "step": 3470 + }, + { + "epoch": 0.10638102243471864, + "grad_norm": 1.7589837545735427, + "learning_rate": 9.84780506530475e-06, + "loss": 0.6685, + "step": 3471 + }, + { + "epoch": 0.10641167095745985, + "grad_norm": 2.1257637918142014, + "learning_rate": 9.84768351808052e-06, + "loss": 0.8253, + "step": 3472 + }, + { + "epoch": 0.10644231948020105, + "grad_norm": 1.9678743812558626, + "learning_rate": 9.847561923090823e-06, + "loss": 0.8214, + "step": 3473 + }, + { + "epoch": 0.10647296800294226, + "grad_norm": 2.3594635857606856, + "learning_rate": 9.847440280336856e-06, + "loss": 0.8665, + "step": 3474 + }, + { + "epoch": 0.10650361652568346, + "grad_norm": 2.1116057099883707, + "learning_rate": 9.847318589819821e-06, + "loss": 0.7657, + "step": 3475 + }, + { + "epoch": 0.10653426504842467, + "grad_norm": 1.9797082415985445, + "learning_rate": 9.847196851540916e-06, + "loss": 0.7443, + "step": 3476 + }, + { + "epoch": 0.10656491357116588, + "grad_norm": 1.8322297876459641, + "learning_rate": 9.84707506550134e-06, + "loss": 0.7379, + "step": 3477 + }, + { + "epoch": 0.10659556209390707, + "grad_norm": 2.0488398205846146, + "learning_rate": 9.846953231702294e-06, + "loss": 0.806, + "step": 3478 + }, + { + "epoch": 0.10662621061664827, + "grad_norm": 1.763243889151968, + "learning_rate": 9.846831350144977e-06, + "loss": 0.808, + "step": 3479 + }, + { + "epoch": 0.10665685913938948, + "grad_norm": 2.388555909219983, + "learning_rate": 9.84670942083059e-06, + "loss": 0.8321, + "step": 3480 + }, + { + "epoch": 0.10668750766213068, + "grad_norm": 1.9358414851476886, + "learning_rate": 9.846587443760337e-06, + "loss": 0.8751, + "step": 3481 + }, + { + "epoch": 0.10671815618487189, + "grad_norm": 2.0191139355321504, + "learning_rate": 9.846465418935415e-06, + "loss": 0.7018, + "step": 3482 + }, + { + "epoch": 0.1067488047076131, + "grad_norm": 1.980583269869422, + "learning_rate": 9.84634334635703e-06, + "loss": 0.8109, + "step": 3483 + }, + { + "epoch": 0.1067794532303543, + "grad_norm": 1.8995612674891607, + "learning_rate": 9.846221226026386e-06, + "loss": 0.8122, + "step": 3484 + }, + { + "epoch": 0.10681010175309551, + "grad_norm": 1.886385085832981, + "learning_rate": 9.846099057944683e-06, + "loss": 0.7484, + "step": 3485 + }, + { + "epoch": 0.1068407502758367, + "grad_norm": 2.001841442043668, + "learning_rate": 9.845976842113125e-06, + "loss": 0.7073, + "step": 3486 + }, + { + "epoch": 0.1068713987985779, + "grad_norm": 1.9293388805333684, + "learning_rate": 9.845854578532918e-06, + "loss": 0.6934, + "step": 3487 + }, + { + "epoch": 0.10690204732131911, + "grad_norm": 1.6475458449704203, + "learning_rate": 9.845732267205266e-06, + "loss": 0.7595, + "step": 3488 + }, + { + "epoch": 0.10693269584406032, + "grad_norm": 1.9066042714976086, + "learning_rate": 9.845609908131374e-06, + "loss": 0.7447, + "step": 3489 + }, + { + "epoch": 0.10696334436680152, + "grad_norm": 1.9136830580504547, + "learning_rate": 9.845487501312449e-06, + "loss": 0.7721, + "step": 3490 + }, + { + "epoch": 0.10699399288954273, + "grad_norm": 2.1988839479312077, + "learning_rate": 9.845365046749695e-06, + "loss": 0.8332, + "step": 3491 + }, + { + "epoch": 0.10702464141228393, + "grad_norm": 1.9719822506576345, + "learning_rate": 9.84524254444432e-06, + "loss": 0.7543, + "step": 3492 + }, + { + "epoch": 0.10705528993502514, + "grad_norm": 2.0344324033137373, + "learning_rate": 9.845119994397529e-06, + "loss": 0.829, + "step": 3493 + }, + { + "epoch": 0.10708593845776633, + "grad_norm": 2.013386073183352, + "learning_rate": 9.844997396610535e-06, + "loss": 0.8434, + "step": 3494 + }, + { + "epoch": 0.10711658698050754, + "grad_norm": 2.047869469281476, + "learning_rate": 9.844874751084536e-06, + "loss": 0.6862, + "step": 3495 + }, + { + "epoch": 0.10714723550324874, + "grad_norm": 1.953750013724405, + "learning_rate": 9.84475205782075e-06, + "loss": 0.789, + "step": 3496 + }, + { + "epoch": 0.10717788402598995, + "grad_norm": 1.9300999332824784, + "learning_rate": 9.844629316820382e-06, + "loss": 0.8264, + "step": 3497 + }, + { + "epoch": 0.10720853254873115, + "grad_norm": 1.974098450021844, + "learning_rate": 9.844506528084643e-06, + "loss": 0.8953, + "step": 3498 + }, + { + "epoch": 0.10723918107147236, + "grad_norm": 2.0091371675421823, + "learning_rate": 9.84438369161474e-06, + "loss": 0.7056, + "step": 3499 + }, + { + "epoch": 0.10726982959421356, + "grad_norm": 2.0652921848904064, + "learning_rate": 9.844260807411886e-06, + "loss": 0.7919, + "step": 3500 + }, + { + "epoch": 0.10730047811695476, + "grad_norm": 1.9216555309559173, + "learning_rate": 9.844137875477288e-06, + "loss": 0.7884, + "step": 3501 + }, + { + "epoch": 0.10733112663969596, + "grad_norm": 2.175468917599524, + "learning_rate": 9.844014895812163e-06, + "loss": 0.7308, + "step": 3502 + }, + { + "epoch": 0.10736177516243717, + "grad_norm": 2.088778088265246, + "learning_rate": 9.843891868417718e-06, + "loss": 0.7243, + "step": 3503 + }, + { + "epoch": 0.10739242368517837, + "grad_norm": 1.9315879930098343, + "learning_rate": 9.84376879329517e-06, + "loss": 0.7134, + "step": 3504 + }, + { + "epoch": 0.10742307220791958, + "grad_norm": 1.0674948016365813, + "learning_rate": 9.843645670445726e-06, + "loss": 0.5261, + "step": 3505 + }, + { + "epoch": 0.10745372073066078, + "grad_norm": 1.8391531971537969, + "learning_rate": 9.843522499870602e-06, + "loss": 0.8351, + "step": 3506 + }, + { + "epoch": 0.10748436925340199, + "grad_norm": 2.11411081761559, + "learning_rate": 9.843399281571013e-06, + "loss": 0.7853, + "step": 3507 + }, + { + "epoch": 0.1075150177761432, + "grad_norm": 1.8527900978436702, + "learning_rate": 9.843276015548171e-06, + "loss": 0.6815, + "step": 3508 + }, + { + "epoch": 0.10754566629888439, + "grad_norm": 2.239179012032572, + "learning_rate": 9.843152701803292e-06, + "loss": 0.7019, + "step": 3509 + }, + { + "epoch": 0.1075763148216256, + "grad_norm": 2.2361545822909927, + "learning_rate": 9.843029340337589e-06, + "loss": 0.7014, + "step": 3510 + }, + { + "epoch": 0.1076069633443668, + "grad_norm": 1.8951584146623983, + "learning_rate": 9.84290593115228e-06, + "loss": 0.7908, + "step": 3511 + }, + { + "epoch": 0.107637611867108, + "grad_norm": 1.7693167504323868, + "learning_rate": 9.842782474248578e-06, + "loss": 0.7408, + "step": 3512 + }, + { + "epoch": 0.10766826038984921, + "grad_norm": 1.9605083111782962, + "learning_rate": 9.842658969627702e-06, + "loss": 0.7175, + "step": 3513 + }, + { + "epoch": 0.10769890891259042, + "grad_norm": 2.1968622843200882, + "learning_rate": 9.842535417290868e-06, + "loss": 0.7745, + "step": 3514 + }, + { + "epoch": 0.10772955743533162, + "grad_norm": 1.9014558769711003, + "learning_rate": 9.842411817239293e-06, + "loss": 0.743, + "step": 3515 + }, + { + "epoch": 0.10776020595807283, + "grad_norm": 2.1888007014999284, + "learning_rate": 9.842288169474197e-06, + "loss": 0.8442, + "step": 3516 + }, + { + "epoch": 0.10779085448081402, + "grad_norm": 2.026768862605879, + "learning_rate": 9.842164473996797e-06, + "loss": 0.8479, + "step": 3517 + }, + { + "epoch": 0.10782150300355522, + "grad_norm": 2.194102337599011, + "learning_rate": 9.842040730808308e-06, + "loss": 0.7499, + "step": 3518 + }, + { + "epoch": 0.10785215152629643, + "grad_norm": 2.068504703906561, + "learning_rate": 9.841916939909956e-06, + "loss": 0.7179, + "step": 3519 + }, + { + "epoch": 0.10788280004903764, + "grad_norm": 1.784889937002916, + "learning_rate": 9.841793101302957e-06, + "loss": 0.7649, + "step": 3520 + }, + { + "epoch": 0.10791344857177884, + "grad_norm": 1.794775598301373, + "learning_rate": 9.841669214988532e-06, + "loss": 0.7224, + "step": 3521 + }, + { + "epoch": 0.10794409709452005, + "grad_norm": 1.7392090621413825, + "learning_rate": 9.8415452809679e-06, + "loss": 0.7873, + "step": 3522 + }, + { + "epoch": 0.10797474561726125, + "grad_norm": 1.9352945299673272, + "learning_rate": 9.841421299242284e-06, + "loss": 0.7174, + "step": 3523 + }, + { + "epoch": 0.10800539414000246, + "grad_norm": 1.9079236516411968, + "learning_rate": 9.841297269812906e-06, + "loss": 0.8018, + "step": 3524 + }, + { + "epoch": 0.10803604266274365, + "grad_norm": 1.9700777994072227, + "learning_rate": 9.841173192680987e-06, + "loss": 0.7189, + "step": 3525 + }, + { + "epoch": 0.10806669118548486, + "grad_norm": 1.7601099254223052, + "learning_rate": 9.84104906784775e-06, + "loss": 0.7208, + "step": 3526 + }, + { + "epoch": 0.10809733970822606, + "grad_norm": 1.861012884857957, + "learning_rate": 9.840924895314418e-06, + "loss": 0.7582, + "step": 3527 + }, + { + "epoch": 0.10812798823096727, + "grad_norm": 2.065824192278561, + "learning_rate": 9.840800675082214e-06, + "loss": 0.9042, + "step": 3528 + }, + { + "epoch": 0.10815863675370847, + "grad_norm": 1.8164141764516684, + "learning_rate": 9.840676407152363e-06, + "loss": 0.7434, + "step": 3529 + }, + { + "epoch": 0.10818928527644968, + "grad_norm": 2.1958780352975986, + "learning_rate": 9.840552091526088e-06, + "loss": 0.7683, + "step": 3530 + }, + { + "epoch": 0.10821993379919088, + "grad_norm": 1.9752733091212074, + "learning_rate": 9.840427728204615e-06, + "loss": 0.8445, + "step": 3531 + }, + { + "epoch": 0.10825058232193208, + "grad_norm": 1.6263225432292943, + "learning_rate": 9.84030331718917e-06, + "loss": 0.6487, + "step": 3532 + }, + { + "epoch": 0.10828123084467328, + "grad_norm": 2.129336850980548, + "learning_rate": 9.840178858480976e-06, + "loss": 0.7367, + "step": 3533 + }, + { + "epoch": 0.10831187936741449, + "grad_norm": 2.271575048600106, + "learning_rate": 9.840054352081262e-06, + "loss": 0.8852, + "step": 3534 + }, + { + "epoch": 0.1083425278901557, + "grad_norm": 1.7627417472039368, + "learning_rate": 9.839929797991256e-06, + "loss": 0.764, + "step": 3535 + }, + { + "epoch": 0.1083731764128969, + "grad_norm": 1.9930803522538303, + "learning_rate": 9.839805196212183e-06, + "loss": 0.7654, + "step": 3536 + }, + { + "epoch": 0.1084038249356381, + "grad_norm": 1.9184579045792873, + "learning_rate": 9.839680546745268e-06, + "loss": 0.875, + "step": 3537 + }, + { + "epoch": 0.10843447345837931, + "grad_norm": 1.6986116025330609, + "learning_rate": 9.839555849591744e-06, + "loss": 0.716, + "step": 3538 + }, + { + "epoch": 0.10846512198112052, + "grad_norm": 1.9222198143960825, + "learning_rate": 9.83943110475284e-06, + "loss": 0.7765, + "step": 3539 + }, + { + "epoch": 0.10849577050386171, + "grad_norm": 1.859070585864703, + "learning_rate": 9.839306312229779e-06, + "loss": 0.7878, + "step": 3540 + }, + { + "epoch": 0.10852641902660291, + "grad_norm": 2.034075588767715, + "learning_rate": 9.839181472023798e-06, + "loss": 0.6829, + "step": 3541 + }, + { + "epoch": 0.10855706754934412, + "grad_norm": 2.1172625069942126, + "learning_rate": 9.839056584136123e-06, + "loss": 0.6978, + "step": 3542 + }, + { + "epoch": 0.10858771607208532, + "grad_norm": 1.961777187445112, + "learning_rate": 9.838931648567986e-06, + "loss": 0.7924, + "step": 3543 + }, + { + "epoch": 0.10861836459482653, + "grad_norm": 2.0296717340651447, + "learning_rate": 9.838806665320615e-06, + "loss": 0.8429, + "step": 3544 + }, + { + "epoch": 0.10864901311756774, + "grad_norm": 1.9105599980588657, + "learning_rate": 9.838681634395245e-06, + "loss": 0.6868, + "step": 3545 + }, + { + "epoch": 0.10867966164030894, + "grad_norm": 2.1347550051551654, + "learning_rate": 9.838556555793108e-06, + "loss": 0.6395, + "step": 3546 + }, + { + "epoch": 0.10871031016305015, + "grad_norm": 2.114683295881984, + "learning_rate": 9.838431429515434e-06, + "loss": 0.7225, + "step": 3547 + }, + { + "epoch": 0.10874095868579134, + "grad_norm": 2.1556271468136288, + "learning_rate": 9.838306255563459e-06, + "loss": 0.8295, + "step": 3548 + }, + { + "epoch": 0.10877160720853254, + "grad_norm": 1.9890654576111741, + "learning_rate": 9.838181033938413e-06, + "loss": 0.7116, + "step": 3549 + }, + { + "epoch": 0.10880225573127375, + "grad_norm": 2.1773562158045863, + "learning_rate": 9.838055764641533e-06, + "loss": 0.7734, + "step": 3550 + }, + { + "epoch": 0.10883290425401496, + "grad_norm": 1.3204364829832096, + "learning_rate": 9.83793044767405e-06, + "loss": 0.5196, + "step": 3551 + }, + { + "epoch": 0.10886355277675616, + "grad_norm": 1.8663152830774417, + "learning_rate": 9.837805083037199e-06, + "loss": 0.7823, + "step": 3552 + }, + { + "epoch": 0.10889420129949737, + "grad_norm": 1.8842343130410473, + "learning_rate": 9.83767967073222e-06, + "loss": 0.7647, + "step": 3553 + }, + { + "epoch": 0.10892484982223857, + "grad_norm": 0.8648107193723986, + "learning_rate": 9.837554210760344e-06, + "loss": 0.527, + "step": 3554 + }, + { + "epoch": 0.10895549834497978, + "grad_norm": 1.8985725432129588, + "learning_rate": 9.837428703122807e-06, + "loss": 0.7539, + "step": 3555 + }, + { + "epoch": 0.10898614686772097, + "grad_norm": 1.8896555373878599, + "learning_rate": 9.837303147820849e-06, + "loss": 0.7489, + "step": 3556 + }, + { + "epoch": 0.10901679539046218, + "grad_norm": 2.000106399513636, + "learning_rate": 9.837177544855705e-06, + "loss": 0.7004, + "step": 3557 + }, + { + "epoch": 0.10904744391320338, + "grad_norm": 1.8969074048333752, + "learning_rate": 9.837051894228614e-06, + "loss": 0.8274, + "step": 3558 + }, + { + "epoch": 0.10907809243594459, + "grad_norm": 1.2611669667442054, + "learning_rate": 9.836926195940811e-06, + "loss": 0.5408, + "step": 3559 + }, + { + "epoch": 0.1091087409586858, + "grad_norm": 1.8230227610670315, + "learning_rate": 9.836800449993538e-06, + "loss": 0.746, + "step": 3560 + }, + { + "epoch": 0.109139389481427, + "grad_norm": 2.147881085583073, + "learning_rate": 9.836674656388032e-06, + "loss": 0.7875, + "step": 3561 + }, + { + "epoch": 0.1091700380041682, + "grad_norm": 0.9329479770983513, + "learning_rate": 9.836548815125536e-06, + "loss": 0.5365, + "step": 3562 + }, + { + "epoch": 0.1092006865269094, + "grad_norm": 1.846265633877356, + "learning_rate": 9.836422926207283e-06, + "loss": 0.5972, + "step": 3563 + }, + { + "epoch": 0.1092313350496506, + "grad_norm": 2.0880970813206026, + "learning_rate": 9.83629698963452e-06, + "loss": 0.7246, + "step": 3564 + }, + { + "epoch": 0.10926198357239181, + "grad_norm": 1.9755429748478295, + "learning_rate": 9.836171005408483e-06, + "loss": 0.6478, + "step": 3565 + }, + { + "epoch": 0.10929263209513301, + "grad_norm": 1.9774078872687606, + "learning_rate": 9.836044973530417e-06, + "loss": 0.731, + "step": 3566 + }, + { + "epoch": 0.10932328061787422, + "grad_norm": 2.031323949532182, + "learning_rate": 9.835918894001564e-06, + "loss": 0.7863, + "step": 3567 + }, + { + "epoch": 0.10935392914061542, + "grad_norm": 1.1720043004897012, + "learning_rate": 9.835792766823162e-06, + "loss": 0.53, + "step": 3568 + }, + { + "epoch": 0.10938457766335663, + "grad_norm": 2.1386905860627645, + "learning_rate": 9.835666591996458e-06, + "loss": 0.7697, + "step": 3569 + }, + { + "epoch": 0.10941522618609784, + "grad_norm": 1.6921085764265809, + "learning_rate": 9.835540369522694e-06, + "loss": 0.7507, + "step": 3570 + }, + { + "epoch": 0.10944587470883903, + "grad_norm": 1.8166177206669303, + "learning_rate": 9.835414099403113e-06, + "loss": 0.7452, + "step": 3571 + }, + { + "epoch": 0.10947652323158023, + "grad_norm": 1.778948183717474, + "learning_rate": 9.83528778163896e-06, + "loss": 0.757, + "step": 3572 + }, + { + "epoch": 0.10950717175432144, + "grad_norm": 2.0860666760823166, + "learning_rate": 9.83516141623148e-06, + "loss": 0.7755, + "step": 3573 + }, + { + "epoch": 0.10953782027706264, + "grad_norm": 0.9372621007896001, + "learning_rate": 9.835035003181917e-06, + "loss": 0.538, + "step": 3574 + }, + { + "epoch": 0.10956846879980385, + "grad_norm": 1.979362195395861, + "learning_rate": 9.834908542491517e-06, + "loss": 0.7155, + "step": 3575 + }, + { + "epoch": 0.10959911732254506, + "grad_norm": 2.043661000463172, + "learning_rate": 9.834782034161525e-06, + "loss": 0.7978, + "step": 3576 + }, + { + "epoch": 0.10962976584528626, + "grad_norm": 1.9859083654004157, + "learning_rate": 9.834655478193188e-06, + "loss": 0.7361, + "step": 3577 + }, + { + "epoch": 0.10966041436802747, + "grad_norm": 1.8207033619109483, + "learning_rate": 9.834528874587756e-06, + "loss": 0.7149, + "step": 3578 + }, + { + "epoch": 0.10969106289076866, + "grad_norm": 2.011749385708135, + "learning_rate": 9.834402223346475e-06, + "loss": 0.7601, + "step": 3579 + }, + { + "epoch": 0.10972171141350986, + "grad_norm": 2.4785689218375055, + "learning_rate": 9.834275524470588e-06, + "loss": 0.9429, + "step": 3580 + }, + { + "epoch": 0.10975235993625107, + "grad_norm": 0.9360366272553071, + "learning_rate": 9.83414877796135e-06, + "loss": 0.5183, + "step": 3581 + }, + { + "epoch": 0.10978300845899228, + "grad_norm": 1.0028370780670568, + "learning_rate": 9.834021983820007e-06, + "loss": 0.5411, + "step": 3582 + }, + { + "epoch": 0.10981365698173348, + "grad_norm": 1.977717476252743, + "learning_rate": 9.833895142047809e-06, + "loss": 0.7611, + "step": 3583 + }, + { + "epoch": 0.10984430550447469, + "grad_norm": 2.087292681066615, + "learning_rate": 9.833768252646003e-06, + "loss": 0.8325, + "step": 3584 + }, + { + "epoch": 0.1098749540272159, + "grad_norm": 2.1542026030124095, + "learning_rate": 9.833641315615844e-06, + "loss": 0.7683, + "step": 3585 + }, + { + "epoch": 0.1099056025499571, + "grad_norm": 1.8942899496213799, + "learning_rate": 9.83351433095858e-06, + "loss": 0.7394, + "step": 3586 + }, + { + "epoch": 0.10993625107269829, + "grad_norm": 1.832175375363711, + "learning_rate": 9.833387298675461e-06, + "loss": 0.709, + "step": 3587 + }, + { + "epoch": 0.1099668995954395, + "grad_norm": 1.9427710674242282, + "learning_rate": 9.833260218767741e-06, + "loss": 0.7149, + "step": 3588 + }, + { + "epoch": 0.1099975481181807, + "grad_norm": 2.1449715103081743, + "learning_rate": 9.833133091236673e-06, + "loss": 0.7549, + "step": 3589 + }, + { + "epoch": 0.11002819664092191, + "grad_norm": 1.8332162224060884, + "learning_rate": 9.833005916083506e-06, + "loss": 0.7438, + "step": 3590 + }, + { + "epoch": 0.11005884516366311, + "grad_norm": 2.038573089713471, + "learning_rate": 9.832878693309495e-06, + "loss": 0.7948, + "step": 3591 + }, + { + "epoch": 0.11008949368640432, + "grad_norm": 1.9159744459901185, + "learning_rate": 9.832751422915896e-06, + "loss": 0.7178, + "step": 3592 + }, + { + "epoch": 0.11012014220914552, + "grad_norm": 2.54432596721499, + "learning_rate": 9.83262410490396e-06, + "loss": 0.8398, + "step": 3593 + }, + { + "epoch": 0.11015079073188672, + "grad_norm": 1.869249037104161, + "learning_rate": 9.832496739274942e-06, + "loss": 0.6916, + "step": 3594 + }, + { + "epoch": 0.11018143925462792, + "grad_norm": 1.8930749539278358, + "learning_rate": 9.832369326030096e-06, + "loss": 0.7048, + "step": 3595 + }, + { + "epoch": 0.11021208777736913, + "grad_norm": 1.7708018490227393, + "learning_rate": 9.83224186517068e-06, + "loss": 0.7014, + "step": 3596 + }, + { + "epoch": 0.11024273630011033, + "grad_norm": 2.0367353377156006, + "learning_rate": 9.832114356697948e-06, + "loss": 0.6915, + "step": 3597 + }, + { + "epoch": 0.11027338482285154, + "grad_norm": 1.483313490890426, + "learning_rate": 9.831986800613157e-06, + "loss": 0.5334, + "step": 3598 + }, + { + "epoch": 0.11030403334559274, + "grad_norm": 1.8954996133778115, + "learning_rate": 9.831859196917563e-06, + "loss": 0.7596, + "step": 3599 + }, + { + "epoch": 0.11033468186833395, + "grad_norm": 1.8775097338604614, + "learning_rate": 9.831731545612423e-06, + "loss": 0.6842, + "step": 3600 + }, + { + "epoch": 0.11036533039107516, + "grad_norm": 1.9169192086922093, + "learning_rate": 9.831603846698998e-06, + "loss": 0.8186, + "step": 3601 + }, + { + "epoch": 0.11039597891381635, + "grad_norm": 1.9055971017110995, + "learning_rate": 9.831476100178543e-06, + "loss": 0.7578, + "step": 3602 + }, + { + "epoch": 0.11042662743655755, + "grad_norm": 2.0982789415909306, + "learning_rate": 9.83134830605232e-06, + "loss": 0.7568, + "step": 3603 + }, + { + "epoch": 0.11045727595929876, + "grad_norm": 2.014038356047331, + "learning_rate": 9.831220464321584e-06, + "loss": 0.7176, + "step": 3604 + }, + { + "epoch": 0.11048792448203996, + "grad_norm": 1.6464943747702472, + "learning_rate": 9.831092574987596e-06, + "loss": 0.6801, + "step": 3605 + }, + { + "epoch": 0.11051857300478117, + "grad_norm": 2.0349364968965244, + "learning_rate": 9.830964638051618e-06, + "loss": 0.7256, + "step": 3606 + }, + { + "epoch": 0.11054922152752238, + "grad_norm": 1.760575420791142, + "learning_rate": 9.830836653514909e-06, + "loss": 0.7097, + "step": 3607 + }, + { + "epoch": 0.11057987005026358, + "grad_norm": 1.7560422938284508, + "learning_rate": 9.830708621378731e-06, + "loss": 0.7549, + "step": 3608 + }, + { + "epoch": 0.11061051857300479, + "grad_norm": 1.7721609518512156, + "learning_rate": 9.830580541644343e-06, + "loss": 0.8169, + "step": 3609 + }, + { + "epoch": 0.11064116709574598, + "grad_norm": 1.3103012017125695, + "learning_rate": 9.830452414313012e-06, + "loss": 0.5525, + "step": 3610 + }, + { + "epoch": 0.11067181561848718, + "grad_norm": 1.8742880282124994, + "learning_rate": 9.830324239385996e-06, + "loss": 0.7716, + "step": 3611 + }, + { + "epoch": 0.11070246414122839, + "grad_norm": 1.8898968059670085, + "learning_rate": 9.830196016864558e-06, + "loss": 0.8571, + "step": 3612 + }, + { + "epoch": 0.1107331126639696, + "grad_norm": 2.1064977655612416, + "learning_rate": 9.830067746749964e-06, + "loss": 0.7956, + "step": 3613 + }, + { + "epoch": 0.1107637611867108, + "grad_norm": 2.079058110412958, + "learning_rate": 9.829939429043478e-06, + "loss": 0.7799, + "step": 3614 + }, + { + "epoch": 0.11079440970945201, + "grad_norm": 1.8933878608008137, + "learning_rate": 9.82981106374636e-06, + "loss": 0.7337, + "step": 3615 + }, + { + "epoch": 0.11082505823219321, + "grad_norm": 2.0624392245880907, + "learning_rate": 9.82968265085988e-06, + "loss": 0.7847, + "step": 3616 + }, + { + "epoch": 0.11085570675493442, + "grad_norm": 2.136167101671275, + "learning_rate": 9.8295541903853e-06, + "loss": 0.7771, + "step": 3617 + }, + { + "epoch": 0.11088635527767561, + "grad_norm": 2.0007030795818674, + "learning_rate": 9.829425682323889e-06, + "loss": 0.7958, + "step": 3618 + }, + { + "epoch": 0.11091700380041682, + "grad_norm": 1.709018707980955, + "learning_rate": 9.829297126676909e-06, + "loss": 0.7007, + "step": 3619 + }, + { + "epoch": 0.11094765232315802, + "grad_norm": 1.8331451832983106, + "learning_rate": 9.82916852344563e-06, + "loss": 0.694, + "step": 3620 + }, + { + "epoch": 0.11097830084589923, + "grad_norm": 1.8691667599276773, + "learning_rate": 9.829039872631317e-06, + "loss": 0.7626, + "step": 3621 + }, + { + "epoch": 0.11100894936864043, + "grad_norm": 0.9778227084024581, + "learning_rate": 9.828911174235238e-06, + "loss": 0.5135, + "step": 3622 + }, + { + "epoch": 0.11103959789138164, + "grad_norm": 1.7597029644994544, + "learning_rate": 9.828782428258663e-06, + "loss": 0.8027, + "step": 3623 + }, + { + "epoch": 0.11107024641412284, + "grad_norm": 2.154708343517572, + "learning_rate": 9.828653634702858e-06, + "loss": 0.7772, + "step": 3624 + }, + { + "epoch": 0.11110089493686404, + "grad_norm": 1.8667860623610841, + "learning_rate": 9.828524793569095e-06, + "loss": 0.7609, + "step": 3625 + }, + { + "epoch": 0.11113154345960524, + "grad_norm": 1.960683828713328, + "learning_rate": 9.82839590485864e-06, + "loss": 0.7218, + "step": 3626 + }, + { + "epoch": 0.11116219198234645, + "grad_norm": 2.0079107964434626, + "learning_rate": 9.828266968572765e-06, + "loss": 0.7938, + "step": 3627 + }, + { + "epoch": 0.11119284050508765, + "grad_norm": 2.1750011600095083, + "learning_rate": 9.828137984712741e-06, + "loss": 0.7307, + "step": 3628 + }, + { + "epoch": 0.11122348902782886, + "grad_norm": 1.8835895304504142, + "learning_rate": 9.828008953279839e-06, + "loss": 0.6675, + "step": 3629 + }, + { + "epoch": 0.11125413755057006, + "grad_norm": 1.8626846228534426, + "learning_rate": 9.827879874275328e-06, + "loss": 0.8185, + "step": 3630 + }, + { + "epoch": 0.11128478607331127, + "grad_norm": 2.052893091289707, + "learning_rate": 9.827750747700481e-06, + "loss": 0.7966, + "step": 3631 + }, + { + "epoch": 0.11131543459605248, + "grad_norm": 0.9724001401700975, + "learning_rate": 9.827621573556573e-06, + "loss": 0.5564, + "step": 3632 + }, + { + "epoch": 0.11134608311879367, + "grad_norm": 1.9551084207794032, + "learning_rate": 9.827492351844872e-06, + "loss": 0.7961, + "step": 3633 + }, + { + "epoch": 0.11137673164153487, + "grad_norm": 0.8795016544577685, + "learning_rate": 9.827363082566655e-06, + "loss": 0.5119, + "step": 3634 + }, + { + "epoch": 0.11140738016427608, + "grad_norm": 1.6217675129355866, + "learning_rate": 9.827233765723193e-06, + "loss": 0.7281, + "step": 3635 + }, + { + "epoch": 0.11143802868701728, + "grad_norm": 1.915160346885111, + "learning_rate": 9.827104401315764e-06, + "loss": 0.7206, + "step": 3636 + }, + { + "epoch": 0.11146867720975849, + "grad_norm": 2.044590300673938, + "learning_rate": 9.82697498934564e-06, + "loss": 0.7061, + "step": 3637 + }, + { + "epoch": 0.1114993257324997, + "grad_norm": 1.816608680404952, + "learning_rate": 9.826845529814093e-06, + "loss": 0.7641, + "step": 3638 + }, + { + "epoch": 0.1115299742552409, + "grad_norm": 2.1140613021610513, + "learning_rate": 9.826716022722405e-06, + "loss": 0.7351, + "step": 3639 + }, + { + "epoch": 0.11156062277798211, + "grad_norm": 2.0312117560019285, + "learning_rate": 9.826586468071848e-06, + "loss": 0.8043, + "step": 3640 + }, + { + "epoch": 0.1115912713007233, + "grad_norm": 1.7284006332310986, + "learning_rate": 9.826456865863699e-06, + "loss": 0.8198, + "step": 3641 + }, + { + "epoch": 0.1116219198234645, + "grad_norm": 1.8960056377382297, + "learning_rate": 9.826327216099237e-06, + "loss": 0.7043, + "step": 3642 + }, + { + "epoch": 0.11165256834620571, + "grad_norm": 2.2154404272948605, + "learning_rate": 9.826197518779738e-06, + "loss": 0.9002, + "step": 3643 + }, + { + "epoch": 0.11168321686894692, + "grad_norm": 2.2565413968162624, + "learning_rate": 9.826067773906479e-06, + "loss": 0.8629, + "step": 3644 + }, + { + "epoch": 0.11171386539168812, + "grad_norm": 1.9586233432635087, + "learning_rate": 9.82593798148074e-06, + "loss": 0.7695, + "step": 3645 + }, + { + "epoch": 0.11174451391442933, + "grad_norm": 1.8516132437702248, + "learning_rate": 9.825808141503798e-06, + "loss": 0.7544, + "step": 3646 + }, + { + "epoch": 0.11177516243717053, + "grad_norm": 2.1112032511971357, + "learning_rate": 9.825678253976935e-06, + "loss": 0.7482, + "step": 3647 + }, + { + "epoch": 0.11180581095991174, + "grad_norm": 1.8011206600516239, + "learning_rate": 9.82554831890143e-06, + "loss": 0.7202, + "step": 3648 + }, + { + "epoch": 0.11183645948265293, + "grad_norm": 1.9344237641934832, + "learning_rate": 9.825418336278563e-06, + "loss": 0.7048, + "step": 3649 + }, + { + "epoch": 0.11186710800539414, + "grad_norm": 1.911194214022531, + "learning_rate": 9.825288306109612e-06, + "loss": 0.7204, + "step": 3650 + }, + { + "epoch": 0.11189775652813534, + "grad_norm": 2.1641381566688738, + "learning_rate": 9.825158228395863e-06, + "loss": 0.8468, + "step": 3651 + }, + { + "epoch": 0.11192840505087655, + "grad_norm": 2.1545149024733345, + "learning_rate": 9.825028103138596e-06, + "loss": 0.8022, + "step": 3652 + }, + { + "epoch": 0.11195905357361775, + "grad_norm": 2.1590752564990043, + "learning_rate": 9.82489793033909e-06, + "loss": 0.7205, + "step": 3653 + }, + { + "epoch": 0.11198970209635896, + "grad_norm": 2.112471377796669, + "learning_rate": 9.824767709998632e-06, + "loss": 0.654, + "step": 3654 + }, + { + "epoch": 0.11202035061910016, + "grad_norm": 1.9815354861618384, + "learning_rate": 9.824637442118503e-06, + "loss": 0.751, + "step": 3655 + }, + { + "epoch": 0.11205099914184136, + "grad_norm": 1.8870941582473197, + "learning_rate": 9.824507126699986e-06, + "loss": 0.6825, + "step": 3656 + }, + { + "epoch": 0.11208164766458256, + "grad_norm": 1.9613357570402972, + "learning_rate": 9.824376763744367e-06, + "loss": 0.7459, + "step": 3657 + }, + { + "epoch": 0.11211229618732377, + "grad_norm": 2.1199409105638205, + "learning_rate": 9.824246353252928e-06, + "loss": 0.7806, + "step": 3658 + }, + { + "epoch": 0.11214294471006497, + "grad_norm": 2.051246385117295, + "learning_rate": 9.824115895226956e-06, + "loss": 0.717, + "step": 3659 + }, + { + "epoch": 0.11217359323280618, + "grad_norm": 1.3414303423663991, + "learning_rate": 9.823985389667736e-06, + "loss": 0.5321, + "step": 3660 + }, + { + "epoch": 0.11220424175554738, + "grad_norm": 2.192841684402074, + "learning_rate": 9.823854836576554e-06, + "loss": 0.7457, + "step": 3661 + }, + { + "epoch": 0.11223489027828859, + "grad_norm": 1.8276190481338674, + "learning_rate": 9.823724235954696e-06, + "loss": 0.7996, + "step": 3662 + }, + { + "epoch": 0.1122655388010298, + "grad_norm": 1.8278847365462105, + "learning_rate": 9.823593587803448e-06, + "loss": 0.7896, + "step": 3663 + }, + { + "epoch": 0.11229618732377099, + "grad_norm": 1.9041858129438338, + "learning_rate": 9.823462892124098e-06, + "loss": 0.8189, + "step": 3664 + }, + { + "epoch": 0.1123268358465122, + "grad_norm": 1.8139264946369593, + "learning_rate": 9.823332148917937e-06, + "loss": 0.7533, + "step": 3665 + }, + { + "epoch": 0.1123574843692534, + "grad_norm": 1.7974066669788964, + "learning_rate": 9.823201358186248e-06, + "loss": 0.731, + "step": 3666 + }, + { + "epoch": 0.1123881328919946, + "grad_norm": 1.815726735353267, + "learning_rate": 9.823070519930321e-06, + "loss": 0.7874, + "step": 3667 + }, + { + "epoch": 0.11241878141473581, + "grad_norm": 1.869688384544121, + "learning_rate": 9.822939634151447e-06, + "loss": 0.7733, + "step": 3668 + }, + { + "epoch": 0.11244942993747702, + "grad_norm": 2.1554937976461512, + "learning_rate": 9.822808700850914e-06, + "loss": 0.711, + "step": 3669 + }, + { + "epoch": 0.11248007846021822, + "grad_norm": 1.9903980003114987, + "learning_rate": 9.822677720030015e-06, + "loss": 0.6575, + "step": 3670 + }, + { + "epoch": 0.11251072698295943, + "grad_norm": 1.4092377661790751, + "learning_rate": 9.822546691690038e-06, + "loss": 0.5153, + "step": 3671 + }, + { + "epoch": 0.11254137550570062, + "grad_norm": 1.719259732230371, + "learning_rate": 9.822415615832272e-06, + "loss": 0.709, + "step": 3672 + }, + { + "epoch": 0.11257202402844182, + "grad_norm": 2.480804606357399, + "learning_rate": 9.822284492458014e-06, + "loss": 0.7786, + "step": 3673 + }, + { + "epoch": 0.11260267255118303, + "grad_norm": 1.8280733816862578, + "learning_rate": 9.822153321568552e-06, + "loss": 0.7661, + "step": 3674 + }, + { + "epoch": 0.11263332107392424, + "grad_norm": 2.0843992877176687, + "learning_rate": 9.822022103165178e-06, + "loss": 0.8043, + "step": 3675 + }, + { + "epoch": 0.11266396959666544, + "grad_norm": 1.984177848762459, + "learning_rate": 9.821890837249189e-06, + "loss": 0.7992, + "step": 3676 + }, + { + "epoch": 0.11269461811940665, + "grad_norm": 1.742127522115925, + "learning_rate": 9.821759523821874e-06, + "loss": 0.7141, + "step": 3677 + }, + { + "epoch": 0.11272526664214785, + "grad_norm": 1.8173532054359318, + "learning_rate": 9.821628162884532e-06, + "loss": 0.6949, + "step": 3678 + }, + { + "epoch": 0.11275591516488906, + "grad_norm": 1.9600088784485488, + "learning_rate": 9.82149675443845e-06, + "loss": 0.7929, + "step": 3679 + }, + { + "epoch": 0.11278656368763025, + "grad_norm": 1.0886634599276426, + "learning_rate": 9.821365298484929e-06, + "loss": 0.5142, + "step": 3680 + }, + { + "epoch": 0.11281721221037146, + "grad_norm": 1.7591578757623698, + "learning_rate": 9.82123379502526e-06, + "loss": 0.7572, + "step": 3681 + }, + { + "epoch": 0.11284786073311266, + "grad_norm": 1.9666148295853225, + "learning_rate": 9.821102244060743e-06, + "loss": 0.7821, + "step": 3682 + }, + { + "epoch": 0.11287850925585387, + "grad_norm": 1.7951463351374148, + "learning_rate": 9.82097064559267e-06, + "loss": 0.6832, + "step": 3683 + }, + { + "epoch": 0.11290915777859507, + "grad_norm": 1.8205767227500531, + "learning_rate": 9.820838999622341e-06, + "loss": 0.7392, + "step": 3684 + }, + { + "epoch": 0.11293980630133628, + "grad_norm": 2.114192087728003, + "learning_rate": 9.820707306151055e-06, + "loss": 0.8331, + "step": 3685 + }, + { + "epoch": 0.11297045482407748, + "grad_norm": 1.9248573833925755, + "learning_rate": 9.820575565180102e-06, + "loss": 0.7867, + "step": 3686 + }, + { + "epoch": 0.11300110334681868, + "grad_norm": 2.1583512611192397, + "learning_rate": 9.820443776710786e-06, + "loss": 0.8944, + "step": 3687 + }, + { + "epoch": 0.11303175186955988, + "grad_norm": 1.9174928736695869, + "learning_rate": 9.820311940744405e-06, + "loss": 0.7734, + "step": 3688 + }, + { + "epoch": 0.11306240039230109, + "grad_norm": 1.817762145989838, + "learning_rate": 9.820180057282256e-06, + "loss": 0.7108, + "step": 3689 + }, + { + "epoch": 0.1130930489150423, + "grad_norm": 1.9406802337759628, + "learning_rate": 9.82004812632564e-06, + "loss": 0.7428, + "step": 3690 + }, + { + "epoch": 0.1131236974377835, + "grad_norm": 1.788138649746012, + "learning_rate": 9.819916147875857e-06, + "loss": 0.8083, + "step": 3691 + }, + { + "epoch": 0.1131543459605247, + "grad_norm": 2.0589716091128585, + "learning_rate": 9.819784121934206e-06, + "loss": 0.9205, + "step": 3692 + }, + { + "epoch": 0.11318499448326591, + "grad_norm": 1.8350279838793235, + "learning_rate": 9.819652048501988e-06, + "loss": 0.7385, + "step": 3693 + }, + { + "epoch": 0.11321564300600712, + "grad_norm": 1.9255857796479403, + "learning_rate": 9.819519927580507e-06, + "loss": 0.7535, + "step": 3694 + }, + { + "epoch": 0.11324629152874831, + "grad_norm": 1.8501215524127381, + "learning_rate": 9.819387759171062e-06, + "loss": 0.7216, + "step": 3695 + }, + { + "epoch": 0.11327694005148951, + "grad_norm": 1.9380558440644036, + "learning_rate": 9.819255543274957e-06, + "loss": 0.7245, + "step": 3696 + }, + { + "epoch": 0.11330758857423072, + "grad_norm": 0.9840159921809034, + "learning_rate": 9.819123279893494e-06, + "loss": 0.5197, + "step": 3697 + }, + { + "epoch": 0.11333823709697192, + "grad_norm": 1.880425415033134, + "learning_rate": 9.818990969027977e-06, + "loss": 0.7645, + "step": 3698 + }, + { + "epoch": 0.11336888561971313, + "grad_norm": 1.913339117554567, + "learning_rate": 9.818858610679706e-06, + "loss": 0.7745, + "step": 3699 + }, + { + "epoch": 0.11339953414245434, + "grad_norm": 1.8170240230736314, + "learning_rate": 9.818726204849991e-06, + "loss": 0.8591, + "step": 3700 + }, + { + "epoch": 0.11343018266519554, + "grad_norm": 0.8567833406513337, + "learning_rate": 9.818593751540134e-06, + "loss": 0.5212, + "step": 3701 + }, + { + "epoch": 0.11346083118793675, + "grad_norm": 0.8565191011254415, + "learning_rate": 9.818461250751438e-06, + "loss": 0.5223, + "step": 3702 + }, + { + "epoch": 0.11349147971067794, + "grad_norm": 2.000666810381929, + "learning_rate": 9.818328702485212e-06, + "loss": 0.8643, + "step": 3703 + }, + { + "epoch": 0.11352212823341915, + "grad_norm": 0.8081151990338308, + "learning_rate": 9.81819610674276e-06, + "loss": 0.5228, + "step": 3704 + }, + { + "epoch": 0.11355277675616035, + "grad_norm": 1.9216145936053692, + "learning_rate": 9.81806346352539e-06, + "loss": 0.8421, + "step": 3705 + }, + { + "epoch": 0.11358342527890156, + "grad_norm": 0.879650448369826, + "learning_rate": 9.817930772834406e-06, + "loss": 0.5082, + "step": 3706 + }, + { + "epoch": 0.11361407380164276, + "grad_norm": 1.8119138964610508, + "learning_rate": 9.817798034671117e-06, + "loss": 0.8231, + "step": 3707 + }, + { + "epoch": 0.11364472232438397, + "grad_norm": 2.071512758920977, + "learning_rate": 9.817665249036833e-06, + "loss": 0.8481, + "step": 3708 + }, + { + "epoch": 0.11367537084712517, + "grad_norm": 1.9940261766534664, + "learning_rate": 9.81753241593286e-06, + "loss": 0.6298, + "step": 3709 + }, + { + "epoch": 0.11370601936986638, + "grad_norm": 0.8601261856052457, + "learning_rate": 9.817399535360507e-06, + "loss": 0.4949, + "step": 3710 + }, + { + "epoch": 0.11373666789260757, + "grad_norm": 1.933719079983175, + "learning_rate": 9.817266607321085e-06, + "loss": 0.8596, + "step": 3711 + }, + { + "epoch": 0.11376731641534878, + "grad_norm": 0.9084155245185468, + "learning_rate": 9.817133631815902e-06, + "loss": 0.5194, + "step": 3712 + }, + { + "epoch": 0.11379796493808998, + "grad_norm": 0.8884352804692812, + "learning_rate": 9.817000608846269e-06, + "loss": 0.5165, + "step": 3713 + }, + { + "epoch": 0.11382861346083119, + "grad_norm": 0.865252513734952, + "learning_rate": 9.816867538413497e-06, + "loss": 0.4977, + "step": 3714 + }, + { + "epoch": 0.1138592619835724, + "grad_norm": 1.8619902862946007, + "learning_rate": 9.816734420518895e-06, + "loss": 0.7932, + "step": 3715 + }, + { + "epoch": 0.1138899105063136, + "grad_norm": 1.944463813498919, + "learning_rate": 9.816601255163777e-06, + "loss": 0.8555, + "step": 3716 + }, + { + "epoch": 0.1139205590290548, + "grad_norm": 1.9428689899393619, + "learning_rate": 9.816468042349456e-06, + "loss": 0.8377, + "step": 3717 + }, + { + "epoch": 0.113951207551796, + "grad_norm": 1.8402962779769099, + "learning_rate": 9.81633478207724e-06, + "loss": 0.7631, + "step": 3718 + }, + { + "epoch": 0.1139818560745372, + "grad_norm": 1.922598435594532, + "learning_rate": 9.816201474348448e-06, + "loss": 0.7255, + "step": 3719 + }, + { + "epoch": 0.11401250459727841, + "grad_norm": 1.8693601145226908, + "learning_rate": 9.81606811916439e-06, + "loss": 0.7199, + "step": 3720 + }, + { + "epoch": 0.11404315312001961, + "grad_norm": 1.8417455770836921, + "learning_rate": 9.815934716526378e-06, + "loss": 0.7353, + "step": 3721 + }, + { + "epoch": 0.11407380164276082, + "grad_norm": 1.8157553090896585, + "learning_rate": 9.815801266435731e-06, + "loss": 0.7902, + "step": 3722 + }, + { + "epoch": 0.11410445016550202, + "grad_norm": 1.9438941010465904, + "learning_rate": 9.815667768893763e-06, + "loss": 0.841, + "step": 3723 + }, + { + "epoch": 0.11413509868824323, + "grad_norm": 1.8760624605548688, + "learning_rate": 9.815534223901788e-06, + "loss": 0.7572, + "step": 3724 + }, + { + "epoch": 0.11416574721098444, + "grad_norm": 2.0435507872588046, + "learning_rate": 9.81540063146112e-06, + "loss": 0.7584, + "step": 3725 + }, + { + "epoch": 0.11419639573372563, + "grad_norm": 2.059346956802741, + "learning_rate": 9.81526699157308e-06, + "loss": 0.7211, + "step": 3726 + }, + { + "epoch": 0.11422704425646683, + "grad_norm": 1.7842103273716676, + "learning_rate": 9.815133304238982e-06, + "loss": 0.6085, + "step": 3727 + }, + { + "epoch": 0.11425769277920804, + "grad_norm": 2.251198179154489, + "learning_rate": 9.814999569460143e-06, + "loss": 0.7462, + "step": 3728 + }, + { + "epoch": 0.11428834130194925, + "grad_norm": 2.0144196973537856, + "learning_rate": 9.81486578723788e-06, + "loss": 0.7704, + "step": 3729 + }, + { + "epoch": 0.11431898982469045, + "grad_norm": 1.651485881954418, + "learning_rate": 9.814731957573514e-06, + "loss": 0.706, + "step": 3730 + }, + { + "epoch": 0.11434963834743166, + "grad_norm": 1.8257664089955838, + "learning_rate": 9.81459808046836e-06, + "loss": 0.76, + "step": 3731 + }, + { + "epoch": 0.11438028687017286, + "grad_norm": 1.7915027122232876, + "learning_rate": 9.814464155923741e-06, + "loss": 0.5401, + "step": 3732 + }, + { + "epoch": 0.11441093539291407, + "grad_norm": 1.7870722227859082, + "learning_rate": 9.814330183940976e-06, + "loss": 0.6989, + "step": 3733 + }, + { + "epoch": 0.11444158391565526, + "grad_norm": 1.9616024809658892, + "learning_rate": 9.814196164521384e-06, + "loss": 0.758, + "step": 3734 + }, + { + "epoch": 0.11447223243839647, + "grad_norm": 1.9749270483821875, + "learning_rate": 9.814062097666284e-06, + "loss": 0.7167, + "step": 3735 + }, + { + "epoch": 0.11450288096113767, + "grad_norm": 0.9251884030366325, + "learning_rate": 9.813927983376998e-06, + "loss": 0.495, + "step": 3736 + }, + { + "epoch": 0.11453352948387888, + "grad_norm": 2.045989203310227, + "learning_rate": 9.813793821654849e-06, + "loss": 0.8896, + "step": 3737 + }, + { + "epoch": 0.11456417800662008, + "grad_norm": 1.8744416980503567, + "learning_rate": 9.813659612501156e-06, + "loss": 0.7972, + "step": 3738 + }, + { + "epoch": 0.11459482652936129, + "grad_norm": 1.9438285383634692, + "learning_rate": 9.813525355917244e-06, + "loss": 0.6837, + "step": 3739 + }, + { + "epoch": 0.1146254750521025, + "grad_norm": 2.102968744781413, + "learning_rate": 9.813391051904436e-06, + "loss": 0.8419, + "step": 3740 + }, + { + "epoch": 0.1146561235748437, + "grad_norm": 1.9770826450384813, + "learning_rate": 9.813256700464054e-06, + "loss": 0.7025, + "step": 3741 + }, + { + "epoch": 0.11468677209758489, + "grad_norm": 2.029302891138796, + "learning_rate": 9.813122301597422e-06, + "loss": 0.8101, + "step": 3742 + }, + { + "epoch": 0.1147174206203261, + "grad_norm": 2.1614026113364573, + "learning_rate": 9.812987855305864e-06, + "loss": 0.8253, + "step": 3743 + }, + { + "epoch": 0.1147480691430673, + "grad_norm": 2.139651627493704, + "learning_rate": 9.812853361590707e-06, + "loss": 0.7742, + "step": 3744 + }, + { + "epoch": 0.11477871766580851, + "grad_norm": 1.8921191148405903, + "learning_rate": 9.812718820453273e-06, + "loss": 0.7066, + "step": 3745 + }, + { + "epoch": 0.11480936618854971, + "grad_norm": 2.314070941776921, + "learning_rate": 9.81258423189489e-06, + "loss": 0.7593, + "step": 3746 + }, + { + "epoch": 0.11484001471129092, + "grad_norm": 1.9965528594425601, + "learning_rate": 9.812449595916883e-06, + "loss": 0.7279, + "step": 3747 + }, + { + "epoch": 0.11487066323403212, + "grad_norm": 1.8635214017257726, + "learning_rate": 9.812314912520577e-06, + "loss": 0.7406, + "step": 3748 + }, + { + "epoch": 0.11490131175677332, + "grad_norm": 2.030672583448243, + "learning_rate": 9.812180181707303e-06, + "loss": 0.8597, + "step": 3749 + }, + { + "epoch": 0.11493196027951452, + "grad_norm": 2.011383315537991, + "learning_rate": 9.812045403478385e-06, + "loss": 0.7051, + "step": 3750 + }, + { + "epoch": 0.11496260880225573, + "grad_norm": 1.9793466837128657, + "learning_rate": 9.811910577835154e-06, + "loss": 0.7621, + "step": 3751 + }, + { + "epoch": 0.11499325732499693, + "grad_norm": 1.493412150033542, + "learning_rate": 9.811775704778934e-06, + "loss": 0.5341, + "step": 3752 + }, + { + "epoch": 0.11502390584773814, + "grad_norm": 1.9139318774933343, + "learning_rate": 9.81164078431106e-06, + "loss": 0.6506, + "step": 3753 + }, + { + "epoch": 0.11505455437047934, + "grad_norm": 1.9863912086282778, + "learning_rate": 9.811505816432857e-06, + "loss": 0.8179, + "step": 3754 + }, + { + "epoch": 0.11508520289322055, + "grad_norm": 1.997848009196362, + "learning_rate": 9.811370801145656e-06, + "loss": 0.7101, + "step": 3755 + }, + { + "epoch": 0.11511585141596176, + "grad_norm": 2.0584525656466837, + "learning_rate": 9.811235738450787e-06, + "loss": 0.7696, + "step": 3756 + }, + { + "epoch": 0.11514649993870295, + "grad_norm": 1.773322252394143, + "learning_rate": 9.811100628349582e-06, + "loss": 0.8263, + "step": 3757 + }, + { + "epoch": 0.11517714846144415, + "grad_norm": 1.8728062594337926, + "learning_rate": 9.810965470843373e-06, + "loss": 0.7314, + "step": 3758 + }, + { + "epoch": 0.11520779698418536, + "grad_norm": 1.32820663509091, + "learning_rate": 9.810830265933488e-06, + "loss": 0.5359, + "step": 3759 + }, + { + "epoch": 0.11523844550692657, + "grad_norm": 1.8581243123426174, + "learning_rate": 9.810695013621261e-06, + "loss": 0.779, + "step": 3760 + }, + { + "epoch": 0.11526909402966777, + "grad_norm": 1.8243345010374756, + "learning_rate": 9.810559713908027e-06, + "loss": 0.7086, + "step": 3761 + }, + { + "epoch": 0.11529974255240898, + "grad_norm": 2.3416321312312527, + "learning_rate": 9.810424366795116e-06, + "loss": 0.8637, + "step": 3762 + }, + { + "epoch": 0.11533039107515018, + "grad_norm": 2.048262502924628, + "learning_rate": 9.810288972283864e-06, + "loss": 0.7943, + "step": 3763 + }, + { + "epoch": 0.11536103959789139, + "grad_norm": 1.7379995587093897, + "learning_rate": 9.810153530375604e-06, + "loss": 0.7623, + "step": 3764 + }, + { + "epoch": 0.11539168812063258, + "grad_norm": 1.953494596153021, + "learning_rate": 9.810018041071669e-06, + "loss": 0.7846, + "step": 3765 + }, + { + "epoch": 0.11542233664337379, + "grad_norm": 2.063468713466126, + "learning_rate": 9.809882504373397e-06, + "loss": 0.7375, + "step": 3766 + }, + { + "epoch": 0.11545298516611499, + "grad_norm": 0.8816610062944062, + "learning_rate": 9.809746920282121e-06, + "loss": 0.518, + "step": 3767 + }, + { + "epoch": 0.1154836336888562, + "grad_norm": 1.7150933644056758, + "learning_rate": 9.809611288799178e-06, + "loss": 0.7187, + "step": 3768 + }, + { + "epoch": 0.1155142822115974, + "grad_norm": 1.8360145378530874, + "learning_rate": 9.809475609925908e-06, + "loss": 0.7365, + "step": 3769 + }, + { + "epoch": 0.11554493073433861, + "grad_norm": 0.8618860850625815, + "learning_rate": 9.80933988366364e-06, + "loss": 0.5252, + "step": 3770 + }, + { + "epoch": 0.11557557925707981, + "grad_norm": 2.0319106708734966, + "learning_rate": 9.809204110013717e-06, + "loss": 0.7926, + "step": 3771 + }, + { + "epoch": 0.11560622777982102, + "grad_norm": 1.9912818646911232, + "learning_rate": 9.809068288977475e-06, + "loss": 0.7993, + "step": 3772 + }, + { + "epoch": 0.11563687630256221, + "grad_norm": 1.9666472179242462, + "learning_rate": 9.808932420556252e-06, + "loss": 0.7175, + "step": 3773 + }, + { + "epoch": 0.11566752482530342, + "grad_norm": 1.9854920634389124, + "learning_rate": 9.80879650475139e-06, + "loss": 0.783, + "step": 3774 + }, + { + "epoch": 0.11569817334804462, + "grad_norm": 0.8523527736131906, + "learning_rate": 9.808660541564224e-06, + "loss": 0.5182, + "step": 3775 + }, + { + "epoch": 0.11572882187078583, + "grad_norm": 0.8731763141364749, + "learning_rate": 9.808524530996095e-06, + "loss": 0.518, + "step": 3776 + }, + { + "epoch": 0.11575947039352703, + "grad_norm": 0.8330208000299373, + "learning_rate": 9.808388473048343e-06, + "loss": 0.5285, + "step": 3777 + }, + { + "epoch": 0.11579011891626824, + "grad_norm": 1.7859791218206003, + "learning_rate": 9.808252367722311e-06, + "loss": 0.7358, + "step": 3778 + }, + { + "epoch": 0.11582076743900944, + "grad_norm": 2.0312958875741276, + "learning_rate": 9.808116215019336e-06, + "loss": 0.7373, + "step": 3779 + }, + { + "epoch": 0.11585141596175064, + "grad_norm": 0.8162398413375672, + "learning_rate": 9.807980014940764e-06, + "loss": 0.5001, + "step": 3780 + }, + { + "epoch": 0.11588206448449184, + "grad_norm": 1.7237605408552374, + "learning_rate": 9.807843767487933e-06, + "loss": 0.7605, + "step": 3781 + }, + { + "epoch": 0.11591271300723305, + "grad_norm": 0.8315187878922815, + "learning_rate": 9.807707472662188e-06, + "loss": 0.5035, + "step": 3782 + }, + { + "epoch": 0.11594336152997425, + "grad_norm": 2.273306833481674, + "learning_rate": 9.80757113046487e-06, + "loss": 0.7759, + "step": 3783 + }, + { + "epoch": 0.11597401005271546, + "grad_norm": 0.8340881117833319, + "learning_rate": 9.807434740897325e-06, + "loss": 0.4976, + "step": 3784 + }, + { + "epoch": 0.11600465857545667, + "grad_norm": 2.0952601039063574, + "learning_rate": 9.807298303960895e-06, + "loss": 0.7387, + "step": 3785 + }, + { + "epoch": 0.11603530709819787, + "grad_norm": 1.7711522603893561, + "learning_rate": 9.807161819656925e-06, + "loss": 0.7513, + "step": 3786 + }, + { + "epoch": 0.11606595562093908, + "grad_norm": 1.865472840229448, + "learning_rate": 9.80702528798676e-06, + "loss": 0.7372, + "step": 3787 + }, + { + "epoch": 0.11609660414368027, + "grad_norm": 1.882568214122031, + "learning_rate": 9.806888708951743e-06, + "loss": 0.8177, + "step": 3788 + }, + { + "epoch": 0.11612725266642147, + "grad_norm": 1.9226433802088447, + "learning_rate": 9.806752082553223e-06, + "loss": 0.7137, + "step": 3789 + }, + { + "epoch": 0.11615790118916268, + "grad_norm": 1.8097671400531072, + "learning_rate": 9.806615408792545e-06, + "loss": 0.7255, + "step": 3790 + }, + { + "epoch": 0.11618854971190389, + "grad_norm": 1.9472058455945491, + "learning_rate": 9.806478687671055e-06, + "loss": 0.731, + "step": 3791 + }, + { + "epoch": 0.11621919823464509, + "grad_norm": 2.0249911227069077, + "learning_rate": 9.806341919190102e-06, + "loss": 0.7333, + "step": 3792 + }, + { + "epoch": 0.1162498467573863, + "grad_norm": 1.8686813229713204, + "learning_rate": 9.806205103351031e-06, + "loss": 0.7942, + "step": 3793 + }, + { + "epoch": 0.1162804952801275, + "grad_norm": 2.0832069775403554, + "learning_rate": 9.806068240155193e-06, + "loss": 0.7399, + "step": 3794 + }, + { + "epoch": 0.11631114380286871, + "grad_norm": 0.9537600509098805, + "learning_rate": 9.805931329603932e-06, + "loss": 0.5276, + "step": 3795 + }, + { + "epoch": 0.1163417923256099, + "grad_norm": 2.0412429273362345, + "learning_rate": 9.805794371698603e-06, + "loss": 0.8054, + "step": 3796 + }, + { + "epoch": 0.1163724408483511, + "grad_norm": 1.9580210209520539, + "learning_rate": 9.80565736644055e-06, + "loss": 0.8718, + "step": 3797 + }, + { + "epoch": 0.11640308937109231, + "grad_norm": 1.6703957321441234, + "learning_rate": 9.805520313831127e-06, + "loss": 0.7424, + "step": 3798 + }, + { + "epoch": 0.11643373789383352, + "grad_norm": 2.3458929987640658, + "learning_rate": 9.805383213871683e-06, + "loss": 0.8638, + "step": 3799 + }, + { + "epoch": 0.11646438641657472, + "grad_norm": 0.922516920051, + "learning_rate": 9.805246066563569e-06, + "loss": 0.498, + "step": 3800 + }, + { + "epoch": 0.11649503493931593, + "grad_norm": 1.8654934826175822, + "learning_rate": 9.805108871908134e-06, + "loss": 0.76, + "step": 3801 + }, + { + "epoch": 0.11652568346205713, + "grad_norm": 1.7412811384995615, + "learning_rate": 9.804971629906733e-06, + "loss": 0.7254, + "step": 3802 + }, + { + "epoch": 0.11655633198479834, + "grad_norm": 1.9143158328076915, + "learning_rate": 9.804834340560717e-06, + "loss": 0.7947, + "step": 3803 + }, + { + "epoch": 0.11658698050753953, + "grad_norm": 0.890519580348268, + "learning_rate": 9.80469700387144e-06, + "loss": 0.5102, + "step": 3804 + }, + { + "epoch": 0.11661762903028074, + "grad_norm": 1.8294743357026237, + "learning_rate": 9.804559619840253e-06, + "loss": 0.7568, + "step": 3805 + }, + { + "epoch": 0.11664827755302194, + "grad_norm": 1.895750708321489, + "learning_rate": 9.80442218846851e-06, + "loss": 0.7292, + "step": 3806 + }, + { + "epoch": 0.11667892607576315, + "grad_norm": 1.9443311618269812, + "learning_rate": 9.804284709757567e-06, + "loss": 0.7198, + "step": 3807 + }, + { + "epoch": 0.11670957459850435, + "grad_norm": 0.8735579924609916, + "learning_rate": 9.804147183708776e-06, + "loss": 0.5038, + "step": 3808 + }, + { + "epoch": 0.11674022312124556, + "grad_norm": 2.3643450004514808, + "learning_rate": 9.804009610323496e-06, + "loss": 0.7444, + "step": 3809 + }, + { + "epoch": 0.11677087164398676, + "grad_norm": 2.054688829290974, + "learning_rate": 9.803871989603078e-06, + "loss": 0.7905, + "step": 3810 + }, + { + "epoch": 0.11680152016672796, + "grad_norm": 1.9089664503880681, + "learning_rate": 9.803734321548883e-06, + "loss": 0.767, + "step": 3811 + }, + { + "epoch": 0.11683216868946916, + "grad_norm": 0.8612547700048758, + "learning_rate": 9.803596606162262e-06, + "loss": 0.5052, + "step": 3812 + }, + { + "epoch": 0.11686281721221037, + "grad_norm": 1.7732023066400475, + "learning_rate": 9.803458843444576e-06, + "loss": 0.7597, + "step": 3813 + }, + { + "epoch": 0.11689346573495157, + "grad_norm": 1.9794736667229902, + "learning_rate": 9.80332103339718e-06, + "loss": 0.7026, + "step": 3814 + }, + { + "epoch": 0.11692411425769278, + "grad_norm": 1.681955115594938, + "learning_rate": 9.803183176021434e-06, + "loss": 0.6358, + "step": 3815 + }, + { + "epoch": 0.11695476278043399, + "grad_norm": 1.8878219249151842, + "learning_rate": 9.803045271318694e-06, + "loss": 0.7467, + "step": 3816 + }, + { + "epoch": 0.11698541130317519, + "grad_norm": 1.8282352615508428, + "learning_rate": 9.80290731929032e-06, + "loss": 0.7812, + "step": 3817 + }, + { + "epoch": 0.1170160598259164, + "grad_norm": 1.9063746072194345, + "learning_rate": 9.802769319937672e-06, + "loss": 0.8279, + "step": 3818 + }, + { + "epoch": 0.11704670834865759, + "grad_norm": 1.9460180288462554, + "learning_rate": 9.802631273262109e-06, + "loss": 0.7061, + "step": 3819 + }, + { + "epoch": 0.1170773568713988, + "grad_norm": 2.050871825080161, + "learning_rate": 9.802493179264991e-06, + "loss": 0.7774, + "step": 3820 + }, + { + "epoch": 0.11710800539414, + "grad_norm": 2.098026438958997, + "learning_rate": 9.802355037947679e-06, + "loss": 0.7871, + "step": 3821 + }, + { + "epoch": 0.1171386539168812, + "grad_norm": 1.7236889331188405, + "learning_rate": 9.802216849311535e-06, + "loss": 0.7029, + "step": 3822 + }, + { + "epoch": 0.11716930243962241, + "grad_norm": 1.8235977196934763, + "learning_rate": 9.802078613357916e-06, + "loss": 0.7234, + "step": 3823 + }, + { + "epoch": 0.11719995096236362, + "grad_norm": 0.9344633451569525, + "learning_rate": 9.801940330088192e-06, + "loss": 0.4983, + "step": 3824 + }, + { + "epoch": 0.11723059948510482, + "grad_norm": 1.8129620263087216, + "learning_rate": 9.801801999503719e-06, + "loss": 0.7712, + "step": 3825 + }, + { + "epoch": 0.11726124800784603, + "grad_norm": 1.6752370148382356, + "learning_rate": 9.801663621605864e-06, + "loss": 0.7097, + "step": 3826 + }, + { + "epoch": 0.11729189653058722, + "grad_norm": 1.8196327921362825, + "learning_rate": 9.801525196395987e-06, + "loss": 0.8087, + "step": 3827 + }, + { + "epoch": 0.11732254505332843, + "grad_norm": 1.8284914631764275, + "learning_rate": 9.801386723875455e-06, + "loss": 0.9023, + "step": 3828 + }, + { + "epoch": 0.11735319357606963, + "grad_norm": 2.1867634391354205, + "learning_rate": 9.80124820404563e-06, + "loss": 0.7865, + "step": 3829 + }, + { + "epoch": 0.11738384209881084, + "grad_norm": 0.9650604740967921, + "learning_rate": 9.801109636907881e-06, + "loss": 0.5354, + "step": 3830 + }, + { + "epoch": 0.11741449062155204, + "grad_norm": 2.5732326599298028, + "learning_rate": 9.800971022463568e-06, + "loss": 0.7415, + "step": 3831 + }, + { + "epoch": 0.11744513914429325, + "grad_norm": 1.7308670682487317, + "learning_rate": 9.800832360714058e-06, + "loss": 0.6827, + "step": 3832 + }, + { + "epoch": 0.11747578766703445, + "grad_norm": 0.9919615888649522, + "learning_rate": 9.80069365166072e-06, + "loss": 0.5102, + "step": 3833 + }, + { + "epoch": 0.11750643618977566, + "grad_norm": 1.858996495433463, + "learning_rate": 9.80055489530492e-06, + "loss": 0.8116, + "step": 3834 + }, + { + "epoch": 0.11753708471251685, + "grad_norm": 1.8106149161318876, + "learning_rate": 9.800416091648022e-06, + "loss": 0.7422, + "step": 3835 + }, + { + "epoch": 0.11756773323525806, + "grad_norm": 0.8536540649526568, + "learning_rate": 9.800277240691399e-06, + "loss": 0.5262, + "step": 3836 + }, + { + "epoch": 0.11759838175799926, + "grad_norm": 0.8354754311469161, + "learning_rate": 9.800138342436413e-06, + "loss": 0.5161, + "step": 3837 + }, + { + "epoch": 0.11762903028074047, + "grad_norm": 0.8398047724552523, + "learning_rate": 9.799999396884436e-06, + "loss": 0.5091, + "step": 3838 + }, + { + "epoch": 0.11765967880348167, + "grad_norm": 2.0040459274390434, + "learning_rate": 9.799860404036838e-06, + "loss": 0.795, + "step": 3839 + }, + { + "epoch": 0.11769032732622288, + "grad_norm": 0.8181714170927724, + "learning_rate": 9.799721363894988e-06, + "loss": 0.5138, + "step": 3840 + }, + { + "epoch": 0.11772097584896409, + "grad_norm": 1.668154646623028, + "learning_rate": 9.799582276460254e-06, + "loss": 0.7013, + "step": 3841 + }, + { + "epoch": 0.11775162437170529, + "grad_norm": 2.616748773827447, + "learning_rate": 9.799443141734009e-06, + "loss": 0.8319, + "step": 3842 + }, + { + "epoch": 0.11778227289444648, + "grad_norm": 2.393471699152147, + "learning_rate": 9.79930395971762e-06, + "loss": 0.763, + "step": 3843 + }, + { + "epoch": 0.11781292141718769, + "grad_norm": 1.911711226195945, + "learning_rate": 9.799164730412464e-06, + "loss": 0.7562, + "step": 3844 + }, + { + "epoch": 0.1178435699399289, + "grad_norm": 1.823363119207298, + "learning_rate": 9.79902545381991e-06, + "loss": 0.8212, + "step": 3845 + }, + { + "epoch": 0.1178742184626701, + "grad_norm": 1.8384187692622416, + "learning_rate": 9.79888612994133e-06, + "loss": 0.6663, + "step": 3846 + }, + { + "epoch": 0.1179048669854113, + "grad_norm": 1.9752577530162125, + "learning_rate": 9.798746758778097e-06, + "loss": 0.8846, + "step": 3847 + }, + { + "epoch": 0.11793551550815251, + "grad_norm": 2.0963918545102698, + "learning_rate": 9.798607340331583e-06, + "loss": 0.7716, + "step": 3848 + }, + { + "epoch": 0.11796616403089372, + "grad_norm": 1.0978041333833677, + "learning_rate": 9.798467874603164e-06, + "loss": 0.5165, + "step": 3849 + }, + { + "epoch": 0.11799681255363491, + "grad_norm": 0.9919531106331221, + "learning_rate": 9.798328361594214e-06, + "loss": 0.5059, + "step": 3850 + }, + { + "epoch": 0.11802746107637611, + "grad_norm": 1.9741670151255237, + "learning_rate": 9.798188801306105e-06, + "loss": 0.7231, + "step": 3851 + }, + { + "epoch": 0.11805810959911732, + "grad_norm": 1.9145308837909456, + "learning_rate": 9.798049193740215e-06, + "loss": 0.738, + "step": 3852 + }, + { + "epoch": 0.11808875812185853, + "grad_norm": 1.9353698742177252, + "learning_rate": 9.79790953889792e-06, + "loss": 0.8026, + "step": 3853 + }, + { + "epoch": 0.11811940664459973, + "grad_norm": 1.7836709811693507, + "learning_rate": 9.797769836780594e-06, + "loss": 0.7955, + "step": 3854 + }, + { + "epoch": 0.11815005516734094, + "grad_norm": 1.8518598808912976, + "learning_rate": 9.797630087389614e-06, + "loss": 0.6671, + "step": 3855 + }, + { + "epoch": 0.11818070369008214, + "grad_norm": 1.9769399886785606, + "learning_rate": 9.797490290726356e-06, + "loss": 0.7104, + "step": 3856 + }, + { + "epoch": 0.11821135221282335, + "grad_norm": 2.0750340294065635, + "learning_rate": 9.797350446792202e-06, + "loss": 0.8885, + "step": 3857 + }, + { + "epoch": 0.11824200073556454, + "grad_norm": 1.8897077178295012, + "learning_rate": 9.797210555588523e-06, + "loss": 0.7812, + "step": 3858 + }, + { + "epoch": 0.11827264925830575, + "grad_norm": 1.7347616446409677, + "learning_rate": 9.797070617116704e-06, + "loss": 0.7255, + "step": 3859 + }, + { + "epoch": 0.11830329778104695, + "grad_norm": 1.9590842939590662, + "learning_rate": 9.796930631378118e-06, + "loss": 0.7946, + "step": 3860 + }, + { + "epoch": 0.11833394630378816, + "grad_norm": 2.0795936788213956, + "learning_rate": 9.796790598374149e-06, + "loss": 0.8842, + "step": 3861 + }, + { + "epoch": 0.11836459482652936, + "grad_norm": 1.8141258530052218, + "learning_rate": 9.796650518106175e-06, + "loss": 0.7889, + "step": 3862 + }, + { + "epoch": 0.11839524334927057, + "grad_norm": 1.7778167232884605, + "learning_rate": 9.796510390575575e-06, + "loss": 0.8595, + "step": 3863 + }, + { + "epoch": 0.11842589187201177, + "grad_norm": 1.9279492673547622, + "learning_rate": 9.796370215783732e-06, + "loss": 0.7939, + "step": 3864 + }, + { + "epoch": 0.11845654039475298, + "grad_norm": 1.9904090928120948, + "learning_rate": 9.796229993732026e-06, + "loss": 0.7839, + "step": 3865 + }, + { + "epoch": 0.11848718891749417, + "grad_norm": 1.9955921370302643, + "learning_rate": 9.796089724421837e-06, + "loss": 0.8201, + "step": 3866 + }, + { + "epoch": 0.11851783744023538, + "grad_norm": 1.9264722895269155, + "learning_rate": 9.795949407854551e-06, + "loss": 0.7622, + "step": 3867 + }, + { + "epoch": 0.11854848596297658, + "grad_norm": 1.9482911904303026, + "learning_rate": 9.795809044031546e-06, + "loss": 0.8153, + "step": 3868 + }, + { + "epoch": 0.11857913448571779, + "grad_norm": 1.888988668878405, + "learning_rate": 9.795668632954209e-06, + "loss": 0.6786, + "step": 3869 + }, + { + "epoch": 0.118609783008459, + "grad_norm": 1.855108042329308, + "learning_rate": 9.795528174623922e-06, + "loss": 0.6974, + "step": 3870 + }, + { + "epoch": 0.1186404315312002, + "grad_norm": 2.6534332463503896, + "learning_rate": 9.795387669042069e-06, + "loss": 0.8038, + "step": 3871 + }, + { + "epoch": 0.1186710800539414, + "grad_norm": 1.9274208248339366, + "learning_rate": 9.795247116210035e-06, + "loss": 0.7159, + "step": 3872 + }, + { + "epoch": 0.11870172857668261, + "grad_norm": 1.8647138618978674, + "learning_rate": 9.795106516129203e-06, + "loss": 0.8194, + "step": 3873 + }, + { + "epoch": 0.1187323770994238, + "grad_norm": 1.7914707846402174, + "learning_rate": 9.794965868800958e-06, + "loss": 0.7364, + "step": 3874 + }, + { + "epoch": 0.11876302562216501, + "grad_norm": 1.8642960762237157, + "learning_rate": 9.79482517422669e-06, + "loss": 0.7868, + "step": 3875 + }, + { + "epoch": 0.11879367414490621, + "grad_norm": 1.9697476545700374, + "learning_rate": 9.794684432407781e-06, + "loss": 0.7767, + "step": 3876 + }, + { + "epoch": 0.11882432266764742, + "grad_norm": 1.789531433345558, + "learning_rate": 9.794543643345622e-06, + "loss": 0.8307, + "step": 3877 + }, + { + "epoch": 0.11885497119038863, + "grad_norm": 1.7060158836795396, + "learning_rate": 9.794402807041596e-06, + "loss": 0.7939, + "step": 3878 + }, + { + "epoch": 0.11888561971312983, + "grad_norm": 1.9513524817726566, + "learning_rate": 9.794261923497092e-06, + "loss": 0.7791, + "step": 3879 + }, + { + "epoch": 0.11891626823587104, + "grad_norm": 2.1657437303029727, + "learning_rate": 9.7941209927135e-06, + "loss": 0.8023, + "step": 3880 + }, + { + "epoch": 0.11894691675861223, + "grad_norm": 2.2643535396471237, + "learning_rate": 9.793980014692207e-06, + "loss": 0.6654, + "step": 3881 + }, + { + "epoch": 0.11897756528135343, + "grad_norm": 1.4347719578696314, + "learning_rate": 9.793838989434602e-06, + "loss": 0.5246, + "step": 3882 + }, + { + "epoch": 0.11900821380409464, + "grad_norm": 1.9320010435449868, + "learning_rate": 9.793697916942074e-06, + "loss": 0.7774, + "step": 3883 + }, + { + "epoch": 0.11903886232683585, + "grad_norm": 1.7824604645604654, + "learning_rate": 9.793556797216016e-06, + "loss": 0.7312, + "step": 3884 + }, + { + "epoch": 0.11906951084957705, + "grad_norm": 1.7274702202457328, + "learning_rate": 9.793415630257815e-06, + "loss": 0.7289, + "step": 3885 + }, + { + "epoch": 0.11910015937231826, + "grad_norm": 2.0238738713745743, + "learning_rate": 9.793274416068862e-06, + "loss": 0.7333, + "step": 3886 + }, + { + "epoch": 0.11913080789505946, + "grad_norm": 1.8222022271946259, + "learning_rate": 9.793133154650552e-06, + "loss": 0.6984, + "step": 3887 + }, + { + "epoch": 0.11916145641780067, + "grad_norm": 1.9212000528514053, + "learning_rate": 9.792991846004274e-06, + "loss": 0.756, + "step": 3888 + }, + { + "epoch": 0.11919210494054186, + "grad_norm": 2.0065067066543265, + "learning_rate": 9.792850490131421e-06, + "loss": 0.716, + "step": 3889 + }, + { + "epoch": 0.11922275346328307, + "grad_norm": 1.1878581543924933, + "learning_rate": 9.792709087033386e-06, + "loss": 0.5234, + "step": 3890 + }, + { + "epoch": 0.11925340198602427, + "grad_norm": 1.9263557188005953, + "learning_rate": 9.792567636711561e-06, + "loss": 0.7042, + "step": 3891 + }, + { + "epoch": 0.11928405050876548, + "grad_norm": 1.8457511289272206, + "learning_rate": 9.792426139167341e-06, + "loss": 0.878, + "step": 3892 + }, + { + "epoch": 0.11931469903150668, + "grad_norm": 0.9218374289904986, + "learning_rate": 9.792284594402122e-06, + "loss": 0.5205, + "step": 3893 + }, + { + "epoch": 0.11934534755424789, + "grad_norm": 1.9252580801621884, + "learning_rate": 9.792143002417295e-06, + "loss": 0.7057, + "step": 3894 + }, + { + "epoch": 0.1193759960769891, + "grad_norm": 2.051068820543772, + "learning_rate": 9.792001363214257e-06, + "loss": 0.7423, + "step": 3895 + }, + { + "epoch": 0.1194066445997303, + "grad_norm": 0.9200537440425371, + "learning_rate": 9.791859676794403e-06, + "loss": 0.5053, + "step": 3896 + }, + { + "epoch": 0.11943729312247149, + "grad_norm": 1.8564203421706564, + "learning_rate": 9.791717943159131e-06, + "loss": 0.795, + "step": 3897 + }, + { + "epoch": 0.1194679416452127, + "grad_norm": 1.9249054478262562, + "learning_rate": 9.791576162309835e-06, + "loss": 0.7508, + "step": 3898 + }, + { + "epoch": 0.1194985901679539, + "grad_norm": 1.8974588151098368, + "learning_rate": 9.791434334247914e-06, + "loss": 0.7825, + "step": 3899 + }, + { + "epoch": 0.11952923869069511, + "grad_norm": 0.8901970268007301, + "learning_rate": 9.791292458974764e-06, + "loss": 0.5104, + "step": 3900 + }, + { + "epoch": 0.11955988721343631, + "grad_norm": 1.7054795060111076, + "learning_rate": 9.791150536491784e-06, + "loss": 0.6516, + "step": 3901 + }, + { + "epoch": 0.11959053573617752, + "grad_norm": 1.862077099453833, + "learning_rate": 9.79100856680037e-06, + "loss": 0.7586, + "step": 3902 + }, + { + "epoch": 0.11962118425891873, + "grad_norm": 2.083623704952197, + "learning_rate": 9.790866549901924e-06, + "loss": 0.7518, + "step": 3903 + }, + { + "epoch": 0.11965183278165993, + "grad_norm": 1.9641864229432904, + "learning_rate": 9.790724485797846e-06, + "loss": 0.8089, + "step": 3904 + }, + { + "epoch": 0.11968248130440112, + "grad_norm": 1.9645586692126367, + "learning_rate": 9.790582374489532e-06, + "loss": 0.7683, + "step": 3905 + }, + { + "epoch": 0.11971312982714233, + "grad_norm": 2.109234739342282, + "learning_rate": 9.790440215978383e-06, + "loss": 0.7406, + "step": 3906 + }, + { + "epoch": 0.11974377834988353, + "grad_norm": 1.9087490486610936, + "learning_rate": 9.790298010265803e-06, + "loss": 0.6865, + "step": 3907 + }, + { + "epoch": 0.11977442687262474, + "grad_norm": 0.9933002050855174, + "learning_rate": 9.79015575735319e-06, + "loss": 0.5001, + "step": 3908 + }, + { + "epoch": 0.11980507539536595, + "grad_norm": 1.8164187697402456, + "learning_rate": 9.790013457241948e-06, + "loss": 0.7745, + "step": 3909 + }, + { + "epoch": 0.11983572391810715, + "grad_norm": 1.9965517356864608, + "learning_rate": 9.789871109933477e-06, + "loss": 0.7957, + "step": 3910 + }, + { + "epoch": 0.11986637244084836, + "grad_norm": 1.9120268297377856, + "learning_rate": 9.78972871542918e-06, + "loss": 0.7714, + "step": 3911 + }, + { + "epoch": 0.11989702096358955, + "grad_norm": 2.05092879297758, + "learning_rate": 9.78958627373046e-06, + "loss": 0.7921, + "step": 3912 + }, + { + "epoch": 0.11992766948633075, + "grad_norm": 2.221842051924001, + "learning_rate": 9.789443784838722e-06, + "loss": 0.7986, + "step": 3913 + }, + { + "epoch": 0.11995831800907196, + "grad_norm": 0.8947080751367145, + "learning_rate": 9.789301248755368e-06, + "loss": 0.5167, + "step": 3914 + }, + { + "epoch": 0.11998896653181317, + "grad_norm": 1.7694232126191867, + "learning_rate": 9.789158665481804e-06, + "loss": 0.7763, + "step": 3915 + }, + { + "epoch": 0.12001961505455437, + "grad_norm": 1.6602438572243863, + "learning_rate": 9.789016035019435e-06, + "loss": 0.715, + "step": 3916 + }, + { + "epoch": 0.12005026357729558, + "grad_norm": 1.8417573062918349, + "learning_rate": 9.788873357369665e-06, + "loss": 0.7368, + "step": 3917 + }, + { + "epoch": 0.12008091210003678, + "grad_norm": 2.2736995601659014, + "learning_rate": 9.7887306325339e-06, + "loss": 0.7817, + "step": 3918 + }, + { + "epoch": 0.12011156062277799, + "grad_norm": 2.032070703018031, + "learning_rate": 9.788587860513547e-06, + "loss": 0.6596, + "step": 3919 + }, + { + "epoch": 0.12014220914551918, + "grad_norm": 1.871091164032451, + "learning_rate": 9.788445041310013e-06, + "loss": 0.6818, + "step": 3920 + }, + { + "epoch": 0.12017285766826039, + "grad_norm": 1.917903958683791, + "learning_rate": 9.788302174924705e-06, + "loss": 0.8452, + "step": 3921 + }, + { + "epoch": 0.12020350619100159, + "grad_norm": 1.8047431909119485, + "learning_rate": 9.788159261359031e-06, + "loss": 0.7792, + "step": 3922 + }, + { + "epoch": 0.1202341547137428, + "grad_norm": 0.9338457211971057, + "learning_rate": 9.788016300614397e-06, + "loss": 0.5254, + "step": 3923 + }, + { + "epoch": 0.120264803236484, + "grad_norm": 0.8927252427717907, + "learning_rate": 9.787873292692216e-06, + "loss": 0.5082, + "step": 3924 + }, + { + "epoch": 0.12029545175922521, + "grad_norm": 0.8202109349512836, + "learning_rate": 9.78773023759389e-06, + "loss": 0.4896, + "step": 3925 + }, + { + "epoch": 0.12032610028196641, + "grad_norm": 2.0481891876737723, + "learning_rate": 9.787587135320837e-06, + "loss": 0.7703, + "step": 3926 + }, + { + "epoch": 0.12035674880470762, + "grad_norm": 1.940735978674249, + "learning_rate": 9.787443985874463e-06, + "loss": 0.771, + "step": 3927 + }, + { + "epoch": 0.12038739732744881, + "grad_norm": 1.9352589305253196, + "learning_rate": 9.787300789256176e-06, + "loss": 0.8211, + "step": 3928 + }, + { + "epoch": 0.12041804585019002, + "grad_norm": 1.8996709994532546, + "learning_rate": 9.787157545467392e-06, + "loss": 0.7777, + "step": 3929 + }, + { + "epoch": 0.12044869437293122, + "grad_norm": 1.8084701385001196, + "learning_rate": 9.787014254509517e-06, + "loss": 0.7881, + "step": 3930 + }, + { + "epoch": 0.12047934289567243, + "grad_norm": 1.8797729291956977, + "learning_rate": 9.786870916383969e-06, + "loss": 0.7392, + "step": 3931 + }, + { + "epoch": 0.12050999141841363, + "grad_norm": 1.6110108802496155, + "learning_rate": 9.786727531092154e-06, + "loss": 0.6906, + "step": 3932 + }, + { + "epoch": 0.12054063994115484, + "grad_norm": 1.7560620644258056, + "learning_rate": 9.78658409863549e-06, + "loss": 0.7445, + "step": 3933 + }, + { + "epoch": 0.12057128846389605, + "grad_norm": 1.8179792927475642, + "learning_rate": 9.786440619015387e-06, + "loss": 0.7725, + "step": 3934 + }, + { + "epoch": 0.12060193698663725, + "grad_norm": 2.5069290133681283, + "learning_rate": 9.78629709223326e-06, + "loss": 0.7608, + "step": 3935 + }, + { + "epoch": 0.12063258550937844, + "grad_norm": 1.1545882786971595, + "learning_rate": 9.786153518290524e-06, + "loss": 0.5236, + "step": 3936 + }, + { + "epoch": 0.12066323403211965, + "grad_norm": 1.7529411261106982, + "learning_rate": 9.78600989718859e-06, + "loss": 0.6871, + "step": 3937 + }, + { + "epoch": 0.12069388255486085, + "grad_norm": 2.1583624429744828, + "learning_rate": 9.78586622892888e-06, + "loss": 0.84, + "step": 3938 + }, + { + "epoch": 0.12072453107760206, + "grad_norm": 2.468055097766941, + "learning_rate": 9.785722513512803e-06, + "loss": 0.7883, + "step": 3939 + }, + { + "epoch": 0.12075517960034327, + "grad_norm": 2.0230721339277826, + "learning_rate": 9.785578750941779e-06, + "loss": 0.8173, + "step": 3940 + }, + { + "epoch": 0.12078582812308447, + "grad_norm": 1.881611057845351, + "learning_rate": 9.785434941217222e-06, + "loss": 0.7365, + "step": 3941 + }, + { + "epoch": 0.12081647664582568, + "grad_norm": 1.9728852351512325, + "learning_rate": 9.785291084340551e-06, + "loss": 0.8345, + "step": 3942 + }, + { + "epoch": 0.12084712516856687, + "grad_norm": 2.108136492305265, + "learning_rate": 9.785147180313181e-06, + "loss": 0.7553, + "step": 3943 + }, + { + "epoch": 0.12087777369130807, + "grad_norm": 0.9209703106352586, + "learning_rate": 9.785003229136534e-06, + "loss": 0.5087, + "step": 3944 + }, + { + "epoch": 0.12090842221404928, + "grad_norm": 2.1161976906065596, + "learning_rate": 9.784859230812024e-06, + "loss": 0.7681, + "step": 3945 + }, + { + "epoch": 0.12093907073679049, + "grad_norm": 1.7040083806759214, + "learning_rate": 9.784715185341072e-06, + "loss": 0.7084, + "step": 3946 + }, + { + "epoch": 0.12096971925953169, + "grad_norm": 1.6922289832657282, + "learning_rate": 9.784571092725097e-06, + "loss": 0.7442, + "step": 3947 + }, + { + "epoch": 0.1210003677822729, + "grad_norm": 2.172328839443821, + "learning_rate": 9.78442695296552e-06, + "loss": 0.7815, + "step": 3948 + }, + { + "epoch": 0.1210310163050141, + "grad_norm": 0.8527328405492085, + "learning_rate": 9.784282766063758e-06, + "loss": 0.4764, + "step": 3949 + }, + { + "epoch": 0.12106166482775531, + "grad_norm": 2.0163456608246095, + "learning_rate": 9.784138532021236e-06, + "loss": 0.7989, + "step": 3950 + }, + { + "epoch": 0.1210923133504965, + "grad_norm": 1.7109951420252174, + "learning_rate": 9.783994250839371e-06, + "loss": 0.67, + "step": 3951 + }, + { + "epoch": 0.1211229618732377, + "grad_norm": 0.8908728555158328, + "learning_rate": 9.783849922519589e-06, + "loss": 0.5215, + "step": 3952 + }, + { + "epoch": 0.12115361039597891, + "grad_norm": 1.911029978523564, + "learning_rate": 9.783705547063306e-06, + "loss": 0.8163, + "step": 3953 + }, + { + "epoch": 0.12118425891872012, + "grad_norm": 1.9060133719234424, + "learning_rate": 9.783561124471951e-06, + "loss": 0.7524, + "step": 3954 + }, + { + "epoch": 0.12121490744146132, + "grad_norm": 1.8098395574781434, + "learning_rate": 9.783416654746945e-06, + "loss": 0.6954, + "step": 3955 + }, + { + "epoch": 0.12124555596420253, + "grad_norm": 2.081397973091131, + "learning_rate": 9.78327213788971e-06, + "loss": 0.7788, + "step": 3956 + }, + { + "epoch": 0.12127620448694373, + "grad_norm": 1.7829205438582147, + "learning_rate": 9.78312757390167e-06, + "loss": 0.7716, + "step": 3957 + }, + { + "epoch": 0.12130685300968494, + "grad_norm": 1.8479551103348368, + "learning_rate": 9.782982962784252e-06, + "loss": 0.7337, + "step": 3958 + }, + { + "epoch": 0.12133750153242613, + "grad_norm": 2.187426181912062, + "learning_rate": 9.782838304538878e-06, + "loss": 0.8148, + "step": 3959 + }, + { + "epoch": 0.12136815005516734, + "grad_norm": 1.1260909599601068, + "learning_rate": 9.782693599166973e-06, + "loss": 0.528, + "step": 3960 + }, + { + "epoch": 0.12139879857790854, + "grad_norm": 2.0790460510798674, + "learning_rate": 9.782548846669966e-06, + "loss": 0.7513, + "step": 3961 + }, + { + "epoch": 0.12142944710064975, + "grad_norm": 1.9619627130507744, + "learning_rate": 9.782404047049281e-06, + "loss": 0.7577, + "step": 3962 + }, + { + "epoch": 0.12146009562339095, + "grad_norm": 2.0124561640302274, + "learning_rate": 9.782259200306345e-06, + "loss": 0.7146, + "step": 3963 + }, + { + "epoch": 0.12149074414613216, + "grad_norm": 2.3256677592416106, + "learning_rate": 9.782114306442586e-06, + "loss": 0.7403, + "step": 3964 + }, + { + "epoch": 0.12152139266887337, + "grad_norm": 1.7308345489105843, + "learning_rate": 9.78196936545943e-06, + "loss": 0.6525, + "step": 3965 + }, + { + "epoch": 0.12155204119161457, + "grad_norm": 1.734505053173351, + "learning_rate": 9.781824377358308e-06, + "loss": 0.7404, + "step": 3966 + }, + { + "epoch": 0.12158268971435576, + "grad_norm": 2.0096897466571346, + "learning_rate": 9.781679342140647e-06, + "loss": 0.7593, + "step": 3967 + }, + { + "epoch": 0.12161333823709697, + "grad_norm": 2.0081337715986285, + "learning_rate": 9.781534259807874e-06, + "loss": 0.7712, + "step": 3968 + }, + { + "epoch": 0.12164398675983817, + "grad_norm": 1.8609828623807834, + "learning_rate": 9.781389130361422e-06, + "loss": 0.7264, + "step": 3969 + }, + { + "epoch": 0.12167463528257938, + "grad_norm": 2.019480436225165, + "learning_rate": 9.781243953802719e-06, + "loss": 0.8323, + "step": 3970 + }, + { + "epoch": 0.12170528380532059, + "grad_norm": 1.3635191954641133, + "learning_rate": 9.781098730133196e-06, + "loss": 0.5272, + "step": 3971 + }, + { + "epoch": 0.12173593232806179, + "grad_norm": 1.0829703566634012, + "learning_rate": 9.780953459354285e-06, + "loss": 0.5374, + "step": 3972 + }, + { + "epoch": 0.121766580850803, + "grad_norm": 1.8326340329449362, + "learning_rate": 9.780808141467414e-06, + "loss": 0.6836, + "step": 3973 + }, + { + "epoch": 0.12179722937354419, + "grad_norm": 1.8619192789115784, + "learning_rate": 9.78066277647402e-06, + "loss": 0.7337, + "step": 3974 + }, + { + "epoch": 0.1218278778962854, + "grad_norm": 2.267811738289442, + "learning_rate": 9.780517364375531e-06, + "loss": 0.9009, + "step": 3975 + }, + { + "epoch": 0.1218585264190266, + "grad_norm": 1.491678643749368, + "learning_rate": 9.780371905173381e-06, + "loss": 0.5394, + "step": 3976 + }, + { + "epoch": 0.1218891749417678, + "grad_norm": 1.9623338123713694, + "learning_rate": 9.780226398869004e-06, + "loss": 0.7914, + "step": 3977 + }, + { + "epoch": 0.12191982346450901, + "grad_norm": 1.9132763383634876, + "learning_rate": 9.780080845463832e-06, + "loss": 0.857, + "step": 3978 + }, + { + "epoch": 0.12195047198725022, + "grad_norm": 2.1490485355330695, + "learning_rate": 9.779935244959303e-06, + "loss": 0.8079, + "step": 3979 + }, + { + "epoch": 0.12198112050999142, + "grad_norm": 1.7985076562759308, + "learning_rate": 9.779789597356848e-06, + "loss": 0.7496, + "step": 3980 + }, + { + "epoch": 0.12201176903273263, + "grad_norm": 1.9321751444269748, + "learning_rate": 9.779643902657902e-06, + "loss": 0.8531, + "step": 3981 + }, + { + "epoch": 0.12204241755547382, + "grad_norm": 1.7316224266530633, + "learning_rate": 9.779498160863903e-06, + "loss": 0.7646, + "step": 3982 + }, + { + "epoch": 0.12207306607821503, + "grad_norm": 1.7655244911793255, + "learning_rate": 9.779352371976284e-06, + "loss": 0.6843, + "step": 3983 + }, + { + "epoch": 0.12210371460095623, + "grad_norm": 0.9188301595227462, + "learning_rate": 9.779206535996487e-06, + "loss": 0.5227, + "step": 3984 + }, + { + "epoch": 0.12213436312369744, + "grad_norm": 2.4316908200003913, + "learning_rate": 9.77906065292594e-06, + "loss": 0.6633, + "step": 3985 + }, + { + "epoch": 0.12216501164643864, + "grad_norm": 2.1711361550157817, + "learning_rate": 9.778914722766089e-06, + "loss": 0.7381, + "step": 3986 + }, + { + "epoch": 0.12219566016917985, + "grad_norm": 1.9180977952833558, + "learning_rate": 9.778768745518367e-06, + "loss": 0.876, + "step": 3987 + }, + { + "epoch": 0.12222630869192105, + "grad_norm": 1.935085227818858, + "learning_rate": 9.778622721184216e-06, + "loss": 0.7891, + "step": 3988 + }, + { + "epoch": 0.12225695721466226, + "grad_norm": 1.9775532937357845, + "learning_rate": 9.778476649765071e-06, + "loss": 0.8173, + "step": 3989 + }, + { + "epoch": 0.12228760573740345, + "grad_norm": 1.9675779596522587, + "learning_rate": 9.778330531262373e-06, + "loss": 0.8053, + "step": 3990 + }, + { + "epoch": 0.12231825426014466, + "grad_norm": 1.7564154888670522, + "learning_rate": 9.778184365677561e-06, + "loss": 0.6353, + "step": 3991 + }, + { + "epoch": 0.12234890278288586, + "grad_norm": 0.965467559716903, + "learning_rate": 9.778038153012078e-06, + "loss": 0.5178, + "step": 3992 + }, + { + "epoch": 0.12237955130562707, + "grad_norm": 1.7429045991779435, + "learning_rate": 9.77789189326736e-06, + "loss": 0.7368, + "step": 3993 + }, + { + "epoch": 0.12241019982836827, + "grad_norm": 1.7428018709021957, + "learning_rate": 9.777745586444853e-06, + "loss": 0.7564, + "step": 3994 + }, + { + "epoch": 0.12244084835110948, + "grad_norm": 2.1050538608062754, + "learning_rate": 9.777599232545994e-06, + "loss": 0.839, + "step": 3995 + }, + { + "epoch": 0.12247149687385069, + "grad_norm": 1.9581609541851468, + "learning_rate": 9.777452831572229e-06, + "loss": 0.6994, + "step": 3996 + }, + { + "epoch": 0.12250214539659189, + "grad_norm": 2.0541389442883946, + "learning_rate": 9.777306383524999e-06, + "loss": 0.7921, + "step": 3997 + }, + { + "epoch": 0.12253279391933308, + "grad_norm": 1.8078280563017146, + "learning_rate": 9.777159888405746e-06, + "loss": 0.7254, + "step": 3998 + }, + { + "epoch": 0.12256344244207429, + "grad_norm": 2.0968954802259003, + "learning_rate": 9.777013346215915e-06, + "loss": 0.7368, + "step": 3999 + }, + { + "epoch": 0.1225940909648155, + "grad_norm": 1.8829968294227226, + "learning_rate": 9.776866756956948e-06, + "loss": 0.7256, + "step": 4000 + }, + { + "epoch": 0.1226247394875567, + "grad_norm": 1.6992076135172227, + "learning_rate": 9.776720120630293e-06, + "loss": 0.7001, + "step": 4001 + }, + { + "epoch": 0.1226553880102979, + "grad_norm": 2.0195854076648074, + "learning_rate": 9.776573437237391e-06, + "loss": 0.7223, + "step": 4002 + }, + { + "epoch": 0.12268603653303911, + "grad_norm": 1.993591967956816, + "learning_rate": 9.776426706779688e-06, + "loss": 0.8202, + "step": 4003 + }, + { + "epoch": 0.12271668505578032, + "grad_norm": 2.152757971021305, + "learning_rate": 9.776279929258632e-06, + "loss": 0.7362, + "step": 4004 + }, + { + "epoch": 0.12274733357852151, + "grad_norm": 1.9875419570641666, + "learning_rate": 9.776133104675667e-06, + "loss": 0.7729, + "step": 4005 + }, + { + "epoch": 0.12277798210126271, + "grad_norm": 1.9196287072367106, + "learning_rate": 9.775986233032241e-06, + "loss": 0.7426, + "step": 4006 + }, + { + "epoch": 0.12280863062400392, + "grad_norm": 1.8310620383826612, + "learning_rate": 9.7758393143298e-06, + "loss": 0.7656, + "step": 4007 + }, + { + "epoch": 0.12283927914674513, + "grad_norm": 1.0046543301046023, + "learning_rate": 9.775692348569792e-06, + "loss": 0.493, + "step": 4008 + }, + { + "epoch": 0.12286992766948633, + "grad_norm": 1.8191125001663875, + "learning_rate": 9.775545335753667e-06, + "loss": 0.8134, + "step": 4009 + }, + { + "epoch": 0.12290057619222754, + "grad_norm": 1.62894729182304, + "learning_rate": 9.77539827588287e-06, + "loss": 0.6989, + "step": 4010 + }, + { + "epoch": 0.12293122471496874, + "grad_norm": 1.9428801694628954, + "learning_rate": 9.775251168958853e-06, + "loss": 0.7122, + "step": 4011 + }, + { + "epoch": 0.12296187323770995, + "grad_norm": 1.8723098484763487, + "learning_rate": 9.775104014983066e-06, + "loss": 0.7049, + "step": 4012 + }, + { + "epoch": 0.12299252176045114, + "grad_norm": 1.8599124781024214, + "learning_rate": 9.774956813956954e-06, + "loss": 0.7375, + "step": 4013 + }, + { + "epoch": 0.12302317028319235, + "grad_norm": 1.060433824126987, + "learning_rate": 9.774809565881973e-06, + "loss": 0.528, + "step": 4014 + }, + { + "epoch": 0.12305381880593355, + "grad_norm": 1.6471215006406288, + "learning_rate": 9.774662270759571e-06, + "loss": 0.76, + "step": 4015 + }, + { + "epoch": 0.12308446732867476, + "grad_norm": 2.255333115868993, + "learning_rate": 9.7745149285912e-06, + "loss": 0.8009, + "step": 4016 + }, + { + "epoch": 0.12311511585141596, + "grad_norm": 1.85207463151369, + "learning_rate": 9.774367539378313e-06, + "loss": 0.6971, + "step": 4017 + }, + { + "epoch": 0.12314576437415717, + "grad_norm": 2.139202657361986, + "learning_rate": 9.77422010312236e-06, + "loss": 0.7945, + "step": 4018 + }, + { + "epoch": 0.12317641289689837, + "grad_norm": 1.7658888893557316, + "learning_rate": 9.774072619824794e-06, + "loss": 0.734, + "step": 4019 + }, + { + "epoch": 0.12320706141963958, + "grad_norm": 1.9805725936284189, + "learning_rate": 9.773925089487069e-06, + "loss": 0.6777, + "step": 4020 + }, + { + "epoch": 0.12323770994238077, + "grad_norm": 2.097339492826017, + "learning_rate": 9.773777512110641e-06, + "loss": 0.7292, + "step": 4021 + }, + { + "epoch": 0.12326835846512198, + "grad_norm": 2.0479281422007656, + "learning_rate": 9.77362988769696e-06, + "loss": 0.7066, + "step": 4022 + }, + { + "epoch": 0.12329900698786318, + "grad_norm": 1.727068083401802, + "learning_rate": 9.773482216247482e-06, + "loss": 0.7851, + "step": 4023 + }, + { + "epoch": 0.12332965551060439, + "grad_norm": 1.9149756549241077, + "learning_rate": 9.773334497763663e-06, + "loss": 0.8197, + "step": 4024 + }, + { + "epoch": 0.1233603040333456, + "grad_norm": 0.9066421207728026, + "learning_rate": 9.773186732246957e-06, + "loss": 0.5103, + "step": 4025 + }, + { + "epoch": 0.1233909525560868, + "grad_norm": 1.9765321698175817, + "learning_rate": 9.773038919698821e-06, + "loss": 0.7019, + "step": 4026 + }, + { + "epoch": 0.123421601078828, + "grad_norm": 1.9559550544396924, + "learning_rate": 9.772891060120713e-06, + "loss": 0.6656, + "step": 4027 + }, + { + "epoch": 0.12345224960156921, + "grad_norm": 1.758700521214489, + "learning_rate": 9.772743153514088e-06, + "loss": 0.7164, + "step": 4028 + }, + { + "epoch": 0.1234828981243104, + "grad_norm": 0.8122173399393205, + "learning_rate": 9.772595199880402e-06, + "loss": 0.4707, + "step": 4029 + }, + { + "epoch": 0.12351354664705161, + "grad_norm": 1.9979244760642902, + "learning_rate": 9.772447199221114e-06, + "loss": 0.7216, + "step": 4030 + }, + { + "epoch": 0.12354419516979281, + "grad_norm": 1.9717210409885437, + "learning_rate": 9.772299151537684e-06, + "loss": 0.7492, + "step": 4031 + }, + { + "epoch": 0.12357484369253402, + "grad_norm": 1.937681325197825, + "learning_rate": 9.77215105683157e-06, + "loss": 0.7517, + "step": 4032 + }, + { + "epoch": 0.12360549221527523, + "grad_norm": 0.8618332261734779, + "learning_rate": 9.772002915104228e-06, + "loss": 0.4818, + "step": 4033 + }, + { + "epoch": 0.12363614073801643, + "grad_norm": 2.1086549860215738, + "learning_rate": 9.771854726357123e-06, + "loss": 0.7603, + "step": 4034 + }, + { + "epoch": 0.12366678926075764, + "grad_norm": 1.8002314655781981, + "learning_rate": 9.771706490591711e-06, + "loss": 0.831, + "step": 4035 + }, + { + "epoch": 0.12369743778349883, + "grad_norm": 1.7778485772448616, + "learning_rate": 9.771558207809455e-06, + "loss": 0.713, + "step": 4036 + }, + { + "epoch": 0.12372808630624003, + "grad_norm": 1.8771421329346885, + "learning_rate": 9.771409878011814e-06, + "loss": 0.7922, + "step": 4037 + }, + { + "epoch": 0.12375873482898124, + "grad_norm": 1.7219897062512775, + "learning_rate": 9.771261501200251e-06, + "loss": 0.627, + "step": 4038 + }, + { + "epoch": 0.12378938335172245, + "grad_norm": 1.8407978638367677, + "learning_rate": 9.771113077376229e-06, + "loss": 0.8138, + "step": 4039 + }, + { + "epoch": 0.12382003187446365, + "grad_norm": 1.9845268244905363, + "learning_rate": 9.770964606541208e-06, + "loss": 0.7784, + "step": 4040 + }, + { + "epoch": 0.12385068039720486, + "grad_norm": 1.9842380966222226, + "learning_rate": 9.770816088696652e-06, + "loss": 0.8735, + "step": 4041 + }, + { + "epoch": 0.12388132891994606, + "grad_norm": 1.6700040130159928, + "learning_rate": 9.770667523844024e-06, + "loss": 0.7232, + "step": 4042 + }, + { + "epoch": 0.12391197744268727, + "grad_norm": 1.7494771879862894, + "learning_rate": 9.77051891198479e-06, + "loss": 0.7952, + "step": 4043 + }, + { + "epoch": 0.12394262596542846, + "grad_norm": 2.0582193479846427, + "learning_rate": 9.770370253120411e-06, + "loss": 0.6535, + "step": 4044 + }, + { + "epoch": 0.12397327448816967, + "grad_norm": 2.012259012846718, + "learning_rate": 9.770221547252354e-06, + "loss": 0.7738, + "step": 4045 + }, + { + "epoch": 0.12400392301091087, + "grad_norm": 2.6739081742876145, + "learning_rate": 9.770072794382082e-06, + "loss": 0.8296, + "step": 4046 + }, + { + "epoch": 0.12403457153365208, + "grad_norm": 1.765892747207758, + "learning_rate": 9.769923994511064e-06, + "loss": 0.7208, + "step": 4047 + }, + { + "epoch": 0.12406522005639328, + "grad_norm": 1.6617874474353278, + "learning_rate": 9.769775147640762e-06, + "loss": 0.7377, + "step": 4048 + }, + { + "epoch": 0.12409586857913449, + "grad_norm": 2.316036159733407, + "learning_rate": 9.769626253772648e-06, + "loss": 0.7829, + "step": 4049 + }, + { + "epoch": 0.1241265171018757, + "grad_norm": 1.9865160888705902, + "learning_rate": 9.769477312908186e-06, + "loss": 0.7355, + "step": 4050 + }, + { + "epoch": 0.1241571656246169, + "grad_norm": 2.1085425289654434, + "learning_rate": 9.769328325048844e-06, + "loss": 0.7047, + "step": 4051 + }, + { + "epoch": 0.12418781414735809, + "grad_norm": 2.0355507976798632, + "learning_rate": 9.769179290196089e-06, + "loss": 0.7977, + "step": 4052 + }, + { + "epoch": 0.1242184626700993, + "grad_norm": 1.8237547971229258, + "learning_rate": 9.769030208351389e-06, + "loss": 0.7065, + "step": 4053 + }, + { + "epoch": 0.1242491111928405, + "grad_norm": 1.9216431127520825, + "learning_rate": 9.768881079516214e-06, + "loss": 0.8196, + "step": 4054 + }, + { + "epoch": 0.12427975971558171, + "grad_norm": 0.9673162840940205, + "learning_rate": 9.768731903692035e-06, + "loss": 0.5049, + "step": 4055 + }, + { + "epoch": 0.12431040823832291, + "grad_norm": 1.8708545183045635, + "learning_rate": 9.76858268088032e-06, + "loss": 0.7487, + "step": 4056 + }, + { + "epoch": 0.12434105676106412, + "grad_norm": 2.044135194350074, + "learning_rate": 9.76843341108254e-06, + "loss": 0.8344, + "step": 4057 + }, + { + "epoch": 0.12437170528380533, + "grad_norm": 2.217709248286808, + "learning_rate": 9.768284094300165e-06, + "loss": 0.7925, + "step": 4058 + }, + { + "epoch": 0.12440235380654653, + "grad_norm": 1.9542245570765469, + "learning_rate": 9.768134730534667e-06, + "loss": 0.7272, + "step": 4059 + }, + { + "epoch": 0.12443300232928772, + "grad_norm": 1.829193009550636, + "learning_rate": 9.767985319787519e-06, + "loss": 0.7466, + "step": 4060 + }, + { + "epoch": 0.12446365085202893, + "grad_norm": 2.0194277147252984, + "learning_rate": 9.767835862060188e-06, + "loss": 0.7637, + "step": 4061 + }, + { + "epoch": 0.12449429937477013, + "grad_norm": 1.6695630257123428, + "learning_rate": 9.767686357354154e-06, + "loss": 0.8079, + "step": 4062 + }, + { + "epoch": 0.12452494789751134, + "grad_norm": 2.1998972767729534, + "learning_rate": 9.767536805670884e-06, + "loss": 0.8016, + "step": 4063 + }, + { + "epoch": 0.12455559642025255, + "grad_norm": 0.9464763781440771, + "learning_rate": 9.767387207011856e-06, + "loss": 0.5271, + "step": 4064 + }, + { + "epoch": 0.12458624494299375, + "grad_norm": 1.9684418612775954, + "learning_rate": 9.767237561378541e-06, + "loss": 0.7493, + "step": 4065 + }, + { + "epoch": 0.12461689346573496, + "grad_norm": 1.9575445515388634, + "learning_rate": 9.767087868772415e-06, + "loss": 0.697, + "step": 4066 + }, + { + "epoch": 0.12464754198847615, + "grad_norm": 2.356846035480588, + "learning_rate": 9.766938129194952e-06, + "loss": 0.8001, + "step": 4067 + }, + { + "epoch": 0.12467819051121735, + "grad_norm": 1.7536424929670622, + "learning_rate": 9.76678834264763e-06, + "loss": 0.8382, + "step": 4068 + }, + { + "epoch": 0.12470883903395856, + "grad_norm": 0.8940801028960911, + "learning_rate": 9.766638509131919e-06, + "loss": 0.5134, + "step": 4069 + }, + { + "epoch": 0.12473948755669977, + "grad_norm": 1.8364682971730653, + "learning_rate": 9.766488628649303e-06, + "loss": 0.7319, + "step": 4070 + }, + { + "epoch": 0.12477013607944097, + "grad_norm": 1.9033532048781003, + "learning_rate": 9.766338701201252e-06, + "loss": 0.7913, + "step": 4071 + }, + { + "epoch": 0.12480078460218218, + "grad_norm": 2.013594443622582, + "learning_rate": 9.766188726789248e-06, + "loss": 0.7892, + "step": 4072 + }, + { + "epoch": 0.12483143312492338, + "grad_norm": 1.929980948966591, + "learning_rate": 9.766038705414766e-06, + "loss": 0.7515, + "step": 4073 + }, + { + "epoch": 0.12486208164766459, + "grad_norm": 1.7828285601991771, + "learning_rate": 9.765888637079287e-06, + "loss": 0.6883, + "step": 4074 + }, + { + "epoch": 0.12489273017040578, + "grad_norm": 1.754232367905929, + "learning_rate": 9.765738521784285e-06, + "loss": 0.7315, + "step": 4075 + }, + { + "epoch": 0.12492337869314699, + "grad_norm": 1.8692756496423744, + "learning_rate": 9.765588359531243e-06, + "loss": 0.7025, + "step": 4076 + }, + { + "epoch": 0.12495402721588819, + "grad_norm": 1.9804987822482374, + "learning_rate": 9.76543815032164e-06, + "loss": 0.7794, + "step": 4077 + }, + { + "epoch": 0.1249846757386294, + "grad_norm": 1.7808439767389075, + "learning_rate": 9.765287894156957e-06, + "loss": 0.6636, + "step": 4078 + }, + { + "epoch": 0.1250153242613706, + "grad_norm": 1.835123519352648, + "learning_rate": 9.765137591038671e-06, + "loss": 0.66, + "step": 4079 + }, + { + "epoch": 0.1250459727841118, + "grad_norm": 1.891618406346714, + "learning_rate": 9.764987240968266e-06, + "loss": 0.7444, + "step": 4080 + }, + { + "epoch": 0.12507662130685301, + "grad_norm": 1.0522397726543828, + "learning_rate": 9.764836843947222e-06, + "loss": 0.5012, + "step": 4081 + }, + { + "epoch": 0.12510726982959422, + "grad_norm": 1.7963211368785483, + "learning_rate": 9.764686399977021e-06, + "loss": 0.8477, + "step": 4082 + }, + { + "epoch": 0.12513791835233543, + "grad_norm": 1.7071933804970616, + "learning_rate": 9.764535909059147e-06, + "loss": 0.8118, + "step": 4083 + }, + { + "epoch": 0.12516856687507663, + "grad_norm": 1.782325433502545, + "learning_rate": 9.764385371195082e-06, + "loss": 0.7912, + "step": 4084 + }, + { + "epoch": 0.12519921539781784, + "grad_norm": 1.88863232077035, + "learning_rate": 9.764234786386308e-06, + "loss": 0.75, + "step": 4085 + }, + { + "epoch": 0.12522986392055904, + "grad_norm": 1.8865812337035655, + "learning_rate": 9.764084154634311e-06, + "loss": 0.7356, + "step": 4086 + }, + { + "epoch": 0.12526051244330022, + "grad_norm": 2.213037199062368, + "learning_rate": 9.763933475940571e-06, + "loss": 0.7901, + "step": 4087 + }, + { + "epoch": 0.12529116096604143, + "grad_norm": 1.9561922080732654, + "learning_rate": 9.763782750306578e-06, + "loss": 0.8036, + "step": 4088 + }, + { + "epoch": 0.12532180948878263, + "grad_norm": 1.8596577728985701, + "learning_rate": 9.763631977733815e-06, + "loss": 0.736, + "step": 4089 + }, + { + "epoch": 0.12535245801152384, + "grad_norm": 1.816794750570996, + "learning_rate": 9.763481158223764e-06, + "loss": 0.7502, + "step": 4090 + }, + { + "epoch": 0.12538310653426504, + "grad_norm": 1.7675616573177617, + "learning_rate": 9.763330291777918e-06, + "loss": 0.6099, + "step": 4091 + }, + { + "epoch": 0.12541375505700625, + "grad_norm": 1.680417324196581, + "learning_rate": 9.763179378397759e-06, + "loss": 0.6721, + "step": 4092 + }, + { + "epoch": 0.12544440357974745, + "grad_norm": 1.795715124211618, + "learning_rate": 9.763028418084773e-06, + "loss": 0.7816, + "step": 4093 + }, + { + "epoch": 0.12547505210248866, + "grad_norm": 1.885880837190289, + "learning_rate": 9.76287741084045e-06, + "loss": 0.8179, + "step": 4094 + }, + { + "epoch": 0.12550570062522987, + "grad_norm": 2.035005883678667, + "learning_rate": 9.762726356666279e-06, + "loss": 0.8098, + "step": 4095 + }, + { + "epoch": 0.12553634914797107, + "grad_norm": 2.1293643222476666, + "learning_rate": 9.762575255563747e-06, + "loss": 0.7931, + "step": 4096 + }, + { + "epoch": 0.12556699767071228, + "grad_norm": 1.8620303830990386, + "learning_rate": 9.76242410753434e-06, + "loss": 0.7889, + "step": 4097 + }, + { + "epoch": 0.12559764619345348, + "grad_norm": 2.007224177321235, + "learning_rate": 9.762272912579551e-06, + "loss": 0.663, + "step": 4098 + }, + { + "epoch": 0.1256282947161947, + "grad_norm": 1.1971495346516854, + "learning_rate": 9.762121670700867e-06, + "loss": 0.5372, + "step": 4099 + }, + { + "epoch": 0.1256589432389359, + "grad_norm": 1.964657185142504, + "learning_rate": 9.761970381899782e-06, + "loss": 0.8028, + "step": 4100 + }, + { + "epoch": 0.1256895917616771, + "grad_norm": 2.0469598191967115, + "learning_rate": 9.761819046177782e-06, + "loss": 0.6998, + "step": 4101 + }, + { + "epoch": 0.12572024028441828, + "grad_norm": 2.169000400070458, + "learning_rate": 9.761667663536363e-06, + "loss": 0.8178, + "step": 4102 + }, + { + "epoch": 0.12575088880715948, + "grad_norm": 1.9663683610579652, + "learning_rate": 9.761516233977014e-06, + "loss": 0.7049, + "step": 4103 + }, + { + "epoch": 0.1257815373299007, + "grad_norm": 1.7998840962385514, + "learning_rate": 9.761364757501227e-06, + "loss": 0.7436, + "step": 4104 + }, + { + "epoch": 0.1258121858526419, + "grad_norm": 2.044815774388784, + "learning_rate": 9.761213234110494e-06, + "loss": 0.6831, + "step": 4105 + }, + { + "epoch": 0.1258428343753831, + "grad_norm": 1.94123924346665, + "learning_rate": 9.761061663806308e-06, + "loss": 0.683, + "step": 4106 + }, + { + "epoch": 0.1258734828981243, + "grad_norm": 2.0048597628630676, + "learning_rate": 9.760910046590164e-06, + "loss": 0.7863, + "step": 4107 + }, + { + "epoch": 0.1259041314208655, + "grad_norm": 1.7803839313949708, + "learning_rate": 9.760758382463555e-06, + "loss": 0.7191, + "step": 4108 + }, + { + "epoch": 0.12593477994360672, + "grad_norm": 1.9260505755487507, + "learning_rate": 9.760606671427976e-06, + "loss": 0.7019, + "step": 4109 + }, + { + "epoch": 0.12596542846634792, + "grad_norm": 1.99567555351415, + "learning_rate": 9.760454913484923e-06, + "loss": 0.6904, + "step": 4110 + }, + { + "epoch": 0.12599607698908913, + "grad_norm": 1.4597745344758335, + "learning_rate": 9.760303108635887e-06, + "loss": 0.5247, + "step": 4111 + }, + { + "epoch": 0.12602672551183033, + "grad_norm": 1.0834631443515697, + "learning_rate": 9.760151256882368e-06, + "loss": 0.5084, + "step": 4112 + }, + { + "epoch": 0.12605737403457154, + "grad_norm": 1.8313300545578, + "learning_rate": 9.759999358225861e-06, + "loss": 0.8067, + "step": 4113 + }, + { + "epoch": 0.12608802255731275, + "grad_norm": 1.8475498875746441, + "learning_rate": 9.759847412667862e-06, + "loss": 0.7873, + "step": 4114 + }, + { + "epoch": 0.12611867108005395, + "grad_norm": 2.070410744879872, + "learning_rate": 9.759695420209867e-06, + "loss": 0.7845, + "step": 4115 + }, + { + "epoch": 0.12614931960279516, + "grad_norm": 2.4109166131841624, + "learning_rate": 9.759543380853379e-06, + "loss": 0.662, + "step": 4116 + }, + { + "epoch": 0.12617996812553636, + "grad_norm": 1.821076589099308, + "learning_rate": 9.759391294599889e-06, + "loss": 0.7286, + "step": 4117 + }, + { + "epoch": 0.12621061664827754, + "grad_norm": 2.0555897747301866, + "learning_rate": 9.7592391614509e-06, + "loss": 0.7191, + "step": 4118 + }, + { + "epoch": 0.12624126517101875, + "grad_norm": 1.9869055572379595, + "learning_rate": 9.759086981407909e-06, + "loss": 0.8261, + "step": 4119 + }, + { + "epoch": 0.12627191369375995, + "grad_norm": 2.0197555521265045, + "learning_rate": 9.758934754472418e-06, + "loss": 0.704, + "step": 4120 + }, + { + "epoch": 0.12630256221650116, + "grad_norm": 2.216145815091527, + "learning_rate": 9.758782480645923e-06, + "loss": 0.7672, + "step": 4121 + }, + { + "epoch": 0.12633321073924236, + "grad_norm": 1.9535525902021684, + "learning_rate": 9.758630159929928e-06, + "loss": 0.7213, + "step": 4122 + }, + { + "epoch": 0.12636385926198357, + "grad_norm": 1.8611265572818791, + "learning_rate": 9.758477792325932e-06, + "loss": 0.8029, + "step": 4123 + }, + { + "epoch": 0.12639450778472477, + "grad_norm": 1.9290418535193892, + "learning_rate": 9.758325377835437e-06, + "loss": 0.7248, + "step": 4124 + }, + { + "epoch": 0.12642515630746598, + "grad_norm": 2.324750513924422, + "learning_rate": 9.758172916459944e-06, + "loss": 0.8725, + "step": 4125 + }, + { + "epoch": 0.12645580483020719, + "grad_norm": 2.4850016135445387, + "learning_rate": 9.758020408200956e-06, + "loss": 0.5623, + "step": 4126 + }, + { + "epoch": 0.1264864533529484, + "grad_norm": 1.9491080618107037, + "learning_rate": 9.757867853059976e-06, + "loss": 0.6811, + "step": 4127 + }, + { + "epoch": 0.1265171018756896, + "grad_norm": 1.9899123386896436, + "learning_rate": 9.757715251038508e-06, + "loss": 0.7293, + "step": 4128 + }, + { + "epoch": 0.1265477503984308, + "grad_norm": 1.8852195815376236, + "learning_rate": 9.757562602138054e-06, + "loss": 0.7472, + "step": 4129 + }, + { + "epoch": 0.126578398921172, + "grad_norm": 1.8404413878276127, + "learning_rate": 9.757409906360116e-06, + "loss": 0.7235, + "step": 4130 + }, + { + "epoch": 0.12660904744391321, + "grad_norm": 1.8899256628638303, + "learning_rate": 9.757257163706203e-06, + "loss": 0.8466, + "step": 4131 + }, + { + "epoch": 0.12663969596665442, + "grad_norm": 2.109203739946818, + "learning_rate": 9.757104374177817e-06, + "loss": 0.7757, + "step": 4132 + }, + { + "epoch": 0.1266703444893956, + "grad_norm": 1.8506594729040282, + "learning_rate": 9.756951537776464e-06, + "loss": 0.7306, + "step": 4133 + }, + { + "epoch": 0.1267009930121368, + "grad_norm": 1.899324349972023, + "learning_rate": 9.756798654503652e-06, + "loss": 0.7414, + "step": 4134 + }, + { + "epoch": 0.126731641534878, + "grad_norm": 1.9234255730445213, + "learning_rate": 9.756645724360884e-06, + "loss": 0.6709, + "step": 4135 + }, + { + "epoch": 0.12676229005761921, + "grad_norm": 1.6720137033168974, + "learning_rate": 9.75649274734967e-06, + "loss": 0.7012, + "step": 4136 + }, + { + "epoch": 0.12679293858036042, + "grad_norm": 1.8169400023096633, + "learning_rate": 9.756339723471516e-06, + "loss": 0.8005, + "step": 4137 + }, + { + "epoch": 0.12682358710310163, + "grad_norm": 1.470342627711797, + "learning_rate": 9.75618665272793e-06, + "loss": 0.5504, + "step": 4138 + }, + { + "epoch": 0.12685423562584283, + "grad_norm": 1.9758341204030871, + "learning_rate": 9.75603353512042e-06, + "loss": 0.7679, + "step": 4139 + }, + { + "epoch": 0.12688488414858404, + "grad_norm": 2.2826157028384406, + "learning_rate": 9.755880370650492e-06, + "loss": 0.6831, + "step": 4140 + }, + { + "epoch": 0.12691553267132524, + "grad_norm": 1.8458910356281029, + "learning_rate": 9.755727159319661e-06, + "loss": 0.7813, + "step": 4141 + }, + { + "epoch": 0.12694618119406645, + "grad_norm": 1.9817921098211582, + "learning_rate": 9.755573901129431e-06, + "loss": 0.7181, + "step": 4142 + }, + { + "epoch": 0.12697682971680765, + "grad_norm": 1.8450518283343222, + "learning_rate": 9.755420596081316e-06, + "loss": 0.786, + "step": 4143 + }, + { + "epoch": 0.12700747823954886, + "grad_norm": 1.9468100026022637, + "learning_rate": 9.755267244176826e-06, + "loss": 0.8058, + "step": 4144 + }, + { + "epoch": 0.12703812676229007, + "grad_norm": 2.6846710452301825, + "learning_rate": 9.75511384541747e-06, + "loss": 0.8025, + "step": 4145 + }, + { + "epoch": 0.12706877528503127, + "grad_norm": 1.5807853652429626, + "learning_rate": 9.75496039980476e-06, + "loss": 0.7042, + "step": 4146 + }, + { + "epoch": 0.12709942380777248, + "grad_norm": 2.733682106131754, + "learning_rate": 9.75480690734021e-06, + "loss": 0.7589, + "step": 4147 + }, + { + "epoch": 0.12713007233051368, + "grad_norm": 1.8414533442470464, + "learning_rate": 9.75465336802533e-06, + "loss": 0.6647, + "step": 4148 + }, + { + "epoch": 0.12716072085325486, + "grad_norm": 1.9205723688179699, + "learning_rate": 9.754499781861634e-06, + "loss": 0.7278, + "step": 4149 + }, + { + "epoch": 0.12719136937599607, + "grad_norm": 1.82531644566895, + "learning_rate": 9.754346148850635e-06, + "loss": 0.7801, + "step": 4150 + }, + { + "epoch": 0.12722201789873727, + "grad_norm": 3.9581356088019817, + "learning_rate": 9.754192468993849e-06, + "loss": 0.7528, + "step": 4151 + }, + { + "epoch": 0.12725266642147848, + "grad_norm": 1.8782851107116356, + "learning_rate": 9.754038742292786e-06, + "loss": 0.7695, + "step": 4152 + }, + { + "epoch": 0.12728331494421968, + "grad_norm": 2.0124721331043482, + "learning_rate": 9.753884968748964e-06, + "loss": 0.8061, + "step": 4153 + }, + { + "epoch": 0.1273139634669609, + "grad_norm": 2.1637892812157515, + "learning_rate": 9.753731148363897e-06, + "loss": 0.7421, + "step": 4154 + }, + { + "epoch": 0.1273446119897021, + "grad_norm": 1.953106417277667, + "learning_rate": 9.7535772811391e-06, + "loss": 0.8363, + "step": 4155 + }, + { + "epoch": 0.1273752605124433, + "grad_norm": 1.7974536301555517, + "learning_rate": 9.753423367076088e-06, + "loss": 0.7083, + "step": 4156 + }, + { + "epoch": 0.1274059090351845, + "grad_norm": 1.904072816192806, + "learning_rate": 9.753269406176382e-06, + "loss": 0.8006, + "step": 4157 + }, + { + "epoch": 0.1274365575579257, + "grad_norm": 1.995871873302226, + "learning_rate": 9.753115398441496e-06, + "loss": 0.7491, + "step": 4158 + }, + { + "epoch": 0.12746720608066692, + "grad_norm": 1.883982231352526, + "learning_rate": 9.752961343872947e-06, + "loss": 0.7494, + "step": 4159 + }, + { + "epoch": 0.12749785460340812, + "grad_norm": 1.715339420978535, + "learning_rate": 9.752807242472255e-06, + "loss": 0.7385, + "step": 4160 + }, + { + "epoch": 0.12752850312614933, + "grad_norm": 1.7316095208566717, + "learning_rate": 9.752653094240935e-06, + "loss": 0.6732, + "step": 4161 + }, + { + "epoch": 0.12755915164889053, + "grad_norm": 1.1740354591590012, + "learning_rate": 9.752498899180512e-06, + "loss": 0.5284, + "step": 4162 + }, + { + "epoch": 0.12758980017163174, + "grad_norm": 1.8449882331582736, + "learning_rate": 9.752344657292497e-06, + "loss": 0.7102, + "step": 4163 + }, + { + "epoch": 0.12762044869437292, + "grad_norm": 0.9153786581222129, + "learning_rate": 9.752190368578415e-06, + "loss": 0.5188, + "step": 4164 + }, + { + "epoch": 0.12765109721711412, + "grad_norm": 1.9590798102102196, + "learning_rate": 9.752036033039788e-06, + "loss": 0.7693, + "step": 4165 + }, + { + "epoch": 0.12768174573985533, + "grad_norm": 0.8778040323118738, + "learning_rate": 9.751881650678132e-06, + "loss": 0.5078, + "step": 4166 + }, + { + "epoch": 0.12771239426259653, + "grad_norm": 2.076583884353312, + "learning_rate": 9.751727221494971e-06, + "loss": 0.7941, + "step": 4167 + }, + { + "epoch": 0.12774304278533774, + "grad_norm": 2.3025139742606267, + "learning_rate": 9.751572745491827e-06, + "loss": 0.8145, + "step": 4168 + }, + { + "epoch": 0.12777369130807895, + "grad_norm": 1.0289337624629065, + "learning_rate": 9.751418222670219e-06, + "loss": 0.5337, + "step": 4169 + }, + { + "epoch": 0.12780433983082015, + "grad_norm": 1.9841980171281777, + "learning_rate": 9.751263653031673e-06, + "loss": 0.8658, + "step": 4170 + }, + { + "epoch": 0.12783498835356136, + "grad_norm": 2.07607844320513, + "learning_rate": 9.751109036577709e-06, + "loss": 0.7516, + "step": 4171 + }, + { + "epoch": 0.12786563687630256, + "grad_norm": 2.1204547258015483, + "learning_rate": 9.750954373309854e-06, + "loss": 0.7925, + "step": 4172 + }, + { + "epoch": 0.12789628539904377, + "grad_norm": 1.8426074991213233, + "learning_rate": 9.750799663229627e-06, + "loss": 0.767, + "step": 4173 + }, + { + "epoch": 0.12792693392178497, + "grad_norm": 2.075884579374969, + "learning_rate": 9.750644906338559e-06, + "loss": 0.786, + "step": 4174 + }, + { + "epoch": 0.12795758244452618, + "grad_norm": 2.0826038243219442, + "learning_rate": 9.750490102638169e-06, + "loss": 0.7376, + "step": 4175 + }, + { + "epoch": 0.12798823096726739, + "grad_norm": 2.044517671461473, + "learning_rate": 9.750335252129985e-06, + "loss": 0.7064, + "step": 4176 + }, + { + "epoch": 0.1280188794900086, + "grad_norm": 1.7721234534369399, + "learning_rate": 9.750180354815531e-06, + "loss": 0.7282, + "step": 4177 + }, + { + "epoch": 0.1280495280127498, + "grad_norm": 1.736138345594517, + "learning_rate": 9.750025410696337e-06, + "loss": 0.777, + "step": 4178 + }, + { + "epoch": 0.128080176535491, + "grad_norm": 1.7366253086067862, + "learning_rate": 9.749870419773926e-06, + "loss": 0.7448, + "step": 4179 + }, + { + "epoch": 0.12811082505823218, + "grad_norm": 1.7949615607797078, + "learning_rate": 9.749715382049827e-06, + "loss": 0.6706, + "step": 4180 + }, + { + "epoch": 0.1281414735809734, + "grad_norm": 2.1642459039199973, + "learning_rate": 9.749560297525567e-06, + "loss": 0.7643, + "step": 4181 + }, + { + "epoch": 0.1281721221037146, + "grad_norm": 1.7470979065011887, + "learning_rate": 9.749405166202673e-06, + "loss": 0.7265, + "step": 4182 + }, + { + "epoch": 0.1282027706264558, + "grad_norm": 1.8659982782768716, + "learning_rate": 9.749249988082677e-06, + "loss": 0.7832, + "step": 4183 + }, + { + "epoch": 0.128233419149197, + "grad_norm": 1.0077753279480168, + "learning_rate": 9.749094763167104e-06, + "loss": 0.5028, + "step": 4184 + }, + { + "epoch": 0.1282640676719382, + "grad_norm": 1.626319085511487, + "learning_rate": 9.748939491457485e-06, + "loss": 0.6946, + "step": 4185 + }, + { + "epoch": 0.12829471619467941, + "grad_norm": 1.8285816514072017, + "learning_rate": 9.74878417295535e-06, + "loss": 0.6892, + "step": 4186 + }, + { + "epoch": 0.12832536471742062, + "grad_norm": 1.9676866903786934, + "learning_rate": 9.74862880766223e-06, + "loss": 0.8204, + "step": 4187 + }, + { + "epoch": 0.12835601324016183, + "grad_norm": 0.8430114979443267, + "learning_rate": 9.748473395579656e-06, + "loss": 0.5038, + "step": 4188 + }, + { + "epoch": 0.12838666176290303, + "grad_norm": 1.7181615809467377, + "learning_rate": 9.748317936709158e-06, + "loss": 0.7098, + "step": 4189 + }, + { + "epoch": 0.12841731028564424, + "grad_norm": 2.208567725787999, + "learning_rate": 9.748162431052269e-06, + "loss": 0.8129, + "step": 4190 + }, + { + "epoch": 0.12844795880838544, + "grad_norm": 1.9825345687532747, + "learning_rate": 9.74800687861052e-06, + "loss": 0.7929, + "step": 4191 + }, + { + "epoch": 0.12847860733112665, + "grad_norm": 0.8586253175764985, + "learning_rate": 9.747851279385445e-06, + "loss": 0.5165, + "step": 4192 + }, + { + "epoch": 0.12850925585386785, + "grad_norm": 1.7883260793208409, + "learning_rate": 9.747695633378576e-06, + "loss": 0.7553, + "step": 4193 + }, + { + "epoch": 0.12853990437660906, + "grad_norm": 2.1010117688283816, + "learning_rate": 9.747539940591449e-06, + "loss": 0.7358, + "step": 4194 + }, + { + "epoch": 0.12857055289935024, + "grad_norm": 1.8455471165416437, + "learning_rate": 9.747384201025593e-06, + "loss": 0.7309, + "step": 4195 + }, + { + "epoch": 0.12860120142209144, + "grad_norm": 1.6680756286365441, + "learning_rate": 9.747228414682547e-06, + "loss": 0.7294, + "step": 4196 + }, + { + "epoch": 0.12863184994483265, + "grad_norm": 1.6690536630095476, + "learning_rate": 9.747072581563845e-06, + "loss": 0.687, + "step": 4197 + }, + { + "epoch": 0.12866249846757385, + "grad_norm": 0.9151578794190413, + "learning_rate": 9.746916701671023e-06, + "loss": 0.5094, + "step": 4198 + }, + { + "epoch": 0.12869314699031506, + "grad_norm": 0.9078973089081683, + "learning_rate": 9.746760775005618e-06, + "loss": 0.5234, + "step": 4199 + }, + { + "epoch": 0.12872379551305627, + "grad_norm": 2.0262240513363374, + "learning_rate": 9.746604801569162e-06, + "loss": 0.7788, + "step": 4200 + }, + { + "epoch": 0.12875444403579747, + "grad_norm": 1.9251148549869341, + "learning_rate": 9.746448781363194e-06, + "loss": 0.7295, + "step": 4201 + }, + { + "epoch": 0.12878509255853868, + "grad_norm": 1.8753648824842013, + "learning_rate": 9.746292714389251e-06, + "loss": 0.8024, + "step": 4202 + }, + { + "epoch": 0.12881574108127988, + "grad_norm": 1.9165797918888843, + "learning_rate": 9.746136600648874e-06, + "loss": 0.8347, + "step": 4203 + }, + { + "epoch": 0.1288463896040211, + "grad_norm": 2.1283377582383456, + "learning_rate": 9.745980440143598e-06, + "loss": 0.748, + "step": 4204 + }, + { + "epoch": 0.1288770381267623, + "grad_norm": 1.017631530392389, + "learning_rate": 9.745824232874963e-06, + "loss": 0.5141, + "step": 4205 + }, + { + "epoch": 0.1289076866495035, + "grad_norm": 1.9073968926786211, + "learning_rate": 9.745667978844506e-06, + "loss": 0.7208, + "step": 4206 + }, + { + "epoch": 0.1289383351722447, + "grad_norm": 1.9425748171485273, + "learning_rate": 9.745511678053769e-06, + "loss": 0.8476, + "step": 4207 + }, + { + "epoch": 0.1289689836949859, + "grad_norm": 2.5531552842820426, + "learning_rate": 9.74535533050429e-06, + "loss": 0.8045, + "step": 4208 + }, + { + "epoch": 0.12899963221772712, + "grad_norm": 0.8524393948004705, + "learning_rate": 9.745198936197613e-06, + "loss": 0.4783, + "step": 4209 + }, + { + "epoch": 0.12903028074046832, + "grad_norm": 0.8816481030659492, + "learning_rate": 9.745042495135275e-06, + "loss": 0.5126, + "step": 4210 + }, + { + "epoch": 0.1290609292632095, + "grad_norm": 2.031947350003892, + "learning_rate": 9.744886007318821e-06, + "loss": 0.8624, + "step": 4211 + }, + { + "epoch": 0.1290915777859507, + "grad_norm": 2.057737993348231, + "learning_rate": 9.74472947274979e-06, + "loss": 0.8657, + "step": 4212 + }, + { + "epoch": 0.1291222263086919, + "grad_norm": 2.0877379337387088, + "learning_rate": 9.744572891429725e-06, + "loss": 0.8846, + "step": 4213 + }, + { + "epoch": 0.12915287483143312, + "grad_norm": 2.525202396685265, + "learning_rate": 9.74441626336017e-06, + "loss": 0.7588, + "step": 4214 + }, + { + "epoch": 0.12918352335417432, + "grad_norm": 2.665759471368125, + "learning_rate": 9.744259588542666e-06, + "loss": 0.8283, + "step": 4215 + }, + { + "epoch": 0.12921417187691553, + "grad_norm": 2.0868755066370284, + "learning_rate": 9.744102866978761e-06, + "loss": 0.753, + "step": 4216 + }, + { + "epoch": 0.12924482039965673, + "grad_norm": 1.6663412290835704, + "learning_rate": 9.743946098669996e-06, + "loss": 0.6704, + "step": 4217 + }, + { + "epoch": 0.12927546892239794, + "grad_norm": 1.8845628284583138, + "learning_rate": 9.743789283617915e-06, + "loss": 0.7449, + "step": 4218 + }, + { + "epoch": 0.12930611744513915, + "grad_norm": 2.054978933180123, + "learning_rate": 9.743632421824065e-06, + "loss": 0.7715, + "step": 4219 + }, + { + "epoch": 0.12933676596788035, + "grad_norm": 1.9274766497962048, + "learning_rate": 9.743475513289993e-06, + "loss": 0.7243, + "step": 4220 + }, + { + "epoch": 0.12936741449062156, + "grad_norm": 1.0526700924858932, + "learning_rate": 9.743318558017242e-06, + "loss": 0.4923, + "step": 4221 + }, + { + "epoch": 0.12939806301336276, + "grad_norm": 1.8415829216134407, + "learning_rate": 9.743161556007358e-06, + "loss": 0.7533, + "step": 4222 + }, + { + "epoch": 0.12942871153610397, + "grad_norm": 1.8655542373684648, + "learning_rate": 9.74300450726189e-06, + "loss": 0.766, + "step": 4223 + }, + { + "epoch": 0.12945936005884517, + "grad_norm": 1.976126862494008, + "learning_rate": 9.742847411782385e-06, + "loss": 0.7312, + "step": 4224 + }, + { + "epoch": 0.12949000858158638, + "grad_norm": 1.791837110646353, + "learning_rate": 9.742690269570392e-06, + "loss": 0.7236, + "step": 4225 + }, + { + "epoch": 0.12952065710432756, + "grad_norm": 1.8100995790084788, + "learning_rate": 9.74253308062746e-06, + "loss": 0.7534, + "step": 4226 + }, + { + "epoch": 0.12955130562706876, + "grad_norm": 1.8034754154606778, + "learning_rate": 9.742375844955131e-06, + "loss": 0.7665, + "step": 4227 + }, + { + "epoch": 0.12958195414980997, + "grad_norm": 1.6598709568798133, + "learning_rate": 9.742218562554964e-06, + "loss": 0.7164, + "step": 4228 + }, + { + "epoch": 0.12961260267255117, + "grad_norm": 0.9217520984125834, + "learning_rate": 9.742061233428502e-06, + "loss": 0.5022, + "step": 4229 + }, + { + "epoch": 0.12964325119529238, + "grad_norm": 1.9354857222087931, + "learning_rate": 9.741903857577298e-06, + "loss": 0.7997, + "step": 4230 + }, + { + "epoch": 0.1296738997180336, + "grad_norm": 1.9402417380309183, + "learning_rate": 9.741746435002904e-06, + "loss": 0.7717, + "step": 4231 + }, + { + "epoch": 0.1297045482407748, + "grad_norm": 0.8346256028051151, + "learning_rate": 9.741588965706865e-06, + "loss": 0.4961, + "step": 4232 + }, + { + "epoch": 0.129735196763516, + "grad_norm": 1.6942983460384657, + "learning_rate": 9.74143144969074e-06, + "loss": 0.7534, + "step": 4233 + }, + { + "epoch": 0.1297658452862572, + "grad_norm": 1.9187347702121547, + "learning_rate": 9.741273886956077e-06, + "loss": 0.729, + "step": 4234 + }, + { + "epoch": 0.1297964938089984, + "grad_norm": 1.6929493232865136, + "learning_rate": 9.741116277504427e-06, + "loss": 0.6939, + "step": 4235 + }, + { + "epoch": 0.12982714233173961, + "grad_norm": 0.8680908866229128, + "learning_rate": 9.740958621337348e-06, + "loss": 0.5339, + "step": 4236 + }, + { + "epoch": 0.12985779085448082, + "grad_norm": 0.9280248803835275, + "learning_rate": 9.74080091845639e-06, + "loss": 0.5233, + "step": 4237 + }, + { + "epoch": 0.12988843937722203, + "grad_norm": 2.3244795352395218, + "learning_rate": 9.740643168863108e-06, + "loss": 0.8266, + "step": 4238 + }, + { + "epoch": 0.12991908789996323, + "grad_norm": 2.0708774396727945, + "learning_rate": 9.740485372559056e-06, + "loss": 0.814, + "step": 4239 + }, + { + "epoch": 0.12994973642270444, + "grad_norm": 1.8707025199608531, + "learning_rate": 9.740327529545787e-06, + "loss": 0.679, + "step": 4240 + }, + { + "epoch": 0.12998038494544564, + "grad_norm": 0.9353920382774733, + "learning_rate": 9.740169639824858e-06, + "loss": 0.5239, + "step": 4241 + }, + { + "epoch": 0.13001103346818682, + "grad_norm": 2.1347676213434097, + "learning_rate": 9.740011703397827e-06, + "loss": 0.716, + "step": 4242 + }, + { + "epoch": 0.13004168199092803, + "grad_norm": 2.0038107024271303, + "learning_rate": 9.739853720266247e-06, + "loss": 0.8316, + "step": 4243 + }, + { + "epoch": 0.13007233051366923, + "grad_norm": 2.1723081609069825, + "learning_rate": 9.739695690431674e-06, + "loss": 0.7665, + "step": 4244 + }, + { + "epoch": 0.13010297903641044, + "grad_norm": 2.0816123268297164, + "learning_rate": 9.739537613895668e-06, + "loss": 0.8078, + "step": 4245 + }, + { + "epoch": 0.13013362755915164, + "grad_norm": 1.8283764793552224, + "learning_rate": 9.739379490659786e-06, + "loss": 0.6996, + "step": 4246 + }, + { + "epoch": 0.13016427608189285, + "grad_norm": 2.0127921751372937, + "learning_rate": 9.739221320725585e-06, + "loss": 0.7954, + "step": 4247 + }, + { + "epoch": 0.13019492460463405, + "grad_norm": 1.7384552828244122, + "learning_rate": 9.739063104094622e-06, + "loss": 0.7369, + "step": 4248 + }, + { + "epoch": 0.13022557312737526, + "grad_norm": 2.0544048440003597, + "learning_rate": 9.73890484076846e-06, + "loss": 0.7954, + "step": 4249 + }, + { + "epoch": 0.13025622165011647, + "grad_norm": 1.6885290890477809, + "learning_rate": 9.738746530748654e-06, + "loss": 0.7665, + "step": 4250 + }, + { + "epoch": 0.13028687017285767, + "grad_norm": 1.8994205481871898, + "learning_rate": 9.738588174036767e-06, + "loss": 0.6969, + "step": 4251 + }, + { + "epoch": 0.13031751869559888, + "grad_norm": 2.081166507047817, + "learning_rate": 9.738429770634359e-06, + "loss": 0.8171, + "step": 4252 + }, + { + "epoch": 0.13034816721834008, + "grad_norm": 2.0946130864393773, + "learning_rate": 9.738271320542989e-06, + "loss": 0.7483, + "step": 4253 + }, + { + "epoch": 0.1303788157410813, + "grad_norm": 1.7251600582252895, + "learning_rate": 9.738112823764219e-06, + "loss": 0.6583, + "step": 4254 + }, + { + "epoch": 0.1304094642638225, + "grad_norm": 1.8183488305424782, + "learning_rate": 9.737954280299612e-06, + "loss": 0.6909, + "step": 4255 + }, + { + "epoch": 0.1304401127865637, + "grad_norm": 1.78018238833152, + "learning_rate": 9.737795690150729e-06, + "loss": 0.6911, + "step": 4256 + }, + { + "epoch": 0.13047076130930488, + "grad_norm": 1.917547028728641, + "learning_rate": 9.737637053319133e-06, + "loss": 0.8138, + "step": 4257 + }, + { + "epoch": 0.13050140983204608, + "grad_norm": 1.9044496403314175, + "learning_rate": 9.737478369806387e-06, + "loss": 0.7394, + "step": 4258 + }, + { + "epoch": 0.1305320583547873, + "grad_norm": 1.8639277754390315, + "learning_rate": 9.737319639614053e-06, + "loss": 0.7868, + "step": 4259 + }, + { + "epoch": 0.1305627068775285, + "grad_norm": 2.2331169653450065, + "learning_rate": 9.737160862743697e-06, + "loss": 0.8021, + "step": 4260 + }, + { + "epoch": 0.1305933554002697, + "grad_norm": 1.8599978653896838, + "learning_rate": 9.737002039196884e-06, + "loss": 0.8736, + "step": 4261 + }, + { + "epoch": 0.1306240039230109, + "grad_norm": 1.8102083903887995, + "learning_rate": 9.73684316897518e-06, + "loss": 0.7883, + "step": 4262 + }, + { + "epoch": 0.1306546524457521, + "grad_norm": 1.8373755338230209, + "learning_rate": 9.736684252080145e-06, + "loss": 0.8103, + "step": 4263 + }, + { + "epoch": 0.13068530096849332, + "grad_norm": 1.7994461472813585, + "learning_rate": 9.736525288513348e-06, + "loss": 0.8352, + "step": 4264 + }, + { + "epoch": 0.13071594949123452, + "grad_norm": 1.9152036925996756, + "learning_rate": 9.736366278276355e-06, + "loss": 0.6876, + "step": 4265 + }, + { + "epoch": 0.13074659801397573, + "grad_norm": 1.8230299373074803, + "learning_rate": 9.736207221370735e-06, + "loss": 0.7948, + "step": 4266 + }, + { + "epoch": 0.13077724653671693, + "grad_norm": 0.957168982908417, + "learning_rate": 9.736048117798054e-06, + "loss": 0.5124, + "step": 4267 + }, + { + "epoch": 0.13080789505945814, + "grad_norm": 2.319161599499677, + "learning_rate": 9.735888967559877e-06, + "loss": 0.8604, + "step": 4268 + }, + { + "epoch": 0.13083854358219935, + "grad_norm": 1.9524025868035813, + "learning_rate": 9.735729770657775e-06, + "loss": 0.7474, + "step": 4269 + }, + { + "epoch": 0.13086919210494055, + "grad_norm": 0.8877787607876692, + "learning_rate": 9.735570527093316e-06, + "loss": 0.5075, + "step": 4270 + }, + { + "epoch": 0.13089984062768176, + "grad_norm": 1.859993229337721, + "learning_rate": 9.735411236868071e-06, + "loss": 0.7, + "step": 4271 + }, + { + "epoch": 0.13093048915042296, + "grad_norm": 1.9221157009868217, + "learning_rate": 9.735251899983605e-06, + "loss": 0.6622, + "step": 4272 + }, + { + "epoch": 0.13096113767316414, + "grad_norm": 1.9481954833887394, + "learning_rate": 9.735092516441491e-06, + "loss": 0.8137, + "step": 4273 + }, + { + "epoch": 0.13099178619590535, + "grad_norm": 0.9485744463040388, + "learning_rate": 9.7349330862433e-06, + "loss": 0.4964, + "step": 4274 + }, + { + "epoch": 0.13102243471864655, + "grad_norm": 2.136477027844876, + "learning_rate": 9.7347736093906e-06, + "loss": 0.7814, + "step": 4275 + }, + { + "epoch": 0.13105308324138776, + "grad_norm": 1.8340848209531122, + "learning_rate": 9.734614085884967e-06, + "loss": 0.7364, + "step": 4276 + }, + { + "epoch": 0.13108373176412896, + "grad_norm": 2.2064513600268825, + "learning_rate": 9.734454515727967e-06, + "loss": 0.7709, + "step": 4277 + }, + { + "epoch": 0.13111438028687017, + "grad_norm": 1.8651839282639557, + "learning_rate": 9.734294898921175e-06, + "loss": 0.6637, + "step": 4278 + }, + { + "epoch": 0.13114502880961137, + "grad_norm": 2.1379662566029016, + "learning_rate": 9.734135235466167e-06, + "loss": 0.7937, + "step": 4279 + }, + { + "epoch": 0.13117567733235258, + "grad_norm": 28.905946873477347, + "learning_rate": 9.73397552536451e-06, + "loss": 0.7297, + "step": 4280 + }, + { + "epoch": 0.13120632585509379, + "grad_norm": 2.309230758946344, + "learning_rate": 9.733815768617784e-06, + "loss": 0.8057, + "step": 4281 + }, + { + "epoch": 0.131236974377835, + "grad_norm": 1.0523322309757919, + "learning_rate": 9.733655965227557e-06, + "loss": 0.5081, + "step": 4282 + }, + { + "epoch": 0.1312676229005762, + "grad_norm": 38.515172775793346, + "learning_rate": 9.733496115195408e-06, + "loss": 1.0114, + "step": 4283 + }, + { + "epoch": 0.1312982714233174, + "grad_norm": 2.143626448128499, + "learning_rate": 9.733336218522914e-06, + "loss": 0.771, + "step": 4284 + }, + { + "epoch": 0.1313289199460586, + "grad_norm": 2.2535873492701355, + "learning_rate": 9.733176275211643e-06, + "loss": 0.7983, + "step": 4285 + }, + { + "epoch": 0.13135956846879981, + "grad_norm": 2.2272954540954597, + "learning_rate": 9.733016285263175e-06, + "loss": 0.6542, + "step": 4286 + }, + { + "epoch": 0.13139021699154102, + "grad_norm": 2.1376354199180643, + "learning_rate": 9.732856248679088e-06, + "loss": 0.8031, + "step": 4287 + }, + { + "epoch": 0.1314208655142822, + "grad_norm": 2.275407706808748, + "learning_rate": 9.732696165460957e-06, + "loss": 0.6871, + "step": 4288 + }, + { + "epoch": 0.1314515140370234, + "grad_norm": 2.040187550126789, + "learning_rate": 9.73253603561036e-06, + "loss": 0.8951, + "step": 4289 + }, + { + "epoch": 0.1314821625597646, + "grad_norm": 2.1239740995029694, + "learning_rate": 9.732375859128876e-06, + "loss": 0.7192, + "step": 4290 + }, + { + "epoch": 0.13151281108250581, + "grad_norm": 4.999153262930207, + "learning_rate": 9.73221563601808e-06, + "loss": 0.7666, + "step": 4291 + }, + { + "epoch": 0.13154345960524702, + "grad_norm": 62.36625078452832, + "learning_rate": 9.732055366279552e-06, + "loss": 0.8158, + "step": 4292 + }, + { + "epoch": 0.13157410812798823, + "grad_norm": 10.090853879157493, + "learning_rate": 9.731895049914873e-06, + "loss": 1.0172, + "step": 4293 + }, + { + "epoch": 0.13160475665072943, + "grad_norm": 6.734872914287785, + "learning_rate": 9.73173468692562e-06, + "loss": 0.917, + "step": 4294 + }, + { + "epoch": 0.13163540517347064, + "grad_norm": 4.048774429662168, + "learning_rate": 9.731574277313377e-06, + "loss": 0.8908, + "step": 4295 + }, + { + "epoch": 0.13166605369621184, + "grad_norm": 4.543826838008357, + "learning_rate": 9.731413821079719e-06, + "loss": 0.6144, + "step": 4296 + }, + { + "epoch": 0.13169670221895305, + "grad_norm": 3.40707915331608, + "learning_rate": 9.731253318226232e-06, + "loss": 0.7639, + "step": 4297 + }, + { + "epoch": 0.13172735074169425, + "grad_norm": 3.182877765763194, + "learning_rate": 9.731092768754496e-06, + "loss": 0.7516, + "step": 4298 + }, + { + "epoch": 0.13175799926443546, + "grad_norm": 2.3592574520444964, + "learning_rate": 9.73093217266609e-06, + "loss": 0.8002, + "step": 4299 + }, + { + "epoch": 0.13178864778717667, + "grad_norm": 2.882726555218591, + "learning_rate": 9.7307715299626e-06, + "loss": 0.9086, + "step": 4300 + }, + { + "epoch": 0.13181929630991787, + "grad_norm": 2.276410603358051, + "learning_rate": 9.73061084064561e-06, + "loss": 0.8583, + "step": 4301 + }, + { + "epoch": 0.13184994483265908, + "grad_norm": 2.478665288504387, + "learning_rate": 9.730450104716697e-06, + "loss": 0.8446, + "step": 4302 + }, + { + "epoch": 0.13188059335540028, + "grad_norm": 2.2937847305462506, + "learning_rate": 9.73028932217745e-06, + "loss": 0.7208, + "step": 4303 + }, + { + "epoch": 0.13191124187814146, + "grad_norm": 2.2953399466992606, + "learning_rate": 9.730128493029454e-06, + "loss": 0.615, + "step": 4304 + }, + { + "epoch": 0.13194189040088267, + "grad_norm": 2.310958441968829, + "learning_rate": 9.729967617274291e-06, + "loss": 0.724, + "step": 4305 + }, + { + "epoch": 0.13197253892362387, + "grad_norm": 2.1781339779108286, + "learning_rate": 9.729806694913546e-06, + "loss": 0.8702, + "step": 4306 + }, + { + "epoch": 0.13200318744636508, + "grad_norm": 2.262077142328397, + "learning_rate": 9.729645725948807e-06, + "loss": 0.8162, + "step": 4307 + }, + { + "epoch": 0.13203383596910628, + "grad_norm": 1.7836946973736219, + "learning_rate": 9.729484710381656e-06, + "loss": 0.7018, + "step": 4308 + }, + { + "epoch": 0.1320644844918475, + "grad_norm": 2.0472612371240158, + "learning_rate": 9.729323648213684e-06, + "loss": 0.7792, + "step": 4309 + }, + { + "epoch": 0.1320951330145887, + "grad_norm": 2.1927073742965018, + "learning_rate": 9.729162539446476e-06, + "loss": 0.7995, + "step": 4310 + }, + { + "epoch": 0.1321257815373299, + "grad_norm": 2.2211844655754334, + "learning_rate": 9.729001384081617e-06, + "loss": 0.8501, + "step": 4311 + }, + { + "epoch": 0.1321564300600711, + "grad_norm": 1.9911564321646955, + "learning_rate": 9.7288401821207e-06, + "loss": 0.7953, + "step": 4312 + }, + { + "epoch": 0.1321870785828123, + "grad_norm": 2.2994855570573995, + "learning_rate": 9.72867893356531e-06, + "loss": 0.7435, + "step": 4313 + }, + { + "epoch": 0.13221772710555352, + "grad_norm": 8.133818101585026, + "learning_rate": 9.728517638417037e-06, + "loss": 0.6185, + "step": 4314 + }, + { + "epoch": 0.13224837562829472, + "grad_norm": 3.191519265989249, + "learning_rate": 9.728356296677469e-06, + "loss": 0.5826, + "step": 4315 + }, + { + "epoch": 0.13227902415103593, + "grad_norm": 1.91960515461799, + "learning_rate": 9.728194908348197e-06, + "loss": 0.6292, + "step": 4316 + }, + { + "epoch": 0.13230967267377713, + "grad_norm": 1.9727360432137708, + "learning_rate": 9.728033473430812e-06, + "loss": 0.7888, + "step": 4317 + }, + { + "epoch": 0.13234032119651834, + "grad_norm": 1.950026666883897, + "learning_rate": 9.7278719919269e-06, + "loss": 0.8407, + "step": 4318 + }, + { + "epoch": 0.13237096971925952, + "grad_norm": 2.129118079048573, + "learning_rate": 9.72771046383806e-06, + "loss": 0.846, + "step": 4319 + }, + { + "epoch": 0.13240161824200072, + "grad_norm": 2.0423971519067803, + "learning_rate": 9.727548889165876e-06, + "loss": 0.763, + "step": 4320 + }, + { + "epoch": 0.13243226676474193, + "grad_norm": 2.1332421076022485, + "learning_rate": 9.727387267911944e-06, + "loss": 0.87, + "step": 4321 + }, + { + "epoch": 0.13246291528748314, + "grad_norm": 17.326002327996076, + "learning_rate": 9.727225600077856e-06, + "loss": 0.6814, + "step": 4322 + }, + { + "epoch": 0.13249356381022434, + "grad_norm": 8.623220715275393, + "learning_rate": 9.727063885665206e-06, + "loss": 0.8702, + "step": 4323 + }, + { + "epoch": 0.13252421233296555, + "grad_norm": 5.525876269600761, + "learning_rate": 9.726902124675585e-06, + "loss": 0.9604, + "step": 4324 + }, + { + "epoch": 0.13255486085570675, + "grad_norm": 3.3991927740390873, + "learning_rate": 9.726740317110588e-06, + "loss": 0.8989, + "step": 4325 + }, + { + "epoch": 0.13258550937844796, + "grad_norm": 2.1885727426647645, + "learning_rate": 9.726578462971808e-06, + "loss": 0.7941, + "step": 4326 + }, + { + "epoch": 0.13261615790118916, + "grad_norm": 2.206438476620381, + "learning_rate": 9.726416562260842e-06, + "loss": 0.7818, + "step": 4327 + }, + { + "epoch": 0.13264680642393037, + "grad_norm": 2.508881042899862, + "learning_rate": 9.726254614979284e-06, + "loss": 0.7887, + "step": 4328 + }, + { + "epoch": 0.13267745494667157, + "grad_norm": 3.5970123489253165, + "learning_rate": 9.726092621128731e-06, + "loss": 0.8932, + "step": 4329 + }, + { + "epoch": 0.13270810346941278, + "grad_norm": 2.9588811396211825, + "learning_rate": 9.725930580710777e-06, + "loss": 0.8084, + "step": 4330 + }, + { + "epoch": 0.13273875199215399, + "grad_norm": 2.1524020664803074, + "learning_rate": 9.725768493727021e-06, + "loss": 0.7441, + "step": 4331 + }, + { + "epoch": 0.1327694005148952, + "grad_norm": 2.062229632221437, + "learning_rate": 9.725606360179058e-06, + "loss": 0.7761, + "step": 4332 + }, + { + "epoch": 0.1328000490376364, + "grad_norm": 1.9899465379966927, + "learning_rate": 9.725444180068487e-06, + "loss": 0.7249, + "step": 4333 + }, + { + "epoch": 0.1328306975603776, + "grad_norm": 1.978389803346329, + "learning_rate": 9.725281953396905e-06, + "loss": 0.7487, + "step": 4334 + }, + { + "epoch": 0.13286134608311878, + "grad_norm": 2.0960007446697584, + "learning_rate": 9.725119680165911e-06, + "loss": 0.7454, + "step": 4335 + }, + { + "epoch": 0.13289199460586, + "grad_norm": 2.4978622373399135, + "learning_rate": 9.724957360377103e-06, + "loss": 0.7571, + "step": 4336 + }, + { + "epoch": 0.1329226431286012, + "grad_norm": 1.9143040020971256, + "learning_rate": 9.724794994032082e-06, + "loss": 0.8035, + "step": 4337 + }, + { + "epoch": 0.1329532916513424, + "grad_norm": 1.9080543848389633, + "learning_rate": 9.724632581132447e-06, + "loss": 0.6231, + "step": 4338 + }, + { + "epoch": 0.1329839401740836, + "grad_norm": 2.052760319299078, + "learning_rate": 9.7244701216798e-06, + "loss": 0.8396, + "step": 4339 + }, + { + "epoch": 0.1330145886968248, + "grad_norm": 1.8563658701638526, + "learning_rate": 9.724307615675737e-06, + "loss": 0.6625, + "step": 4340 + }, + { + "epoch": 0.13304523721956601, + "grad_norm": 1.9159596286562957, + "learning_rate": 9.724145063121863e-06, + "loss": 0.7451, + "step": 4341 + }, + { + "epoch": 0.13307588574230722, + "grad_norm": 2.0423771680864093, + "learning_rate": 9.723982464019781e-06, + "loss": 0.7565, + "step": 4342 + }, + { + "epoch": 0.13310653426504843, + "grad_norm": 2.6672550152633216, + "learning_rate": 9.723819818371089e-06, + "loss": 0.8003, + "step": 4343 + }, + { + "epoch": 0.13313718278778963, + "grad_norm": 1.9487985424611356, + "learning_rate": 9.723657126177393e-06, + "loss": 0.79, + "step": 4344 + }, + { + "epoch": 0.13316783131053084, + "grad_norm": 2.1233275555359823, + "learning_rate": 9.723494387440295e-06, + "loss": 0.7344, + "step": 4345 + }, + { + "epoch": 0.13319847983327204, + "grad_norm": 2.098607051651493, + "learning_rate": 9.723331602161396e-06, + "loss": 0.6986, + "step": 4346 + }, + { + "epoch": 0.13322912835601325, + "grad_norm": 2.04750214121753, + "learning_rate": 9.723168770342304e-06, + "loss": 0.8887, + "step": 4347 + }, + { + "epoch": 0.13325977687875445, + "grad_norm": 2.6349447939448942, + "learning_rate": 9.723005891984622e-06, + "loss": 0.7341, + "step": 4348 + }, + { + "epoch": 0.13329042540149566, + "grad_norm": 1.9593506609949785, + "learning_rate": 9.722842967089953e-06, + "loss": 0.8015, + "step": 4349 + }, + { + "epoch": 0.13332107392423684, + "grad_norm": 1.854441918851124, + "learning_rate": 9.722679995659904e-06, + "loss": 0.7919, + "step": 4350 + }, + { + "epoch": 0.13335172244697804, + "grad_norm": 2.2694964481326374, + "learning_rate": 9.722516977696083e-06, + "loss": 0.7761, + "step": 4351 + }, + { + "epoch": 0.13338237096971925, + "grad_norm": 1.8279949352032696, + "learning_rate": 9.722353913200091e-06, + "loss": 0.8064, + "step": 4352 + }, + { + "epoch": 0.13341301949246046, + "grad_norm": 2.042466809674826, + "learning_rate": 9.72219080217354e-06, + "loss": 0.8401, + "step": 4353 + }, + { + "epoch": 0.13344366801520166, + "grad_norm": 1.7677596593035292, + "learning_rate": 9.722027644618033e-06, + "loss": 0.7647, + "step": 4354 + }, + { + "epoch": 0.13347431653794287, + "grad_norm": 7.9991340030947855, + "learning_rate": 9.72186444053518e-06, + "loss": 0.7365, + "step": 4355 + }, + { + "epoch": 0.13350496506068407, + "grad_norm": 2.102599926923018, + "learning_rate": 9.72170118992659e-06, + "loss": 0.8412, + "step": 4356 + }, + { + "epoch": 0.13353561358342528, + "grad_norm": 2.1366965479609266, + "learning_rate": 9.721537892793868e-06, + "loss": 0.798, + "step": 4357 + }, + { + "epoch": 0.13356626210616648, + "grad_norm": 2.1592725198792277, + "learning_rate": 9.721374549138626e-06, + "loss": 0.7554, + "step": 4358 + }, + { + "epoch": 0.1335969106289077, + "grad_norm": 1.5765636231098645, + "learning_rate": 9.721211158962471e-06, + "loss": 0.7303, + "step": 4359 + }, + { + "epoch": 0.1336275591516489, + "grad_norm": 1.8771518511899583, + "learning_rate": 9.721047722267016e-06, + "loss": 0.7454, + "step": 4360 + }, + { + "epoch": 0.1336582076743901, + "grad_norm": 2.4999468150951585, + "learning_rate": 9.72088423905387e-06, + "loss": 0.8387, + "step": 4361 + }, + { + "epoch": 0.1336888561971313, + "grad_norm": 3.316215734004627, + "learning_rate": 9.720720709324644e-06, + "loss": 0.7498, + "step": 4362 + }, + { + "epoch": 0.1337195047198725, + "grad_norm": 2.128388601668882, + "learning_rate": 9.720557133080948e-06, + "loss": 0.7851, + "step": 4363 + }, + { + "epoch": 0.13375015324261372, + "grad_norm": 1.8738443801440898, + "learning_rate": 9.720393510324395e-06, + "loss": 0.7611, + "step": 4364 + }, + { + "epoch": 0.13378080176535492, + "grad_norm": 2.008605194746792, + "learning_rate": 9.720229841056598e-06, + "loss": 0.8511, + "step": 4365 + }, + { + "epoch": 0.1338114502880961, + "grad_norm": 1.846418714383518, + "learning_rate": 9.720066125279167e-06, + "loss": 0.8076, + "step": 4366 + }, + { + "epoch": 0.1338420988108373, + "grad_norm": 1.730463774377677, + "learning_rate": 9.719902362993719e-06, + "loss": 0.7125, + "step": 4367 + }, + { + "epoch": 0.1338727473335785, + "grad_norm": 38.391207760026965, + "learning_rate": 9.719738554201863e-06, + "loss": 0.702, + "step": 4368 + }, + { + "epoch": 0.13390339585631972, + "grad_norm": 2.213862948345738, + "learning_rate": 9.719574698905216e-06, + "loss": 0.8565, + "step": 4369 + }, + { + "epoch": 0.13393404437906092, + "grad_norm": 2.4186159877570144, + "learning_rate": 9.719410797105393e-06, + "loss": 0.6147, + "step": 4370 + }, + { + "epoch": 0.13396469290180213, + "grad_norm": 2.295374552642242, + "learning_rate": 9.719246848804008e-06, + "loss": 0.8533, + "step": 4371 + }, + { + "epoch": 0.13399534142454333, + "grad_norm": 1.8965441155093075, + "learning_rate": 9.719082854002675e-06, + "loss": 0.6982, + "step": 4372 + }, + { + "epoch": 0.13402598994728454, + "grad_norm": 4.2472972669098, + "learning_rate": 9.71891881270301e-06, + "loss": 0.6394, + "step": 4373 + }, + { + "epoch": 0.13405663847002575, + "grad_norm": 1.9571560518846498, + "learning_rate": 9.718754724906634e-06, + "loss": 0.8107, + "step": 4374 + }, + { + "epoch": 0.13408728699276695, + "grad_norm": 1.4163836065919577, + "learning_rate": 9.718590590615157e-06, + "loss": 0.5422, + "step": 4375 + }, + { + "epoch": 0.13411793551550816, + "grad_norm": 1.9815967177021734, + "learning_rate": 9.718426409830201e-06, + "loss": 0.8303, + "step": 4376 + }, + { + "epoch": 0.13414858403824936, + "grad_norm": 1.6773222734071884, + "learning_rate": 9.718262182553384e-06, + "loss": 0.7466, + "step": 4377 + }, + { + "epoch": 0.13417923256099057, + "grad_norm": 3.1085597586788976, + "learning_rate": 9.71809790878632e-06, + "loss": 0.5702, + "step": 4378 + }, + { + "epoch": 0.13420988108373177, + "grad_norm": 1.717942585005802, + "learning_rate": 9.717933588530632e-06, + "loss": 0.7137, + "step": 4379 + }, + { + "epoch": 0.13424052960647298, + "grad_norm": 1.7235014720470594, + "learning_rate": 9.717769221787936e-06, + "loss": 0.7103, + "step": 4380 + }, + { + "epoch": 0.13427117812921416, + "grad_norm": 1.9973212535784604, + "learning_rate": 9.717604808559854e-06, + "loss": 0.7069, + "step": 4381 + }, + { + "epoch": 0.13430182665195536, + "grad_norm": 1.8972697925305166, + "learning_rate": 9.717440348848004e-06, + "loss": 0.8695, + "step": 4382 + }, + { + "epoch": 0.13433247517469657, + "grad_norm": 1.9338506957104857, + "learning_rate": 9.717275842654006e-06, + "loss": 0.698, + "step": 4383 + }, + { + "epoch": 0.13436312369743778, + "grad_norm": 1.869138775272314, + "learning_rate": 9.717111289979484e-06, + "loss": 0.6708, + "step": 4384 + }, + { + "epoch": 0.13439377222017898, + "grad_norm": 2.2415410680427073, + "learning_rate": 9.716946690826056e-06, + "loss": 0.5562, + "step": 4385 + }, + { + "epoch": 0.1344244207429202, + "grad_norm": 1.7389616133697272, + "learning_rate": 9.716782045195348e-06, + "loss": 0.7996, + "step": 4386 + }, + { + "epoch": 0.1344550692656614, + "grad_norm": 2.016624008171407, + "learning_rate": 9.716617353088977e-06, + "loss": 0.9199, + "step": 4387 + }, + { + "epoch": 0.1344857177884026, + "grad_norm": 1.636515909576831, + "learning_rate": 9.716452614508569e-06, + "loss": 0.8546, + "step": 4388 + }, + { + "epoch": 0.1345163663111438, + "grad_norm": 1.9441487910438018, + "learning_rate": 9.716287829455748e-06, + "loss": 0.7557, + "step": 4389 + }, + { + "epoch": 0.134547014833885, + "grad_norm": 1.0302647681145105, + "learning_rate": 9.716122997932135e-06, + "loss": 0.5394, + "step": 4390 + }, + { + "epoch": 0.13457766335662621, + "grad_norm": 1.0832543501514242, + "learning_rate": 9.715958119939355e-06, + "loss": 0.5318, + "step": 4391 + }, + { + "epoch": 0.13460831187936742, + "grad_norm": 1.8537132937685292, + "learning_rate": 9.715793195479035e-06, + "loss": 0.759, + "step": 4392 + }, + { + "epoch": 0.13463896040210863, + "grad_norm": 3.1052153758112233, + "learning_rate": 9.715628224552795e-06, + "loss": 0.8251, + "step": 4393 + }, + { + "epoch": 0.13466960892484983, + "grad_norm": 2.0940315620435515, + "learning_rate": 9.715463207162267e-06, + "loss": 0.7708, + "step": 4394 + }, + { + "epoch": 0.13470025744759104, + "grad_norm": 1.9212176429850405, + "learning_rate": 9.71529814330907e-06, + "loss": 0.7468, + "step": 4395 + }, + { + "epoch": 0.13473090597033224, + "grad_norm": 1.8008155297729327, + "learning_rate": 9.715133032994837e-06, + "loss": 0.7273, + "step": 4396 + }, + { + "epoch": 0.13476155449307342, + "grad_norm": 1.0028262170129458, + "learning_rate": 9.71496787622119e-06, + "loss": 0.5083, + "step": 4397 + }, + { + "epoch": 0.13479220301581463, + "grad_norm": 1.883556224282165, + "learning_rate": 9.71480267298976e-06, + "loss": 0.7346, + "step": 4398 + }, + { + "epoch": 0.13482285153855583, + "grad_norm": 2.4307306186635036, + "learning_rate": 9.71463742330217e-06, + "loss": 0.7519, + "step": 4399 + }, + { + "epoch": 0.13485350006129704, + "grad_norm": 0.9916701414372555, + "learning_rate": 9.714472127160054e-06, + "loss": 0.5542, + "step": 4400 + }, + { + "epoch": 0.13488414858403824, + "grad_norm": 1.882247855940592, + "learning_rate": 9.714306784565037e-06, + "loss": 0.7052, + "step": 4401 + }, + { + "epoch": 0.13491479710677945, + "grad_norm": 1.9249132065611598, + "learning_rate": 9.714141395518748e-06, + "loss": 0.7724, + "step": 4402 + }, + { + "epoch": 0.13494544562952066, + "grad_norm": 1.7908998783296453, + "learning_rate": 9.71397596002282e-06, + "loss": 0.7175, + "step": 4403 + }, + { + "epoch": 0.13497609415226186, + "grad_norm": 2.0275400944388378, + "learning_rate": 9.713810478078878e-06, + "loss": 0.8158, + "step": 4404 + }, + { + "epoch": 0.13500674267500307, + "grad_norm": 2.0666559879920037, + "learning_rate": 9.713644949688556e-06, + "loss": 0.7738, + "step": 4405 + }, + { + "epoch": 0.13503739119774427, + "grad_norm": 1.7664049377098454, + "learning_rate": 9.713479374853486e-06, + "loss": 0.6862, + "step": 4406 + }, + { + "epoch": 0.13506803972048548, + "grad_norm": 1.65144824990935, + "learning_rate": 9.713313753575296e-06, + "loss": 0.7071, + "step": 4407 + }, + { + "epoch": 0.13509868824322668, + "grad_norm": 1.9376786012481435, + "learning_rate": 9.713148085855619e-06, + "loss": 0.7202, + "step": 4408 + }, + { + "epoch": 0.1351293367659679, + "grad_norm": 1.226724094473204, + "learning_rate": 9.71298237169609e-06, + "loss": 0.533, + "step": 4409 + }, + { + "epoch": 0.1351599852887091, + "grad_norm": 2.049356105380898, + "learning_rate": 9.712816611098339e-06, + "loss": 0.6994, + "step": 4410 + }, + { + "epoch": 0.1351906338114503, + "grad_norm": 1.3617576468909505, + "learning_rate": 9.712650804064e-06, + "loss": 0.5154, + "step": 4411 + }, + { + "epoch": 0.13522128233419148, + "grad_norm": 0.8468617563370583, + "learning_rate": 9.712484950594707e-06, + "loss": 0.5334, + "step": 4412 + }, + { + "epoch": 0.13525193085693268, + "grad_norm": 2.0536954174826465, + "learning_rate": 9.712319050692093e-06, + "loss": 0.8271, + "step": 4413 + }, + { + "epoch": 0.1352825793796739, + "grad_norm": 1.6297918040722232, + "learning_rate": 9.712153104357796e-06, + "loss": 0.6535, + "step": 4414 + }, + { + "epoch": 0.1353132279024151, + "grad_norm": 1.8907288003001324, + "learning_rate": 9.711987111593446e-06, + "loss": 0.8041, + "step": 4415 + }, + { + "epoch": 0.1353438764251563, + "grad_norm": 1.7958801241217897, + "learning_rate": 9.711821072400683e-06, + "loss": 0.8301, + "step": 4416 + }, + { + "epoch": 0.1353745249478975, + "grad_norm": 1.6180376562264416, + "learning_rate": 9.711654986781142e-06, + "loss": 0.6871, + "step": 4417 + }, + { + "epoch": 0.1354051734706387, + "grad_norm": 1.9227090486964815, + "learning_rate": 9.711488854736457e-06, + "loss": 0.7175, + "step": 4418 + }, + { + "epoch": 0.13543582199337992, + "grad_norm": 1.9267597068641478, + "learning_rate": 9.711322676268269e-06, + "loss": 0.7932, + "step": 4419 + }, + { + "epoch": 0.13546647051612112, + "grad_norm": 1.6978639762769645, + "learning_rate": 9.711156451378212e-06, + "loss": 0.7654, + "step": 4420 + }, + { + "epoch": 0.13549711903886233, + "grad_norm": 1.886882605504737, + "learning_rate": 9.710990180067926e-06, + "loss": 0.7503, + "step": 4421 + }, + { + "epoch": 0.13552776756160353, + "grad_norm": 1.812487448574536, + "learning_rate": 9.710823862339048e-06, + "loss": 0.7693, + "step": 4422 + }, + { + "epoch": 0.13555841608434474, + "grad_norm": 1.8275466412607622, + "learning_rate": 9.710657498193215e-06, + "loss": 0.7913, + "step": 4423 + }, + { + "epoch": 0.13558906460708595, + "grad_norm": 1.6819394089044097, + "learning_rate": 9.71049108763207e-06, + "loss": 0.7386, + "step": 4424 + }, + { + "epoch": 0.13561971312982715, + "grad_norm": 1.8991673739057116, + "learning_rate": 9.710324630657252e-06, + "loss": 0.7035, + "step": 4425 + }, + { + "epoch": 0.13565036165256836, + "grad_norm": 1.8855050012801107, + "learning_rate": 9.7101581272704e-06, + "loss": 0.7646, + "step": 4426 + }, + { + "epoch": 0.13568101017530956, + "grad_norm": 1.836772671569591, + "learning_rate": 9.709991577473154e-06, + "loss": 0.8006, + "step": 4427 + }, + { + "epoch": 0.13571165869805074, + "grad_norm": 1.9920895022295142, + "learning_rate": 9.709824981267155e-06, + "loss": 0.8233, + "step": 4428 + }, + { + "epoch": 0.13574230722079195, + "grad_norm": 1.7967135370472707, + "learning_rate": 9.709658338654046e-06, + "loss": 0.8005, + "step": 4429 + }, + { + "epoch": 0.13577295574353315, + "grad_norm": 1.7763982432254377, + "learning_rate": 9.70949164963547e-06, + "loss": 0.7329, + "step": 4430 + }, + { + "epoch": 0.13580360426627436, + "grad_norm": 1.9184599994789755, + "learning_rate": 9.709324914213068e-06, + "loss": 0.7659, + "step": 4431 + }, + { + "epoch": 0.13583425278901556, + "grad_norm": 2.34302703759835, + "learning_rate": 9.70915813238848e-06, + "loss": 0.8135, + "step": 4432 + }, + { + "epoch": 0.13586490131175677, + "grad_norm": 1.6458663743879542, + "learning_rate": 9.708991304163353e-06, + "loss": 0.7733, + "step": 4433 + }, + { + "epoch": 0.13589554983449798, + "grad_norm": 2.4920143056817037, + "learning_rate": 9.708824429539332e-06, + "loss": 0.6763, + "step": 4434 + }, + { + "epoch": 0.13592619835723918, + "grad_norm": 1.8298778583352155, + "learning_rate": 9.708657508518056e-06, + "loss": 0.7233, + "step": 4435 + }, + { + "epoch": 0.1359568468799804, + "grad_norm": 1.8892386626749809, + "learning_rate": 9.708490541101174e-06, + "loss": 0.8618, + "step": 4436 + }, + { + "epoch": 0.1359874954027216, + "grad_norm": 2.0917210294430992, + "learning_rate": 9.70832352729033e-06, + "loss": 0.7361, + "step": 4437 + }, + { + "epoch": 0.1360181439254628, + "grad_norm": 1.864258844868636, + "learning_rate": 9.70815646708717e-06, + "loss": 0.7729, + "step": 4438 + }, + { + "epoch": 0.136048792448204, + "grad_norm": 2.0505198753786424, + "learning_rate": 9.707989360493339e-06, + "loss": 0.7233, + "step": 4439 + }, + { + "epoch": 0.1360794409709452, + "grad_norm": 1.9495732911190298, + "learning_rate": 9.707822207510486e-06, + "loss": 0.5352, + "step": 4440 + }, + { + "epoch": 0.13611008949368641, + "grad_norm": 1.7811866704476615, + "learning_rate": 9.707655008140255e-06, + "loss": 0.7161, + "step": 4441 + }, + { + "epoch": 0.13614073801642762, + "grad_norm": 1.0469547161656636, + "learning_rate": 9.707487762384294e-06, + "loss": 0.5287, + "step": 4442 + }, + { + "epoch": 0.1361713865391688, + "grad_norm": 1.7209573535644478, + "learning_rate": 9.707320470244253e-06, + "loss": 0.6872, + "step": 4443 + }, + { + "epoch": 0.13620203506191, + "grad_norm": 1.1243535677955965, + "learning_rate": 9.707153131721777e-06, + "loss": 0.537, + "step": 4444 + }, + { + "epoch": 0.1362326835846512, + "grad_norm": 2.076487425194706, + "learning_rate": 9.706985746818519e-06, + "loss": 0.8213, + "step": 4445 + }, + { + "epoch": 0.13626333210739242, + "grad_norm": 1.9470841539368955, + "learning_rate": 9.706818315536127e-06, + "loss": 0.6997, + "step": 4446 + }, + { + "epoch": 0.13629398063013362, + "grad_norm": 1.3342028948292817, + "learning_rate": 9.706650837876246e-06, + "loss": 0.55, + "step": 4447 + }, + { + "epoch": 0.13632462915287483, + "grad_norm": 2.015957909063176, + "learning_rate": 9.706483313840533e-06, + "loss": 0.7823, + "step": 4448 + }, + { + "epoch": 0.13635527767561603, + "grad_norm": 1.55088933420369, + "learning_rate": 9.706315743430635e-06, + "loss": 0.7524, + "step": 4449 + }, + { + "epoch": 0.13638592619835724, + "grad_norm": 1.5990697082681746, + "learning_rate": 9.706148126648203e-06, + "loss": 0.6542, + "step": 4450 + }, + { + "epoch": 0.13641657472109844, + "grad_norm": 0.9360702819457677, + "learning_rate": 9.70598046349489e-06, + "loss": 0.5213, + "step": 4451 + }, + { + "epoch": 0.13644722324383965, + "grad_norm": 1.9080195639869317, + "learning_rate": 9.705812753972348e-06, + "loss": 0.6833, + "step": 4452 + }, + { + "epoch": 0.13647787176658085, + "grad_norm": 2.0576387934702725, + "learning_rate": 9.705644998082228e-06, + "loss": 0.8436, + "step": 4453 + }, + { + "epoch": 0.13650852028932206, + "grad_norm": 1.769287613096271, + "learning_rate": 9.705477195826183e-06, + "loss": 0.7394, + "step": 4454 + }, + { + "epoch": 0.13653916881206327, + "grad_norm": 2.196560242094752, + "learning_rate": 9.705309347205869e-06, + "loss": 0.7958, + "step": 4455 + }, + { + "epoch": 0.13656981733480447, + "grad_norm": 2.0151997209959993, + "learning_rate": 9.705141452222937e-06, + "loss": 0.7015, + "step": 4456 + }, + { + "epoch": 0.13660046585754568, + "grad_norm": 2.05236260013077, + "learning_rate": 9.704973510879044e-06, + "loss": 0.8075, + "step": 4457 + }, + { + "epoch": 0.13663111438028688, + "grad_norm": 1.1135420243325846, + "learning_rate": 9.704805523175842e-06, + "loss": 0.5111, + "step": 4458 + }, + { + "epoch": 0.13666176290302806, + "grad_norm": 2.1630578647772403, + "learning_rate": 9.704637489114987e-06, + "loss": 0.6595, + "step": 4459 + }, + { + "epoch": 0.13669241142576927, + "grad_norm": 1.7742686258462343, + "learning_rate": 9.704469408698136e-06, + "loss": 0.8239, + "step": 4460 + }, + { + "epoch": 0.13672305994851047, + "grad_norm": 1.8508426665170508, + "learning_rate": 9.704301281926943e-06, + "loss": 0.6354, + "step": 4461 + }, + { + "epoch": 0.13675370847125168, + "grad_norm": 2.0854682127638573, + "learning_rate": 9.704133108803067e-06, + "loss": 0.7962, + "step": 4462 + }, + { + "epoch": 0.13678435699399288, + "grad_norm": 1.708533473587686, + "learning_rate": 9.703964889328164e-06, + "loss": 0.6628, + "step": 4463 + }, + { + "epoch": 0.1368150055167341, + "grad_norm": 1.863380040935953, + "learning_rate": 9.703796623503891e-06, + "loss": 0.7618, + "step": 4464 + }, + { + "epoch": 0.1368456540394753, + "grad_norm": 1.6551041321991629, + "learning_rate": 9.703628311331904e-06, + "loss": 0.7799, + "step": 4465 + }, + { + "epoch": 0.1368763025622165, + "grad_norm": 0.8974367186385043, + "learning_rate": 9.703459952813868e-06, + "loss": 0.5106, + "step": 4466 + }, + { + "epoch": 0.1369069510849577, + "grad_norm": 1.7097860207583526, + "learning_rate": 9.703291547951434e-06, + "loss": 0.6393, + "step": 4467 + }, + { + "epoch": 0.1369375996076989, + "grad_norm": 1.9637865872449407, + "learning_rate": 9.703123096746267e-06, + "loss": 0.7912, + "step": 4468 + }, + { + "epoch": 0.13696824813044012, + "grad_norm": 1.8076812739796644, + "learning_rate": 9.702954599200025e-06, + "loss": 0.7585, + "step": 4469 + }, + { + "epoch": 0.13699889665318132, + "grad_norm": 1.7485349193010524, + "learning_rate": 9.702786055314368e-06, + "loss": 0.6587, + "step": 4470 + }, + { + "epoch": 0.13702954517592253, + "grad_norm": 1.7575719637422815, + "learning_rate": 9.702617465090955e-06, + "loss": 0.703, + "step": 4471 + }, + { + "epoch": 0.13706019369866373, + "grad_norm": 1.883728848897849, + "learning_rate": 9.70244882853145e-06, + "loss": 0.7637, + "step": 4472 + }, + { + "epoch": 0.13709084222140494, + "grad_norm": 1.9845998719276963, + "learning_rate": 9.702280145637516e-06, + "loss": 0.7493, + "step": 4473 + }, + { + "epoch": 0.13712149074414612, + "grad_norm": 2.1570584975086895, + "learning_rate": 9.702111416410809e-06, + "loss": 0.7715, + "step": 4474 + }, + { + "epoch": 0.13715213926688732, + "grad_norm": 2.1479344153467657, + "learning_rate": 9.701942640852996e-06, + "loss": 0.8455, + "step": 4475 + }, + { + "epoch": 0.13718278778962853, + "grad_norm": 1.88836487612854, + "learning_rate": 9.70177381896574e-06, + "loss": 0.7295, + "step": 4476 + }, + { + "epoch": 0.13721343631236974, + "grad_norm": 2.2359443580211384, + "learning_rate": 9.701604950750703e-06, + "loss": 0.8044, + "step": 4477 + }, + { + "epoch": 0.13724408483511094, + "grad_norm": 1.7888826644856013, + "learning_rate": 9.701436036209549e-06, + "loss": 0.7498, + "step": 4478 + }, + { + "epoch": 0.13727473335785215, + "grad_norm": 2.014097142276588, + "learning_rate": 9.701267075343943e-06, + "loss": 0.7772, + "step": 4479 + }, + { + "epoch": 0.13730538188059335, + "grad_norm": 1.7857455678144656, + "learning_rate": 9.70109806815555e-06, + "loss": 0.8191, + "step": 4480 + }, + { + "epoch": 0.13733603040333456, + "grad_norm": 1.7860840972262582, + "learning_rate": 9.700929014646035e-06, + "loss": 0.6699, + "step": 4481 + }, + { + "epoch": 0.13736667892607576, + "grad_norm": 1.8719270586313759, + "learning_rate": 9.700759914817064e-06, + "loss": 0.6938, + "step": 4482 + }, + { + "epoch": 0.13739732744881697, + "grad_norm": 0.9887606759079871, + "learning_rate": 9.700590768670302e-06, + "loss": 0.5144, + "step": 4483 + }, + { + "epoch": 0.13742797597155818, + "grad_norm": 1.6777755675766262, + "learning_rate": 9.700421576207417e-06, + "loss": 0.6409, + "step": 4484 + }, + { + "epoch": 0.13745862449429938, + "grad_norm": 2.02210521602702, + "learning_rate": 9.700252337430075e-06, + "loss": 0.8019, + "step": 4485 + }, + { + "epoch": 0.1374892730170406, + "grad_norm": 0.8747731465428763, + "learning_rate": 9.700083052339944e-06, + "loss": 0.5057, + "step": 4486 + }, + { + "epoch": 0.1375199215397818, + "grad_norm": 1.739879634897783, + "learning_rate": 9.699913720938694e-06, + "loss": 0.7189, + "step": 4487 + }, + { + "epoch": 0.137550570062523, + "grad_norm": 1.7831281512000337, + "learning_rate": 9.69974434322799e-06, + "loss": 0.6669, + "step": 4488 + }, + { + "epoch": 0.1375812185852642, + "grad_norm": 1.7631218358684435, + "learning_rate": 9.699574919209502e-06, + "loss": 0.7179, + "step": 4489 + }, + { + "epoch": 0.13761186710800538, + "grad_norm": 1.895531455715369, + "learning_rate": 9.6994054488849e-06, + "loss": 0.815, + "step": 4490 + }, + { + "epoch": 0.1376425156307466, + "grad_norm": 1.6469451508772281, + "learning_rate": 9.699235932255855e-06, + "loss": 0.7248, + "step": 4491 + }, + { + "epoch": 0.1376731641534878, + "grad_norm": 2.0982929762787403, + "learning_rate": 9.699066369324034e-06, + "loss": 0.6378, + "step": 4492 + }, + { + "epoch": 0.137703812676229, + "grad_norm": 1.9576665723263762, + "learning_rate": 9.698896760091112e-06, + "loss": 0.7733, + "step": 4493 + }, + { + "epoch": 0.1377344611989702, + "grad_norm": 1.902721751844345, + "learning_rate": 9.698727104558756e-06, + "loss": 0.7971, + "step": 4494 + }, + { + "epoch": 0.1377651097217114, + "grad_norm": 1.6866614786587157, + "learning_rate": 9.698557402728642e-06, + "loss": 0.7979, + "step": 4495 + }, + { + "epoch": 0.13779575824445262, + "grad_norm": 1.952069749336564, + "learning_rate": 9.698387654602437e-06, + "loss": 0.7751, + "step": 4496 + }, + { + "epoch": 0.13782640676719382, + "grad_norm": 1.014717844528978, + "learning_rate": 9.698217860181817e-06, + "loss": 0.542, + "step": 4497 + }, + { + "epoch": 0.13785705528993503, + "grad_norm": 1.8753932923150154, + "learning_rate": 9.698048019468455e-06, + "loss": 0.824, + "step": 4498 + }, + { + "epoch": 0.13788770381267623, + "grad_norm": 1.8310764853545036, + "learning_rate": 9.697878132464024e-06, + "loss": 0.7328, + "step": 4499 + }, + { + "epoch": 0.13791835233541744, + "grad_norm": 1.7884663500894944, + "learning_rate": 9.697708199170198e-06, + "loss": 0.7872, + "step": 4500 + }, + { + "epoch": 0.13794900085815864, + "grad_norm": 1.63609618379203, + "learning_rate": 9.697538219588652e-06, + "loss": 0.8256, + "step": 4501 + }, + { + "epoch": 0.13797964938089985, + "grad_norm": 1.7623001161209668, + "learning_rate": 9.697368193721057e-06, + "loss": 0.7309, + "step": 4502 + }, + { + "epoch": 0.13801029790364105, + "grad_norm": 0.7993896292923013, + "learning_rate": 9.697198121569093e-06, + "loss": 0.5019, + "step": 4503 + }, + { + "epoch": 0.13804094642638226, + "grad_norm": 2.0789954568192353, + "learning_rate": 9.697028003134434e-06, + "loss": 0.8266, + "step": 4504 + }, + { + "epoch": 0.13807159494912344, + "grad_norm": 2.090774757174744, + "learning_rate": 9.696857838418755e-06, + "loss": 0.6996, + "step": 4505 + }, + { + "epoch": 0.13810224347186464, + "grad_norm": 0.8856417728220688, + "learning_rate": 9.696687627423738e-06, + "loss": 0.544, + "step": 4506 + }, + { + "epoch": 0.13813289199460585, + "grad_norm": 1.92700621131547, + "learning_rate": 9.696517370151053e-06, + "loss": 0.7549, + "step": 4507 + }, + { + "epoch": 0.13816354051734706, + "grad_norm": 1.9809006475231181, + "learning_rate": 9.696347066602381e-06, + "loss": 0.7996, + "step": 4508 + }, + { + "epoch": 0.13819418904008826, + "grad_norm": 1.7375697965065409, + "learning_rate": 9.6961767167794e-06, + "loss": 0.7096, + "step": 4509 + }, + { + "epoch": 0.13822483756282947, + "grad_norm": 1.957057501903984, + "learning_rate": 9.696006320683787e-06, + "loss": 0.8123, + "step": 4510 + }, + { + "epoch": 0.13825548608557067, + "grad_norm": 1.8581756609857913, + "learning_rate": 9.695835878317223e-06, + "loss": 0.7816, + "step": 4511 + }, + { + "epoch": 0.13828613460831188, + "grad_norm": 1.8211935904379264, + "learning_rate": 9.695665389681389e-06, + "loss": 0.7244, + "step": 4512 + }, + { + "epoch": 0.13831678313105308, + "grad_norm": 1.8728854037777547, + "learning_rate": 9.69549485477796e-06, + "loss": 0.6612, + "step": 4513 + }, + { + "epoch": 0.1383474316537943, + "grad_norm": 1.8562296297052183, + "learning_rate": 9.695324273608619e-06, + "loss": 0.7186, + "step": 4514 + }, + { + "epoch": 0.1383780801765355, + "grad_norm": 1.8860974282612177, + "learning_rate": 9.695153646175047e-06, + "loss": 0.7766, + "step": 4515 + }, + { + "epoch": 0.1384087286992767, + "grad_norm": 1.0197702617360107, + "learning_rate": 9.694982972478923e-06, + "loss": 0.5403, + "step": 4516 + }, + { + "epoch": 0.1384393772220179, + "grad_norm": 3.193616274260521, + "learning_rate": 9.694812252521933e-06, + "loss": 0.7394, + "step": 4517 + }, + { + "epoch": 0.1384700257447591, + "grad_norm": 1.9315484985759312, + "learning_rate": 9.694641486305756e-06, + "loss": 0.7819, + "step": 4518 + }, + { + "epoch": 0.13850067426750032, + "grad_norm": 1.8383009441786728, + "learning_rate": 9.694470673832075e-06, + "loss": 0.8371, + "step": 4519 + }, + { + "epoch": 0.13853132279024152, + "grad_norm": 2.1580423236793935, + "learning_rate": 9.694299815102572e-06, + "loss": 0.6926, + "step": 4520 + }, + { + "epoch": 0.1385619713129827, + "grad_norm": 1.916505113883462, + "learning_rate": 9.694128910118934e-06, + "loss": 0.741, + "step": 4521 + }, + { + "epoch": 0.1385926198357239, + "grad_norm": 1.8622488549410645, + "learning_rate": 9.693957958882843e-06, + "loss": 0.7374, + "step": 4522 + }, + { + "epoch": 0.1386232683584651, + "grad_norm": 0.9119258352955781, + "learning_rate": 9.693786961395982e-06, + "loss": 0.5197, + "step": 4523 + }, + { + "epoch": 0.13865391688120632, + "grad_norm": 1.9297042421131025, + "learning_rate": 9.693615917660036e-06, + "loss": 0.7231, + "step": 4524 + }, + { + "epoch": 0.13868456540394752, + "grad_norm": 1.724739070261407, + "learning_rate": 9.693444827676694e-06, + "loss": 0.7189, + "step": 4525 + }, + { + "epoch": 0.13871521392668873, + "grad_norm": 1.975594559376815, + "learning_rate": 9.693273691447637e-06, + "loss": 0.7913, + "step": 4526 + }, + { + "epoch": 0.13874586244942994, + "grad_norm": 0.843970452006816, + "learning_rate": 9.693102508974555e-06, + "loss": 0.4835, + "step": 4527 + }, + { + "epoch": 0.13877651097217114, + "grad_norm": 0.8514219831747067, + "learning_rate": 9.692931280259133e-06, + "loss": 0.5143, + "step": 4528 + }, + { + "epoch": 0.13880715949491235, + "grad_norm": 1.8177949527539492, + "learning_rate": 9.692760005303057e-06, + "loss": 0.7172, + "step": 4529 + }, + { + "epoch": 0.13883780801765355, + "grad_norm": 2.573028378590129, + "learning_rate": 9.692588684108018e-06, + "loss": 0.8848, + "step": 4530 + }, + { + "epoch": 0.13886845654039476, + "grad_norm": 0.8427184132862107, + "learning_rate": 9.6924173166757e-06, + "loss": 0.4951, + "step": 4531 + }, + { + "epoch": 0.13889910506313596, + "grad_norm": 0.8624239654654199, + "learning_rate": 9.692245903007795e-06, + "loss": 0.5316, + "step": 4532 + }, + { + "epoch": 0.13892975358587717, + "grad_norm": 1.9006329792651984, + "learning_rate": 9.69207444310599e-06, + "loss": 0.7008, + "step": 4533 + }, + { + "epoch": 0.13896040210861837, + "grad_norm": 2.040045036608528, + "learning_rate": 9.691902936971975e-06, + "loss": 0.7802, + "step": 4534 + }, + { + "epoch": 0.13899105063135958, + "grad_norm": 1.8239596991445857, + "learning_rate": 9.691731384607441e-06, + "loss": 0.6783, + "step": 4535 + }, + { + "epoch": 0.13902169915410076, + "grad_norm": 1.79047823423201, + "learning_rate": 9.691559786014076e-06, + "loss": 0.7492, + "step": 4536 + }, + { + "epoch": 0.13905234767684196, + "grad_norm": 2.091987767748127, + "learning_rate": 9.691388141193571e-06, + "loss": 0.772, + "step": 4537 + }, + { + "epoch": 0.13908299619958317, + "grad_norm": 1.8693925860882536, + "learning_rate": 9.691216450147622e-06, + "loss": 0.767, + "step": 4538 + }, + { + "epoch": 0.13911364472232438, + "grad_norm": 1.037968945332575, + "learning_rate": 9.691044712877914e-06, + "loss": 0.5198, + "step": 4539 + }, + { + "epoch": 0.13914429324506558, + "grad_norm": 1.9020902454548585, + "learning_rate": 9.690872929386143e-06, + "loss": 0.7877, + "step": 4540 + }, + { + "epoch": 0.1391749417678068, + "grad_norm": 1.5689890321282969, + "learning_rate": 9.690701099674e-06, + "loss": 0.7759, + "step": 4541 + }, + { + "epoch": 0.139205590290548, + "grad_norm": 1.8506223884869804, + "learning_rate": 9.69052922374318e-06, + "loss": 0.6683, + "step": 4542 + }, + { + "epoch": 0.1392362388132892, + "grad_norm": 0.8627939996962602, + "learning_rate": 9.690357301595375e-06, + "loss": 0.5267, + "step": 4543 + }, + { + "epoch": 0.1392668873360304, + "grad_norm": 1.662718086550571, + "learning_rate": 9.690185333232278e-06, + "loss": 0.8327, + "step": 4544 + }, + { + "epoch": 0.1392975358587716, + "grad_norm": 2.0408489525168494, + "learning_rate": 9.690013318655588e-06, + "loss": 0.7894, + "step": 4545 + }, + { + "epoch": 0.13932818438151282, + "grad_norm": 1.7917950845600379, + "learning_rate": 9.689841257866994e-06, + "loss": 0.7884, + "step": 4546 + }, + { + "epoch": 0.13935883290425402, + "grad_norm": 0.8935001906430371, + "learning_rate": 9.689669150868196e-06, + "loss": 0.4839, + "step": 4547 + }, + { + "epoch": 0.13938948142699523, + "grad_norm": 2.1508889411435033, + "learning_rate": 9.689496997660887e-06, + "loss": 0.7286, + "step": 4548 + }, + { + "epoch": 0.13942012994973643, + "grad_norm": 2.358871374965612, + "learning_rate": 9.689324798246765e-06, + "loss": 0.8054, + "step": 4549 + }, + { + "epoch": 0.13945077847247764, + "grad_norm": 1.8435403755232007, + "learning_rate": 9.689152552627526e-06, + "loss": 0.7333, + "step": 4550 + }, + { + "epoch": 0.13948142699521884, + "grad_norm": 1.7799599639257386, + "learning_rate": 9.688980260804865e-06, + "loss": 0.674, + "step": 4551 + }, + { + "epoch": 0.13951207551796002, + "grad_norm": 0.8557509761856589, + "learning_rate": 9.688807922780483e-06, + "loss": 0.4916, + "step": 4552 + }, + { + "epoch": 0.13954272404070123, + "grad_norm": 1.6137992103463712, + "learning_rate": 9.688635538556079e-06, + "loss": 0.6212, + "step": 4553 + }, + { + "epoch": 0.13957337256344243, + "grad_norm": 1.9124875852428607, + "learning_rate": 9.688463108133345e-06, + "loss": 0.7687, + "step": 4554 + }, + { + "epoch": 0.13960402108618364, + "grad_norm": 1.8245492326240678, + "learning_rate": 9.688290631513989e-06, + "loss": 0.7682, + "step": 4555 + }, + { + "epoch": 0.13963466960892484, + "grad_norm": 0.9108542822257696, + "learning_rate": 9.688118108699703e-06, + "loss": 0.5264, + "step": 4556 + }, + { + "epoch": 0.13966531813166605, + "grad_norm": 1.9596846156951737, + "learning_rate": 9.687945539692191e-06, + "loss": 0.6938, + "step": 4557 + }, + { + "epoch": 0.13969596665440726, + "grad_norm": 0.9035046231728185, + "learning_rate": 9.68777292449315e-06, + "loss": 0.53, + "step": 4558 + }, + { + "epoch": 0.13972661517714846, + "grad_norm": 0.8328674608167904, + "learning_rate": 9.687600263104287e-06, + "loss": 0.504, + "step": 4559 + }, + { + "epoch": 0.13975726369988967, + "grad_norm": 2.0496166912372606, + "learning_rate": 9.687427555527296e-06, + "loss": 0.7784, + "step": 4560 + }, + { + "epoch": 0.13978791222263087, + "grad_norm": 1.8574214632218584, + "learning_rate": 9.687254801763883e-06, + "loss": 0.7058, + "step": 4561 + }, + { + "epoch": 0.13981856074537208, + "grad_norm": 1.8906147235129158, + "learning_rate": 9.687082001815749e-06, + "loss": 0.7594, + "step": 4562 + }, + { + "epoch": 0.13984920926811328, + "grad_norm": 1.626571276329655, + "learning_rate": 9.686909155684596e-06, + "loss": 0.7809, + "step": 4563 + }, + { + "epoch": 0.1398798577908545, + "grad_norm": 1.9944846625238144, + "learning_rate": 9.68673626337213e-06, + "loss": 0.749, + "step": 4564 + }, + { + "epoch": 0.1399105063135957, + "grad_norm": 0.9821265583894822, + "learning_rate": 9.68656332488005e-06, + "loss": 0.5222, + "step": 4565 + }, + { + "epoch": 0.1399411548363369, + "grad_norm": 1.918619289500038, + "learning_rate": 9.686390340210064e-06, + "loss": 0.7878, + "step": 4566 + }, + { + "epoch": 0.13997180335907808, + "grad_norm": 2.243386450926643, + "learning_rate": 9.686217309363875e-06, + "loss": 0.7671, + "step": 4567 + }, + { + "epoch": 0.14000245188181928, + "grad_norm": 1.8515884313556517, + "learning_rate": 9.686044232343186e-06, + "loss": 0.7671, + "step": 4568 + }, + { + "epoch": 0.1400331004045605, + "grad_norm": 1.890448567960161, + "learning_rate": 9.685871109149706e-06, + "loss": 0.8004, + "step": 4569 + }, + { + "epoch": 0.1400637489273017, + "grad_norm": 1.885493053675027, + "learning_rate": 9.68569793978514e-06, + "loss": 0.6923, + "step": 4570 + }, + { + "epoch": 0.1400943974500429, + "grad_norm": 1.8419056156292881, + "learning_rate": 9.68552472425119e-06, + "loss": 0.7398, + "step": 4571 + }, + { + "epoch": 0.1401250459727841, + "grad_norm": 1.9710324288187853, + "learning_rate": 9.685351462549568e-06, + "loss": 0.8632, + "step": 4572 + }, + { + "epoch": 0.1401556944955253, + "grad_norm": 1.7808038412756524, + "learning_rate": 9.68517815468198e-06, + "loss": 0.723, + "step": 4573 + }, + { + "epoch": 0.14018634301826652, + "grad_norm": 0.8797137747595021, + "learning_rate": 9.68500480065013e-06, + "loss": 0.5198, + "step": 4574 + }, + { + "epoch": 0.14021699154100772, + "grad_norm": 1.9385943518272317, + "learning_rate": 9.684831400455731e-06, + "loss": 0.6555, + "step": 4575 + }, + { + "epoch": 0.14024764006374893, + "grad_norm": 2.0997365752539, + "learning_rate": 9.684657954100492e-06, + "loss": 0.7762, + "step": 4576 + }, + { + "epoch": 0.14027828858649014, + "grad_norm": 1.7624322611930856, + "learning_rate": 9.684484461586117e-06, + "loss": 0.599, + "step": 4577 + }, + { + "epoch": 0.14030893710923134, + "grad_norm": 1.9861423067025354, + "learning_rate": 9.684310922914318e-06, + "loss": 0.6133, + "step": 4578 + }, + { + "epoch": 0.14033958563197255, + "grad_norm": 1.6821564027264786, + "learning_rate": 9.684137338086805e-06, + "loss": 0.7454, + "step": 4579 + }, + { + "epoch": 0.14037023415471375, + "grad_norm": 1.9364285176938536, + "learning_rate": 9.683963707105288e-06, + "loss": 0.7441, + "step": 4580 + }, + { + "epoch": 0.14040088267745496, + "grad_norm": 1.884311395059368, + "learning_rate": 9.683790029971478e-06, + "loss": 0.6699, + "step": 4581 + }, + { + "epoch": 0.14043153120019616, + "grad_norm": 1.9470127051233839, + "learning_rate": 9.683616306687086e-06, + "loss": 0.7729, + "step": 4582 + }, + { + "epoch": 0.14046217972293734, + "grad_norm": 0.8531954229152967, + "learning_rate": 9.683442537253826e-06, + "loss": 0.5269, + "step": 4583 + }, + { + "epoch": 0.14049282824567855, + "grad_norm": 2.0019363357390616, + "learning_rate": 9.683268721673408e-06, + "loss": 0.7614, + "step": 4584 + }, + { + "epoch": 0.14052347676841975, + "grad_norm": 0.8425241503343531, + "learning_rate": 9.683094859947544e-06, + "loss": 0.5131, + "step": 4585 + }, + { + "epoch": 0.14055412529116096, + "grad_norm": 1.7296819502725604, + "learning_rate": 9.68292095207795e-06, + "loss": 0.7638, + "step": 4586 + }, + { + "epoch": 0.14058477381390216, + "grad_norm": 1.7420223144418805, + "learning_rate": 9.682746998066335e-06, + "loss": 0.8076, + "step": 4587 + }, + { + "epoch": 0.14061542233664337, + "grad_norm": 0.843698794419924, + "learning_rate": 9.682572997914417e-06, + "loss": 0.5077, + "step": 4588 + }, + { + "epoch": 0.14064607085938458, + "grad_norm": 2.1061120215507305, + "learning_rate": 9.68239895162391e-06, + "loss": 0.8442, + "step": 4589 + }, + { + "epoch": 0.14067671938212578, + "grad_norm": 1.8627347903271168, + "learning_rate": 9.682224859196528e-06, + "loss": 0.6824, + "step": 4590 + }, + { + "epoch": 0.140707367904867, + "grad_norm": 2.2559528834545612, + "learning_rate": 9.682050720633985e-06, + "loss": 0.6732, + "step": 4591 + }, + { + "epoch": 0.1407380164276082, + "grad_norm": 1.8328484475643534, + "learning_rate": 9.681876535937999e-06, + "loss": 0.7609, + "step": 4592 + }, + { + "epoch": 0.1407686649503494, + "grad_norm": 1.855891260478722, + "learning_rate": 9.681702305110285e-06, + "loss": 0.6991, + "step": 4593 + }, + { + "epoch": 0.1407993134730906, + "grad_norm": 1.8769519534946322, + "learning_rate": 9.681528028152562e-06, + "loss": 0.7869, + "step": 4594 + }, + { + "epoch": 0.1408299619958318, + "grad_norm": 1.8746128640097124, + "learning_rate": 9.681353705066544e-06, + "loss": 0.7861, + "step": 4595 + }, + { + "epoch": 0.14086061051857302, + "grad_norm": 2.0228895934172266, + "learning_rate": 9.681179335853951e-06, + "loss": 0.6836, + "step": 4596 + }, + { + "epoch": 0.14089125904131422, + "grad_norm": 0.9488712801208768, + "learning_rate": 9.6810049205165e-06, + "loss": 0.5239, + "step": 4597 + }, + { + "epoch": 0.14092190756405543, + "grad_norm": 1.831176847434413, + "learning_rate": 9.68083045905591e-06, + "loss": 0.8046, + "step": 4598 + }, + { + "epoch": 0.1409525560867966, + "grad_norm": 1.7998335690184608, + "learning_rate": 9.6806559514739e-06, + "loss": 0.6496, + "step": 4599 + }, + { + "epoch": 0.1409832046095378, + "grad_norm": 0.8861339810612073, + "learning_rate": 9.680481397772187e-06, + "loss": 0.4924, + "step": 4600 + }, + { + "epoch": 0.14101385313227902, + "grad_norm": 1.7632967407333813, + "learning_rate": 9.680306797952496e-06, + "loss": 0.783, + "step": 4601 + }, + { + "epoch": 0.14104450165502022, + "grad_norm": 1.80633611896751, + "learning_rate": 9.680132152016544e-06, + "loss": 0.8134, + "step": 4602 + }, + { + "epoch": 0.14107515017776143, + "grad_norm": 1.712487440215308, + "learning_rate": 9.679957459966053e-06, + "loss": 0.6812, + "step": 4603 + }, + { + "epoch": 0.14110579870050263, + "grad_norm": 1.9856585571553766, + "learning_rate": 9.679782721802742e-06, + "loss": 0.7541, + "step": 4604 + }, + { + "epoch": 0.14113644722324384, + "grad_norm": 1.93091310059048, + "learning_rate": 9.679607937528335e-06, + "loss": 0.7048, + "step": 4605 + }, + { + "epoch": 0.14116709574598504, + "grad_norm": 1.737932091502883, + "learning_rate": 9.679433107144555e-06, + "loss": 0.7089, + "step": 4606 + }, + { + "epoch": 0.14119774426872625, + "grad_norm": 1.8391266730387914, + "learning_rate": 9.679258230653122e-06, + "loss": 0.8052, + "step": 4607 + }, + { + "epoch": 0.14122839279146746, + "grad_norm": 2.069645103326678, + "learning_rate": 9.679083308055761e-06, + "loss": 0.7564, + "step": 4608 + }, + { + "epoch": 0.14125904131420866, + "grad_norm": 1.88398870297276, + "learning_rate": 9.678908339354194e-06, + "loss": 0.6966, + "step": 4609 + }, + { + "epoch": 0.14128968983694987, + "grad_norm": 1.9465968751083556, + "learning_rate": 9.678733324550147e-06, + "loss": 0.7268, + "step": 4610 + }, + { + "epoch": 0.14132033835969107, + "grad_norm": 1.030919981604569, + "learning_rate": 9.678558263645343e-06, + "loss": 0.5037, + "step": 4611 + }, + { + "epoch": 0.14135098688243228, + "grad_norm": 1.9195886698435929, + "learning_rate": 9.678383156641507e-06, + "loss": 0.8167, + "step": 4612 + }, + { + "epoch": 0.14138163540517348, + "grad_norm": 1.8731609053549816, + "learning_rate": 9.678208003540366e-06, + "loss": 0.7087, + "step": 4613 + }, + { + "epoch": 0.14141228392791466, + "grad_norm": 0.865671046180436, + "learning_rate": 9.678032804343644e-06, + "loss": 0.4922, + "step": 4614 + }, + { + "epoch": 0.14144293245065587, + "grad_norm": 1.6317859820613836, + "learning_rate": 9.677857559053068e-06, + "loss": 0.7001, + "step": 4615 + }, + { + "epoch": 0.14147358097339707, + "grad_norm": 1.7784017800913616, + "learning_rate": 9.677682267670365e-06, + "loss": 0.7724, + "step": 4616 + }, + { + "epoch": 0.14150422949613828, + "grad_norm": 1.8380640210624268, + "learning_rate": 9.677506930197261e-06, + "loss": 0.7218, + "step": 4617 + }, + { + "epoch": 0.14153487801887948, + "grad_norm": 0.9738149228882358, + "learning_rate": 9.677331546635483e-06, + "loss": 0.5082, + "step": 4618 + }, + { + "epoch": 0.1415655265416207, + "grad_norm": 1.7596671080022428, + "learning_rate": 9.677156116986764e-06, + "loss": 0.699, + "step": 4619 + }, + { + "epoch": 0.1415961750643619, + "grad_norm": 1.975701320092645, + "learning_rate": 9.676980641252826e-06, + "loss": 0.7197, + "step": 4620 + }, + { + "epoch": 0.1416268235871031, + "grad_norm": 1.8598784552685064, + "learning_rate": 9.676805119435402e-06, + "loss": 0.6532, + "step": 4621 + }, + { + "epoch": 0.1416574721098443, + "grad_norm": 0.8244020670882576, + "learning_rate": 9.676629551536221e-06, + "loss": 0.4965, + "step": 4622 + }, + { + "epoch": 0.1416881206325855, + "grad_norm": 2.0195131878397894, + "learning_rate": 9.676453937557013e-06, + "loss": 0.7406, + "step": 4623 + }, + { + "epoch": 0.14171876915532672, + "grad_norm": 2.0035635349472676, + "learning_rate": 9.676278277499507e-06, + "loss": 0.7198, + "step": 4624 + }, + { + "epoch": 0.14174941767806792, + "grad_norm": 1.9516702152393814, + "learning_rate": 9.676102571365433e-06, + "loss": 0.7879, + "step": 4625 + }, + { + "epoch": 0.14178006620080913, + "grad_norm": 0.8556250947610435, + "learning_rate": 9.675926819156527e-06, + "loss": 0.5198, + "step": 4626 + }, + { + "epoch": 0.14181071472355034, + "grad_norm": 1.7408121372608478, + "learning_rate": 9.675751020874516e-06, + "loss": 0.6728, + "step": 4627 + }, + { + "epoch": 0.14184136324629154, + "grad_norm": 2.028642828376145, + "learning_rate": 9.675575176521134e-06, + "loss": 0.7434, + "step": 4628 + }, + { + "epoch": 0.14187201176903275, + "grad_norm": 1.9600697375309037, + "learning_rate": 9.675399286098113e-06, + "loss": 0.7534, + "step": 4629 + }, + { + "epoch": 0.14190266029177392, + "grad_norm": 2.1909587011004197, + "learning_rate": 9.675223349607187e-06, + "loss": 0.7603, + "step": 4630 + }, + { + "epoch": 0.14193330881451513, + "grad_norm": 2.0226806983853, + "learning_rate": 9.67504736705009e-06, + "loss": 0.8849, + "step": 4631 + }, + { + "epoch": 0.14196395733725634, + "grad_norm": 1.7877952176413714, + "learning_rate": 9.674871338428555e-06, + "loss": 0.7335, + "step": 4632 + }, + { + "epoch": 0.14199460585999754, + "grad_norm": 1.719591335600712, + "learning_rate": 9.674695263744315e-06, + "loss": 0.7267, + "step": 4633 + }, + { + "epoch": 0.14202525438273875, + "grad_norm": 1.8683838759671463, + "learning_rate": 9.674519142999108e-06, + "loss": 0.636, + "step": 4634 + }, + { + "epoch": 0.14205590290547995, + "grad_norm": 1.8163765833855932, + "learning_rate": 9.674342976194667e-06, + "loss": 0.7298, + "step": 4635 + }, + { + "epoch": 0.14208655142822116, + "grad_norm": 1.8365100837528556, + "learning_rate": 9.67416676333273e-06, + "loss": 0.7256, + "step": 4636 + }, + { + "epoch": 0.14211719995096236, + "grad_norm": 1.8245516160155941, + "learning_rate": 9.67399050441503e-06, + "loss": 0.6826, + "step": 4637 + }, + { + "epoch": 0.14214784847370357, + "grad_norm": 1.5522859890594094, + "learning_rate": 9.673814199443308e-06, + "loss": 0.6933, + "step": 4638 + }, + { + "epoch": 0.14217849699644478, + "grad_norm": 2.2014966282106103, + "learning_rate": 9.673637848419297e-06, + "loss": 0.914, + "step": 4639 + }, + { + "epoch": 0.14220914551918598, + "grad_norm": 2.0626387096918015, + "learning_rate": 9.673461451344736e-06, + "loss": 0.7283, + "step": 4640 + }, + { + "epoch": 0.1422397940419272, + "grad_norm": 0.9939850829578208, + "learning_rate": 9.673285008221364e-06, + "loss": 0.495, + "step": 4641 + }, + { + "epoch": 0.1422704425646684, + "grad_norm": 1.9852037339377415, + "learning_rate": 9.673108519050922e-06, + "loss": 0.6911, + "step": 4642 + }, + { + "epoch": 0.1423010910874096, + "grad_norm": 1.8674590002570668, + "learning_rate": 9.672931983835143e-06, + "loss": 0.6949, + "step": 4643 + }, + { + "epoch": 0.1423317396101508, + "grad_norm": 2.1323422103307834, + "learning_rate": 9.672755402575771e-06, + "loss": 0.6454, + "step": 4644 + }, + { + "epoch": 0.14236238813289198, + "grad_norm": 1.7967238226662534, + "learning_rate": 9.672578775274543e-06, + "loss": 0.778, + "step": 4645 + }, + { + "epoch": 0.1423930366556332, + "grad_norm": 2.1340588020195215, + "learning_rate": 9.672402101933201e-06, + "loss": 0.8214, + "step": 4646 + }, + { + "epoch": 0.1424236851783744, + "grad_norm": 1.8889542173467422, + "learning_rate": 9.672225382553486e-06, + "loss": 0.7177, + "step": 4647 + }, + { + "epoch": 0.1424543337011156, + "grad_norm": 1.7134926663204497, + "learning_rate": 9.67204861713714e-06, + "loss": 0.763, + "step": 4648 + }, + { + "epoch": 0.1424849822238568, + "grad_norm": 2.113972366028378, + "learning_rate": 9.671871805685902e-06, + "loss": 0.7563, + "step": 4649 + }, + { + "epoch": 0.142515630746598, + "grad_norm": 1.6736234533083243, + "learning_rate": 9.671694948201517e-06, + "loss": 0.6907, + "step": 4650 + }, + { + "epoch": 0.14254627926933922, + "grad_norm": 1.857281029513005, + "learning_rate": 9.671518044685726e-06, + "loss": 0.7744, + "step": 4651 + }, + { + "epoch": 0.14257692779208042, + "grad_norm": 1.7437852200306623, + "learning_rate": 9.671341095140273e-06, + "loss": 0.7173, + "step": 4652 + }, + { + "epoch": 0.14260757631482163, + "grad_norm": 2.0781555646326266, + "learning_rate": 9.671164099566898e-06, + "loss": 0.8261, + "step": 4653 + }, + { + "epoch": 0.14263822483756283, + "grad_norm": 1.957297785973761, + "learning_rate": 9.67098705796735e-06, + "loss": 0.7483, + "step": 4654 + }, + { + "epoch": 0.14266887336030404, + "grad_norm": 2.0783909681519575, + "learning_rate": 9.670809970343372e-06, + "loss": 0.7584, + "step": 4655 + }, + { + "epoch": 0.14269952188304524, + "grad_norm": 1.8573239340553396, + "learning_rate": 9.670632836696707e-06, + "loss": 0.792, + "step": 4656 + }, + { + "epoch": 0.14273017040578645, + "grad_norm": 1.9689135528675448, + "learning_rate": 9.670455657029104e-06, + "loss": 0.7884, + "step": 4657 + }, + { + "epoch": 0.14276081892852766, + "grad_norm": 1.7468187800251964, + "learning_rate": 9.670278431342304e-06, + "loss": 0.6768, + "step": 4658 + }, + { + "epoch": 0.14279146745126886, + "grad_norm": 1.65153295453724, + "learning_rate": 9.670101159638057e-06, + "loss": 0.8801, + "step": 4659 + }, + { + "epoch": 0.14282211597401007, + "grad_norm": 1.5839926455895212, + "learning_rate": 9.669923841918107e-06, + "loss": 0.6247, + "step": 4660 + }, + { + "epoch": 0.14285276449675124, + "grad_norm": 1.775213242786564, + "learning_rate": 9.669746478184204e-06, + "loss": 0.7786, + "step": 4661 + }, + { + "epoch": 0.14288341301949245, + "grad_norm": 1.9544642850604157, + "learning_rate": 9.669569068438094e-06, + "loss": 0.8323, + "step": 4662 + }, + { + "epoch": 0.14291406154223366, + "grad_norm": 1.5603070940400945, + "learning_rate": 9.669391612681524e-06, + "loss": 0.6469, + "step": 4663 + }, + { + "epoch": 0.14294471006497486, + "grad_norm": 1.695815206499114, + "learning_rate": 9.669214110916246e-06, + "loss": 0.6611, + "step": 4664 + }, + { + "epoch": 0.14297535858771607, + "grad_norm": 1.9753775160929192, + "learning_rate": 9.669036563144004e-06, + "loss": 0.7884, + "step": 4665 + }, + { + "epoch": 0.14300600711045727, + "grad_norm": 2.0281529123093938, + "learning_rate": 9.668858969366551e-06, + "loss": 0.8299, + "step": 4666 + }, + { + "epoch": 0.14303665563319848, + "grad_norm": 1.9590287018036652, + "learning_rate": 9.668681329585637e-06, + "loss": 0.8571, + "step": 4667 + }, + { + "epoch": 0.14306730415593968, + "grad_norm": 1.8575420418934425, + "learning_rate": 9.668503643803011e-06, + "loss": 0.7368, + "step": 4668 + }, + { + "epoch": 0.1430979526786809, + "grad_norm": 1.8099258704948955, + "learning_rate": 9.668325912020424e-06, + "loss": 0.64, + "step": 4669 + }, + { + "epoch": 0.1431286012014221, + "grad_norm": 1.62294929359585, + "learning_rate": 9.668148134239626e-06, + "loss": 0.7472, + "step": 4670 + }, + { + "epoch": 0.1431592497241633, + "grad_norm": 1.6929187633625438, + "learning_rate": 9.66797031046237e-06, + "loss": 0.5805, + "step": 4671 + }, + { + "epoch": 0.1431898982469045, + "grad_norm": 1.930209266036854, + "learning_rate": 9.667792440690411e-06, + "loss": 0.657, + "step": 4672 + }, + { + "epoch": 0.1432205467696457, + "grad_norm": 1.2239155458863866, + "learning_rate": 9.667614524925496e-06, + "loss": 0.5101, + "step": 4673 + }, + { + "epoch": 0.14325119529238692, + "grad_norm": 1.863843828159239, + "learning_rate": 9.667436563169383e-06, + "loss": 0.7611, + "step": 4674 + }, + { + "epoch": 0.14328184381512812, + "grad_norm": 1.857161908681275, + "learning_rate": 9.667258555423822e-06, + "loss": 0.7124, + "step": 4675 + }, + { + "epoch": 0.1433124923378693, + "grad_norm": 1.7545571025432851, + "learning_rate": 9.667080501690569e-06, + "loss": 0.773, + "step": 4676 + }, + { + "epoch": 0.1433431408606105, + "grad_norm": 2.1096632778220705, + "learning_rate": 9.666902401971377e-06, + "loss": 0.7391, + "step": 4677 + }, + { + "epoch": 0.1433737893833517, + "grad_norm": 1.7952127664577544, + "learning_rate": 9.666724256268001e-06, + "loss": 0.8465, + "step": 4678 + }, + { + "epoch": 0.14340443790609292, + "grad_norm": 0.9771357159568739, + "learning_rate": 9.666546064582199e-06, + "loss": 0.5052, + "step": 4679 + }, + { + "epoch": 0.14343508642883412, + "grad_norm": 1.8825856459834605, + "learning_rate": 9.666367826915723e-06, + "loss": 0.6944, + "step": 4680 + }, + { + "epoch": 0.14346573495157533, + "grad_norm": 1.7322624001992266, + "learning_rate": 9.66618954327033e-06, + "loss": 0.7724, + "step": 4681 + }, + { + "epoch": 0.14349638347431654, + "grad_norm": 1.8422284543238134, + "learning_rate": 9.66601121364778e-06, + "loss": 0.678, + "step": 4682 + }, + { + "epoch": 0.14352703199705774, + "grad_norm": 1.8195953736235655, + "learning_rate": 9.665832838049826e-06, + "loss": 0.7226, + "step": 4683 + }, + { + "epoch": 0.14355768051979895, + "grad_norm": 2.1116232152922163, + "learning_rate": 9.665654416478227e-06, + "loss": 0.7684, + "step": 4684 + }, + { + "epoch": 0.14358832904254015, + "grad_norm": 1.952736553599624, + "learning_rate": 9.665475948934742e-06, + "loss": 0.7807, + "step": 4685 + }, + { + "epoch": 0.14361897756528136, + "grad_norm": 2.145494905126868, + "learning_rate": 9.665297435421128e-06, + "loss": 0.7926, + "step": 4686 + }, + { + "epoch": 0.14364962608802256, + "grad_norm": 1.8466519959988428, + "learning_rate": 9.665118875939145e-06, + "loss": 0.7791, + "step": 4687 + }, + { + "epoch": 0.14368027461076377, + "grad_norm": 1.9683922995655576, + "learning_rate": 9.664940270490553e-06, + "loss": 0.8328, + "step": 4688 + }, + { + "epoch": 0.14371092313350498, + "grad_norm": 1.8309870644372435, + "learning_rate": 9.66476161907711e-06, + "loss": 0.7934, + "step": 4689 + }, + { + "epoch": 0.14374157165624618, + "grad_norm": 1.7583012718157836, + "learning_rate": 9.664582921700578e-06, + "loss": 0.6461, + "step": 4690 + }, + { + "epoch": 0.1437722201789874, + "grad_norm": 1.686586213718502, + "learning_rate": 9.664404178362715e-06, + "loss": 0.7678, + "step": 4691 + }, + { + "epoch": 0.14380286870172856, + "grad_norm": 1.9014915101697882, + "learning_rate": 9.664225389065287e-06, + "loss": 0.744, + "step": 4692 + }, + { + "epoch": 0.14383351722446977, + "grad_norm": 1.8492374837791392, + "learning_rate": 9.664046553810051e-06, + "loss": 0.7294, + "step": 4693 + }, + { + "epoch": 0.14386416574721098, + "grad_norm": 1.8300894762203772, + "learning_rate": 9.663867672598772e-06, + "loss": 0.7274, + "step": 4694 + }, + { + "epoch": 0.14389481426995218, + "grad_norm": 1.9175106229090442, + "learning_rate": 9.663688745433211e-06, + "loss": 0.8758, + "step": 4695 + }, + { + "epoch": 0.1439254627926934, + "grad_norm": 1.0354430605970493, + "learning_rate": 9.663509772315132e-06, + "loss": 0.511, + "step": 4696 + }, + { + "epoch": 0.1439561113154346, + "grad_norm": 2.0524594619405443, + "learning_rate": 9.663330753246298e-06, + "loss": 0.7958, + "step": 4697 + }, + { + "epoch": 0.1439867598381758, + "grad_norm": 2.06551121521444, + "learning_rate": 9.663151688228473e-06, + "loss": 0.7572, + "step": 4698 + }, + { + "epoch": 0.144017408360917, + "grad_norm": 0.8713116167698426, + "learning_rate": 9.66297257726342e-06, + "loss": 0.5245, + "step": 4699 + }, + { + "epoch": 0.1440480568836582, + "grad_norm": 1.6227332237548318, + "learning_rate": 9.662793420352906e-06, + "loss": 0.7143, + "step": 4700 + }, + { + "epoch": 0.14407870540639942, + "grad_norm": 1.6829449448299216, + "learning_rate": 9.662614217498696e-06, + "loss": 0.757, + "step": 4701 + }, + { + "epoch": 0.14410935392914062, + "grad_norm": 0.8512769550722208, + "learning_rate": 9.662434968702554e-06, + "loss": 0.5215, + "step": 4702 + }, + { + "epoch": 0.14414000245188183, + "grad_norm": 1.9150628890880192, + "learning_rate": 9.662255673966248e-06, + "loss": 0.8362, + "step": 4703 + }, + { + "epoch": 0.14417065097462303, + "grad_norm": 1.7547084389108967, + "learning_rate": 9.662076333291543e-06, + "loss": 0.7504, + "step": 4704 + }, + { + "epoch": 0.14420129949736424, + "grad_norm": 1.925788996459963, + "learning_rate": 9.66189694668021e-06, + "loss": 0.6677, + "step": 4705 + }, + { + "epoch": 0.14423194802010544, + "grad_norm": 1.8054288310120827, + "learning_rate": 9.66171751413401e-06, + "loss": 0.7975, + "step": 4706 + }, + { + "epoch": 0.14426259654284662, + "grad_norm": 2.0079427746776077, + "learning_rate": 9.661538035654716e-06, + "loss": 0.7447, + "step": 4707 + }, + { + "epoch": 0.14429324506558783, + "grad_norm": 1.8451064529634773, + "learning_rate": 9.661358511244095e-06, + "loss": 0.5374, + "step": 4708 + }, + { + "epoch": 0.14432389358832903, + "grad_norm": 1.8142984639381763, + "learning_rate": 9.661178940903916e-06, + "loss": 0.7106, + "step": 4709 + }, + { + "epoch": 0.14435454211107024, + "grad_norm": 1.0360440383402303, + "learning_rate": 9.660999324635948e-06, + "loss": 0.5157, + "step": 4710 + }, + { + "epoch": 0.14438519063381144, + "grad_norm": 1.938630759119246, + "learning_rate": 9.660819662441962e-06, + "loss": 0.669, + "step": 4711 + }, + { + "epoch": 0.14441583915655265, + "grad_norm": 1.8076424151260648, + "learning_rate": 9.660639954323726e-06, + "loss": 0.683, + "step": 4712 + }, + { + "epoch": 0.14444648767929386, + "grad_norm": 1.636902969227355, + "learning_rate": 9.660460200283013e-06, + "loss": 0.642, + "step": 4713 + }, + { + "epoch": 0.14447713620203506, + "grad_norm": 1.852610409515035, + "learning_rate": 9.660280400321593e-06, + "loss": 0.826, + "step": 4714 + }, + { + "epoch": 0.14450778472477627, + "grad_norm": 1.6472018521004055, + "learning_rate": 9.660100554441237e-06, + "loss": 0.6542, + "step": 4715 + }, + { + "epoch": 0.14453843324751747, + "grad_norm": 1.9575035375650358, + "learning_rate": 9.659920662643719e-06, + "loss": 0.7379, + "step": 4716 + }, + { + "epoch": 0.14456908177025868, + "grad_norm": 1.8365431419633476, + "learning_rate": 9.659740724930811e-06, + "loss": 0.8083, + "step": 4717 + }, + { + "epoch": 0.14459973029299988, + "grad_norm": 1.8984757058629764, + "learning_rate": 9.659560741304284e-06, + "loss": 0.7696, + "step": 4718 + }, + { + "epoch": 0.1446303788157411, + "grad_norm": 1.6557298038948969, + "learning_rate": 9.659380711765914e-06, + "loss": 0.7059, + "step": 4719 + }, + { + "epoch": 0.1446610273384823, + "grad_norm": 1.6873061482197285, + "learning_rate": 9.659200636317471e-06, + "loss": 0.7535, + "step": 4720 + }, + { + "epoch": 0.1446916758612235, + "grad_norm": 1.886527514261137, + "learning_rate": 9.659020514960736e-06, + "loss": 0.8157, + "step": 4721 + }, + { + "epoch": 0.1447223243839647, + "grad_norm": 1.7006888763428387, + "learning_rate": 9.658840347697476e-06, + "loss": 0.7372, + "step": 4722 + }, + { + "epoch": 0.14475297290670588, + "grad_norm": 1.9189869538669622, + "learning_rate": 9.658660134529473e-06, + "loss": 0.7018, + "step": 4723 + }, + { + "epoch": 0.1447836214294471, + "grad_norm": 1.6846500292125064, + "learning_rate": 9.658479875458497e-06, + "loss": 0.7356, + "step": 4724 + }, + { + "epoch": 0.1448142699521883, + "grad_norm": 0.9713962895058946, + "learning_rate": 9.658299570486328e-06, + "loss": 0.5074, + "step": 4725 + }, + { + "epoch": 0.1448449184749295, + "grad_norm": 1.705558850176301, + "learning_rate": 9.658119219614744e-06, + "loss": 0.6341, + "step": 4726 + }, + { + "epoch": 0.1448755669976707, + "grad_norm": 1.5879906621768416, + "learning_rate": 9.657938822845517e-06, + "loss": 0.7854, + "step": 4727 + }, + { + "epoch": 0.1449062155204119, + "grad_norm": 1.6811498844726889, + "learning_rate": 9.657758380180426e-06, + "loss": 0.7545, + "step": 4728 + }, + { + "epoch": 0.14493686404315312, + "grad_norm": 1.9450757019303284, + "learning_rate": 9.657577891621252e-06, + "loss": 0.7854, + "step": 4729 + }, + { + "epoch": 0.14496751256589432, + "grad_norm": 0.8806277576743071, + "learning_rate": 9.65739735716977e-06, + "loss": 0.5171, + "step": 4730 + }, + { + "epoch": 0.14499816108863553, + "grad_norm": 1.6530752629957275, + "learning_rate": 9.657216776827763e-06, + "loss": 0.6672, + "step": 4731 + }, + { + "epoch": 0.14502880961137674, + "grad_norm": 1.8600346468249391, + "learning_rate": 9.657036150597004e-06, + "loss": 0.7, + "step": 4732 + }, + { + "epoch": 0.14505945813411794, + "grad_norm": 1.6579641057540273, + "learning_rate": 9.656855478479279e-06, + "loss": 0.7421, + "step": 4733 + }, + { + "epoch": 0.14509010665685915, + "grad_norm": 0.9436203736355935, + "learning_rate": 9.656674760476364e-06, + "loss": 0.4881, + "step": 4734 + }, + { + "epoch": 0.14512075517960035, + "grad_norm": 1.996369739579787, + "learning_rate": 9.65649399659004e-06, + "loss": 0.8144, + "step": 4735 + }, + { + "epoch": 0.14515140370234156, + "grad_norm": 1.757501986298747, + "learning_rate": 9.65631318682209e-06, + "loss": 0.7828, + "step": 4736 + }, + { + "epoch": 0.14518205222508276, + "grad_norm": 1.6659179669479773, + "learning_rate": 9.656132331174297e-06, + "loss": 0.7751, + "step": 4737 + }, + { + "epoch": 0.14521270074782394, + "grad_norm": 1.6455611551022535, + "learning_rate": 9.655951429648438e-06, + "loss": 0.7048, + "step": 4738 + }, + { + "epoch": 0.14524334927056515, + "grad_norm": 1.9727936966084625, + "learning_rate": 9.655770482246299e-06, + "loss": 0.8402, + "step": 4739 + }, + { + "epoch": 0.14527399779330635, + "grad_norm": 1.6642830974204603, + "learning_rate": 9.655589488969663e-06, + "loss": 0.6523, + "step": 4740 + }, + { + "epoch": 0.14530464631604756, + "grad_norm": 1.9298863563697801, + "learning_rate": 9.655408449820312e-06, + "loss": 0.6724, + "step": 4741 + }, + { + "epoch": 0.14533529483878876, + "grad_norm": 1.8924275533845918, + "learning_rate": 9.65522736480003e-06, + "loss": 0.7538, + "step": 4742 + }, + { + "epoch": 0.14536594336152997, + "grad_norm": 1.9981647043997304, + "learning_rate": 9.655046233910604e-06, + "loss": 0.697, + "step": 4743 + }, + { + "epoch": 0.14539659188427118, + "grad_norm": 1.8086142401363892, + "learning_rate": 9.654865057153813e-06, + "loss": 0.7328, + "step": 4744 + }, + { + "epoch": 0.14542724040701238, + "grad_norm": 1.7538960902799114, + "learning_rate": 9.654683834531447e-06, + "loss": 0.6811, + "step": 4745 + }, + { + "epoch": 0.1454578889297536, + "grad_norm": 1.8714953530448353, + "learning_rate": 9.654502566045292e-06, + "loss": 0.7491, + "step": 4746 + }, + { + "epoch": 0.1454885374524948, + "grad_norm": 1.6403333523251495, + "learning_rate": 9.65432125169713e-06, + "loss": 0.6878, + "step": 4747 + }, + { + "epoch": 0.145519185975236, + "grad_norm": 1.7724410669594495, + "learning_rate": 9.654139891488751e-06, + "loss": 0.7164, + "step": 4748 + }, + { + "epoch": 0.1455498344979772, + "grad_norm": 1.940772975951159, + "learning_rate": 9.653958485421939e-06, + "loss": 0.7773, + "step": 4749 + }, + { + "epoch": 0.1455804830207184, + "grad_norm": 0.9923025245179172, + "learning_rate": 9.653777033498485e-06, + "loss": 0.5296, + "step": 4750 + }, + { + "epoch": 0.14561113154345962, + "grad_norm": 1.0060793508444512, + "learning_rate": 9.653595535720175e-06, + "loss": 0.5084, + "step": 4751 + }, + { + "epoch": 0.14564178006620082, + "grad_norm": 0.8629340322022764, + "learning_rate": 9.653413992088798e-06, + "loss": 0.5211, + "step": 4752 + }, + { + "epoch": 0.14567242858894203, + "grad_norm": 1.9201203277663443, + "learning_rate": 9.653232402606142e-06, + "loss": 0.7527, + "step": 4753 + }, + { + "epoch": 0.1457030771116832, + "grad_norm": 1.0125772437582239, + "learning_rate": 9.653050767273996e-06, + "loss": 0.5163, + "step": 4754 + }, + { + "epoch": 0.1457337256344244, + "grad_norm": 2.0864410677916303, + "learning_rate": 9.65286908609415e-06, + "loss": 0.7, + "step": 4755 + }, + { + "epoch": 0.14576437415716562, + "grad_norm": 1.9450347913757888, + "learning_rate": 9.652687359068396e-06, + "loss": 0.8739, + "step": 4756 + }, + { + "epoch": 0.14579502267990682, + "grad_norm": 1.7985121007660752, + "learning_rate": 9.652505586198523e-06, + "loss": 0.7331, + "step": 4757 + }, + { + "epoch": 0.14582567120264803, + "grad_norm": 1.7866407669566706, + "learning_rate": 9.65232376748632e-06, + "loss": 0.6951, + "step": 4758 + }, + { + "epoch": 0.14585631972538923, + "grad_norm": 1.6642236148229765, + "learning_rate": 9.65214190293358e-06, + "loss": 0.8072, + "step": 4759 + }, + { + "epoch": 0.14588696824813044, + "grad_norm": 1.8555198903716426, + "learning_rate": 9.651959992542097e-06, + "loss": 0.748, + "step": 4760 + }, + { + "epoch": 0.14591761677087164, + "grad_norm": 1.6429112611229075, + "learning_rate": 9.651778036313664e-06, + "loss": 0.7049, + "step": 4761 + }, + { + "epoch": 0.14594826529361285, + "grad_norm": 1.7016729950795106, + "learning_rate": 9.651596034250069e-06, + "loss": 0.8274, + "step": 4762 + }, + { + "epoch": 0.14597891381635406, + "grad_norm": 1.870714900146296, + "learning_rate": 9.651413986353109e-06, + "loss": 0.7792, + "step": 4763 + }, + { + "epoch": 0.14600956233909526, + "grad_norm": 1.1133266016293648, + "learning_rate": 9.651231892624577e-06, + "loss": 0.5195, + "step": 4764 + }, + { + "epoch": 0.14604021086183647, + "grad_norm": 1.939600911557928, + "learning_rate": 9.651049753066267e-06, + "loss": 0.7225, + "step": 4765 + }, + { + "epoch": 0.14607085938457767, + "grad_norm": 2.0100370421261937, + "learning_rate": 9.650867567679973e-06, + "loss": 0.7564, + "step": 4766 + }, + { + "epoch": 0.14610150790731888, + "grad_norm": 1.904026108397537, + "learning_rate": 9.650685336467492e-06, + "loss": 0.7672, + "step": 4767 + }, + { + "epoch": 0.14613215643006008, + "grad_norm": 1.8053409879527638, + "learning_rate": 9.650503059430618e-06, + "loss": 0.7899, + "step": 4768 + }, + { + "epoch": 0.14616280495280126, + "grad_norm": 1.8206336629293773, + "learning_rate": 9.650320736571146e-06, + "loss": 0.6994, + "step": 4769 + }, + { + "epoch": 0.14619345347554247, + "grad_norm": 1.6204515268421693, + "learning_rate": 9.650138367890876e-06, + "loss": 0.679, + "step": 4770 + }, + { + "epoch": 0.14622410199828367, + "grad_norm": 1.5516582253265725, + "learning_rate": 9.649955953391603e-06, + "loss": 0.6537, + "step": 4771 + }, + { + "epoch": 0.14625475052102488, + "grad_norm": 1.8637705402822673, + "learning_rate": 9.649773493075122e-06, + "loss": 0.7724, + "step": 4772 + }, + { + "epoch": 0.14628539904376608, + "grad_norm": 1.7262790174318796, + "learning_rate": 9.649590986943236e-06, + "loss": 0.7358, + "step": 4773 + }, + { + "epoch": 0.1463160475665073, + "grad_norm": 1.8572890298572622, + "learning_rate": 9.649408434997739e-06, + "loss": 0.7091, + "step": 4774 + }, + { + "epoch": 0.1463466960892485, + "grad_norm": 1.650012978049179, + "learning_rate": 9.64922583724043e-06, + "loss": 0.7984, + "step": 4775 + }, + { + "epoch": 0.1463773446119897, + "grad_norm": 1.61542332458243, + "learning_rate": 9.64904319367311e-06, + "loss": 0.6746, + "step": 4776 + }, + { + "epoch": 0.1464079931347309, + "grad_norm": 1.9071003911259083, + "learning_rate": 9.64886050429758e-06, + "loss": 0.7579, + "step": 4777 + }, + { + "epoch": 0.1464386416574721, + "grad_norm": 1.790161531733155, + "learning_rate": 9.648677769115637e-06, + "loss": 0.7087, + "step": 4778 + }, + { + "epoch": 0.14646929018021332, + "grad_norm": 1.8771342324129219, + "learning_rate": 9.64849498812908e-06, + "loss": 0.7036, + "step": 4779 + }, + { + "epoch": 0.14649993870295452, + "grad_norm": 2.0335186391408526, + "learning_rate": 9.648312161339715e-06, + "loss": 0.6817, + "step": 4780 + }, + { + "epoch": 0.14653058722569573, + "grad_norm": 1.8465907522527274, + "learning_rate": 9.64812928874934e-06, + "loss": 0.6658, + "step": 4781 + }, + { + "epoch": 0.14656123574843694, + "grad_norm": 1.8074348909788422, + "learning_rate": 9.64794637035976e-06, + "loss": 0.7183, + "step": 4782 + }, + { + "epoch": 0.14659188427117814, + "grad_norm": 1.7786921825652362, + "learning_rate": 9.647763406172772e-06, + "loss": 0.7619, + "step": 4783 + }, + { + "epoch": 0.14662253279391935, + "grad_norm": 1.86088531606038, + "learning_rate": 9.647580396190184e-06, + "loss": 0.7845, + "step": 4784 + }, + { + "epoch": 0.14665318131666052, + "grad_norm": 1.774833932765458, + "learning_rate": 9.647397340413795e-06, + "loss": 0.7578, + "step": 4785 + }, + { + "epoch": 0.14668382983940173, + "grad_norm": 1.829787150709969, + "learning_rate": 9.647214238845413e-06, + "loss": 0.7354, + "step": 4786 + }, + { + "epoch": 0.14671447836214294, + "grad_norm": 2.0141013058356565, + "learning_rate": 9.647031091486838e-06, + "loss": 0.6565, + "step": 4787 + }, + { + "epoch": 0.14674512688488414, + "grad_norm": 1.8322675147896257, + "learning_rate": 9.64684789833988e-06, + "loss": 0.8347, + "step": 4788 + }, + { + "epoch": 0.14677577540762535, + "grad_norm": 1.9235266688855912, + "learning_rate": 9.646664659406339e-06, + "loss": 0.7691, + "step": 4789 + }, + { + "epoch": 0.14680642393036655, + "grad_norm": 2.011585946042271, + "learning_rate": 9.646481374688022e-06, + "loss": 0.7702, + "step": 4790 + }, + { + "epoch": 0.14683707245310776, + "grad_norm": 1.7625973771478838, + "learning_rate": 9.646298044186735e-06, + "loss": 0.7906, + "step": 4791 + }, + { + "epoch": 0.14686772097584896, + "grad_norm": 2.0319328741991467, + "learning_rate": 9.646114667904285e-06, + "loss": 0.7244, + "step": 4792 + }, + { + "epoch": 0.14689836949859017, + "grad_norm": 1.7125005520109458, + "learning_rate": 9.645931245842477e-06, + "loss": 0.6987, + "step": 4793 + }, + { + "epoch": 0.14692901802133138, + "grad_norm": 1.6870086136998261, + "learning_rate": 9.64574777800312e-06, + "loss": 0.748, + "step": 4794 + }, + { + "epoch": 0.14695966654407258, + "grad_norm": 1.7492896628577936, + "learning_rate": 9.645564264388024e-06, + "loss": 0.6444, + "step": 4795 + }, + { + "epoch": 0.1469903150668138, + "grad_norm": 1.9831633168544007, + "learning_rate": 9.645380704998993e-06, + "loss": 0.7371, + "step": 4796 + }, + { + "epoch": 0.147020963589555, + "grad_norm": 1.9268115976364952, + "learning_rate": 9.645197099837838e-06, + "loss": 0.7969, + "step": 4797 + }, + { + "epoch": 0.1470516121122962, + "grad_norm": 1.8844733493472108, + "learning_rate": 9.645013448906366e-06, + "loss": 0.7296, + "step": 4798 + }, + { + "epoch": 0.1470822606350374, + "grad_norm": 1.9642720622094652, + "learning_rate": 9.644829752206388e-06, + "loss": 0.8145, + "step": 4799 + }, + { + "epoch": 0.14711290915777858, + "grad_norm": 2.0156125832622696, + "learning_rate": 9.644646009739715e-06, + "loss": 0.888, + "step": 4800 + }, + { + "epoch": 0.1471435576805198, + "grad_norm": 2.1792958253478076, + "learning_rate": 9.644462221508157e-06, + "loss": 0.8768, + "step": 4801 + }, + { + "epoch": 0.147174206203261, + "grad_norm": 1.0702166707081038, + "learning_rate": 9.644278387513525e-06, + "loss": 0.5347, + "step": 4802 + }, + { + "epoch": 0.1472048547260022, + "grad_norm": 0.989173366516463, + "learning_rate": 9.644094507757627e-06, + "loss": 0.5187, + "step": 4803 + }, + { + "epoch": 0.1472355032487434, + "grad_norm": 1.8289903391509017, + "learning_rate": 9.643910582242279e-06, + "loss": 0.745, + "step": 4804 + }, + { + "epoch": 0.1472661517714846, + "grad_norm": 1.950525709750812, + "learning_rate": 9.643726610969293e-06, + "loss": 0.8165, + "step": 4805 + }, + { + "epoch": 0.14729680029422582, + "grad_norm": 2.1918974448823296, + "learning_rate": 9.643542593940478e-06, + "loss": 0.7809, + "step": 4806 + }, + { + "epoch": 0.14732744881696702, + "grad_norm": 1.864567093465016, + "learning_rate": 9.643358531157651e-06, + "loss": 0.8164, + "step": 4807 + }, + { + "epoch": 0.14735809733970823, + "grad_norm": 1.1346430176574782, + "learning_rate": 9.643174422622625e-06, + "loss": 0.5003, + "step": 4808 + }, + { + "epoch": 0.14738874586244943, + "grad_norm": 1.9005465752306572, + "learning_rate": 9.642990268337214e-06, + "loss": 0.8311, + "step": 4809 + }, + { + "epoch": 0.14741939438519064, + "grad_norm": 1.0949475366454546, + "learning_rate": 9.642806068303229e-06, + "loss": 0.5242, + "step": 4810 + }, + { + "epoch": 0.14745004290793184, + "grad_norm": 1.874701892777097, + "learning_rate": 9.642621822522491e-06, + "loss": 0.7319, + "step": 4811 + }, + { + "epoch": 0.14748069143067305, + "grad_norm": 1.955354367567878, + "learning_rate": 9.642437530996812e-06, + "loss": 0.7236, + "step": 4812 + }, + { + "epoch": 0.14751133995341426, + "grad_norm": 0.8536021513461448, + "learning_rate": 9.642253193728006e-06, + "loss": 0.4979, + "step": 4813 + }, + { + "epoch": 0.14754198847615546, + "grad_norm": 0.8616514565354272, + "learning_rate": 9.642068810717893e-06, + "loss": 0.5042, + "step": 4814 + }, + { + "epoch": 0.14757263699889667, + "grad_norm": 2.63370978939868, + "learning_rate": 9.641884381968289e-06, + "loss": 0.782, + "step": 4815 + }, + { + "epoch": 0.14760328552163784, + "grad_norm": 1.9128056241908307, + "learning_rate": 9.641699907481008e-06, + "loss": 0.7586, + "step": 4816 + }, + { + "epoch": 0.14763393404437905, + "grad_norm": 2.2733318738338, + "learning_rate": 9.641515387257873e-06, + "loss": 0.7373, + "step": 4817 + }, + { + "epoch": 0.14766458256712026, + "grad_norm": 1.9349762970737299, + "learning_rate": 9.641330821300697e-06, + "loss": 0.8024, + "step": 4818 + }, + { + "epoch": 0.14769523108986146, + "grad_norm": 2.0274248744046828, + "learning_rate": 9.641146209611302e-06, + "loss": 0.8002, + "step": 4819 + }, + { + "epoch": 0.14772587961260267, + "grad_norm": 1.0027983295738125, + "learning_rate": 9.640961552191505e-06, + "loss": 0.5075, + "step": 4820 + }, + { + "epoch": 0.14775652813534387, + "grad_norm": 1.7418068558136603, + "learning_rate": 9.640776849043128e-06, + "loss": 0.7175, + "step": 4821 + }, + { + "epoch": 0.14778717665808508, + "grad_norm": 2.1776896254831266, + "learning_rate": 9.640592100167989e-06, + "loss": 0.6836, + "step": 4822 + }, + { + "epoch": 0.14781782518082628, + "grad_norm": 1.785648077115751, + "learning_rate": 9.640407305567907e-06, + "loss": 0.723, + "step": 4823 + }, + { + "epoch": 0.1478484737035675, + "grad_norm": 2.2510173739730464, + "learning_rate": 9.640222465244706e-06, + "loss": 0.7535, + "step": 4824 + }, + { + "epoch": 0.1478791222263087, + "grad_norm": 1.9200111196049368, + "learning_rate": 9.640037579200206e-06, + "loss": 0.7493, + "step": 4825 + }, + { + "epoch": 0.1479097707490499, + "grad_norm": 1.7128677446286404, + "learning_rate": 9.63985264743623e-06, + "loss": 0.7349, + "step": 4826 + }, + { + "epoch": 0.1479404192717911, + "grad_norm": 1.9102459605328392, + "learning_rate": 9.639667669954596e-06, + "loss": 0.6674, + "step": 4827 + }, + { + "epoch": 0.1479710677945323, + "grad_norm": 1.8171706243509302, + "learning_rate": 9.63948264675713e-06, + "loss": 0.817, + "step": 4828 + }, + { + "epoch": 0.14800171631727352, + "grad_norm": 1.761660204003078, + "learning_rate": 9.639297577845654e-06, + "loss": 0.7409, + "step": 4829 + }, + { + "epoch": 0.14803236484001472, + "grad_norm": 1.8098961361998671, + "learning_rate": 9.639112463221994e-06, + "loss": 0.6279, + "step": 4830 + }, + { + "epoch": 0.1480630133627559, + "grad_norm": 1.861801535150556, + "learning_rate": 9.638927302887968e-06, + "loss": 0.8065, + "step": 4831 + }, + { + "epoch": 0.1480936618854971, + "grad_norm": 1.9414158411650115, + "learning_rate": 9.638742096845408e-06, + "loss": 0.7434, + "step": 4832 + }, + { + "epoch": 0.1481243104082383, + "grad_norm": 1.8357741099781189, + "learning_rate": 9.638556845096134e-06, + "loss": 0.7347, + "step": 4833 + }, + { + "epoch": 0.14815495893097952, + "grad_norm": 1.8161813870539019, + "learning_rate": 9.638371547641972e-06, + "loss": 0.7318, + "step": 4834 + }, + { + "epoch": 0.14818560745372072, + "grad_norm": 2.9692579072730845, + "learning_rate": 9.63818620448475e-06, + "loss": 0.8766, + "step": 4835 + }, + { + "epoch": 0.14821625597646193, + "grad_norm": 1.9598949999615665, + "learning_rate": 9.638000815626292e-06, + "loss": 0.7765, + "step": 4836 + }, + { + "epoch": 0.14824690449920314, + "grad_norm": 1.9513263084875467, + "learning_rate": 9.637815381068424e-06, + "loss": 0.7654, + "step": 4837 + }, + { + "epoch": 0.14827755302194434, + "grad_norm": 1.908693007539495, + "learning_rate": 9.637629900812975e-06, + "loss": 0.6912, + "step": 4838 + }, + { + "epoch": 0.14830820154468555, + "grad_norm": 1.7850047277983903, + "learning_rate": 9.637444374861774e-06, + "loss": 0.8176, + "step": 4839 + }, + { + "epoch": 0.14833885006742675, + "grad_norm": 1.8421093523244976, + "learning_rate": 9.637258803216643e-06, + "loss": 0.7301, + "step": 4840 + }, + { + "epoch": 0.14836949859016796, + "grad_norm": 0.9980811687051077, + "learning_rate": 9.637073185879418e-06, + "loss": 0.5358, + "step": 4841 + }, + { + "epoch": 0.14840014711290916, + "grad_norm": 0.9133807607342997, + "learning_rate": 9.636887522851924e-06, + "loss": 0.5417, + "step": 4842 + }, + { + "epoch": 0.14843079563565037, + "grad_norm": 1.9086102856318414, + "learning_rate": 9.63670181413599e-06, + "loss": 0.7473, + "step": 4843 + }, + { + "epoch": 0.14846144415839158, + "grad_norm": 1.8576704616565254, + "learning_rate": 9.636516059733445e-06, + "loss": 0.7279, + "step": 4844 + }, + { + "epoch": 0.14849209268113278, + "grad_norm": 2.2371026696345773, + "learning_rate": 9.636330259646122e-06, + "loss": 0.6991, + "step": 4845 + }, + { + "epoch": 0.148522741203874, + "grad_norm": 2.1074713432357464, + "learning_rate": 9.636144413875852e-06, + "loss": 0.6844, + "step": 4846 + }, + { + "epoch": 0.14855338972661516, + "grad_norm": 1.790848593718653, + "learning_rate": 9.635958522424464e-06, + "loss": 0.7383, + "step": 4847 + }, + { + "epoch": 0.14858403824935637, + "grad_norm": 1.8249828082557231, + "learning_rate": 9.635772585293792e-06, + "loss": 0.682, + "step": 4848 + }, + { + "epoch": 0.14861468677209758, + "grad_norm": 1.092532767176621, + "learning_rate": 9.635586602485665e-06, + "loss": 0.5199, + "step": 4849 + }, + { + "epoch": 0.14864533529483878, + "grad_norm": 0.9691615142719298, + "learning_rate": 9.635400574001918e-06, + "loss": 0.522, + "step": 4850 + }, + { + "epoch": 0.14867598381758, + "grad_norm": 0.7630978535627854, + "learning_rate": 9.635214499844383e-06, + "loss": 0.4899, + "step": 4851 + }, + { + "epoch": 0.1487066323403212, + "grad_norm": 2.321207024941387, + "learning_rate": 9.635028380014893e-06, + "loss": 0.7923, + "step": 4852 + }, + { + "epoch": 0.1487372808630624, + "grad_norm": 1.9624361442548472, + "learning_rate": 9.634842214515283e-06, + "loss": 0.7952, + "step": 4853 + }, + { + "epoch": 0.1487679293858036, + "grad_norm": 1.7319050453274083, + "learning_rate": 9.634656003347387e-06, + "loss": 0.6617, + "step": 4854 + }, + { + "epoch": 0.1487985779085448, + "grad_norm": 1.1661195594335076, + "learning_rate": 9.634469746513038e-06, + "loss": 0.5199, + "step": 4855 + }, + { + "epoch": 0.14882922643128602, + "grad_norm": 1.70202382774432, + "learning_rate": 9.634283444014075e-06, + "loss": 0.6678, + "step": 4856 + }, + { + "epoch": 0.14885987495402722, + "grad_norm": 1.8517555093824078, + "learning_rate": 9.63409709585233e-06, + "loss": 0.7249, + "step": 4857 + }, + { + "epoch": 0.14889052347676843, + "grad_norm": 2.070510369126959, + "learning_rate": 9.633910702029641e-06, + "loss": 0.837, + "step": 4858 + }, + { + "epoch": 0.14892117199950963, + "grad_norm": 1.8295925057649591, + "learning_rate": 9.633724262547843e-06, + "loss": 0.7249, + "step": 4859 + }, + { + "epoch": 0.14895182052225084, + "grad_norm": 1.9562720566473224, + "learning_rate": 9.633537777408777e-06, + "loss": 0.6856, + "step": 4860 + }, + { + "epoch": 0.14898246904499204, + "grad_norm": 0.9674533718111031, + "learning_rate": 9.633351246614275e-06, + "loss": 0.5123, + "step": 4861 + }, + { + "epoch": 0.14901311756773322, + "grad_norm": 1.6587814949821522, + "learning_rate": 9.633164670166179e-06, + "loss": 0.6947, + "step": 4862 + }, + { + "epoch": 0.14904376609047443, + "grad_norm": 1.68888003351922, + "learning_rate": 9.632978048066325e-06, + "loss": 0.7087, + "step": 4863 + }, + { + "epoch": 0.14907441461321563, + "grad_norm": 1.9158510367191572, + "learning_rate": 9.632791380316556e-06, + "loss": 0.7664, + "step": 4864 + }, + { + "epoch": 0.14910506313595684, + "grad_norm": 1.7377329270192177, + "learning_rate": 9.632604666918705e-06, + "loss": 0.8081, + "step": 4865 + }, + { + "epoch": 0.14913571165869804, + "grad_norm": 1.8156427358413616, + "learning_rate": 9.632417907874617e-06, + "loss": 0.7368, + "step": 4866 + }, + { + "epoch": 0.14916636018143925, + "grad_norm": 1.6997369996345522, + "learning_rate": 9.632231103186128e-06, + "loss": 0.7878, + "step": 4867 + }, + { + "epoch": 0.14919700870418046, + "grad_norm": 1.6423591608426962, + "learning_rate": 9.632044252855082e-06, + "loss": 0.7558, + "step": 4868 + }, + { + "epoch": 0.14922765722692166, + "grad_norm": 1.7874722121528934, + "learning_rate": 9.631857356883318e-06, + "loss": 0.6612, + "step": 4869 + }, + { + "epoch": 0.14925830574966287, + "grad_norm": 1.8827345916212987, + "learning_rate": 9.631670415272679e-06, + "loss": 0.8335, + "step": 4870 + }, + { + "epoch": 0.14928895427240407, + "grad_norm": 1.676737068670265, + "learning_rate": 9.631483428025007e-06, + "loss": 0.7127, + "step": 4871 + }, + { + "epoch": 0.14931960279514528, + "grad_norm": 1.9144567647657256, + "learning_rate": 9.631296395142142e-06, + "loss": 0.7549, + "step": 4872 + }, + { + "epoch": 0.14935025131788648, + "grad_norm": 1.685604531783356, + "learning_rate": 9.631109316625931e-06, + "loss": 0.7644, + "step": 4873 + }, + { + "epoch": 0.1493808998406277, + "grad_norm": 1.4941990323132015, + "learning_rate": 9.630922192478213e-06, + "loss": 0.6857, + "step": 4874 + }, + { + "epoch": 0.1494115483633689, + "grad_norm": 1.960935991860923, + "learning_rate": 9.630735022700835e-06, + "loss": 0.7767, + "step": 4875 + }, + { + "epoch": 0.1494421968861101, + "grad_norm": 1.8383144924933796, + "learning_rate": 9.630547807295639e-06, + "loss": 0.7261, + "step": 4876 + }, + { + "epoch": 0.1494728454088513, + "grad_norm": 1.8155330907060545, + "learning_rate": 9.63036054626447e-06, + "loss": 0.7558, + "step": 4877 + }, + { + "epoch": 0.14950349393159248, + "grad_norm": 2.017123346076423, + "learning_rate": 9.630173239609176e-06, + "loss": 0.7976, + "step": 4878 + }, + { + "epoch": 0.1495341424543337, + "grad_norm": 1.9844242511490455, + "learning_rate": 9.629985887331598e-06, + "loss": 0.7146, + "step": 4879 + }, + { + "epoch": 0.1495647909770749, + "grad_norm": 1.721521299533863, + "learning_rate": 9.629798489433586e-06, + "loss": 0.7817, + "step": 4880 + }, + { + "epoch": 0.1495954394998161, + "grad_norm": 1.8100472700162746, + "learning_rate": 9.629611045916985e-06, + "loss": 0.701, + "step": 4881 + }, + { + "epoch": 0.1496260880225573, + "grad_norm": 1.8314306673743885, + "learning_rate": 9.629423556783641e-06, + "loss": 0.7514, + "step": 4882 + }, + { + "epoch": 0.1496567365452985, + "grad_norm": 2.055279193121491, + "learning_rate": 9.629236022035404e-06, + "loss": 0.8103, + "step": 4883 + }, + { + "epoch": 0.14968738506803972, + "grad_norm": 1.977395604693014, + "learning_rate": 9.629048441674117e-06, + "loss": 0.8266, + "step": 4884 + }, + { + "epoch": 0.14971803359078092, + "grad_norm": 1.8036209607269873, + "learning_rate": 9.628860815701635e-06, + "loss": 0.7066, + "step": 4885 + }, + { + "epoch": 0.14974868211352213, + "grad_norm": 1.7478338207587072, + "learning_rate": 9.6286731441198e-06, + "loss": 0.6503, + "step": 4886 + }, + { + "epoch": 0.14977933063626334, + "grad_norm": 1.8739826243521163, + "learning_rate": 9.628485426930465e-06, + "loss": 0.6825, + "step": 4887 + }, + { + "epoch": 0.14980997915900454, + "grad_norm": 2.0355017837395613, + "learning_rate": 9.62829766413548e-06, + "loss": 0.761, + "step": 4888 + }, + { + "epoch": 0.14984062768174575, + "grad_norm": 1.8951397969930073, + "learning_rate": 9.628109855736692e-06, + "loss": 0.7069, + "step": 4889 + }, + { + "epoch": 0.14987127620448695, + "grad_norm": 1.9294217432988616, + "learning_rate": 9.627922001735955e-06, + "loss": 0.7811, + "step": 4890 + }, + { + "epoch": 0.14990192472722816, + "grad_norm": 1.8617332373504707, + "learning_rate": 9.627734102135118e-06, + "loss": 0.7637, + "step": 4891 + }, + { + "epoch": 0.14993257324996936, + "grad_norm": 1.83388729154305, + "learning_rate": 9.627546156936033e-06, + "loss": 0.7728, + "step": 4892 + }, + { + "epoch": 0.14996322177271054, + "grad_norm": 2.0560835009524845, + "learning_rate": 9.627358166140551e-06, + "loss": 0.7407, + "step": 4893 + }, + { + "epoch": 0.14999387029545175, + "grad_norm": 2.038576146385384, + "learning_rate": 9.627170129750526e-06, + "loss": 0.7549, + "step": 4894 + }, + { + "epoch": 0.15002451881819295, + "grad_norm": 1.8236780781875452, + "learning_rate": 9.626982047767808e-06, + "loss": 0.8084, + "step": 4895 + }, + { + "epoch": 0.15005516734093416, + "grad_norm": 1.6499985019312138, + "learning_rate": 9.626793920194254e-06, + "loss": 0.6311, + "step": 4896 + }, + { + "epoch": 0.15008581586367536, + "grad_norm": 1.7855228089360686, + "learning_rate": 9.626605747031715e-06, + "loss": 0.7185, + "step": 4897 + }, + { + "epoch": 0.15011646438641657, + "grad_norm": 1.791978070534017, + "learning_rate": 9.626417528282047e-06, + "loss": 0.7455, + "step": 4898 + }, + { + "epoch": 0.15014711290915778, + "grad_norm": 1.6695827216113124, + "learning_rate": 9.6262292639471e-06, + "loss": 0.7015, + "step": 4899 + }, + { + "epoch": 0.15017776143189898, + "grad_norm": 1.7975564535084028, + "learning_rate": 9.626040954028735e-06, + "loss": 0.6519, + "step": 4900 + }, + { + "epoch": 0.1502084099546402, + "grad_norm": 1.9394062485437105, + "learning_rate": 9.625852598528804e-06, + "loss": 0.7786, + "step": 4901 + }, + { + "epoch": 0.1502390584773814, + "grad_norm": 1.1238128606665363, + "learning_rate": 9.625664197449165e-06, + "loss": 0.5016, + "step": 4902 + }, + { + "epoch": 0.1502697070001226, + "grad_norm": 1.9586184954369323, + "learning_rate": 9.625475750791672e-06, + "loss": 0.7088, + "step": 4903 + }, + { + "epoch": 0.1503003555228638, + "grad_norm": 0.8744025433851047, + "learning_rate": 9.625287258558183e-06, + "loss": 0.4971, + "step": 4904 + }, + { + "epoch": 0.150331004045605, + "grad_norm": 2.1568483095690594, + "learning_rate": 9.625098720750557e-06, + "loss": 0.6989, + "step": 4905 + }, + { + "epoch": 0.15036165256834622, + "grad_norm": 1.8578425456087218, + "learning_rate": 9.624910137370647e-06, + "loss": 0.7575, + "step": 4906 + }, + { + "epoch": 0.15039230109108742, + "grad_norm": 1.5424188464438946, + "learning_rate": 9.624721508420316e-06, + "loss": 0.6929, + "step": 4907 + }, + { + "epoch": 0.15042294961382863, + "grad_norm": 1.9509540527541973, + "learning_rate": 9.624532833901419e-06, + "loss": 0.7221, + "step": 4908 + }, + { + "epoch": 0.1504535981365698, + "grad_norm": 1.9249011243763086, + "learning_rate": 9.624344113815818e-06, + "loss": 0.7648, + "step": 4909 + }, + { + "epoch": 0.150484246659311, + "grad_norm": 2.022911545483853, + "learning_rate": 9.624155348165372e-06, + "loss": 0.7334, + "step": 4910 + }, + { + "epoch": 0.15051489518205222, + "grad_norm": 1.793305676802103, + "learning_rate": 9.623966536951939e-06, + "loss": 0.7977, + "step": 4911 + }, + { + "epoch": 0.15054554370479342, + "grad_norm": 1.861403449538561, + "learning_rate": 9.62377768017738e-06, + "loss": 0.7475, + "step": 4912 + }, + { + "epoch": 0.15057619222753463, + "grad_norm": 1.8057001135184148, + "learning_rate": 9.623588777843558e-06, + "loss": 0.7525, + "step": 4913 + }, + { + "epoch": 0.15060684075027583, + "grad_norm": 2.043706573037171, + "learning_rate": 9.623399829952332e-06, + "loss": 0.6812, + "step": 4914 + }, + { + "epoch": 0.15063748927301704, + "grad_norm": 1.943667825206583, + "learning_rate": 9.623210836505565e-06, + "loss": 0.783, + "step": 4915 + }, + { + "epoch": 0.15066813779575824, + "grad_norm": 1.9006193892338594, + "learning_rate": 9.623021797505118e-06, + "loss": 0.7987, + "step": 4916 + }, + { + "epoch": 0.15069878631849945, + "grad_norm": 1.902716001891339, + "learning_rate": 9.622832712952856e-06, + "loss": 0.6842, + "step": 4917 + }, + { + "epoch": 0.15072943484124066, + "grad_norm": 1.810981515079503, + "learning_rate": 9.62264358285064e-06, + "loss": 0.8996, + "step": 4918 + }, + { + "epoch": 0.15076008336398186, + "grad_norm": 1.969168384562869, + "learning_rate": 9.622454407200333e-06, + "loss": 0.7667, + "step": 4919 + }, + { + "epoch": 0.15079073188672307, + "grad_norm": 1.8880485803433655, + "learning_rate": 9.622265186003799e-06, + "loss": 0.7513, + "step": 4920 + }, + { + "epoch": 0.15082138040946427, + "grad_norm": 2.1642767012186113, + "learning_rate": 9.622075919262905e-06, + "loss": 0.727, + "step": 4921 + }, + { + "epoch": 0.15085202893220548, + "grad_norm": 1.8801012400320958, + "learning_rate": 9.621886606979514e-06, + "loss": 0.7699, + "step": 4922 + }, + { + "epoch": 0.15088267745494668, + "grad_norm": 1.2651197864852899, + "learning_rate": 9.621697249155493e-06, + "loss": 0.5023, + "step": 4923 + }, + { + "epoch": 0.15091332597768786, + "grad_norm": 1.1046027330214496, + "learning_rate": 9.621507845792705e-06, + "loss": 0.5008, + "step": 4924 + }, + { + "epoch": 0.15094397450042907, + "grad_norm": 1.9148735565662414, + "learning_rate": 9.621318396893018e-06, + "loss": 0.778, + "step": 4925 + }, + { + "epoch": 0.15097462302317027, + "grad_norm": 1.8766240349604522, + "learning_rate": 9.621128902458299e-06, + "loss": 0.8087, + "step": 4926 + }, + { + "epoch": 0.15100527154591148, + "grad_norm": 2.0876955097989662, + "learning_rate": 9.620939362490414e-06, + "loss": 0.8235, + "step": 4927 + }, + { + "epoch": 0.15103592006865268, + "grad_norm": 1.060933283149895, + "learning_rate": 9.62074977699123e-06, + "loss": 0.4924, + "step": 4928 + }, + { + "epoch": 0.1510665685913939, + "grad_norm": 1.1538218052813587, + "learning_rate": 9.620560145962618e-06, + "loss": 0.5123, + "step": 4929 + }, + { + "epoch": 0.1510972171141351, + "grad_norm": 1.8562087045901994, + "learning_rate": 9.620370469406443e-06, + "loss": 0.7233, + "step": 4930 + }, + { + "epoch": 0.1511278656368763, + "grad_norm": 2.3690880357900905, + "learning_rate": 9.620180747324577e-06, + "loss": 0.6634, + "step": 4931 + }, + { + "epoch": 0.1511585141596175, + "grad_norm": 1.8311699943146824, + "learning_rate": 9.619990979718889e-06, + "loss": 0.736, + "step": 4932 + }, + { + "epoch": 0.1511891626823587, + "grad_norm": 1.8926710001422808, + "learning_rate": 9.619801166591247e-06, + "loss": 0.8137, + "step": 4933 + }, + { + "epoch": 0.15121981120509992, + "grad_norm": 1.5512956037743426, + "learning_rate": 9.61961130794352e-06, + "loss": 0.6633, + "step": 4934 + }, + { + "epoch": 0.15125045972784112, + "grad_norm": 2.0010402321968472, + "learning_rate": 9.619421403777583e-06, + "loss": 0.8445, + "step": 4935 + }, + { + "epoch": 0.15128110825058233, + "grad_norm": 1.9266469595640987, + "learning_rate": 9.619231454095304e-06, + "loss": 0.7361, + "step": 4936 + }, + { + "epoch": 0.15131175677332354, + "grad_norm": 1.6238897006349466, + "learning_rate": 9.619041458898557e-06, + "loss": 0.6521, + "step": 4937 + }, + { + "epoch": 0.15134240529606474, + "grad_norm": 1.2626966426136528, + "learning_rate": 9.61885141818921e-06, + "loss": 0.5123, + "step": 4938 + }, + { + "epoch": 0.15137305381880595, + "grad_norm": 1.0829647220586824, + "learning_rate": 9.61866133196914e-06, + "loss": 0.4999, + "step": 4939 + }, + { + "epoch": 0.15140370234154713, + "grad_norm": 1.8359877407829717, + "learning_rate": 9.618471200240219e-06, + "loss": 0.7085, + "step": 4940 + }, + { + "epoch": 0.15143435086428833, + "grad_norm": 1.716696361165358, + "learning_rate": 9.618281023004318e-06, + "loss": 0.7998, + "step": 4941 + }, + { + "epoch": 0.15146499938702954, + "grad_norm": 1.820644811791315, + "learning_rate": 9.618090800263313e-06, + "loss": 0.816, + "step": 4942 + }, + { + "epoch": 0.15149564790977074, + "grad_norm": 2.05889173450834, + "learning_rate": 9.617900532019078e-06, + "loss": 0.7662, + "step": 4943 + }, + { + "epoch": 0.15152629643251195, + "grad_norm": 1.806752422244448, + "learning_rate": 9.617710218273486e-06, + "loss": 0.766, + "step": 4944 + }, + { + "epoch": 0.15155694495525315, + "grad_norm": 1.9610818530044487, + "learning_rate": 9.617519859028415e-06, + "loss": 0.6777, + "step": 4945 + }, + { + "epoch": 0.15158759347799436, + "grad_norm": 1.9727728561501796, + "learning_rate": 9.61732945428574e-06, + "loss": 0.7507, + "step": 4946 + }, + { + "epoch": 0.15161824200073556, + "grad_norm": 2.010934893245397, + "learning_rate": 9.617139004047335e-06, + "loss": 0.7741, + "step": 4947 + }, + { + "epoch": 0.15164889052347677, + "grad_norm": 1.8310095389517476, + "learning_rate": 9.61694850831508e-06, + "loss": 0.8402, + "step": 4948 + }, + { + "epoch": 0.15167953904621798, + "grad_norm": 1.9514450249192663, + "learning_rate": 9.616757967090848e-06, + "loss": 0.7399, + "step": 4949 + }, + { + "epoch": 0.15171018756895918, + "grad_norm": 1.8271939613741288, + "learning_rate": 9.61656738037652e-06, + "loss": 0.7766, + "step": 4950 + }, + { + "epoch": 0.1517408360917004, + "grad_norm": 1.6622455548750146, + "learning_rate": 9.616376748173973e-06, + "loss": 0.5035, + "step": 4951 + }, + { + "epoch": 0.1517714846144416, + "grad_norm": 1.857851798974954, + "learning_rate": 9.616186070485082e-06, + "loss": 0.7307, + "step": 4952 + }, + { + "epoch": 0.1518021331371828, + "grad_norm": 2.358927792029898, + "learning_rate": 9.61599534731173e-06, + "loss": 0.7406, + "step": 4953 + }, + { + "epoch": 0.151832781659924, + "grad_norm": 0.9845846523551288, + "learning_rate": 9.615804578655796e-06, + "loss": 0.511, + "step": 4954 + }, + { + "epoch": 0.15186343018266518, + "grad_norm": 2.027406899280921, + "learning_rate": 9.615613764519155e-06, + "loss": 0.7844, + "step": 4955 + }, + { + "epoch": 0.1518940787054064, + "grad_norm": 1.9620824993404762, + "learning_rate": 9.615422904903695e-06, + "loss": 0.7796, + "step": 4956 + }, + { + "epoch": 0.1519247272281476, + "grad_norm": 2.102523991015329, + "learning_rate": 9.61523199981129e-06, + "loss": 0.8417, + "step": 4957 + }, + { + "epoch": 0.1519553757508888, + "grad_norm": 1.702552194195034, + "learning_rate": 9.615041049243825e-06, + "loss": 0.6837, + "step": 4958 + }, + { + "epoch": 0.15198602427363, + "grad_norm": 2.0003801509956594, + "learning_rate": 9.614850053203178e-06, + "loss": 0.7611, + "step": 4959 + }, + { + "epoch": 0.1520166727963712, + "grad_norm": 1.9898306162227992, + "learning_rate": 9.614659011691232e-06, + "loss": 0.7447, + "step": 4960 + }, + { + "epoch": 0.15204732131911242, + "grad_norm": 1.885675763553351, + "learning_rate": 9.61446792470987e-06, + "loss": 0.7258, + "step": 4961 + }, + { + "epoch": 0.15207796984185362, + "grad_norm": 2.0727705193722317, + "learning_rate": 9.614276792260978e-06, + "loss": 0.6849, + "step": 4962 + }, + { + "epoch": 0.15210861836459483, + "grad_norm": 1.6471528035201624, + "learning_rate": 9.614085614346433e-06, + "loss": 0.7236, + "step": 4963 + }, + { + "epoch": 0.15213926688733603, + "grad_norm": 1.3254722447343434, + "learning_rate": 9.613894390968121e-06, + "loss": 0.5226, + "step": 4964 + }, + { + "epoch": 0.15216991541007724, + "grad_norm": 2.1050857952199915, + "learning_rate": 9.61370312212793e-06, + "loss": 0.7965, + "step": 4965 + }, + { + "epoch": 0.15220056393281844, + "grad_norm": 1.8649773394980012, + "learning_rate": 9.61351180782774e-06, + "loss": 0.8079, + "step": 4966 + }, + { + "epoch": 0.15223121245555965, + "grad_norm": 0.9215812296264748, + "learning_rate": 9.61332044806944e-06, + "loss": 0.5065, + "step": 4967 + }, + { + "epoch": 0.15226186097830086, + "grad_norm": 1.8785560774864332, + "learning_rate": 9.61312904285491e-06, + "loss": 0.7555, + "step": 4968 + }, + { + "epoch": 0.15229250950104206, + "grad_norm": 1.7542519065394, + "learning_rate": 9.612937592186041e-06, + "loss": 0.6362, + "step": 4969 + }, + { + "epoch": 0.15232315802378327, + "grad_norm": 2.0284587943750148, + "learning_rate": 9.612746096064718e-06, + "loss": 0.679, + "step": 4970 + }, + { + "epoch": 0.15235380654652445, + "grad_norm": 0.9435527801406817, + "learning_rate": 9.612554554492825e-06, + "loss": 0.502, + "step": 4971 + }, + { + "epoch": 0.15238445506926565, + "grad_norm": 1.7330113220106413, + "learning_rate": 9.612362967472254e-06, + "loss": 0.7655, + "step": 4972 + }, + { + "epoch": 0.15241510359200686, + "grad_norm": 0.8750767653185756, + "learning_rate": 9.612171335004892e-06, + "loss": 0.4878, + "step": 4973 + }, + { + "epoch": 0.15244575211474806, + "grad_norm": 2.1891459640823676, + "learning_rate": 9.611979657092622e-06, + "loss": 0.8293, + "step": 4974 + }, + { + "epoch": 0.15247640063748927, + "grad_norm": 1.8300687620442613, + "learning_rate": 9.61178793373734e-06, + "loss": 0.7736, + "step": 4975 + }, + { + "epoch": 0.15250704916023047, + "grad_norm": 0.8540374587367847, + "learning_rate": 9.611596164940929e-06, + "loss": 0.5196, + "step": 4976 + }, + { + "epoch": 0.15253769768297168, + "grad_norm": 2.120033881137624, + "learning_rate": 9.611404350705283e-06, + "loss": 0.7305, + "step": 4977 + }, + { + "epoch": 0.15256834620571288, + "grad_norm": 1.9359405557341316, + "learning_rate": 9.611212491032289e-06, + "loss": 0.7215, + "step": 4978 + }, + { + "epoch": 0.1525989947284541, + "grad_norm": 1.859049912922564, + "learning_rate": 9.611020585923838e-06, + "loss": 0.7804, + "step": 4979 + }, + { + "epoch": 0.1526296432511953, + "grad_norm": 1.9416806444536179, + "learning_rate": 9.610828635381822e-06, + "loss": 0.7971, + "step": 4980 + }, + { + "epoch": 0.1526602917739365, + "grad_norm": 1.741337793273298, + "learning_rate": 9.610636639408132e-06, + "loss": 0.7215, + "step": 4981 + }, + { + "epoch": 0.1526909402966777, + "grad_norm": 1.7724356162366306, + "learning_rate": 9.610444598004658e-06, + "loss": 0.7426, + "step": 4982 + }, + { + "epoch": 0.1527215888194189, + "grad_norm": 0.9943146411569626, + "learning_rate": 9.610252511173297e-06, + "loss": 0.5268, + "step": 4983 + }, + { + "epoch": 0.15275223734216012, + "grad_norm": 2.0508957218730193, + "learning_rate": 9.610060378915935e-06, + "loss": 0.6606, + "step": 4984 + }, + { + "epoch": 0.15278288586490132, + "grad_norm": 2.486130574711528, + "learning_rate": 9.60986820123447e-06, + "loss": 0.729, + "step": 4985 + }, + { + "epoch": 0.1528135343876425, + "grad_norm": 1.9631669533824776, + "learning_rate": 9.609675978130795e-06, + "loss": 0.7487, + "step": 4986 + }, + { + "epoch": 0.1528441829103837, + "grad_norm": 1.9772670843178737, + "learning_rate": 9.609483709606802e-06, + "loss": 0.8225, + "step": 4987 + }, + { + "epoch": 0.1528748314331249, + "grad_norm": 1.9347426073861826, + "learning_rate": 9.609291395664387e-06, + "loss": 0.731, + "step": 4988 + }, + { + "epoch": 0.15290547995586612, + "grad_norm": 1.8844134184237011, + "learning_rate": 9.609099036305443e-06, + "loss": 0.8138, + "step": 4989 + }, + { + "epoch": 0.15293612847860732, + "grad_norm": 1.911166342382071, + "learning_rate": 9.608906631531869e-06, + "loss": 0.6511, + "step": 4990 + }, + { + "epoch": 0.15296677700134853, + "grad_norm": 1.857424655042543, + "learning_rate": 9.608714181345558e-06, + "loss": 0.779, + "step": 4991 + }, + { + "epoch": 0.15299742552408974, + "grad_norm": 2.0463717559299424, + "learning_rate": 9.608521685748406e-06, + "loss": 0.9343, + "step": 4992 + }, + { + "epoch": 0.15302807404683094, + "grad_norm": 1.748026707961763, + "learning_rate": 9.608329144742312e-06, + "loss": 0.7167, + "step": 4993 + }, + { + "epoch": 0.15305872256957215, + "grad_norm": 1.7983359403085313, + "learning_rate": 9.608136558329172e-06, + "loss": 0.8008, + "step": 4994 + }, + { + "epoch": 0.15308937109231335, + "grad_norm": 0.9282221979478622, + "learning_rate": 9.607943926510883e-06, + "loss": 0.5139, + "step": 4995 + }, + { + "epoch": 0.15312001961505456, + "grad_norm": 1.7064415312822678, + "learning_rate": 9.607751249289342e-06, + "loss": 0.8497, + "step": 4996 + }, + { + "epoch": 0.15315066813779576, + "grad_norm": 1.7859972059577651, + "learning_rate": 9.607558526666451e-06, + "loss": 0.7237, + "step": 4997 + }, + { + "epoch": 0.15318131666053697, + "grad_norm": 1.747965821508336, + "learning_rate": 9.607365758644107e-06, + "loss": 0.6625, + "step": 4998 + }, + { + "epoch": 0.15321196518327818, + "grad_norm": 1.6928933720057302, + "learning_rate": 9.607172945224208e-06, + "loss": 0.641, + "step": 4999 + }, + { + "epoch": 0.15324261370601938, + "grad_norm": 2.559162463398214, + "learning_rate": 9.606980086408657e-06, + "loss": 0.7298, + "step": 5000 + }, + { + "epoch": 0.1532732622287606, + "grad_norm": 1.9823396237492439, + "learning_rate": 9.60678718219935e-06, + "loss": 0.7669, + "step": 5001 + }, + { + "epoch": 0.15330391075150177, + "grad_norm": 1.8028221231128476, + "learning_rate": 9.606594232598192e-06, + "loss": 0.7954, + "step": 5002 + }, + { + "epoch": 0.15333455927424297, + "grad_norm": 1.7564974370508564, + "learning_rate": 9.606401237607081e-06, + "loss": 0.7458, + "step": 5003 + }, + { + "epoch": 0.15336520779698418, + "grad_norm": 1.8905490084242071, + "learning_rate": 9.606208197227922e-06, + "loss": 0.8244, + "step": 5004 + }, + { + "epoch": 0.15339585631972538, + "grad_norm": 0.9778481255490409, + "learning_rate": 9.606015111462614e-06, + "loss": 0.5137, + "step": 5005 + }, + { + "epoch": 0.1534265048424666, + "grad_norm": 1.7698628504608398, + "learning_rate": 9.605821980313061e-06, + "loss": 0.7883, + "step": 5006 + }, + { + "epoch": 0.1534571533652078, + "grad_norm": 1.9232486742355663, + "learning_rate": 9.605628803781165e-06, + "loss": 0.796, + "step": 5007 + }, + { + "epoch": 0.153487801887949, + "grad_norm": 1.9588176604012892, + "learning_rate": 9.60543558186883e-06, + "loss": 0.6426, + "step": 5008 + }, + { + "epoch": 0.1535184504106902, + "grad_norm": 2.5059429848920294, + "learning_rate": 9.605242314577961e-06, + "loss": 0.7323, + "step": 5009 + }, + { + "epoch": 0.1535490989334314, + "grad_norm": 1.8906135740951564, + "learning_rate": 9.605049001910458e-06, + "loss": 0.8668, + "step": 5010 + }, + { + "epoch": 0.15357974745617262, + "grad_norm": 1.8712167907372284, + "learning_rate": 9.604855643868231e-06, + "loss": 0.7903, + "step": 5011 + }, + { + "epoch": 0.15361039597891382, + "grad_norm": 1.8795116596153976, + "learning_rate": 9.604662240453185e-06, + "loss": 0.8012, + "step": 5012 + }, + { + "epoch": 0.15364104450165503, + "grad_norm": 1.902521943900373, + "learning_rate": 9.604468791667221e-06, + "loss": 0.7296, + "step": 5013 + }, + { + "epoch": 0.15367169302439623, + "grad_norm": 1.850874391100053, + "learning_rate": 9.60427529751225e-06, + "loss": 0.6674, + "step": 5014 + }, + { + "epoch": 0.15370234154713744, + "grad_norm": 1.6082597273118906, + "learning_rate": 9.604081757990175e-06, + "loss": 0.773, + "step": 5015 + }, + { + "epoch": 0.15373299006987864, + "grad_norm": 2.0137017323957265, + "learning_rate": 9.603888173102904e-06, + "loss": 0.7318, + "step": 5016 + }, + { + "epoch": 0.15376363859261982, + "grad_norm": 1.9672086178507375, + "learning_rate": 9.603694542852346e-06, + "loss": 0.809, + "step": 5017 + }, + { + "epoch": 0.15379428711536103, + "grad_norm": 1.9777378783535216, + "learning_rate": 9.603500867240408e-06, + "loss": 0.8054, + "step": 5018 + }, + { + "epoch": 0.15382493563810223, + "grad_norm": 1.7041048745384892, + "learning_rate": 9.603307146268998e-06, + "loss": 0.7276, + "step": 5019 + }, + { + "epoch": 0.15385558416084344, + "grad_norm": 2.1913129895473564, + "learning_rate": 9.603113379940024e-06, + "loss": 0.7248, + "step": 5020 + }, + { + "epoch": 0.15388623268358465, + "grad_norm": 1.694984739947166, + "learning_rate": 9.602919568255396e-06, + "loss": 0.6593, + "step": 5021 + }, + { + "epoch": 0.15391688120632585, + "grad_norm": 1.9553571639928207, + "learning_rate": 9.602725711217025e-06, + "loss": 0.7268, + "step": 5022 + }, + { + "epoch": 0.15394752972906706, + "grad_norm": 2.084110222850507, + "learning_rate": 9.60253180882682e-06, + "loss": 0.9045, + "step": 5023 + }, + { + "epoch": 0.15397817825180826, + "grad_norm": 1.8205497777438984, + "learning_rate": 9.60233786108669e-06, + "loss": 0.7484, + "step": 5024 + }, + { + "epoch": 0.15400882677454947, + "grad_norm": 1.8317679413711365, + "learning_rate": 9.602143867998548e-06, + "loss": 0.738, + "step": 5025 + }, + { + "epoch": 0.15403947529729067, + "grad_norm": 2.3360845061608906, + "learning_rate": 9.601949829564305e-06, + "loss": 0.8491, + "step": 5026 + }, + { + "epoch": 0.15407012382003188, + "grad_norm": 1.8206608756981533, + "learning_rate": 9.601755745785873e-06, + "loss": 0.7342, + "step": 5027 + }, + { + "epoch": 0.15410077234277308, + "grad_norm": 1.7960065328139503, + "learning_rate": 9.601561616665164e-06, + "loss": 0.7772, + "step": 5028 + }, + { + "epoch": 0.1541314208655143, + "grad_norm": 1.9156586129654503, + "learning_rate": 9.601367442204093e-06, + "loss": 0.7148, + "step": 5029 + }, + { + "epoch": 0.1541620693882555, + "grad_norm": 1.7955958157434158, + "learning_rate": 9.601173222404568e-06, + "loss": 0.6924, + "step": 5030 + }, + { + "epoch": 0.1541927179109967, + "grad_norm": 0.9926660541108828, + "learning_rate": 9.600978957268508e-06, + "loss": 0.4953, + "step": 5031 + }, + { + "epoch": 0.1542233664337379, + "grad_norm": 1.869260423948571, + "learning_rate": 9.600784646797825e-06, + "loss": 0.8095, + "step": 5032 + }, + { + "epoch": 0.15425401495647909, + "grad_norm": 1.8736199442154888, + "learning_rate": 9.600590290994434e-06, + "loss": 0.8104, + "step": 5033 + }, + { + "epoch": 0.1542846634792203, + "grad_norm": 1.8882361163227945, + "learning_rate": 9.600395889860248e-06, + "loss": 0.7156, + "step": 5034 + }, + { + "epoch": 0.1543153120019615, + "grad_norm": 1.8564231524361279, + "learning_rate": 9.600201443397185e-06, + "loss": 0.8155, + "step": 5035 + }, + { + "epoch": 0.1543459605247027, + "grad_norm": 2.2277225106167675, + "learning_rate": 9.600006951607162e-06, + "loss": 0.7388, + "step": 5036 + }, + { + "epoch": 0.1543766090474439, + "grad_norm": 1.8850902787197519, + "learning_rate": 9.599812414492092e-06, + "loss": 0.7347, + "step": 5037 + }, + { + "epoch": 0.1544072575701851, + "grad_norm": 2.172666331564381, + "learning_rate": 9.599617832053893e-06, + "loss": 0.6994, + "step": 5038 + }, + { + "epoch": 0.15443790609292632, + "grad_norm": 1.7873216627739106, + "learning_rate": 9.599423204294484e-06, + "loss": 0.7906, + "step": 5039 + }, + { + "epoch": 0.15446855461566752, + "grad_norm": 1.7649473111261356, + "learning_rate": 9.599228531215779e-06, + "loss": 0.7365, + "step": 5040 + }, + { + "epoch": 0.15449920313840873, + "grad_norm": 1.882598349276251, + "learning_rate": 9.5990338128197e-06, + "loss": 0.6814, + "step": 5041 + }, + { + "epoch": 0.15452985166114994, + "grad_norm": 1.8005367236312326, + "learning_rate": 9.598839049108164e-06, + "loss": 0.7766, + "step": 5042 + }, + { + "epoch": 0.15456050018389114, + "grad_norm": 1.8676300143341749, + "learning_rate": 9.59864424008309e-06, + "loss": 0.8083, + "step": 5043 + }, + { + "epoch": 0.15459114870663235, + "grad_norm": 0.8933753121276299, + "learning_rate": 9.598449385746399e-06, + "loss": 0.5202, + "step": 5044 + }, + { + "epoch": 0.15462179722937355, + "grad_norm": 1.9288547771502222, + "learning_rate": 9.59825448610001e-06, + "loss": 0.7729, + "step": 5045 + }, + { + "epoch": 0.15465244575211476, + "grad_norm": 1.9835557833099156, + "learning_rate": 9.598059541145841e-06, + "loss": 0.7543, + "step": 5046 + }, + { + "epoch": 0.15468309427485596, + "grad_norm": 1.6393293340864799, + "learning_rate": 9.597864550885816e-06, + "loss": 0.7848, + "step": 5047 + }, + { + "epoch": 0.15471374279759714, + "grad_norm": 1.9581320970023075, + "learning_rate": 9.597669515321853e-06, + "loss": 0.8559, + "step": 5048 + }, + { + "epoch": 0.15474439132033835, + "grad_norm": 1.8592104119709134, + "learning_rate": 9.597474434455878e-06, + "loss": 0.774, + "step": 5049 + }, + { + "epoch": 0.15477503984307955, + "grad_norm": 0.8450053069918761, + "learning_rate": 9.597279308289811e-06, + "loss": 0.5318, + "step": 5050 + }, + { + "epoch": 0.15480568836582076, + "grad_norm": 1.700358065853225, + "learning_rate": 9.597084136825573e-06, + "loss": 0.7333, + "step": 5051 + }, + { + "epoch": 0.15483633688856197, + "grad_norm": 1.8457705053108413, + "learning_rate": 9.59688892006509e-06, + "loss": 0.7991, + "step": 5052 + }, + { + "epoch": 0.15486698541130317, + "grad_norm": 1.8801634908376847, + "learning_rate": 9.596693658010286e-06, + "loss": 0.7468, + "step": 5053 + }, + { + "epoch": 0.15489763393404438, + "grad_norm": 1.9370980308496963, + "learning_rate": 9.596498350663082e-06, + "loss": 0.7887, + "step": 5054 + }, + { + "epoch": 0.15492828245678558, + "grad_norm": 2.095288059168411, + "learning_rate": 9.596302998025403e-06, + "loss": 0.6768, + "step": 5055 + }, + { + "epoch": 0.1549589309795268, + "grad_norm": 1.9986292262217749, + "learning_rate": 9.596107600099176e-06, + "loss": 0.7365, + "step": 5056 + }, + { + "epoch": 0.154989579502268, + "grad_norm": 1.821893691405891, + "learning_rate": 9.595912156886323e-06, + "loss": 0.8031, + "step": 5057 + }, + { + "epoch": 0.1550202280250092, + "grad_norm": 1.8349222451562888, + "learning_rate": 9.595716668388773e-06, + "loss": 0.6913, + "step": 5058 + }, + { + "epoch": 0.1550508765477504, + "grad_norm": 1.70321420403657, + "learning_rate": 9.59552113460845e-06, + "loss": 0.7822, + "step": 5059 + }, + { + "epoch": 0.1550815250704916, + "grad_norm": 1.9096561931442857, + "learning_rate": 9.595325555547281e-06, + "loss": 0.6968, + "step": 5060 + }, + { + "epoch": 0.15511217359323282, + "grad_norm": 0.898474363234282, + "learning_rate": 9.595129931207194e-06, + "loss": 0.5164, + "step": 5061 + }, + { + "epoch": 0.15514282211597402, + "grad_norm": 1.73024570630405, + "learning_rate": 9.594934261590117e-06, + "loss": 0.6985, + "step": 5062 + }, + { + "epoch": 0.15517347063871523, + "grad_norm": 0.8650357337815794, + "learning_rate": 9.594738546697977e-06, + "loss": 0.5046, + "step": 5063 + }, + { + "epoch": 0.1552041191614564, + "grad_norm": 0.8277615999566638, + "learning_rate": 9.594542786532702e-06, + "loss": 0.4978, + "step": 5064 + }, + { + "epoch": 0.1552347676841976, + "grad_norm": 1.8918084223596983, + "learning_rate": 9.594346981096221e-06, + "loss": 0.8306, + "step": 5065 + }, + { + "epoch": 0.15526541620693882, + "grad_norm": 1.9124935864487576, + "learning_rate": 9.594151130390463e-06, + "loss": 0.7203, + "step": 5066 + }, + { + "epoch": 0.15529606472968002, + "grad_norm": 0.8783548540608576, + "learning_rate": 9.593955234417361e-06, + "loss": 0.5293, + "step": 5067 + }, + { + "epoch": 0.15532671325242123, + "grad_norm": 1.8399394948497365, + "learning_rate": 9.593759293178839e-06, + "loss": 0.7489, + "step": 5068 + }, + { + "epoch": 0.15535736177516243, + "grad_norm": 1.7328212241977998, + "learning_rate": 9.593563306676835e-06, + "loss": 0.6969, + "step": 5069 + }, + { + "epoch": 0.15538801029790364, + "grad_norm": 2.0916824427488443, + "learning_rate": 9.593367274913274e-06, + "loss": 0.8496, + "step": 5070 + }, + { + "epoch": 0.15541865882064484, + "grad_norm": 1.697821301618154, + "learning_rate": 9.59317119789009e-06, + "loss": 0.7489, + "step": 5071 + }, + { + "epoch": 0.15544930734338605, + "grad_norm": 1.8111843028782069, + "learning_rate": 9.592975075609216e-06, + "loss": 0.7393, + "step": 5072 + }, + { + "epoch": 0.15547995586612726, + "grad_norm": 1.5374956618027378, + "learning_rate": 9.592778908072583e-06, + "loss": 0.6875, + "step": 5073 + }, + { + "epoch": 0.15551060438886846, + "grad_norm": 1.8114345962831297, + "learning_rate": 9.592582695282124e-06, + "loss": 0.8158, + "step": 5074 + }, + { + "epoch": 0.15554125291160967, + "grad_norm": 1.9911900070046253, + "learning_rate": 9.592386437239773e-06, + "loss": 0.629, + "step": 5075 + }, + { + "epoch": 0.15557190143435087, + "grad_norm": 0.9916124610203599, + "learning_rate": 9.592190133947465e-06, + "loss": 0.5325, + "step": 5076 + }, + { + "epoch": 0.15560254995709208, + "grad_norm": 2.0988693938367913, + "learning_rate": 9.59199378540713e-06, + "loss": 0.7624, + "step": 5077 + }, + { + "epoch": 0.15563319847983328, + "grad_norm": 1.6916139598888045, + "learning_rate": 9.591797391620708e-06, + "loss": 0.6509, + "step": 5078 + }, + { + "epoch": 0.15566384700257446, + "grad_norm": 1.7727003229326794, + "learning_rate": 9.591600952590129e-06, + "loss": 0.6526, + "step": 5079 + }, + { + "epoch": 0.15569449552531567, + "grad_norm": 1.4229197738967037, + "learning_rate": 9.59140446831733e-06, + "loss": 0.5015, + "step": 5080 + }, + { + "epoch": 0.15572514404805687, + "grad_norm": 1.9760648141308779, + "learning_rate": 9.591207938804252e-06, + "loss": 0.8086, + "step": 5081 + }, + { + "epoch": 0.15575579257079808, + "grad_norm": 1.6272913497989536, + "learning_rate": 9.591011364052825e-06, + "loss": 0.7596, + "step": 5082 + }, + { + "epoch": 0.15578644109353929, + "grad_norm": 1.7987948912162233, + "learning_rate": 9.59081474406499e-06, + "loss": 0.696, + "step": 5083 + }, + { + "epoch": 0.1558170896162805, + "grad_norm": 1.827964707615435, + "learning_rate": 9.590618078842679e-06, + "loss": 0.7857, + "step": 5084 + }, + { + "epoch": 0.1558477381390217, + "grad_norm": 1.789233427938964, + "learning_rate": 9.590421368387837e-06, + "loss": 0.7254, + "step": 5085 + }, + { + "epoch": 0.1558783866617629, + "grad_norm": 1.8169096656651034, + "learning_rate": 9.590224612702398e-06, + "loss": 0.7358, + "step": 5086 + }, + { + "epoch": 0.1559090351845041, + "grad_norm": 1.850002736986028, + "learning_rate": 9.590027811788301e-06, + "loss": 0.8665, + "step": 5087 + }, + { + "epoch": 0.1559396837072453, + "grad_norm": 1.6841708766387864, + "learning_rate": 9.589830965647487e-06, + "loss": 0.7547, + "step": 5088 + }, + { + "epoch": 0.15597033222998652, + "grad_norm": 1.8902917368364884, + "learning_rate": 9.589634074281891e-06, + "loss": 0.869, + "step": 5089 + }, + { + "epoch": 0.15600098075272772, + "grad_norm": 1.0317472094022113, + "learning_rate": 9.589437137693459e-06, + "loss": 0.5157, + "step": 5090 + }, + { + "epoch": 0.15603162927546893, + "grad_norm": 1.7483247691041017, + "learning_rate": 9.589240155884128e-06, + "loss": 0.7192, + "step": 5091 + }, + { + "epoch": 0.15606227779821014, + "grad_norm": 1.7113885344873239, + "learning_rate": 9.589043128855838e-06, + "loss": 0.7177, + "step": 5092 + }, + { + "epoch": 0.15609292632095134, + "grad_norm": 1.7187625803699333, + "learning_rate": 9.588846056610533e-06, + "loss": 0.7844, + "step": 5093 + }, + { + "epoch": 0.15612357484369255, + "grad_norm": 1.7093701697730832, + "learning_rate": 9.588648939150153e-06, + "loss": 0.7083, + "step": 5094 + }, + { + "epoch": 0.15615422336643373, + "grad_norm": 1.9755750104931336, + "learning_rate": 9.588451776476643e-06, + "loss": 0.8044, + "step": 5095 + }, + { + "epoch": 0.15618487188917493, + "grad_norm": 0.8831459209650383, + "learning_rate": 9.588254568591942e-06, + "loss": 0.5172, + "step": 5096 + }, + { + "epoch": 0.15621552041191614, + "grad_norm": 1.7603305191856293, + "learning_rate": 9.588057315497995e-06, + "loss": 0.7561, + "step": 5097 + }, + { + "epoch": 0.15624616893465734, + "grad_norm": 1.7586976049834457, + "learning_rate": 9.587860017196747e-06, + "loss": 0.7298, + "step": 5098 + }, + { + "epoch": 0.15627681745739855, + "grad_norm": 1.6965168590618427, + "learning_rate": 9.587662673690137e-06, + "loss": 0.5977, + "step": 5099 + }, + { + "epoch": 0.15630746598013975, + "grad_norm": 1.8219887751221908, + "learning_rate": 9.587465284980115e-06, + "loss": 0.6792, + "step": 5100 + }, + { + "epoch": 0.15633811450288096, + "grad_norm": 1.7716365035851085, + "learning_rate": 9.587267851068624e-06, + "loss": 0.6612, + "step": 5101 + }, + { + "epoch": 0.15636876302562217, + "grad_norm": 2.110526822306012, + "learning_rate": 9.587070371957608e-06, + "loss": 0.7039, + "step": 5102 + }, + { + "epoch": 0.15639941154836337, + "grad_norm": 1.9430997188437937, + "learning_rate": 9.586872847649016e-06, + "loss": 0.754, + "step": 5103 + }, + { + "epoch": 0.15643006007110458, + "grad_norm": 1.870425736375899, + "learning_rate": 9.58667527814479e-06, + "loss": 0.8692, + "step": 5104 + }, + { + "epoch": 0.15646070859384578, + "grad_norm": 1.8401177042762624, + "learning_rate": 9.58647766344688e-06, + "loss": 0.7296, + "step": 5105 + }, + { + "epoch": 0.156491357116587, + "grad_norm": 1.9119662168182623, + "learning_rate": 9.58628000355723e-06, + "loss": 0.7314, + "step": 5106 + }, + { + "epoch": 0.1565220056393282, + "grad_norm": 1.968428344052431, + "learning_rate": 9.586082298477794e-06, + "loss": 0.6272, + "step": 5107 + }, + { + "epoch": 0.1565526541620694, + "grad_norm": 1.8313396531397326, + "learning_rate": 9.585884548210513e-06, + "loss": 0.6872, + "step": 5108 + }, + { + "epoch": 0.1565833026848106, + "grad_norm": 1.0181517441881718, + "learning_rate": 9.585686752757339e-06, + "loss": 0.5116, + "step": 5109 + }, + { + "epoch": 0.15661395120755178, + "grad_norm": 1.7221674184168783, + "learning_rate": 9.58548891212022e-06, + "loss": 0.9026, + "step": 5110 + }, + { + "epoch": 0.156644599730293, + "grad_norm": 1.8111589430743487, + "learning_rate": 9.585291026301105e-06, + "loss": 0.8136, + "step": 5111 + }, + { + "epoch": 0.1566752482530342, + "grad_norm": 1.7870403139349413, + "learning_rate": 9.585093095301944e-06, + "loss": 0.7837, + "step": 5112 + }, + { + "epoch": 0.1567058967757754, + "grad_norm": 1.4777355090266222, + "learning_rate": 9.584895119124688e-06, + "loss": 0.6185, + "step": 5113 + }, + { + "epoch": 0.1567365452985166, + "grad_norm": 1.7329681721827148, + "learning_rate": 9.584697097771287e-06, + "loss": 0.653, + "step": 5114 + }, + { + "epoch": 0.1567671938212578, + "grad_norm": 1.8407558476668129, + "learning_rate": 9.584499031243693e-06, + "loss": 0.7807, + "step": 5115 + }, + { + "epoch": 0.15679784234399902, + "grad_norm": 1.9698417993495616, + "learning_rate": 9.584300919543856e-06, + "loss": 0.7281, + "step": 5116 + }, + { + "epoch": 0.15682849086674022, + "grad_norm": 1.5954792668322741, + "learning_rate": 9.58410276267373e-06, + "loss": 0.7202, + "step": 5117 + }, + { + "epoch": 0.15685913938948143, + "grad_norm": 1.9156200788911737, + "learning_rate": 9.583904560635267e-06, + "loss": 0.8528, + "step": 5118 + }, + { + "epoch": 0.15688978791222263, + "grad_norm": 0.981594511247581, + "learning_rate": 9.583706313430418e-06, + "loss": 0.5318, + "step": 5119 + }, + { + "epoch": 0.15692043643496384, + "grad_norm": 2.5833646235387113, + "learning_rate": 9.583508021061141e-06, + "loss": 0.6451, + "step": 5120 + }, + { + "epoch": 0.15695108495770504, + "grad_norm": 2.0317285771869416, + "learning_rate": 9.583309683529384e-06, + "loss": 0.8006, + "step": 5121 + }, + { + "epoch": 0.15698173348044625, + "grad_norm": 1.8355131820098078, + "learning_rate": 9.583111300837105e-06, + "loss": 0.7651, + "step": 5122 + }, + { + "epoch": 0.15701238200318746, + "grad_norm": 1.885492692384682, + "learning_rate": 9.582912872986256e-06, + "loss": 0.7983, + "step": 5123 + }, + { + "epoch": 0.15704303052592866, + "grad_norm": 1.9670629896976872, + "learning_rate": 9.582714399978796e-06, + "loss": 0.7911, + "step": 5124 + }, + { + "epoch": 0.15707367904866987, + "grad_norm": 1.7756485982147419, + "learning_rate": 9.582515881816678e-06, + "loss": 0.7933, + "step": 5125 + }, + { + "epoch": 0.15710432757141105, + "grad_norm": 0.8623989997286543, + "learning_rate": 9.582317318501859e-06, + "loss": 0.5388, + "step": 5126 + }, + { + "epoch": 0.15713497609415225, + "grad_norm": 1.999203164431094, + "learning_rate": 9.582118710036293e-06, + "loss": 0.6752, + "step": 5127 + }, + { + "epoch": 0.15716562461689346, + "grad_norm": 1.804816836061846, + "learning_rate": 9.58192005642194e-06, + "loss": 0.6766, + "step": 5128 + }, + { + "epoch": 0.15719627313963466, + "grad_norm": 0.81428512691434, + "learning_rate": 9.581721357660757e-06, + "loss": 0.4879, + "step": 5129 + }, + { + "epoch": 0.15722692166237587, + "grad_norm": 1.622389324538969, + "learning_rate": 9.581522613754702e-06, + "loss": 0.5278, + "step": 5130 + }, + { + "epoch": 0.15725757018511707, + "grad_norm": 0.768464865539869, + "learning_rate": 9.58132382470573e-06, + "loss": 0.4916, + "step": 5131 + }, + { + "epoch": 0.15728821870785828, + "grad_norm": 1.7964402035675044, + "learning_rate": 9.581124990515805e-06, + "loss": 0.7336, + "step": 5132 + }, + { + "epoch": 0.15731886723059949, + "grad_norm": 1.9380135334950317, + "learning_rate": 9.58092611118688e-06, + "loss": 0.7479, + "step": 5133 + }, + { + "epoch": 0.1573495157533407, + "grad_norm": 1.8257303066577855, + "learning_rate": 9.580727186720919e-06, + "loss": 0.8102, + "step": 5134 + }, + { + "epoch": 0.1573801642760819, + "grad_norm": 1.9039582670459556, + "learning_rate": 9.580528217119882e-06, + "loss": 0.7647, + "step": 5135 + }, + { + "epoch": 0.1574108127988231, + "grad_norm": 1.6411048719196422, + "learning_rate": 9.580329202385729e-06, + "loss": 0.731, + "step": 5136 + }, + { + "epoch": 0.1574414613215643, + "grad_norm": 2.1016600343895804, + "learning_rate": 9.580130142520419e-06, + "loss": 0.7654, + "step": 5137 + }, + { + "epoch": 0.1574721098443055, + "grad_norm": 1.7432868374248809, + "learning_rate": 9.579931037525915e-06, + "loss": 0.6748, + "step": 5138 + }, + { + "epoch": 0.15750275836704672, + "grad_norm": 1.6068917965083642, + "learning_rate": 9.57973188740418e-06, + "loss": 0.7557, + "step": 5139 + }, + { + "epoch": 0.15753340688978792, + "grad_norm": 1.9844809145822924, + "learning_rate": 9.579532692157174e-06, + "loss": 0.7924, + "step": 5140 + }, + { + "epoch": 0.1575640554125291, + "grad_norm": 1.77067728439259, + "learning_rate": 9.57933345178686e-06, + "loss": 0.8518, + "step": 5141 + }, + { + "epoch": 0.1575947039352703, + "grad_norm": 1.816567274494751, + "learning_rate": 9.579134166295203e-06, + "loss": 0.751, + "step": 5142 + }, + { + "epoch": 0.15762535245801151, + "grad_norm": 2.048672428683356, + "learning_rate": 9.578934835684166e-06, + "loss": 0.6409, + "step": 5143 + }, + { + "epoch": 0.15765600098075272, + "grad_norm": 1.4953270398664988, + "learning_rate": 9.57873545995571e-06, + "loss": 0.6965, + "step": 5144 + }, + { + "epoch": 0.15768664950349393, + "grad_norm": 1.7465775697767942, + "learning_rate": 9.578536039111806e-06, + "loss": 0.6926, + "step": 5145 + }, + { + "epoch": 0.15771729802623513, + "grad_norm": 1.6845502784245825, + "learning_rate": 9.578336573154411e-06, + "loss": 0.7228, + "step": 5146 + }, + { + "epoch": 0.15774794654897634, + "grad_norm": 1.8996823091411785, + "learning_rate": 9.578137062085496e-06, + "loss": 0.8652, + "step": 5147 + }, + { + "epoch": 0.15777859507171754, + "grad_norm": 1.7485123849079585, + "learning_rate": 9.577937505907024e-06, + "loss": 0.6952, + "step": 5148 + }, + { + "epoch": 0.15780924359445875, + "grad_norm": 1.711725358587465, + "learning_rate": 9.577737904620963e-06, + "loss": 0.7136, + "step": 5149 + }, + { + "epoch": 0.15783989211719995, + "grad_norm": 1.7035182918301366, + "learning_rate": 9.57753825822928e-06, + "loss": 0.7487, + "step": 5150 + }, + { + "epoch": 0.15787054063994116, + "grad_norm": 1.9589086225210777, + "learning_rate": 9.57733856673394e-06, + "loss": 0.7343, + "step": 5151 + }, + { + "epoch": 0.15790118916268236, + "grad_norm": 1.1071186588933413, + "learning_rate": 9.577138830136914e-06, + "loss": 0.5099, + "step": 5152 + }, + { + "epoch": 0.15793183768542357, + "grad_norm": 1.6262478571984178, + "learning_rate": 9.576939048440167e-06, + "loss": 0.7173, + "step": 5153 + }, + { + "epoch": 0.15796248620816478, + "grad_norm": 1.965753813870681, + "learning_rate": 9.576739221645669e-06, + "loss": 0.7898, + "step": 5154 + }, + { + "epoch": 0.15799313473090598, + "grad_norm": 2.0600993815500095, + "learning_rate": 9.576539349755387e-06, + "loss": 0.8077, + "step": 5155 + }, + { + "epoch": 0.1580237832536472, + "grad_norm": 1.88933676987593, + "learning_rate": 9.576339432771293e-06, + "loss": 0.811, + "step": 5156 + }, + { + "epoch": 0.15805443177638837, + "grad_norm": 1.9845059537048446, + "learning_rate": 9.576139470695353e-06, + "loss": 0.8165, + "step": 5157 + }, + { + "epoch": 0.15808508029912957, + "grad_norm": 1.7896327695185645, + "learning_rate": 9.575939463529544e-06, + "loss": 0.7877, + "step": 5158 + }, + { + "epoch": 0.15811572882187078, + "grad_norm": 1.8547976083637447, + "learning_rate": 9.57573941127583e-06, + "loss": 0.7137, + "step": 5159 + }, + { + "epoch": 0.15814637734461198, + "grad_norm": 1.7705575200042662, + "learning_rate": 9.575539313936186e-06, + "loss": 0.6463, + "step": 5160 + }, + { + "epoch": 0.1581770258673532, + "grad_norm": 1.948829150856235, + "learning_rate": 9.575339171512582e-06, + "loss": 0.7897, + "step": 5161 + }, + { + "epoch": 0.1582076743900944, + "grad_norm": 1.6938357120528083, + "learning_rate": 9.57513898400699e-06, + "loss": 0.752, + "step": 5162 + }, + { + "epoch": 0.1582383229128356, + "grad_norm": 1.884229125820616, + "learning_rate": 9.574938751421384e-06, + "loss": 0.7773, + "step": 5163 + }, + { + "epoch": 0.1582689714355768, + "grad_norm": 1.8192303133349992, + "learning_rate": 9.574738473757737e-06, + "loss": 0.7376, + "step": 5164 + }, + { + "epoch": 0.158299619958318, + "grad_norm": 0.9791251324683953, + "learning_rate": 9.574538151018018e-06, + "loss": 0.4905, + "step": 5165 + }, + { + "epoch": 0.15833026848105922, + "grad_norm": 2.072805768222092, + "learning_rate": 9.574337783204206e-06, + "loss": 0.82, + "step": 5166 + }, + { + "epoch": 0.15836091700380042, + "grad_norm": 1.7501174242734596, + "learning_rate": 9.574137370318275e-06, + "loss": 0.7433, + "step": 5167 + }, + { + "epoch": 0.15839156552654163, + "grad_norm": 1.6700361975512705, + "learning_rate": 9.573936912362195e-06, + "loss": 0.7762, + "step": 5168 + }, + { + "epoch": 0.15842221404928283, + "grad_norm": 1.7551248604073364, + "learning_rate": 9.573736409337946e-06, + "loss": 0.7034, + "step": 5169 + }, + { + "epoch": 0.15845286257202404, + "grad_norm": 1.8660811951080485, + "learning_rate": 9.573535861247502e-06, + "loss": 0.7484, + "step": 5170 + }, + { + "epoch": 0.15848351109476524, + "grad_norm": 0.8183240121213802, + "learning_rate": 9.573335268092839e-06, + "loss": 0.5002, + "step": 5171 + }, + { + "epoch": 0.15851415961750642, + "grad_norm": 2.0223563347108344, + "learning_rate": 9.573134629875934e-06, + "loss": 0.6269, + "step": 5172 + }, + { + "epoch": 0.15854480814024763, + "grad_norm": 1.9147797702857003, + "learning_rate": 9.572933946598761e-06, + "loss": 0.7286, + "step": 5173 + }, + { + "epoch": 0.15857545666298883, + "grad_norm": 1.7513197868909949, + "learning_rate": 9.572733218263302e-06, + "loss": 0.7404, + "step": 5174 + }, + { + "epoch": 0.15860610518573004, + "grad_norm": 1.7710954894262578, + "learning_rate": 9.572532444871532e-06, + "loss": 0.8412, + "step": 5175 + }, + { + "epoch": 0.15863675370847125, + "grad_norm": 1.8846914204581158, + "learning_rate": 9.57233162642543e-06, + "loss": 0.805, + "step": 5176 + }, + { + "epoch": 0.15866740223121245, + "grad_norm": 2.124526185565614, + "learning_rate": 9.572130762926975e-06, + "loss": 0.787, + "step": 5177 + }, + { + "epoch": 0.15869805075395366, + "grad_norm": 1.909714225277626, + "learning_rate": 9.571929854378144e-06, + "loss": 0.7818, + "step": 5178 + }, + { + "epoch": 0.15872869927669486, + "grad_norm": 1.8205343620082437, + "learning_rate": 9.57172890078092e-06, + "loss": 0.7685, + "step": 5179 + }, + { + "epoch": 0.15875934779943607, + "grad_norm": 1.836033171117063, + "learning_rate": 9.57152790213728e-06, + "loss": 0.7408, + "step": 5180 + }, + { + "epoch": 0.15878999632217727, + "grad_norm": 1.85042073208907, + "learning_rate": 9.571326858449209e-06, + "loss": 0.7443, + "step": 5181 + }, + { + "epoch": 0.15882064484491848, + "grad_norm": 1.7707381755660112, + "learning_rate": 9.57112576971868e-06, + "loss": 0.7889, + "step": 5182 + }, + { + "epoch": 0.15885129336765968, + "grad_norm": 2.103559516110049, + "learning_rate": 9.570924635947682e-06, + "loss": 0.8502, + "step": 5183 + }, + { + "epoch": 0.1588819418904009, + "grad_norm": 1.7294096177420013, + "learning_rate": 9.570723457138196e-06, + "loss": 0.6565, + "step": 5184 + }, + { + "epoch": 0.1589125904131421, + "grad_norm": 1.7587448966069499, + "learning_rate": 9.5705222332922e-06, + "loss": 0.791, + "step": 5185 + }, + { + "epoch": 0.1589432389358833, + "grad_norm": 1.9994427157625665, + "learning_rate": 9.570320964411678e-06, + "loss": 0.7592, + "step": 5186 + }, + { + "epoch": 0.1589738874586245, + "grad_norm": 0.9310869003705924, + "learning_rate": 9.570119650498617e-06, + "loss": 0.5087, + "step": 5187 + }, + { + "epoch": 0.15900453598136569, + "grad_norm": 1.5695740470254809, + "learning_rate": 9.569918291554995e-06, + "loss": 0.6543, + "step": 5188 + }, + { + "epoch": 0.1590351845041069, + "grad_norm": 1.9003394171872867, + "learning_rate": 9.569716887582801e-06, + "loss": 0.8319, + "step": 5189 + }, + { + "epoch": 0.1590658330268481, + "grad_norm": 1.6609522054370895, + "learning_rate": 9.569515438584016e-06, + "loss": 0.7252, + "step": 5190 + }, + { + "epoch": 0.1590964815495893, + "grad_norm": 2.155328417154746, + "learning_rate": 9.569313944560628e-06, + "loss": 0.838, + "step": 5191 + }, + { + "epoch": 0.1591271300723305, + "grad_norm": 0.8054455276553973, + "learning_rate": 9.569112405514619e-06, + "loss": 0.5043, + "step": 5192 + }, + { + "epoch": 0.15915777859507171, + "grad_norm": 1.7894937365833252, + "learning_rate": 9.568910821447976e-06, + "loss": 0.8163, + "step": 5193 + }, + { + "epoch": 0.15918842711781292, + "grad_norm": 1.866013813406795, + "learning_rate": 9.568709192362687e-06, + "loss": 0.7581, + "step": 5194 + }, + { + "epoch": 0.15921907564055413, + "grad_norm": 2.0158349096573014, + "learning_rate": 9.568507518260737e-06, + "loss": 0.7561, + "step": 5195 + }, + { + "epoch": 0.15924972416329533, + "grad_norm": 1.710618038787171, + "learning_rate": 9.568305799144112e-06, + "loss": 0.7409, + "step": 5196 + }, + { + "epoch": 0.15928037268603654, + "grad_norm": 1.7880544498924413, + "learning_rate": 9.568104035014802e-06, + "loss": 0.6762, + "step": 5197 + }, + { + "epoch": 0.15931102120877774, + "grad_norm": 2.1665143902142154, + "learning_rate": 9.567902225874794e-06, + "loss": 0.7166, + "step": 5198 + }, + { + "epoch": 0.15934166973151895, + "grad_norm": 1.835338469599353, + "learning_rate": 9.567700371726079e-06, + "loss": 0.7646, + "step": 5199 + }, + { + "epoch": 0.15937231825426015, + "grad_norm": 2.1312260505531517, + "learning_rate": 9.56749847257064e-06, + "loss": 0.7677, + "step": 5200 + }, + { + "epoch": 0.15940296677700136, + "grad_norm": 1.5916713804527276, + "learning_rate": 9.567296528410472e-06, + "loss": 0.7065, + "step": 5201 + }, + { + "epoch": 0.15943361529974256, + "grad_norm": 1.7375223391449093, + "learning_rate": 9.567094539247562e-06, + "loss": 0.7285, + "step": 5202 + }, + { + "epoch": 0.15946426382248374, + "grad_norm": 1.794115265733253, + "learning_rate": 9.566892505083903e-06, + "loss": 0.721, + "step": 5203 + }, + { + "epoch": 0.15949491234522495, + "grad_norm": 1.7589518597390243, + "learning_rate": 9.566690425921482e-06, + "loss": 0.6997, + "step": 5204 + }, + { + "epoch": 0.15952556086796615, + "grad_norm": 1.8854569961967653, + "learning_rate": 9.56648830176229e-06, + "loss": 0.7514, + "step": 5205 + }, + { + "epoch": 0.15955620939070736, + "grad_norm": 0.9203731857189505, + "learning_rate": 9.566286132608322e-06, + "loss": 0.5129, + "step": 5206 + }, + { + "epoch": 0.15958685791344857, + "grad_norm": 1.7323005662571178, + "learning_rate": 9.566083918461569e-06, + "loss": 0.7241, + "step": 5207 + }, + { + "epoch": 0.15961750643618977, + "grad_norm": 2.2089416037188396, + "learning_rate": 9.565881659324021e-06, + "loss": 0.713, + "step": 5208 + }, + { + "epoch": 0.15964815495893098, + "grad_norm": 1.800122765823468, + "learning_rate": 9.565679355197674e-06, + "loss": 0.8144, + "step": 5209 + }, + { + "epoch": 0.15967880348167218, + "grad_norm": 1.6291878420981833, + "learning_rate": 9.565477006084521e-06, + "loss": 0.654, + "step": 5210 + }, + { + "epoch": 0.1597094520044134, + "grad_norm": 1.6131924184349844, + "learning_rate": 9.565274611986555e-06, + "loss": 0.6229, + "step": 5211 + }, + { + "epoch": 0.1597401005271546, + "grad_norm": 2.2346537353454434, + "learning_rate": 9.565072172905768e-06, + "loss": 0.7101, + "step": 5212 + }, + { + "epoch": 0.1597707490498958, + "grad_norm": 1.648742957468223, + "learning_rate": 9.564869688844158e-06, + "loss": 0.6656, + "step": 5213 + }, + { + "epoch": 0.159801397572637, + "grad_norm": 1.4918100496141857, + "learning_rate": 9.564667159803719e-06, + "loss": 0.6277, + "step": 5214 + }, + { + "epoch": 0.1598320460953782, + "grad_norm": 1.7675426678855002, + "learning_rate": 9.564464585786447e-06, + "loss": 0.7479, + "step": 5215 + }, + { + "epoch": 0.15986269461811942, + "grad_norm": 1.8030841129177897, + "learning_rate": 9.564261966794337e-06, + "loss": 0.72, + "step": 5216 + }, + { + "epoch": 0.15989334314086062, + "grad_norm": 1.9414341478178234, + "learning_rate": 9.564059302829386e-06, + "loss": 0.6939, + "step": 5217 + }, + { + "epoch": 0.15992399166360183, + "grad_norm": 2.0862410114520746, + "learning_rate": 9.563856593893593e-06, + "loss": 0.7457, + "step": 5218 + }, + { + "epoch": 0.159954640186343, + "grad_norm": 2.040763419839761, + "learning_rate": 9.563653839988951e-06, + "loss": 0.7439, + "step": 5219 + }, + { + "epoch": 0.1599852887090842, + "grad_norm": 1.6688979415177365, + "learning_rate": 9.56345104111746e-06, + "loss": 0.7172, + "step": 5220 + }, + { + "epoch": 0.16001593723182542, + "grad_norm": 1.711306119991752, + "learning_rate": 9.563248197281119e-06, + "loss": 0.7854, + "step": 5221 + }, + { + "epoch": 0.16004658575456662, + "grad_norm": 1.9540236678702811, + "learning_rate": 9.563045308481926e-06, + "loss": 0.7887, + "step": 5222 + }, + { + "epoch": 0.16007723427730783, + "grad_norm": 1.909866166135983, + "learning_rate": 9.56284237472188e-06, + "loss": 0.7792, + "step": 5223 + }, + { + "epoch": 0.16010788280004903, + "grad_norm": 1.7829715536939565, + "learning_rate": 9.562639396002979e-06, + "loss": 0.7793, + "step": 5224 + }, + { + "epoch": 0.16013853132279024, + "grad_norm": 1.6754080284388688, + "learning_rate": 9.562436372327227e-06, + "loss": 0.6567, + "step": 5225 + }, + { + "epoch": 0.16016917984553145, + "grad_norm": 1.6033056907595429, + "learning_rate": 9.562233303696623e-06, + "loss": 0.7545, + "step": 5226 + }, + { + "epoch": 0.16019982836827265, + "grad_norm": 1.8099694583760564, + "learning_rate": 9.562030190113163e-06, + "loss": 0.7037, + "step": 5227 + }, + { + "epoch": 0.16023047689101386, + "grad_norm": 1.7911243757971251, + "learning_rate": 9.561827031578855e-06, + "loss": 0.7615, + "step": 5228 + }, + { + "epoch": 0.16026112541375506, + "grad_norm": 1.6916240778727367, + "learning_rate": 9.561623828095697e-06, + "loss": 0.6629, + "step": 5229 + }, + { + "epoch": 0.16029177393649627, + "grad_norm": 1.8888899785322126, + "learning_rate": 9.561420579665692e-06, + "loss": 0.7715, + "step": 5230 + }, + { + "epoch": 0.16032242245923747, + "grad_norm": 0.9990148376521791, + "learning_rate": 9.561217286290845e-06, + "loss": 0.4896, + "step": 5231 + }, + { + "epoch": 0.16035307098197868, + "grad_norm": 0.9405603400603475, + "learning_rate": 9.561013947973155e-06, + "loss": 0.5193, + "step": 5232 + }, + { + "epoch": 0.16038371950471988, + "grad_norm": 1.9901582571365997, + "learning_rate": 9.560810564714629e-06, + "loss": 0.6346, + "step": 5233 + }, + { + "epoch": 0.16041436802746106, + "grad_norm": 1.6048918025009407, + "learning_rate": 9.560607136517268e-06, + "loss": 0.6451, + "step": 5234 + }, + { + "epoch": 0.16044501655020227, + "grad_norm": 0.8675820519493376, + "learning_rate": 9.56040366338308e-06, + "loss": 0.508, + "step": 5235 + }, + { + "epoch": 0.16047566507294347, + "grad_norm": 2.013517652914433, + "learning_rate": 9.560200145314067e-06, + "loss": 0.7706, + "step": 5236 + }, + { + "epoch": 0.16050631359568468, + "grad_norm": 1.887849789632235, + "learning_rate": 9.559996582312235e-06, + "loss": 0.7882, + "step": 5237 + }, + { + "epoch": 0.16053696211842589, + "grad_norm": 1.8260237326842437, + "learning_rate": 9.55979297437959e-06, + "loss": 0.626, + "step": 5238 + }, + { + "epoch": 0.1605676106411671, + "grad_norm": 1.877788992709773, + "learning_rate": 9.559589321518137e-06, + "loss": 0.8109, + "step": 5239 + }, + { + "epoch": 0.1605982591639083, + "grad_norm": 1.0076907375377278, + "learning_rate": 9.559385623729886e-06, + "loss": 0.4926, + "step": 5240 + }, + { + "epoch": 0.1606289076866495, + "grad_norm": 2.0976787269824615, + "learning_rate": 9.55918188101684e-06, + "loss": 0.8437, + "step": 5241 + }, + { + "epoch": 0.1606595562093907, + "grad_norm": 1.9285394374915235, + "learning_rate": 9.558978093381008e-06, + "loss": 0.7237, + "step": 5242 + }, + { + "epoch": 0.1606902047321319, + "grad_norm": 1.6845894797419314, + "learning_rate": 9.5587742608244e-06, + "loss": 0.7044, + "step": 5243 + }, + { + "epoch": 0.16072085325487312, + "grad_norm": 1.870202057821533, + "learning_rate": 9.558570383349023e-06, + "loss": 0.7719, + "step": 5244 + }, + { + "epoch": 0.16075150177761433, + "grad_norm": 1.7708570966554127, + "learning_rate": 9.558366460956885e-06, + "loss": 0.7794, + "step": 5245 + }, + { + "epoch": 0.16078215030035553, + "grad_norm": 2.01710267542916, + "learning_rate": 9.558162493649996e-06, + "loss": 0.7493, + "step": 5246 + }, + { + "epoch": 0.16081279882309674, + "grad_norm": 1.528674346399743, + "learning_rate": 9.557958481430365e-06, + "loss": 0.6496, + "step": 5247 + }, + { + "epoch": 0.16084344734583794, + "grad_norm": 1.8770314448356344, + "learning_rate": 9.557754424300004e-06, + "loss": 0.7759, + "step": 5248 + }, + { + "epoch": 0.16087409586857915, + "grad_norm": 1.8947055304802072, + "learning_rate": 9.557550322260921e-06, + "loss": 0.6921, + "step": 5249 + }, + { + "epoch": 0.16090474439132033, + "grad_norm": 1.9139943611329806, + "learning_rate": 9.55734617531513e-06, + "loss": 0.7334, + "step": 5250 + }, + { + "epoch": 0.16093539291406153, + "grad_norm": 0.9012654489679223, + "learning_rate": 9.557141983464641e-06, + "loss": 0.4932, + "step": 5251 + }, + { + "epoch": 0.16096604143680274, + "grad_norm": 1.7491244811077082, + "learning_rate": 9.556937746711466e-06, + "loss": 0.5818, + "step": 5252 + }, + { + "epoch": 0.16099668995954394, + "grad_norm": 1.6267890440439494, + "learning_rate": 9.556733465057617e-06, + "loss": 0.7609, + "step": 5253 + }, + { + "epoch": 0.16102733848228515, + "grad_norm": 1.8492437693949122, + "learning_rate": 9.556529138505108e-06, + "loss": 0.777, + "step": 5254 + }, + { + "epoch": 0.16105798700502635, + "grad_norm": 1.7336068301117808, + "learning_rate": 9.556324767055952e-06, + "loss": 0.7078, + "step": 5255 + }, + { + "epoch": 0.16108863552776756, + "grad_norm": 1.8493106991338062, + "learning_rate": 9.556120350712158e-06, + "loss": 0.7497, + "step": 5256 + }, + { + "epoch": 0.16111928405050877, + "grad_norm": 1.7377414262276025, + "learning_rate": 9.55591588947575e-06, + "loss": 0.8271, + "step": 5257 + }, + { + "epoch": 0.16114993257324997, + "grad_norm": 1.909030868453334, + "learning_rate": 9.555711383348734e-06, + "loss": 0.6799, + "step": 5258 + }, + { + "epoch": 0.16118058109599118, + "grad_norm": 1.9537414208139117, + "learning_rate": 9.555506832333131e-06, + "loss": 0.62, + "step": 5259 + }, + { + "epoch": 0.16121122961873238, + "grad_norm": 1.8215003633876372, + "learning_rate": 9.55530223643095e-06, + "loss": 0.7997, + "step": 5260 + }, + { + "epoch": 0.1612418781414736, + "grad_norm": 1.7959656529071688, + "learning_rate": 9.555097595644212e-06, + "loss": 0.7943, + "step": 5261 + }, + { + "epoch": 0.1612725266642148, + "grad_norm": 1.7495300553826338, + "learning_rate": 9.554892909974933e-06, + "loss": 0.6771, + "step": 5262 + }, + { + "epoch": 0.161303175186956, + "grad_norm": 1.8513871734950385, + "learning_rate": 9.554688179425126e-06, + "loss": 0.8212, + "step": 5263 + }, + { + "epoch": 0.1613338237096972, + "grad_norm": 1.6312657167575322, + "learning_rate": 9.554483403996813e-06, + "loss": 0.7688, + "step": 5264 + }, + { + "epoch": 0.16136447223243838, + "grad_norm": 1.9476432423769248, + "learning_rate": 9.554278583692009e-06, + "loss": 0.7886, + "step": 5265 + }, + { + "epoch": 0.1613951207551796, + "grad_norm": 1.7621709884850458, + "learning_rate": 9.554073718512735e-06, + "loss": 0.8081, + "step": 5266 + }, + { + "epoch": 0.1614257692779208, + "grad_norm": 1.8020118989106522, + "learning_rate": 9.553868808461004e-06, + "loss": 0.7099, + "step": 5267 + }, + { + "epoch": 0.161456417800662, + "grad_norm": 1.9215822553642763, + "learning_rate": 9.553663853538841e-06, + "loss": 0.6985, + "step": 5268 + }, + { + "epoch": 0.1614870663234032, + "grad_norm": 1.79424563291065, + "learning_rate": 9.553458853748263e-06, + "loss": 0.7495, + "step": 5269 + }, + { + "epoch": 0.1615177148461444, + "grad_norm": 1.928854672483742, + "learning_rate": 9.553253809091287e-06, + "loss": 0.7569, + "step": 5270 + }, + { + "epoch": 0.16154836336888562, + "grad_norm": 1.7936185948667627, + "learning_rate": 9.55304871956994e-06, + "loss": 0.7678, + "step": 5271 + }, + { + "epoch": 0.16157901189162682, + "grad_norm": 1.6961272604574766, + "learning_rate": 9.552843585186237e-06, + "loss": 0.6757, + "step": 5272 + }, + { + "epoch": 0.16160966041436803, + "grad_norm": 1.9568450293962025, + "learning_rate": 9.552638405942201e-06, + "loss": 0.7213, + "step": 5273 + }, + { + "epoch": 0.16164030893710923, + "grad_norm": 1.9351943730151921, + "learning_rate": 9.552433181839855e-06, + "loss": 0.6969, + "step": 5274 + }, + { + "epoch": 0.16167095745985044, + "grad_norm": 1.873924178892835, + "learning_rate": 9.55222791288122e-06, + "loss": 0.7259, + "step": 5275 + }, + { + "epoch": 0.16170160598259165, + "grad_norm": 1.7331795926345352, + "learning_rate": 9.552022599068317e-06, + "loss": 0.7542, + "step": 5276 + }, + { + "epoch": 0.16173225450533285, + "grad_norm": 1.8518036267491649, + "learning_rate": 9.551817240403172e-06, + "loss": 0.7496, + "step": 5277 + }, + { + "epoch": 0.16176290302807406, + "grad_norm": 1.9438299895837454, + "learning_rate": 9.551611836887807e-06, + "loss": 0.7131, + "step": 5278 + }, + { + "epoch": 0.16179355155081526, + "grad_norm": 1.8856120175227336, + "learning_rate": 9.551406388524244e-06, + "loss": 0.7656, + "step": 5279 + }, + { + "epoch": 0.16182420007355647, + "grad_norm": 2.034442754612429, + "learning_rate": 9.551200895314512e-06, + "loss": 0.7944, + "step": 5280 + }, + { + "epoch": 0.16185484859629765, + "grad_norm": 0.9297178154583118, + "learning_rate": 9.550995357260633e-06, + "loss": 0.4984, + "step": 5281 + }, + { + "epoch": 0.16188549711903885, + "grad_norm": 2.0494183702103745, + "learning_rate": 9.550789774364632e-06, + "loss": 0.7084, + "step": 5282 + }, + { + "epoch": 0.16191614564178006, + "grad_norm": 1.8618457518295324, + "learning_rate": 9.550584146628534e-06, + "loss": 0.6661, + "step": 5283 + }, + { + "epoch": 0.16194679416452126, + "grad_norm": 1.8509036548283657, + "learning_rate": 9.550378474054367e-06, + "loss": 0.7265, + "step": 5284 + }, + { + "epoch": 0.16197744268726247, + "grad_norm": 1.8483183148267104, + "learning_rate": 9.550172756644156e-06, + "loss": 0.7677, + "step": 5285 + }, + { + "epoch": 0.16200809121000367, + "grad_norm": 1.8163027893828434, + "learning_rate": 9.549966994399928e-06, + "loss": 0.6851, + "step": 5286 + }, + { + "epoch": 0.16203873973274488, + "grad_norm": 1.83930571021459, + "learning_rate": 9.549761187323714e-06, + "loss": 0.8411, + "step": 5287 + }, + { + "epoch": 0.16206938825548609, + "grad_norm": 1.875423759712754, + "learning_rate": 9.549555335417535e-06, + "loss": 0.7064, + "step": 5288 + }, + { + "epoch": 0.1621000367782273, + "grad_norm": 1.751587604378209, + "learning_rate": 9.549349438683426e-06, + "loss": 0.8131, + "step": 5289 + }, + { + "epoch": 0.1621306853009685, + "grad_norm": 1.620273724676893, + "learning_rate": 9.549143497123412e-06, + "loss": 0.71, + "step": 5290 + }, + { + "epoch": 0.1621613338237097, + "grad_norm": 2.588047749339332, + "learning_rate": 9.548937510739524e-06, + "loss": 0.7356, + "step": 5291 + }, + { + "epoch": 0.1621919823464509, + "grad_norm": 1.8299980419678592, + "learning_rate": 9.54873147953379e-06, + "loss": 0.7552, + "step": 5292 + }, + { + "epoch": 0.1622226308691921, + "grad_norm": 1.645471716053331, + "learning_rate": 9.548525403508241e-06, + "loss": 0.6955, + "step": 5293 + }, + { + "epoch": 0.16225327939193332, + "grad_norm": 0.9589866400447533, + "learning_rate": 9.548319282664906e-06, + "loss": 0.491, + "step": 5294 + }, + { + "epoch": 0.16228392791467453, + "grad_norm": 1.9232753365040958, + "learning_rate": 9.54811311700582e-06, + "loss": 0.7644, + "step": 5295 + }, + { + "epoch": 0.1623145764374157, + "grad_norm": 1.9682978644966584, + "learning_rate": 9.54790690653301e-06, + "loss": 0.7886, + "step": 5296 + }, + { + "epoch": 0.1623452249601569, + "grad_norm": 2.0803951158686544, + "learning_rate": 9.54770065124851e-06, + "loss": 0.7538, + "step": 5297 + }, + { + "epoch": 0.16237587348289811, + "grad_norm": 1.7910867449432095, + "learning_rate": 9.547494351154352e-06, + "loss": 0.8983, + "step": 5298 + }, + { + "epoch": 0.16240652200563932, + "grad_norm": 1.5685662963932423, + "learning_rate": 9.547288006252568e-06, + "loss": 0.6282, + "step": 5299 + }, + { + "epoch": 0.16243717052838053, + "grad_norm": 0.9008621178035504, + "learning_rate": 9.547081616545193e-06, + "loss": 0.5159, + "step": 5300 + }, + { + "epoch": 0.16246781905112173, + "grad_norm": 1.6490931584307444, + "learning_rate": 9.54687518203426e-06, + "loss": 0.701, + "step": 5301 + }, + { + "epoch": 0.16249846757386294, + "grad_norm": 1.9602271128570157, + "learning_rate": 9.546668702721801e-06, + "loss": 0.6736, + "step": 5302 + }, + { + "epoch": 0.16252911609660414, + "grad_norm": 1.9027590345994356, + "learning_rate": 9.546462178609852e-06, + "loss": 0.6468, + "step": 5303 + }, + { + "epoch": 0.16255976461934535, + "grad_norm": 1.8209204979817322, + "learning_rate": 9.546255609700447e-06, + "loss": 0.8081, + "step": 5304 + }, + { + "epoch": 0.16259041314208655, + "grad_norm": 1.8530160060297516, + "learning_rate": 9.546048995995625e-06, + "loss": 0.7805, + "step": 5305 + }, + { + "epoch": 0.16262106166482776, + "grad_norm": 0.8693658102122664, + "learning_rate": 9.545842337497417e-06, + "loss": 0.5173, + "step": 5306 + }, + { + "epoch": 0.16265171018756897, + "grad_norm": 1.6642858828961802, + "learning_rate": 9.545635634207862e-06, + "loss": 0.7811, + "step": 5307 + }, + { + "epoch": 0.16268235871031017, + "grad_norm": 1.7222848265539283, + "learning_rate": 9.545428886128996e-06, + "loss": 0.7665, + "step": 5308 + }, + { + "epoch": 0.16271300723305138, + "grad_norm": 1.9444420802251952, + "learning_rate": 9.545222093262856e-06, + "loss": 0.6814, + "step": 5309 + }, + { + "epoch": 0.16274365575579258, + "grad_norm": 1.7570068126849956, + "learning_rate": 9.54501525561148e-06, + "loss": 0.8085, + "step": 5310 + }, + { + "epoch": 0.1627743042785338, + "grad_norm": 1.9879347373441765, + "learning_rate": 9.544808373176906e-06, + "loss": 0.765, + "step": 5311 + }, + { + "epoch": 0.16280495280127497, + "grad_norm": 0.8570063195517861, + "learning_rate": 9.544601445961172e-06, + "loss": 0.5139, + "step": 5312 + }, + { + "epoch": 0.16283560132401617, + "grad_norm": 1.6246220195054208, + "learning_rate": 9.544394473966317e-06, + "loss": 0.6117, + "step": 5313 + }, + { + "epoch": 0.16286624984675738, + "grad_norm": 1.9384473484125313, + "learning_rate": 9.54418745719438e-06, + "loss": 0.7212, + "step": 5314 + }, + { + "epoch": 0.16289689836949858, + "grad_norm": 2.053315517066517, + "learning_rate": 9.543980395647403e-06, + "loss": 0.7792, + "step": 5315 + }, + { + "epoch": 0.1629275468922398, + "grad_norm": 1.707917819399618, + "learning_rate": 9.543773289327423e-06, + "loss": 0.6523, + "step": 5316 + }, + { + "epoch": 0.162958195414981, + "grad_norm": 0.8176992323886978, + "learning_rate": 9.543566138236483e-06, + "loss": 0.5134, + "step": 5317 + }, + { + "epoch": 0.1629888439377222, + "grad_norm": 2.0152099902354106, + "learning_rate": 9.543358942376623e-06, + "loss": 0.7492, + "step": 5318 + }, + { + "epoch": 0.1630194924604634, + "grad_norm": 0.84892937990634, + "learning_rate": 9.543151701749885e-06, + "loss": 0.5153, + "step": 5319 + }, + { + "epoch": 0.1630501409832046, + "grad_norm": 2.137225941519722, + "learning_rate": 9.54294441635831e-06, + "loss": 0.7604, + "step": 5320 + }, + { + "epoch": 0.16308078950594582, + "grad_norm": 1.9452544202167656, + "learning_rate": 9.542737086203943e-06, + "loss": 0.7229, + "step": 5321 + }, + { + "epoch": 0.16311143802868702, + "grad_norm": 1.770620888449625, + "learning_rate": 9.542529711288824e-06, + "loss": 0.6511, + "step": 5322 + }, + { + "epoch": 0.16314208655142823, + "grad_norm": 1.7523378280493491, + "learning_rate": 9.542322291614999e-06, + "loss": 0.7779, + "step": 5323 + }, + { + "epoch": 0.16317273507416943, + "grad_norm": 2.2062111865061196, + "learning_rate": 9.542114827184507e-06, + "loss": 0.7993, + "step": 5324 + }, + { + "epoch": 0.16320338359691064, + "grad_norm": 1.8791163507389126, + "learning_rate": 9.541907317999397e-06, + "loss": 0.8583, + "step": 5325 + }, + { + "epoch": 0.16323403211965185, + "grad_norm": 1.9050463317533237, + "learning_rate": 9.541699764061714e-06, + "loss": 0.7092, + "step": 5326 + }, + { + "epoch": 0.16326468064239302, + "grad_norm": 1.9998966280709332, + "learning_rate": 9.5414921653735e-06, + "loss": 0.7846, + "step": 5327 + }, + { + "epoch": 0.16329532916513423, + "grad_norm": 1.6941256417433437, + "learning_rate": 9.5412845219368e-06, + "loss": 0.7275, + "step": 5328 + }, + { + "epoch": 0.16332597768787543, + "grad_norm": 1.8930378049053456, + "learning_rate": 9.541076833753665e-06, + "loss": 0.761, + "step": 5329 + }, + { + "epoch": 0.16335662621061664, + "grad_norm": 1.69002932838127, + "learning_rate": 9.540869100826136e-06, + "loss": 0.7731, + "step": 5330 + }, + { + "epoch": 0.16338727473335785, + "grad_norm": 2.2724721956087874, + "learning_rate": 9.540661323156261e-06, + "loss": 0.8033, + "step": 5331 + }, + { + "epoch": 0.16341792325609905, + "grad_norm": 1.0458573042384547, + "learning_rate": 9.54045350074609e-06, + "loss": 0.484, + "step": 5332 + }, + { + "epoch": 0.16344857177884026, + "grad_norm": 2.1268870301014333, + "learning_rate": 9.540245633597667e-06, + "loss": 0.7564, + "step": 5333 + }, + { + "epoch": 0.16347922030158146, + "grad_norm": 0.9052896927325926, + "learning_rate": 9.540037721713045e-06, + "loss": 0.5046, + "step": 5334 + }, + { + "epoch": 0.16350986882432267, + "grad_norm": 1.979776793619579, + "learning_rate": 9.539829765094265e-06, + "loss": 0.9123, + "step": 5335 + }, + { + "epoch": 0.16354051734706387, + "grad_norm": 1.8061850531080035, + "learning_rate": 9.539621763743384e-06, + "loss": 0.708, + "step": 5336 + }, + { + "epoch": 0.16357116586980508, + "grad_norm": 1.5944410603029322, + "learning_rate": 9.539413717662449e-06, + "loss": 0.6935, + "step": 5337 + }, + { + "epoch": 0.16360181439254629, + "grad_norm": 2.030871447719239, + "learning_rate": 9.53920562685351e-06, + "loss": 0.7605, + "step": 5338 + }, + { + "epoch": 0.1636324629152875, + "grad_norm": 1.683267920298032, + "learning_rate": 9.538997491318613e-06, + "loss": 0.8174, + "step": 5339 + }, + { + "epoch": 0.1636631114380287, + "grad_norm": 1.9935031704796395, + "learning_rate": 9.538789311059815e-06, + "loss": 0.7359, + "step": 5340 + }, + { + "epoch": 0.1636937599607699, + "grad_norm": 2.0821164460220287, + "learning_rate": 9.538581086079164e-06, + "loss": 0.7044, + "step": 5341 + }, + { + "epoch": 0.1637244084835111, + "grad_norm": 1.7179318527666925, + "learning_rate": 9.538372816378711e-06, + "loss": 0.7839, + "step": 5342 + }, + { + "epoch": 0.16375505700625229, + "grad_norm": 1.8548400661908466, + "learning_rate": 9.538164501960511e-06, + "loss": 0.7482, + "step": 5343 + }, + { + "epoch": 0.1637857055289935, + "grad_norm": 1.2061613399708688, + "learning_rate": 9.537956142826615e-06, + "loss": 0.5047, + "step": 5344 + }, + { + "epoch": 0.1638163540517347, + "grad_norm": 1.5185011355954516, + "learning_rate": 9.537747738979076e-06, + "loss": 0.6214, + "step": 5345 + }, + { + "epoch": 0.1638470025744759, + "grad_norm": 0.9111957060021851, + "learning_rate": 9.537539290419945e-06, + "loss": 0.4841, + "step": 5346 + }, + { + "epoch": 0.1638776510972171, + "grad_norm": 1.71010123117021, + "learning_rate": 9.537330797151282e-06, + "loss": 0.6786, + "step": 5347 + }, + { + "epoch": 0.16390829961995831, + "grad_norm": 2.0169762006929504, + "learning_rate": 9.537122259175135e-06, + "loss": 0.8134, + "step": 5348 + }, + { + "epoch": 0.16393894814269952, + "grad_norm": 1.649879924848106, + "learning_rate": 9.536913676493564e-06, + "loss": 0.6782, + "step": 5349 + }, + { + "epoch": 0.16396959666544073, + "grad_norm": 1.066636885640487, + "learning_rate": 9.53670504910862e-06, + "loss": 0.5255, + "step": 5350 + }, + { + "epoch": 0.16400024518818193, + "grad_norm": 1.7911230903341813, + "learning_rate": 9.536496377022362e-06, + "loss": 0.8061, + "step": 5351 + }, + { + "epoch": 0.16403089371092314, + "grad_norm": 1.915447082221522, + "learning_rate": 9.536287660236842e-06, + "loss": 0.72, + "step": 5352 + }, + { + "epoch": 0.16406154223366434, + "grad_norm": 1.9643709975892816, + "learning_rate": 9.53607889875412e-06, + "loss": 0.7291, + "step": 5353 + }, + { + "epoch": 0.16409219075640555, + "grad_norm": 1.8769334219580385, + "learning_rate": 9.535870092576253e-06, + "loss": 0.6626, + "step": 5354 + }, + { + "epoch": 0.16412283927914675, + "grad_norm": 1.9667035018781915, + "learning_rate": 9.535661241705296e-06, + "loss": 0.8227, + "step": 5355 + }, + { + "epoch": 0.16415348780188796, + "grad_norm": 1.856952135208599, + "learning_rate": 9.53545234614331e-06, + "loss": 0.6573, + "step": 5356 + }, + { + "epoch": 0.16418413632462917, + "grad_norm": 1.6586555240327931, + "learning_rate": 9.53524340589235e-06, + "loss": 0.6585, + "step": 5357 + }, + { + "epoch": 0.16421478484737034, + "grad_norm": 1.7138801543686837, + "learning_rate": 9.535034420954476e-06, + "loss": 0.7817, + "step": 5358 + }, + { + "epoch": 0.16424543337011155, + "grad_norm": 1.8152349043323157, + "learning_rate": 9.53482539133175e-06, + "loss": 0.7412, + "step": 5359 + }, + { + "epoch": 0.16427608189285275, + "grad_norm": 1.7849101862389976, + "learning_rate": 9.534616317026227e-06, + "loss": 0.72, + "step": 5360 + }, + { + "epoch": 0.16430673041559396, + "grad_norm": 1.94729670924048, + "learning_rate": 9.53440719803997e-06, + "loss": 0.6859, + "step": 5361 + }, + { + "epoch": 0.16433737893833517, + "grad_norm": 1.7063559055313442, + "learning_rate": 9.534198034375039e-06, + "loss": 0.6858, + "step": 5362 + }, + { + "epoch": 0.16436802746107637, + "grad_norm": 2.0191726024434984, + "learning_rate": 9.533988826033494e-06, + "loss": 0.7094, + "step": 5363 + }, + { + "epoch": 0.16439867598381758, + "grad_norm": 1.8883539051676899, + "learning_rate": 9.533779573017397e-06, + "loss": 0.71, + "step": 5364 + }, + { + "epoch": 0.16442932450655878, + "grad_norm": 1.7354217792649826, + "learning_rate": 9.53357027532881e-06, + "loss": 0.6854, + "step": 5365 + }, + { + "epoch": 0.1644599730293, + "grad_norm": 1.898105057711462, + "learning_rate": 9.533360932969795e-06, + "loss": 0.8048, + "step": 5366 + }, + { + "epoch": 0.1644906215520412, + "grad_norm": 1.9620184144814825, + "learning_rate": 9.533151545942414e-06, + "loss": 0.7637, + "step": 5367 + }, + { + "epoch": 0.1645212700747824, + "grad_norm": 1.9008099967488723, + "learning_rate": 9.532942114248734e-06, + "loss": 0.7084, + "step": 5368 + }, + { + "epoch": 0.1645519185975236, + "grad_norm": 0.953344923752192, + "learning_rate": 9.532732637890813e-06, + "loss": 0.506, + "step": 5369 + }, + { + "epoch": 0.1645825671202648, + "grad_norm": 1.798536883085912, + "learning_rate": 9.532523116870718e-06, + "loss": 0.7399, + "step": 5370 + }, + { + "epoch": 0.16461321564300602, + "grad_norm": 1.7271827342839856, + "learning_rate": 9.532313551190513e-06, + "loss": 0.666, + "step": 5371 + }, + { + "epoch": 0.16464386416574722, + "grad_norm": 1.8093364840797412, + "learning_rate": 9.532103940852263e-06, + "loss": 0.7908, + "step": 5372 + }, + { + "epoch": 0.16467451268848843, + "grad_norm": 2.7020149803837907, + "learning_rate": 9.531894285858032e-06, + "loss": 0.7158, + "step": 5373 + }, + { + "epoch": 0.1647051612112296, + "grad_norm": 2.0181760654581473, + "learning_rate": 9.53168458620989e-06, + "loss": 0.7718, + "step": 5374 + }, + { + "epoch": 0.1647358097339708, + "grad_norm": 1.9017574149026013, + "learning_rate": 9.531474841909898e-06, + "loss": 0.6958, + "step": 5375 + }, + { + "epoch": 0.16476645825671202, + "grad_norm": 0.9350237192257729, + "learning_rate": 9.531265052960126e-06, + "loss": 0.485, + "step": 5376 + }, + { + "epoch": 0.16479710677945322, + "grad_norm": 1.8045201128384252, + "learning_rate": 9.531055219362639e-06, + "loss": 0.7984, + "step": 5377 + }, + { + "epoch": 0.16482775530219443, + "grad_norm": 2.1292986082500596, + "learning_rate": 9.530845341119506e-06, + "loss": 0.8851, + "step": 5378 + }, + { + "epoch": 0.16485840382493563, + "grad_norm": 0.8121071762702956, + "learning_rate": 9.530635418232795e-06, + "loss": 0.5057, + "step": 5379 + }, + { + "epoch": 0.16488905234767684, + "grad_norm": 2.0857651598462055, + "learning_rate": 9.530425450704574e-06, + "loss": 0.7647, + "step": 5380 + }, + { + "epoch": 0.16491970087041805, + "grad_norm": 1.8859008494598022, + "learning_rate": 9.530215438536912e-06, + "loss": 0.7578, + "step": 5381 + }, + { + "epoch": 0.16495034939315925, + "grad_norm": 1.8656903285413058, + "learning_rate": 9.530005381731876e-06, + "loss": 0.746, + "step": 5382 + }, + { + "epoch": 0.16498099791590046, + "grad_norm": 0.8251935214977781, + "learning_rate": 9.529795280291542e-06, + "loss": 0.4976, + "step": 5383 + }, + { + "epoch": 0.16501164643864166, + "grad_norm": 1.501805557300514, + "learning_rate": 9.529585134217973e-06, + "loss": 0.7066, + "step": 5384 + }, + { + "epoch": 0.16504229496138287, + "grad_norm": 1.8965411056688455, + "learning_rate": 9.529374943513244e-06, + "loss": 0.7439, + "step": 5385 + }, + { + "epoch": 0.16507294348412407, + "grad_norm": 1.793999903779015, + "learning_rate": 9.529164708179424e-06, + "loss": 0.7244, + "step": 5386 + }, + { + "epoch": 0.16510359200686528, + "grad_norm": 1.7856380055619527, + "learning_rate": 9.528954428218586e-06, + "loss": 0.7669, + "step": 5387 + }, + { + "epoch": 0.16513424052960649, + "grad_norm": 1.7794961769016475, + "learning_rate": 9.528744103632802e-06, + "loss": 0.5376, + "step": 5388 + }, + { + "epoch": 0.16516488905234766, + "grad_norm": 1.7239791013727623, + "learning_rate": 9.52853373442414e-06, + "loss": 0.6672, + "step": 5389 + }, + { + "epoch": 0.16519553757508887, + "grad_norm": 1.8876435871982191, + "learning_rate": 9.52832332059468e-06, + "loss": 0.7338, + "step": 5390 + }, + { + "epoch": 0.16522618609783007, + "grad_norm": 1.812318745802571, + "learning_rate": 9.528112862146492e-06, + "loss": 0.6914, + "step": 5391 + }, + { + "epoch": 0.16525683462057128, + "grad_norm": 1.7756778855970903, + "learning_rate": 9.527902359081649e-06, + "loss": 0.8213, + "step": 5392 + }, + { + "epoch": 0.16528748314331249, + "grad_norm": 1.8585456430337592, + "learning_rate": 9.527691811402224e-06, + "loss": 0.7148, + "step": 5393 + }, + { + "epoch": 0.1653181316660537, + "grad_norm": 2.368653444535271, + "learning_rate": 9.527481219110293e-06, + "loss": 0.6773, + "step": 5394 + }, + { + "epoch": 0.1653487801887949, + "grad_norm": 1.7180923132691213, + "learning_rate": 9.527270582207933e-06, + "loss": 0.7252, + "step": 5395 + }, + { + "epoch": 0.1653794287115361, + "grad_norm": 1.9380765108815994, + "learning_rate": 9.527059900697216e-06, + "loss": 0.7288, + "step": 5396 + }, + { + "epoch": 0.1654100772342773, + "grad_norm": 1.68506241363974, + "learning_rate": 9.52684917458022e-06, + "loss": 0.7191, + "step": 5397 + }, + { + "epoch": 0.16544072575701851, + "grad_norm": 1.7387529290779076, + "learning_rate": 9.526638403859021e-06, + "loss": 0.7592, + "step": 5398 + }, + { + "epoch": 0.16547137427975972, + "grad_norm": 1.9296519643991579, + "learning_rate": 9.526427588535696e-06, + "loss": 0.7953, + "step": 5399 + }, + { + "epoch": 0.16550202280250093, + "grad_norm": 2.0758155032619636, + "learning_rate": 9.526216728612321e-06, + "loss": 0.7043, + "step": 5400 + }, + { + "epoch": 0.16553267132524213, + "grad_norm": 2.163862544375518, + "learning_rate": 9.526005824090975e-06, + "loss": 0.8597, + "step": 5401 + }, + { + "epoch": 0.16556331984798334, + "grad_norm": 1.9796024009648219, + "learning_rate": 9.525794874973735e-06, + "loss": 0.7186, + "step": 5402 + }, + { + "epoch": 0.16559396837072454, + "grad_norm": 1.8829065135493939, + "learning_rate": 9.525583881262681e-06, + "loss": 0.8402, + "step": 5403 + }, + { + "epoch": 0.16562461689346575, + "grad_norm": 1.7985781527080176, + "learning_rate": 9.52537284295989e-06, + "loss": 0.7644, + "step": 5404 + }, + { + "epoch": 0.16565526541620693, + "grad_norm": 1.8707129503605113, + "learning_rate": 9.525161760067443e-06, + "loss": 0.7662, + "step": 5405 + }, + { + "epoch": 0.16568591393894813, + "grad_norm": 1.021364357409291, + "learning_rate": 9.52495063258742e-06, + "loss": 0.5267, + "step": 5406 + }, + { + "epoch": 0.16571656246168934, + "grad_norm": 2.069298851563156, + "learning_rate": 9.5247394605219e-06, + "loss": 0.8088, + "step": 5407 + }, + { + "epoch": 0.16574721098443054, + "grad_norm": 1.710058036864639, + "learning_rate": 9.524528243872964e-06, + "loss": 0.6215, + "step": 5408 + }, + { + "epoch": 0.16577785950717175, + "grad_norm": 1.8597544351232445, + "learning_rate": 9.524316982642693e-06, + "loss": 0.7404, + "step": 5409 + }, + { + "epoch": 0.16580850802991295, + "grad_norm": 0.9116620395255023, + "learning_rate": 9.524105676833172e-06, + "loss": 0.4978, + "step": 5410 + }, + { + "epoch": 0.16583915655265416, + "grad_norm": 1.6391889113624345, + "learning_rate": 9.523894326446478e-06, + "loss": 0.6883, + "step": 5411 + }, + { + "epoch": 0.16586980507539537, + "grad_norm": 1.8261936454204857, + "learning_rate": 9.523682931484696e-06, + "loss": 0.6242, + "step": 5412 + }, + { + "epoch": 0.16590045359813657, + "grad_norm": 1.8536516225786246, + "learning_rate": 9.523471491949909e-06, + "loss": 0.6721, + "step": 5413 + }, + { + "epoch": 0.16593110212087778, + "grad_norm": 1.975390838594922, + "learning_rate": 9.5232600078442e-06, + "loss": 0.7448, + "step": 5414 + }, + { + "epoch": 0.16596175064361898, + "grad_norm": 1.591249440101633, + "learning_rate": 9.523048479169653e-06, + "loss": 0.7634, + "step": 5415 + }, + { + "epoch": 0.1659923991663602, + "grad_norm": 1.6098119940598528, + "learning_rate": 9.522836905928352e-06, + "loss": 0.6763, + "step": 5416 + }, + { + "epoch": 0.1660230476891014, + "grad_norm": 1.8173270714596186, + "learning_rate": 9.522625288122381e-06, + "loss": 0.732, + "step": 5417 + }, + { + "epoch": 0.1660536962118426, + "grad_norm": 1.6050977814786098, + "learning_rate": 9.522413625753827e-06, + "loss": 0.7643, + "step": 5418 + }, + { + "epoch": 0.1660843447345838, + "grad_norm": 1.9022924326012198, + "learning_rate": 9.522201918824774e-06, + "loss": 0.7996, + "step": 5419 + }, + { + "epoch": 0.16611499325732498, + "grad_norm": 1.9228794820754243, + "learning_rate": 9.521990167337309e-06, + "loss": 0.7895, + "step": 5420 + }, + { + "epoch": 0.1661456417800662, + "grad_norm": 1.7090650024792267, + "learning_rate": 9.521778371293517e-06, + "loss": 0.7849, + "step": 5421 + }, + { + "epoch": 0.1661762903028074, + "grad_norm": 1.6236754787029553, + "learning_rate": 9.521566530695485e-06, + "loss": 0.746, + "step": 5422 + }, + { + "epoch": 0.1662069388255486, + "grad_norm": 1.667806214910666, + "learning_rate": 9.521354645545303e-06, + "loss": 0.7645, + "step": 5423 + }, + { + "epoch": 0.1662375873482898, + "grad_norm": 1.6371464540046308, + "learning_rate": 9.521142715845055e-06, + "loss": 0.6435, + "step": 5424 + }, + { + "epoch": 0.166268235871031, + "grad_norm": 1.7787959596729792, + "learning_rate": 9.520930741596831e-06, + "loss": 0.7014, + "step": 5425 + }, + { + "epoch": 0.16629888439377222, + "grad_norm": 0.9253453966690562, + "learning_rate": 9.520718722802722e-06, + "loss": 0.5019, + "step": 5426 + }, + { + "epoch": 0.16632953291651342, + "grad_norm": 1.823887564781995, + "learning_rate": 9.520506659464812e-06, + "loss": 0.7187, + "step": 5427 + }, + { + "epoch": 0.16636018143925463, + "grad_norm": 1.7637253127235484, + "learning_rate": 9.520294551585195e-06, + "loss": 0.8109, + "step": 5428 + }, + { + "epoch": 0.16639082996199583, + "grad_norm": 1.84279847906109, + "learning_rate": 9.520082399165958e-06, + "loss": 0.6669, + "step": 5429 + }, + { + "epoch": 0.16642147848473704, + "grad_norm": 1.849534419109039, + "learning_rate": 9.519870202209194e-06, + "loss": 0.7212, + "step": 5430 + }, + { + "epoch": 0.16645212700747825, + "grad_norm": 1.6916688720866593, + "learning_rate": 9.519657960716992e-06, + "loss": 0.7485, + "step": 5431 + }, + { + "epoch": 0.16648277553021945, + "grad_norm": 2.031513400472209, + "learning_rate": 9.519445674691443e-06, + "loss": 0.7296, + "step": 5432 + }, + { + "epoch": 0.16651342405296066, + "grad_norm": 1.6736283714886864, + "learning_rate": 9.51923334413464e-06, + "loss": 0.7322, + "step": 5433 + }, + { + "epoch": 0.16654407257570186, + "grad_norm": 1.7323716273924272, + "learning_rate": 9.519020969048676e-06, + "loss": 0.6994, + "step": 5434 + }, + { + "epoch": 0.16657472109844307, + "grad_norm": 1.7559784895564017, + "learning_rate": 9.518808549435639e-06, + "loss": 0.764, + "step": 5435 + }, + { + "epoch": 0.16660536962118425, + "grad_norm": 1.7600403179648296, + "learning_rate": 9.518596085297627e-06, + "loss": 0.7129, + "step": 5436 + }, + { + "epoch": 0.16663601814392545, + "grad_norm": 1.5463253237048211, + "learning_rate": 9.518383576636732e-06, + "loss": 0.6817, + "step": 5437 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.8647408814153528, + "learning_rate": 9.518171023455047e-06, + "loss": 0.7377, + "step": 5438 + }, + { + "epoch": 0.16669731518940786, + "grad_norm": 1.036902163487068, + "learning_rate": 9.517958425754668e-06, + "loss": 0.5237, + "step": 5439 + }, + { + "epoch": 0.16672796371214907, + "grad_norm": 1.807475685154996, + "learning_rate": 9.517745783537686e-06, + "loss": 0.7814, + "step": 5440 + }, + { + "epoch": 0.16675861223489027, + "grad_norm": 1.8299387695747615, + "learning_rate": 9.517533096806201e-06, + "loss": 0.7907, + "step": 5441 + }, + { + "epoch": 0.16678926075763148, + "grad_norm": 1.726581943955212, + "learning_rate": 9.517320365562306e-06, + "loss": 0.7402, + "step": 5442 + }, + { + "epoch": 0.16681990928037269, + "grad_norm": 1.9178962688585173, + "learning_rate": 9.517107589808098e-06, + "loss": 0.7451, + "step": 5443 + }, + { + "epoch": 0.1668505578031139, + "grad_norm": 1.6857538914664711, + "learning_rate": 9.516894769545672e-06, + "loss": 0.7455, + "step": 5444 + }, + { + "epoch": 0.1668812063258551, + "grad_norm": 1.9930622089253875, + "learning_rate": 9.516681904777128e-06, + "loss": 0.7531, + "step": 5445 + }, + { + "epoch": 0.1669118548485963, + "grad_norm": 2.045853546549637, + "learning_rate": 9.51646899550456e-06, + "loss": 0.7811, + "step": 5446 + }, + { + "epoch": 0.1669425033713375, + "grad_norm": 2.1555486191284996, + "learning_rate": 9.516256041730068e-06, + "loss": 0.7472, + "step": 5447 + }, + { + "epoch": 0.16697315189407871, + "grad_norm": 2.1529474618314532, + "learning_rate": 9.516043043455749e-06, + "loss": 0.8654, + "step": 5448 + }, + { + "epoch": 0.16700380041681992, + "grad_norm": 2.0062246048563397, + "learning_rate": 9.515830000683703e-06, + "loss": 0.7499, + "step": 5449 + }, + { + "epoch": 0.16703444893956113, + "grad_norm": 1.7021518086794665, + "learning_rate": 9.515616913416029e-06, + "loss": 0.6847, + "step": 5450 + }, + { + "epoch": 0.1670650974623023, + "grad_norm": 1.9691448872546435, + "learning_rate": 9.515403781654825e-06, + "loss": 0.7459, + "step": 5451 + }, + { + "epoch": 0.1670957459850435, + "grad_norm": 1.7285806452509818, + "learning_rate": 9.515190605402194e-06, + "loss": 0.7308, + "step": 5452 + }, + { + "epoch": 0.16712639450778471, + "grad_norm": 1.894898970754207, + "learning_rate": 9.514977384660233e-06, + "loss": 0.8083, + "step": 5453 + }, + { + "epoch": 0.16715704303052592, + "grad_norm": 1.9773477554155199, + "learning_rate": 9.514764119431047e-06, + "loss": 0.8218, + "step": 5454 + }, + { + "epoch": 0.16718769155326713, + "grad_norm": 1.017370417204914, + "learning_rate": 9.514550809716731e-06, + "loss": 0.4784, + "step": 5455 + }, + { + "epoch": 0.16721834007600833, + "grad_norm": 2.010808982024262, + "learning_rate": 9.514337455519394e-06, + "loss": 0.7719, + "step": 5456 + }, + { + "epoch": 0.16724898859874954, + "grad_norm": 1.8587077761170436, + "learning_rate": 9.514124056841133e-06, + "loss": 0.7491, + "step": 5457 + }, + { + "epoch": 0.16727963712149074, + "grad_norm": 1.6297461030891143, + "learning_rate": 9.513910613684054e-06, + "loss": 0.7237, + "step": 5458 + }, + { + "epoch": 0.16731028564423195, + "grad_norm": 2.0571528711353286, + "learning_rate": 9.513697126050258e-06, + "loss": 0.7581, + "step": 5459 + }, + { + "epoch": 0.16734093416697315, + "grad_norm": 1.7790772896191094, + "learning_rate": 9.51348359394185e-06, + "loss": 0.7244, + "step": 5460 + }, + { + "epoch": 0.16737158268971436, + "grad_norm": 1.9233053539340674, + "learning_rate": 9.513270017360933e-06, + "loss": 0.6211, + "step": 5461 + }, + { + "epoch": 0.16740223121245557, + "grad_norm": 1.954495869855543, + "learning_rate": 9.513056396309613e-06, + "loss": 0.7273, + "step": 5462 + }, + { + "epoch": 0.16743287973519677, + "grad_norm": 1.7896427975668279, + "learning_rate": 9.512842730789992e-06, + "loss": 0.7182, + "step": 5463 + }, + { + "epoch": 0.16746352825793798, + "grad_norm": 1.7008880495539762, + "learning_rate": 9.512629020804176e-06, + "loss": 0.7672, + "step": 5464 + }, + { + "epoch": 0.16749417678067918, + "grad_norm": 1.9366389336209664, + "learning_rate": 9.512415266354274e-06, + "loss": 0.799, + "step": 5465 + }, + { + "epoch": 0.1675248253034204, + "grad_norm": 1.76034455789019, + "learning_rate": 9.512201467442389e-06, + "loss": 0.6823, + "step": 5466 + }, + { + "epoch": 0.16755547382616157, + "grad_norm": 1.7465154229438766, + "learning_rate": 9.511987624070629e-06, + "loss": 0.7737, + "step": 5467 + }, + { + "epoch": 0.16758612234890277, + "grad_norm": 2.033438208547295, + "learning_rate": 9.5117737362411e-06, + "loss": 0.827, + "step": 5468 + }, + { + "epoch": 0.16761677087164398, + "grad_norm": 1.0457738640128849, + "learning_rate": 9.51155980395591e-06, + "loss": 0.5098, + "step": 5469 + }, + { + "epoch": 0.16764741939438518, + "grad_norm": 1.8159343907690362, + "learning_rate": 9.511345827217167e-06, + "loss": 0.7745, + "step": 5470 + }, + { + "epoch": 0.1676780679171264, + "grad_norm": 2.066034311050583, + "learning_rate": 9.511131806026979e-06, + "loss": 0.7197, + "step": 5471 + }, + { + "epoch": 0.1677087164398676, + "grad_norm": 2.1490379589572504, + "learning_rate": 9.510917740387456e-06, + "loss": 0.7422, + "step": 5472 + }, + { + "epoch": 0.1677393649626088, + "grad_norm": 1.9264437812416917, + "learning_rate": 9.510703630300704e-06, + "loss": 0.8313, + "step": 5473 + }, + { + "epoch": 0.16777001348535, + "grad_norm": 1.982206300595434, + "learning_rate": 9.510489475768836e-06, + "loss": 0.7774, + "step": 5474 + }, + { + "epoch": 0.1678006620080912, + "grad_norm": 1.8426164059806065, + "learning_rate": 9.510275276793963e-06, + "loss": 0.7441, + "step": 5475 + }, + { + "epoch": 0.16783131053083242, + "grad_norm": 1.7727790461296162, + "learning_rate": 9.510061033378191e-06, + "loss": 0.6452, + "step": 5476 + }, + { + "epoch": 0.16786195905357362, + "grad_norm": 2.24109180328191, + "learning_rate": 9.509846745523635e-06, + "loss": 0.8155, + "step": 5477 + }, + { + "epoch": 0.16789260757631483, + "grad_norm": 1.7201340203306235, + "learning_rate": 9.509632413232406e-06, + "loss": 0.6873, + "step": 5478 + }, + { + "epoch": 0.16792325609905603, + "grad_norm": 1.9871060744149815, + "learning_rate": 9.509418036506614e-06, + "loss": 0.7078, + "step": 5479 + }, + { + "epoch": 0.16795390462179724, + "grad_norm": 1.6597596448368357, + "learning_rate": 9.509203615348372e-06, + "loss": 0.6501, + "step": 5480 + }, + { + "epoch": 0.16798455314453845, + "grad_norm": 1.8656893974107611, + "learning_rate": 9.508989149759792e-06, + "loss": 0.7567, + "step": 5481 + }, + { + "epoch": 0.16801520166727962, + "grad_norm": 1.7701731621538874, + "learning_rate": 9.508774639742992e-06, + "loss": 0.7137, + "step": 5482 + }, + { + "epoch": 0.16804585019002083, + "grad_norm": 1.6930146117983216, + "learning_rate": 9.508560085300078e-06, + "loss": 0.7326, + "step": 5483 + }, + { + "epoch": 0.16807649871276203, + "grad_norm": 1.7776202681141993, + "learning_rate": 9.508345486433171e-06, + "loss": 0.7315, + "step": 5484 + }, + { + "epoch": 0.16810714723550324, + "grad_norm": 2.124745187624052, + "learning_rate": 9.508130843144382e-06, + "loss": 0.7473, + "step": 5485 + }, + { + "epoch": 0.16813779575824445, + "grad_norm": 1.8198404235305183, + "learning_rate": 9.507916155435824e-06, + "loss": 0.7151, + "step": 5486 + }, + { + "epoch": 0.16816844428098565, + "grad_norm": 1.8245501008033016, + "learning_rate": 9.507701423309616e-06, + "loss": 0.7091, + "step": 5487 + }, + { + "epoch": 0.16819909280372686, + "grad_norm": 1.9010354157192642, + "learning_rate": 9.507486646767872e-06, + "loss": 0.7806, + "step": 5488 + }, + { + "epoch": 0.16822974132646806, + "grad_norm": 1.9814750685361169, + "learning_rate": 9.507271825812709e-06, + "loss": 0.7933, + "step": 5489 + }, + { + "epoch": 0.16826038984920927, + "grad_norm": 1.7438998344702756, + "learning_rate": 9.507056960446243e-06, + "loss": 0.6959, + "step": 5490 + }, + { + "epoch": 0.16829103837195047, + "grad_norm": 1.850882408980375, + "learning_rate": 9.506842050670593e-06, + "loss": 0.7576, + "step": 5491 + }, + { + "epoch": 0.16832168689469168, + "grad_norm": 1.7764173683916067, + "learning_rate": 9.506627096487875e-06, + "loss": 0.7465, + "step": 5492 + }, + { + "epoch": 0.16835233541743289, + "grad_norm": 2.1229604405082316, + "learning_rate": 9.506412097900206e-06, + "loss": 0.8174, + "step": 5493 + }, + { + "epoch": 0.1683829839401741, + "grad_norm": 2.0076607738672543, + "learning_rate": 9.506197054909708e-06, + "loss": 0.732, + "step": 5494 + }, + { + "epoch": 0.1684136324629153, + "grad_norm": 1.757287476750036, + "learning_rate": 9.505981967518493e-06, + "loss": 0.6812, + "step": 5495 + }, + { + "epoch": 0.1684442809856565, + "grad_norm": 1.7893971600538634, + "learning_rate": 9.50576683572869e-06, + "loss": 0.7139, + "step": 5496 + }, + { + "epoch": 0.1684749295083977, + "grad_norm": 1.7174080177307827, + "learning_rate": 9.50555165954241e-06, + "loss": 0.7454, + "step": 5497 + }, + { + "epoch": 0.1685055780311389, + "grad_norm": 2.0026269410067625, + "learning_rate": 9.505336438961778e-06, + "loss": 0.8025, + "step": 5498 + }, + { + "epoch": 0.1685362265538801, + "grad_norm": 1.8229852335675327, + "learning_rate": 9.505121173988913e-06, + "loss": 0.7698, + "step": 5499 + }, + { + "epoch": 0.1685668750766213, + "grad_norm": 1.963248199644064, + "learning_rate": 9.504905864625935e-06, + "loss": 0.8709, + "step": 5500 + }, + { + "epoch": 0.1685975235993625, + "grad_norm": 2.0342411519674646, + "learning_rate": 9.50469051087497e-06, + "loss": 0.7847, + "step": 5501 + }, + { + "epoch": 0.1686281721221037, + "grad_norm": 1.9171392727648264, + "learning_rate": 9.504475112738134e-06, + "loss": 0.7535, + "step": 5502 + }, + { + "epoch": 0.16865882064484491, + "grad_norm": 1.9497524925689589, + "learning_rate": 9.504259670217553e-06, + "loss": 0.7147, + "step": 5503 + }, + { + "epoch": 0.16868946916758612, + "grad_norm": 1.014306441099802, + "learning_rate": 9.50404418331535e-06, + "loss": 0.509, + "step": 5504 + }, + { + "epoch": 0.16872011769032733, + "grad_norm": 0.9017151082014905, + "learning_rate": 9.503828652033647e-06, + "loss": 0.5215, + "step": 5505 + }, + { + "epoch": 0.16875076621306853, + "grad_norm": 2.176527382442755, + "learning_rate": 9.503613076374568e-06, + "loss": 0.7985, + "step": 5506 + }, + { + "epoch": 0.16878141473580974, + "grad_norm": 0.8225099076132971, + "learning_rate": 9.503397456340235e-06, + "loss": 0.5104, + "step": 5507 + }, + { + "epoch": 0.16881206325855094, + "grad_norm": 1.9182867337045713, + "learning_rate": 9.503181791932777e-06, + "loss": 0.7006, + "step": 5508 + }, + { + "epoch": 0.16884271178129215, + "grad_norm": 0.9697791693037344, + "learning_rate": 9.502966083154314e-06, + "loss": 0.5158, + "step": 5509 + }, + { + "epoch": 0.16887336030403335, + "grad_norm": 1.864158127601389, + "learning_rate": 9.502750330006977e-06, + "loss": 0.7072, + "step": 5510 + }, + { + "epoch": 0.16890400882677456, + "grad_norm": 1.9278805134811938, + "learning_rate": 9.502534532492889e-06, + "loss": 0.7429, + "step": 5511 + }, + { + "epoch": 0.16893465734951577, + "grad_norm": 1.9151665438495942, + "learning_rate": 9.502318690614175e-06, + "loss": 0.7053, + "step": 5512 + }, + { + "epoch": 0.16896530587225694, + "grad_norm": 0.837025844295299, + "learning_rate": 9.502102804372962e-06, + "loss": 0.5084, + "step": 5513 + }, + { + "epoch": 0.16899595439499815, + "grad_norm": 1.94000270621854, + "learning_rate": 9.501886873771378e-06, + "loss": 0.7184, + "step": 5514 + }, + { + "epoch": 0.16902660291773935, + "grad_norm": 1.8176772579204366, + "learning_rate": 9.501670898811552e-06, + "loss": 0.7053, + "step": 5515 + }, + { + "epoch": 0.16905725144048056, + "grad_norm": 2.3309158674831782, + "learning_rate": 9.50145487949561e-06, + "loss": 0.7277, + "step": 5516 + }, + { + "epoch": 0.16908789996322177, + "grad_norm": 1.8520655466656628, + "learning_rate": 9.501238815825684e-06, + "loss": 0.7541, + "step": 5517 + }, + { + "epoch": 0.16911854848596297, + "grad_norm": 0.8879078159735497, + "learning_rate": 9.501022707803898e-06, + "loss": 0.5034, + "step": 5518 + }, + { + "epoch": 0.16914919700870418, + "grad_norm": 1.6474204648484125, + "learning_rate": 9.500806555432384e-06, + "loss": 0.7156, + "step": 5519 + }, + { + "epoch": 0.16917984553144538, + "grad_norm": 1.7630416718405577, + "learning_rate": 9.50059035871327e-06, + "loss": 0.6972, + "step": 5520 + }, + { + "epoch": 0.1692104940541866, + "grad_norm": 1.8216747797102113, + "learning_rate": 9.500374117648689e-06, + "loss": 0.6855, + "step": 5521 + }, + { + "epoch": 0.1692411425769278, + "grad_norm": 1.8900442752918787, + "learning_rate": 9.500157832240772e-06, + "loss": 0.6696, + "step": 5522 + }, + { + "epoch": 0.169271791099669, + "grad_norm": 1.695197679849267, + "learning_rate": 9.499941502491646e-06, + "loss": 0.7182, + "step": 5523 + }, + { + "epoch": 0.1693024396224102, + "grad_norm": 1.8285610294456798, + "learning_rate": 9.499725128403446e-06, + "loss": 0.6968, + "step": 5524 + }, + { + "epoch": 0.1693330881451514, + "grad_norm": 2.167589468795949, + "learning_rate": 9.499508709978303e-06, + "loss": 0.7228, + "step": 5525 + }, + { + "epoch": 0.16936373666789262, + "grad_norm": 1.7471382566500087, + "learning_rate": 9.499292247218348e-06, + "loss": 0.7641, + "step": 5526 + }, + { + "epoch": 0.16939438519063382, + "grad_norm": 1.8683291548857413, + "learning_rate": 9.499075740125719e-06, + "loss": 0.794, + "step": 5527 + }, + { + "epoch": 0.16942503371337503, + "grad_norm": 1.7403271739818917, + "learning_rate": 9.498859188702541e-06, + "loss": 0.7272, + "step": 5528 + }, + { + "epoch": 0.1694556822361162, + "grad_norm": 1.6424977748424467, + "learning_rate": 9.498642592950955e-06, + "loss": 0.6736, + "step": 5529 + }, + { + "epoch": 0.1694863307588574, + "grad_norm": 1.9045924335415325, + "learning_rate": 9.498425952873092e-06, + "loss": 0.666, + "step": 5530 + }, + { + "epoch": 0.16951697928159862, + "grad_norm": 2.0705648583240945, + "learning_rate": 9.498209268471089e-06, + "loss": 0.7459, + "step": 5531 + }, + { + "epoch": 0.16954762780433982, + "grad_norm": 1.9215409874614207, + "learning_rate": 9.497992539747076e-06, + "loss": 0.7211, + "step": 5532 + }, + { + "epoch": 0.16957827632708103, + "grad_norm": 2.0207599330039026, + "learning_rate": 9.497775766703193e-06, + "loss": 0.7519, + "step": 5533 + }, + { + "epoch": 0.16960892484982223, + "grad_norm": 0.9672937553044787, + "learning_rate": 9.497558949341575e-06, + "loss": 0.5101, + "step": 5534 + }, + { + "epoch": 0.16963957337256344, + "grad_norm": 1.9043492890411755, + "learning_rate": 9.497342087664355e-06, + "loss": 0.779, + "step": 5535 + }, + { + "epoch": 0.16967022189530465, + "grad_norm": 1.9911951628431142, + "learning_rate": 9.497125181673676e-06, + "loss": 0.8471, + "step": 5536 + }, + { + "epoch": 0.16970087041804585, + "grad_norm": 1.8698186494112718, + "learning_rate": 9.496908231371672e-06, + "loss": 0.774, + "step": 5537 + }, + { + "epoch": 0.16973151894078706, + "grad_norm": 0.8675126865583478, + "learning_rate": 9.49669123676048e-06, + "loss": 0.5386, + "step": 5538 + }, + { + "epoch": 0.16976216746352826, + "grad_norm": 1.696544582245715, + "learning_rate": 9.496474197842238e-06, + "loss": 0.7006, + "step": 5539 + }, + { + "epoch": 0.16979281598626947, + "grad_norm": 1.678144135959628, + "learning_rate": 9.496257114619085e-06, + "loss": 0.7355, + "step": 5540 + }, + { + "epoch": 0.16982346450901067, + "grad_norm": 0.7959035891663752, + "learning_rate": 9.496039987093162e-06, + "loss": 0.4955, + "step": 5541 + }, + { + "epoch": 0.16985411303175188, + "grad_norm": 1.8337893751494705, + "learning_rate": 9.495822815266605e-06, + "loss": 0.6581, + "step": 5542 + }, + { + "epoch": 0.16988476155449309, + "grad_norm": 1.744607630730739, + "learning_rate": 9.495605599141555e-06, + "loss": 0.8055, + "step": 5543 + }, + { + "epoch": 0.16991541007723426, + "grad_norm": 1.89154177690632, + "learning_rate": 9.495388338720155e-06, + "loss": 0.6705, + "step": 5544 + }, + { + "epoch": 0.16994605859997547, + "grad_norm": 1.90296685520996, + "learning_rate": 9.495171034004542e-06, + "loss": 0.7413, + "step": 5545 + }, + { + "epoch": 0.16997670712271667, + "grad_norm": 0.8426752267403551, + "learning_rate": 9.494953684996859e-06, + "loss": 0.4955, + "step": 5546 + }, + { + "epoch": 0.17000735564545788, + "grad_norm": 2.0434655123596737, + "learning_rate": 9.494736291699247e-06, + "loss": 0.7425, + "step": 5547 + }, + { + "epoch": 0.1700380041681991, + "grad_norm": 1.7863756488392646, + "learning_rate": 9.49451885411385e-06, + "loss": 0.7577, + "step": 5548 + }, + { + "epoch": 0.1700686526909403, + "grad_norm": 1.7339654887271438, + "learning_rate": 9.494301372242807e-06, + "loss": 0.78, + "step": 5549 + }, + { + "epoch": 0.1700993012136815, + "grad_norm": 1.6646715923958633, + "learning_rate": 9.494083846088263e-06, + "loss": 0.777, + "step": 5550 + }, + { + "epoch": 0.1701299497364227, + "grad_norm": 1.9488066058810407, + "learning_rate": 9.493866275652359e-06, + "loss": 0.678, + "step": 5551 + }, + { + "epoch": 0.1701605982591639, + "grad_norm": 2.2094140399298214, + "learning_rate": 9.493648660937244e-06, + "loss": 0.7822, + "step": 5552 + }, + { + "epoch": 0.17019124678190511, + "grad_norm": 1.9288883293173522, + "learning_rate": 9.493431001945056e-06, + "loss": 0.7217, + "step": 5553 + }, + { + "epoch": 0.17022189530464632, + "grad_norm": 0.9059222004483666, + "learning_rate": 9.493213298677945e-06, + "loss": 0.5023, + "step": 5554 + }, + { + "epoch": 0.17025254382738753, + "grad_norm": 1.9591856020246328, + "learning_rate": 9.492995551138054e-06, + "loss": 0.7661, + "step": 5555 + }, + { + "epoch": 0.17028319235012873, + "grad_norm": 1.9901915127765635, + "learning_rate": 9.492777759327528e-06, + "loss": 0.7789, + "step": 5556 + }, + { + "epoch": 0.17031384087286994, + "grad_norm": 0.8265248570277627, + "learning_rate": 9.492559923248512e-06, + "loss": 0.5174, + "step": 5557 + }, + { + "epoch": 0.17034448939561114, + "grad_norm": 1.8175697528866224, + "learning_rate": 9.492342042903153e-06, + "loss": 0.7992, + "step": 5558 + }, + { + "epoch": 0.17037513791835235, + "grad_norm": 1.7026369830287142, + "learning_rate": 9.4921241182936e-06, + "loss": 0.798, + "step": 5559 + }, + { + "epoch": 0.17040578644109353, + "grad_norm": 1.8365765355718582, + "learning_rate": 9.491906149421998e-06, + "loss": 0.8357, + "step": 5560 + }, + { + "epoch": 0.17043643496383473, + "grad_norm": 1.8689410202208092, + "learning_rate": 9.491688136290496e-06, + "loss": 0.6387, + "step": 5561 + }, + { + "epoch": 0.17046708348657594, + "grad_norm": 0.9449054412905855, + "learning_rate": 9.491470078901241e-06, + "loss": 0.4911, + "step": 5562 + }, + { + "epoch": 0.17049773200931714, + "grad_norm": 0.8381348412995975, + "learning_rate": 9.491251977256383e-06, + "loss": 0.5083, + "step": 5563 + }, + { + "epoch": 0.17052838053205835, + "grad_norm": 1.7810244193928708, + "learning_rate": 9.49103383135807e-06, + "loss": 0.7586, + "step": 5564 + }, + { + "epoch": 0.17055902905479955, + "grad_norm": 1.6898728338878204, + "learning_rate": 9.49081564120845e-06, + "loss": 0.6828, + "step": 5565 + }, + { + "epoch": 0.17058967757754076, + "grad_norm": 0.8472065990940847, + "learning_rate": 9.490597406809676e-06, + "loss": 0.4927, + "step": 5566 + }, + { + "epoch": 0.17062032610028197, + "grad_norm": 1.8219561919154088, + "learning_rate": 9.490379128163897e-06, + "loss": 0.6322, + "step": 5567 + }, + { + "epoch": 0.17065097462302317, + "grad_norm": 1.9702984858732568, + "learning_rate": 9.490160805273262e-06, + "loss": 0.7589, + "step": 5568 + }, + { + "epoch": 0.17068162314576438, + "grad_norm": 1.8242034188121286, + "learning_rate": 9.489942438139925e-06, + "loss": 0.7625, + "step": 5569 + }, + { + "epoch": 0.17071227166850558, + "grad_norm": 2.0241515052260404, + "learning_rate": 9.489724026766037e-06, + "loss": 0.8416, + "step": 5570 + }, + { + "epoch": 0.1707429201912468, + "grad_norm": 1.9475342864203056, + "learning_rate": 9.489505571153747e-06, + "loss": 0.8376, + "step": 5571 + }, + { + "epoch": 0.170773568713988, + "grad_norm": 1.036601714044455, + "learning_rate": 9.489287071305212e-06, + "loss": 0.5127, + "step": 5572 + }, + { + "epoch": 0.1708042172367292, + "grad_norm": 1.7488910147395813, + "learning_rate": 9.489068527222583e-06, + "loss": 0.7965, + "step": 5573 + }, + { + "epoch": 0.1708348657594704, + "grad_norm": 1.724361703104298, + "learning_rate": 9.488849938908011e-06, + "loss": 0.6521, + "step": 5574 + }, + { + "epoch": 0.17086551428221158, + "grad_norm": 0.8340528362803336, + "learning_rate": 9.488631306363654e-06, + "loss": 0.5037, + "step": 5575 + }, + { + "epoch": 0.1708961628049528, + "grad_norm": 1.8754462168799986, + "learning_rate": 9.488412629591663e-06, + "loss": 0.7815, + "step": 5576 + }, + { + "epoch": 0.170926811327694, + "grad_norm": 1.728811572456509, + "learning_rate": 9.488193908594195e-06, + "loss": 0.6607, + "step": 5577 + }, + { + "epoch": 0.1709574598504352, + "grad_norm": 2.0117586826140665, + "learning_rate": 9.487975143373404e-06, + "loss": 0.7497, + "step": 5578 + }, + { + "epoch": 0.1709881083731764, + "grad_norm": 1.7285182378160624, + "learning_rate": 9.487756333931446e-06, + "loss": 0.7275, + "step": 5579 + }, + { + "epoch": 0.1710187568959176, + "grad_norm": 1.7674552161953856, + "learning_rate": 9.487537480270474e-06, + "loss": 0.6322, + "step": 5580 + }, + { + "epoch": 0.17104940541865882, + "grad_norm": 1.6736177809618296, + "learning_rate": 9.48731858239265e-06, + "loss": 0.7295, + "step": 5581 + }, + { + "epoch": 0.17108005394140002, + "grad_norm": 1.822560446510806, + "learning_rate": 9.487099640300126e-06, + "loss": 0.7553, + "step": 5582 + }, + { + "epoch": 0.17111070246414123, + "grad_norm": 2.1345117097523443, + "learning_rate": 9.486880653995063e-06, + "loss": 0.8613, + "step": 5583 + }, + { + "epoch": 0.17114135098688243, + "grad_norm": 0.8920315851310969, + "learning_rate": 9.486661623479616e-06, + "loss": 0.5002, + "step": 5584 + }, + { + "epoch": 0.17117199950962364, + "grad_norm": 2.1187480201999387, + "learning_rate": 9.486442548755942e-06, + "loss": 0.8078, + "step": 5585 + }, + { + "epoch": 0.17120264803236485, + "grad_norm": 1.901177766714581, + "learning_rate": 9.486223429826205e-06, + "loss": 0.6658, + "step": 5586 + }, + { + "epoch": 0.17123329655510605, + "grad_norm": 1.879590751797583, + "learning_rate": 9.486004266692558e-06, + "loss": 0.765, + "step": 5587 + }, + { + "epoch": 0.17126394507784726, + "grad_norm": 2.525390745048308, + "learning_rate": 9.485785059357166e-06, + "loss": 0.7588, + "step": 5588 + }, + { + "epoch": 0.17129459360058846, + "grad_norm": 1.7522281874614511, + "learning_rate": 9.485565807822183e-06, + "loss": 0.6594, + "step": 5589 + }, + { + "epoch": 0.17132524212332967, + "grad_norm": 1.6764767456852814, + "learning_rate": 9.485346512089775e-06, + "loss": 0.8068, + "step": 5590 + }, + { + "epoch": 0.17135589064607085, + "grad_norm": 1.6508874202991037, + "learning_rate": 9.485127172162098e-06, + "loss": 0.6005, + "step": 5591 + }, + { + "epoch": 0.17138653916881205, + "grad_norm": 2.8408581208042616, + "learning_rate": 9.484907788041318e-06, + "loss": 0.8585, + "step": 5592 + }, + { + "epoch": 0.17141718769155326, + "grad_norm": 1.9637616170384895, + "learning_rate": 9.484688359729592e-06, + "loss": 0.7038, + "step": 5593 + }, + { + "epoch": 0.17144783621429446, + "grad_norm": 1.7715594285230847, + "learning_rate": 9.484468887229085e-06, + "loss": 0.6853, + "step": 5594 + }, + { + "epoch": 0.17147848473703567, + "grad_norm": 1.7086918340714838, + "learning_rate": 9.484249370541958e-06, + "loss": 0.7313, + "step": 5595 + }, + { + "epoch": 0.17150913325977687, + "grad_norm": 1.9223822514755264, + "learning_rate": 9.484029809670377e-06, + "loss": 0.7324, + "step": 5596 + }, + { + "epoch": 0.17153978178251808, + "grad_norm": 1.6440911201073685, + "learning_rate": 9.483810204616498e-06, + "loss": 0.6379, + "step": 5597 + }, + { + "epoch": 0.17157043030525929, + "grad_norm": 1.7309607334812827, + "learning_rate": 9.483590555382493e-06, + "loss": 0.7241, + "step": 5598 + }, + { + "epoch": 0.1716010788280005, + "grad_norm": 1.880777608601573, + "learning_rate": 9.483370861970525e-06, + "loss": 0.7705, + "step": 5599 + }, + { + "epoch": 0.1716317273507417, + "grad_norm": 1.5920856674930546, + "learning_rate": 9.483151124382755e-06, + "loss": 0.7603, + "step": 5600 + }, + { + "epoch": 0.1716623758734829, + "grad_norm": 3.7281861204042115, + "learning_rate": 9.48293134262135e-06, + "loss": 0.6483, + "step": 5601 + }, + { + "epoch": 0.1716930243962241, + "grad_norm": 1.7896933677079843, + "learning_rate": 9.482711516688475e-06, + "loss": 0.8072, + "step": 5602 + }, + { + "epoch": 0.17172367291896531, + "grad_norm": 1.7706841892328256, + "learning_rate": 9.482491646586297e-06, + "loss": 0.7944, + "step": 5603 + }, + { + "epoch": 0.17175432144170652, + "grad_norm": 2.0831252149922497, + "learning_rate": 9.48227173231698e-06, + "loss": 0.7471, + "step": 5604 + }, + { + "epoch": 0.17178496996444773, + "grad_norm": 1.6076261783011339, + "learning_rate": 9.482051773882695e-06, + "loss": 0.6795, + "step": 5605 + }, + { + "epoch": 0.1718156184871889, + "grad_norm": 1.951804480975737, + "learning_rate": 9.481831771285606e-06, + "loss": 0.7253, + "step": 5606 + }, + { + "epoch": 0.1718462670099301, + "grad_norm": 1.0595576843982157, + "learning_rate": 9.48161172452788e-06, + "loss": 0.5331, + "step": 5607 + }, + { + "epoch": 0.17187691553267131, + "grad_norm": 0.8913173549110113, + "learning_rate": 9.481391633611689e-06, + "loss": 0.4983, + "step": 5608 + }, + { + "epoch": 0.17190756405541252, + "grad_norm": 2.270999841788014, + "learning_rate": 9.4811714985392e-06, + "loss": 0.6623, + "step": 5609 + }, + { + "epoch": 0.17193821257815373, + "grad_norm": 2.7165255723475252, + "learning_rate": 9.480951319312582e-06, + "loss": 0.7475, + "step": 5610 + }, + { + "epoch": 0.17196886110089493, + "grad_norm": 2.102811543989467, + "learning_rate": 9.480731095934003e-06, + "loss": 0.7229, + "step": 5611 + }, + { + "epoch": 0.17199950962363614, + "grad_norm": 1.8112937830897908, + "learning_rate": 9.480510828405636e-06, + "loss": 0.7045, + "step": 5612 + }, + { + "epoch": 0.17203015814637734, + "grad_norm": 1.8803285267077816, + "learning_rate": 9.480290516729648e-06, + "loss": 0.7522, + "step": 5613 + }, + { + "epoch": 0.17206080666911855, + "grad_norm": 1.8650194371128652, + "learning_rate": 9.480070160908212e-06, + "loss": 0.7341, + "step": 5614 + }, + { + "epoch": 0.17209145519185975, + "grad_norm": 1.9118596016851557, + "learning_rate": 9.479849760943498e-06, + "loss": 0.7449, + "step": 5615 + }, + { + "epoch": 0.17212210371460096, + "grad_norm": 2.9802878913310966, + "learning_rate": 9.479629316837676e-06, + "loss": 0.7256, + "step": 5616 + }, + { + "epoch": 0.17215275223734217, + "grad_norm": 1.662082224785129, + "learning_rate": 9.479408828592923e-06, + "loss": 0.7585, + "step": 5617 + }, + { + "epoch": 0.17218340076008337, + "grad_norm": 1.9160705149652364, + "learning_rate": 9.479188296211407e-06, + "loss": 0.7936, + "step": 5618 + }, + { + "epoch": 0.17221404928282458, + "grad_norm": 2.0440206185556535, + "learning_rate": 9.478967719695303e-06, + "loss": 0.7073, + "step": 5619 + }, + { + "epoch": 0.17224469780556578, + "grad_norm": 1.9620773911857585, + "learning_rate": 9.478747099046786e-06, + "loss": 0.6686, + "step": 5620 + }, + { + "epoch": 0.172275346328307, + "grad_norm": 1.4944159544388, + "learning_rate": 9.478526434268026e-06, + "loss": 0.5257, + "step": 5621 + }, + { + "epoch": 0.17230599485104817, + "grad_norm": 1.738394245877667, + "learning_rate": 9.478305725361198e-06, + "loss": 0.6833, + "step": 5622 + }, + { + "epoch": 0.17233664337378937, + "grad_norm": 1.0080459329460913, + "learning_rate": 9.478084972328481e-06, + "loss": 0.5106, + "step": 5623 + }, + { + "epoch": 0.17236729189653058, + "grad_norm": 2.0001179114587164, + "learning_rate": 9.477864175172044e-06, + "loss": 0.7608, + "step": 5624 + }, + { + "epoch": 0.17239794041927178, + "grad_norm": 1.565293584950513, + "learning_rate": 9.477643333894067e-06, + "loss": 0.7595, + "step": 5625 + }, + { + "epoch": 0.172428588942013, + "grad_norm": 1.8085211965054093, + "learning_rate": 9.477422448496724e-06, + "loss": 0.7772, + "step": 5626 + }, + { + "epoch": 0.1724592374647542, + "grad_norm": 1.219569404242758, + "learning_rate": 9.477201518982193e-06, + "loss": 0.5177, + "step": 5627 + }, + { + "epoch": 0.1724898859874954, + "grad_norm": 2.0838874454152725, + "learning_rate": 9.47698054535265e-06, + "loss": 0.7372, + "step": 5628 + }, + { + "epoch": 0.1725205345102366, + "grad_norm": 1.6521752745506797, + "learning_rate": 9.47675952761027e-06, + "loss": 0.7316, + "step": 5629 + }, + { + "epoch": 0.1725511830329778, + "grad_norm": 2.242054650126467, + "learning_rate": 9.476538465757236e-06, + "loss": 0.6729, + "step": 5630 + }, + { + "epoch": 0.17258183155571902, + "grad_norm": 1.795190666818639, + "learning_rate": 9.47631735979572e-06, + "loss": 0.7842, + "step": 5631 + }, + { + "epoch": 0.17261248007846022, + "grad_norm": 1.8028045328048803, + "learning_rate": 9.476096209727907e-06, + "loss": 0.7107, + "step": 5632 + }, + { + "epoch": 0.17264312860120143, + "grad_norm": 1.2901466031276299, + "learning_rate": 9.47587501555597e-06, + "loss": 0.5336, + "step": 5633 + }, + { + "epoch": 0.17267377712394263, + "grad_norm": 1.0552267401706736, + "learning_rate": 9.475653777282093e-06, + "loss": 0.5046, + "step": 5634 + }, + { + "epoch": 0.17270442564668384, + "grad_norm": 1.7458911655328584, + "learning_rate": 9.475432494908454e-06, + "loss": 0.6676, + "step": 5635 + }, + { + "epoch": 0.17273507416942505, + "grad_norm": 2.2121640158650453, + "learning_rate": 9.475211168437234e-06, + "loss": 0.7809, + "step": 5636 + }, + { + "epoch": 0.17276572269216625, + "grad_norm": 1.6785634953832562, + "learning_rate": 9.474989797870611e-06, + "loss": 0.5551, + "step": 5637 + }, + { + "epoch": 0.17279637121490743, + "grad_norm": 1.7324797802519132, + "learning_rate": 9.47476838321077e-06, + "loss": 0.7316, + "step": 5638 + }, + { + "epoch": 0.17282701973764864, + "grad_norm": 1.7567134972033513, + "learning_rate": 9.474546924459892e-06, + "loss": 0.7662, + "step": 5639 + }, + { + "epoch": 0.17285766826038984, + "grad_norm": 2.0125655095687534, + "learning_rate": 9.474325421620158e-06, + "loss": 0.7997, + "step": 5640 + }, + { + "epoch": 0.17288831678313105, + "grad_norm": 1.7819064675523584, + "learning_rate": 9.47410387469375e-06, + "loss": 0.7039, + "step": 5641 + }, + { + "epoch": 0.17291896530587225, + "grad_norm": 1.754039963689201, + "learning_rate": 9.473882283682852e-06, + "loss": 0.9138, + "step": 5642 + }, + { + "epoch": 0.17294961382861346, + "grad_norm": 1.7395684131263631, + "learning_rate": 9.473660648589648e-06, + "loss": 0.7124, + "step": 5643 + }, + { + "epoch": 0.17298026235135466, + "grad_norm": 1.7891786688806783, + "learning_rate": 9.47343896941632e-06, + "loss": 0.7742, + "step": 5644 + }, + { + "epoch": 0.17301091087409587, + "grad_norm": 1.7872151910760445, + "learning_rate": 9.473217246165055e-06, + "loss": 0.7795, + "step": 5645 + }, + { + "epoch": 0.17304155939683707, + "grad_norm": 1.732046919326293, + "learning_rate": 9.472995478838034e-06, + "loss": 0.7635, + "step": 5646 + }, + { + "epoch": 0.17307220791957828, + "grad_norm": 1.8313294630648098, + "learning_rate": 9.472773667437444e-06, + "loss": 0.7383, + "step": 5647 + }, + { + "epoch": 0.17310285644231949, + "grad_norm": 1.782775267105498, + "learning_rate": 9.47255181196547e-06, + "loss": 0.7416, + "step": 5648 + }, + { + "epoch": 0.1731335049650607, + "grad_norm": 2.23810907072418, + "learning_rate": 9.4723299124243e-06, + "loss": 0.8019, + "step": 5649 + }, + { + "epoch": 0.1731641534878019, + "grad_norm": 1.7716707917989054, + "learning_rate": 9.47210796881612e-06, + "loss": 0.7511, + "step": 5650 + }, + { + "epoch": 0.1731948020105431, + "grad_norm": 1.7913801103709626, + "learning_rate": 9.471885981143114e-06, + "loss": 0.7474, + "step": 5651 + }, + { + "epoch": 0.1732254505332843, + "grad_norm": 1.5655199130623774, + "learning_rate": 9.471663949407472e-06, + "loss": 0.5391, + "step": 5652 + }, + { + "epoch": 0.1732560990560255, + "grad_norm": 1.6409591034069644, + "learning_rate": 9.471441873611382e-06, + "loss": 0.6943, + "step": 5653 + }, + { + "epoch": 0.1732867475787667, + "grad_norm": 2.06048143718854, + "learning_rate": 9.47121975375703e-06, + "loss": 0.7677, + "step": 5654 + }, + { + "epoch": 0.1733173961015079, + "grad_norm": 1.7895606366421748, + "learning_rate": 9.470997589846607e-06, + "loss": 0.8348, + "step": 5655 + }, + { + "epoch": 0.1733480446242491, + "grad_norm": 1.9782408963539468, + "learning_rate": 9.4707753818823e-06, + "loss": 0.7167, + "step": 5656 + }, + { + "epoch": 0.1733786931469903, + "grad_norm": 1.9525568529364614, + "learning_rate": 9.470553129866297e-06, + "loss": 0.7803, + "step": 5657 + }, + { + "epoch": 0.17340934166973151, + "grad_norm": 1.6807147446955009, + "learning_rate": 9.470330833800794e-06, + "loss": 0.7289, + "step": 5658 + }, + { + "epoch": 0.17343999019247272, + "grad_norm": 1.0078984431385547, + "learning_rate": 9.470108493687976e-06, + "loss": 0.5127, + "step": 5659 + }, + { + "epoch": 0.17347063871521393, + "grad_norm": 1.6804530456025093, + "learning_rate": 9.469886109530034e-06, + "loss": 0.7023, + "step": 5660 + }, + { + "epoch": 0.17350128723795513, + "grad_norm": 1.923265257945296, + "learning_rate": 9.469663681329161e-06, + "loss": 0.7189, + "step": 5661 + }, + { + "epoch": 0.17353193576069634, + "grad_norm": 1.893439206654434, + "learning_rate": 9.469441209087549e-06, + "loss": 0.7613, + "step": 5662 + }, + { + "epoch": 0.17356258428343754, + "grad_norm": 1.8087184272340588, + "learning_rate": 9.469218692807389e-06, + "loss": 0.768, + "step": 5663 + }, + { + "epoch": 0.17359323280617875, + "grad_norm": 1.782536913687053, + "learning_rate": 9.468996132490874e-06, + "loss": 0.8007, + "step": 5664 + }, + { + "epoch": 0.17362388132891995, + "grad_norm": 1.6359487854624317, + "learning_rate": 9.468773528140195e-06, + "loss": 0.7161, + "step": 5665 + }, + { + "epoch": 0.17365452985166116, + "grad_norm": 0.8770797796832163, + "learning_rate": 9.46855087975755e-06, + "loss": 0.504, + "step": 5666 + }, + { + "epoch": 0.17368517837440237, + "grad_norm": 0.8528935343402324, + "learning_rate": 9.468328187345128e-06, + "loss": 0.5134, + "step": 5667 + }, + { + "epoch": 0.17371582689714357, + "grad_norm": 1.907883058439203, + "learning_rate": 9.468105450905125e-06, + "loss": 0.7295, + "step": 5668 + }, + { + "epoch": 0.17374647541988475, + "grad_norm": 2.154504080176605, + "learning_rate": 9.467882670439736e-06, + "loss": 0.7795, + "step": 5669 + }, + { + "epoch": 0.17377712394262596, + "grad_norm": 1.6860584607169002, + "learning_rate": 9.467659845951156e-06, + "loss": 0.6398, + "step": 5670 + }, + { + "epoch": 0.17380777246536716, + "grad_norm": 1.9633194133828853, + "learning_rate": 9.46743697744158e-06, + "loss": 0.8103, + "step": 5671 + }, + { + "epoch": 0.17383842098810837, + "grad_norm": 3.310618644639345, + "learning_rate": 9.467214064913205e-06, + "loss": 0.6911, + "step": 5672 + }, + { + "epoch": 0.17386906951084957, + "grad_norm": 1.7380345441304608, + "learning_rate": 9.466991108368226e-06, + "loss": 0.7571, + "step": 5673 + }, + { + "epoch": 0.17389971803359078, + "grad_norm": 1.8787861412714502, + "learning_rate": 9.466768107808842e-06, + "loss": 0.7154, + "step": 5674 + }, + { + "epoch": 0.17393036655633198, + "grad_norm": 2.1090923419103382, + "learning_rate": 9.466545063237248e-06, + "loss": 0.7576, + "step": 5675 + }, + { + "epoch": 0.1739610150790732, + "grad_norm": 1.94899055841435, + "learning_rate": 9.466321974655644e-06, + "loss": 0.7556, + "step": 5676 + }, + { + "epoch": 0.1739916636018144, + "grad_norm": 2.0312141467882845, + "learning_rate": 9.466098842066224e-06, + "loss": 0.7731, + "step": 5677 + }, + { + "epoch": 0.1740223121245556, + "grad_norm": 1.9001155124647058, + "learning_rate": 9.465875665471193e-06, + "loss": 0.7618, + "step": 5678 + }, + { + "epoch": 0.1740529606472968, + "grad_norm": 1.9193515474190415, + "learning_rate": 9.465652444872744e-06, + "loss": 0.6816, + "step": 5679 + }, + { + "epoch": 0.174083609170038, + "grad_norm": 1.8244027229281947, + "learning_rate": 9.46542918027308e-06, + "loss": 0.6901, + "step": 5680 + }, + { + "epoch": 0.17411425769277922, + "grad_norm": 1.7478453482610379, + "learning_rate": 9.465205871674399e-06, + "loss": 0.6531, + "step": 5681 + }, + { + "epoch": 0.17414490621552042, + "grad_norm": 1.6271301644102623, + "learning_rate": 9.464982519078903e-06, + "loss": 0.723, + "step": 5682 + }, + { + "epoch": 0.17417555473826163, + "grad_norm": 1.1109077111438201, + "learning_rate": 9.46475912248879e-06, + "loss": 0.4973, + "step": 5683 + }, + { + "epoch": 0.1742062032610028, + "grad_norm": 2.008110275209393, + "learning_rate": 9.464535681906264e-06, + "loss": 0.7659, + "step": 5684 + }, + { + "epoch": 0.174236851783744, + "grad_norm": 2.0178058861438086, + "learning_rate": 9.464312197333526e-06, + "loss": 0.7204, + "step": 5685 + }, + { + "epoch": 0.17426750030648522, + "grad_norm": 0.8182564779178952, + "learning_rate": 9.464088668772777e-06, + "loss": 0.494, + "step": 5686 + }, + { + "epoch": 0.17429814882922642, + "grad_norm": 1.8634150788638992, + "learning_rate": 9.463865096226221e-06, + "loss": 0.7544, + "step": 5687 + }, + { + "epoch": 0.17432879735196763, + "grad_norm": 1.7694653967205287, + "learning_rate": 9.46364147969606e-06, + "loss": 0.7231, + "step": 5688 + }, + { + "epoch": 0.17435944587470883, + "grad_norm": 1.8253642866151585, + "learning_rate": 9.463417819184498e-06, + "loss": 0.7556, + "step": 5689 + }, + { + "epoch": 0.17439009439745004, + "grad_norm": 1.648285046814354, + "learning_rate": 9.463194114693736e-06, + "loss": 0.6993, + "step": 5690 + }, + { + "epoch": 0.17442074292019125, + "grad_norm": 1.0175106398902356, + "learning_rate": 9.462970366225983e-06, + "loss": 0.5166, + "step": 5691 + }, + { + "epoch": 0.17445139144293245, + "grad_norm": 1.8447598253121582, + "learning_rate": 9.46274657378344e-06, + "loss": 0.7492, + "step": 5692 + }, + { + "epoch": 0.17448203996567366, + "grad_norm": 1.9478960125352531, + "learning_rate": 9.462522737368311e-06, + "loss": 0.7056, + "step": 5693 + }, + { + "epoch": 0.17451268848841486, + "grad_norm": 1.8991166421133308, + "learning_rate": 9.462298856982808e-06, + "loss": 0.6617, + "step": 5694 + }, + { + "epoch": 0.17454333701115607, + "grad_norm": 1.8237835800549111, + "learning_rate": 9.46207493262913e-06, + "loss": 0.6844, + "step": 5695 + }, + { + "epoch": 0.17457398553389727, + "grad_norm": 1.895926334144448, + "learning_rate": 9.461850964309485e-06, + "loss": 0.8083, + "step": 5696 + }, + { + "epoch": 0.17460463405663848, + "grad_norm": 1.810099581271447, + "learning_rate": 9.461626952026083e-06, + "loss": 0.691, + "step": 5697 + }, + { + "epoch": 0.17463528257937969, + "grad_norm": 1.7164126427140733, + "learning_rate": 9.46140289578113e-06, + "loss": 0.7643, + "step": 5698 + }, + { + "epoch": 0.1746659311021209, + "grad_norm": 1.8493599413449915, + "learning_rate": 9.461178795576829e-06, + "loss": 0.7551, + "step": 5699 + }, + { + "epoch": 0.17469657962486207, + "grad_norm": 1.9224785992158577, + "learning_rate": 9.460954651415395e-06, + "loss": 0.6596, + "step": 5700 + }, + { + "epoch": 0.17472722814760328, + "grad_norm": 1.6739978184329856, + "learning_rate": 9.460730463299032e-06, + "loss": 0.7488, + "step": 5701 + }, + { + "epoch": 0.17475787667034448, + "grad_norm": 1.7549680199934088, + "learning_rate": 9.46050623122995e-06, + "loss": 0.7266, + "step": 5702 + }, + { + "epoch": 0.1747885251930857, + "grad_norm": 1.8869092030040246, + "learning_rate": 9.46028195521036e-06, + "loss": 0.8391, + "step": 5703 + }, + { + "epoch": 0.1748191737158269, + "grad_norm": 1.9365671836017007, + "learning_rate": 9.46005763524247e-06, + "loss": 0.673, + "step": 5704 + }, + { + "epoch": 0.1748498222385681, + "grad_norm": 1.7810581639376322, + "learning_rate": 9.459833271328491e-06, + "loss": 0.7477, + "step": 5705 + }, + { + "epoch": 0.1748804707613093, + "grad_norm": 1.9651816792962449, + "learning_rate": 9.459608863470635e-06, + "loss": 0.8218, + "step": 5706 + }, + { + "epoch": 0.1749111192840505, + "grad_norm": 0.974668901115887, + "learning_rate": 9.45938441167111e-06, + "loss": 0.5049, + "step": 5707 + }, + { + "epoch": 0.17494176780679171, + "grad_norm": 1.9261307462517137, + "learning_rate": 9.45915991593213e-06, + "loss": 0.7834, + "step": 5708 + }, + { + "epoch": 0.17497241632953292, + "grad_norm": 1.8228576801153493, + "learning_rate": 9.458935376255907e-06, + "loss": 0.7682, + "step": 5709 + }, + { + "epoch": 0.17500306485227413, + "grad_norm": 1.6565656558492585, + "learning_rate": 9.458710792644652e-06, + "loss": 0.7401, + "step": 5710 + }, + { + "epoch": 0.17503371337501533, + "grad_norm": 1.6861600650614865, + "learning_rate": 9.45848616510058e-06, + "loss": 0.5776, + "step": 5711 + }, + { + "epoch": 0.17506436189775654, + "grad_norm": 1.8785862479466169, + "learning_rate": 9.458261493625903e-06, + "loss": 0.7302, + "step": 5712 + }, + { + "epoch": 0.17509501042049774, + "grad_norm": 0.8720517184062498, + "learning_rate": 9.458036778222833e-06, + "loss": 0.5207, + "step": 5713 + }, + { + "epoch": 0.17512565894323895, + "grad_norm": 1.9471812491046847, + "learning_rate": 9.457812018893587e-06, + "loss": 0.6655, + "step": 5714 + }, + { + "epoch": 0.17515630746598013, + "grad_norm": 1.827370846207321, + "learning_rate": 9.457587215640379e-06, + "loss": 0.7788, + "step": 5715 + }, + { + "epoch": 0.17518695598872133, + "grad_norm": 1.7859459719251893, + "learning_rate": 9.457362368465424e-06, + "loss": 0.7595, + "step": 5716 + }, + { + "epoch": 0.17521760451146254, + "grad_norm": 0.7965323432242155, + "learning_rate": 9.457137477370936e-06, + "loss": 0.522, + "step": 5717 + }, + { + "epoch": 0.17524825303420374, + "grad_norm": 1.7491988991682585, + "learning_rate": 9.456912542359132e-06, + "loss": 0.8001, + "step": 5718 + }, + { + "epoch": 0.17527890155694495, + "grad_norm": 1.907090838101954, + "learning_rate": 9.45668756343223e-06, + "loss": 0.7132, + "step": 5719 + }, + { + "epoch": 0.17530955007968615, + "grad_norm": 1.5985983888101893, + "learning_rate": 9.456462540592442e-06, + "loss": 0.6302, + "step": 5720 + }, + { + "epoch": 0.17534019860242736, + "grad_norm": 1.9006113898705714, + "learning_rate": 9.456237473841991e-06, + "loss": 0.7369, + "step": 5721 + }, + { + "epoch": 0.17537084712516857, + "grad_norm": 2.217326772664082, + "learning_rate": 9.456012363183091e-06, + "loss": 0.7961, + "step": 5722 + }, + { + "epoch": 0.17540149564790977, + "grad_norm": 1.8036619024156408, + "learning_rate": 9.455787208617962e-06, + "loss": 0.7531, + "step": 5723 + }, + { + "epoch": 0.17543214417065098, + "grad_norm": 1.9075384458434117, + "learning_rate": 9.455562010148821e-06, + "loss": 0.7936, + "step": 5724 + }, + { + "epoch": 0.17546279269339218, + "grad_norm": 1.5886458510557788, + "learning_rate": 9.455336767777888e-06, + "loss": 0.6519, + "step": 5725 + }, + { + "epoch": 0.1754934412161334, + "grad_norm": 1.7399761909356275, + "learning_rate": 9.45511148150738e-06, + "loss": 0.685, + "step": 5726 + }, + { + "epoch": 0.1755240897388746, + "grad_norm": 1.0901201679087262, + "learning_rate": 9.45488615133952e-06, + "loss": 0.5066, + "step": 5727 + }, + { + "epoch": 0.1755547382616158, + "grad_norm": 1.9006868968498747, + "learning_rate": 9.454660777276528e-06, + "loss": 0.7361, + "step": 5728 + }, + { + "epoch": 0.175585386784357, + "grad_norm": 1.7845863131590807, + "learning_rate": 9.454435359320622e-06, + "loss": 0.7225, + "step": 5729 + }, + { + "epoch": 0.1756160353070982, + "grad_norm": 0.8113712613035167, + "learning_rate": 9.454209897474025e-06, + "loss": 0.5131, + "step": 5730 + }, + { + "epoch": 0.1756466838298394, + "grad_norm": 0.827095645517229, + "learning_rate": 9.45398439173896e-06, + "loss": 0.5036, + "step": 5731 + }, + { + "epoch": 0.1756773323525806, + "grad_norm": 1.6342563460544417, + "learning_rate": 9.453758842117645e-06, + "loss": 0.7327, + "step": 5732 + }, + { + "epoch": 0.1757079808753218, + "grad_norm": 0.8334942014634006, + "learning_rate": 9.453533248612305e-06, + "loss": 0.4961, + "step": 5733 + }, + { + "epoch": 0.175738629398063, + "grad_norm": 1.7480244296604095, + "learning_rate": 9.453307611225162e-06, + "loss": 0.7513, + "step": 5734 + }, + { + "epoch": 0.1757692779208042, + "grad_norm": 1.5123369590340752, + "learning_rate": 9.45308192995844e-06, + "loss": 0.7334, + "step": 5735 + }, + { + "epoch": 0.17579992644354542, + "grad_norm": 2.022638334932778, + "learning_rate": 9.452856204814364e-06, + "loss": 0.7208, + "step": 5736 + }, + { + "epoch": 0.17583057496628662, + "grad_norm": 1.84029326619947, + "learning_rate": 9.452630435795155e-06, + "loss": 0.7079, + "step": 5737 + }, + { + "epoch": 0.17586122348902783, + "grad_norm": 1.668218577933512, + "learning_rate": 9.452404622903039e-06, + "loss": 0.6915, + "step": 5738 + }, + { + "epoch": 0.17589187201176903, + "grad_norm": 0.8890738600091329, + "learning_rate": 9.452178766140241e-06, + "loss": 0.5195, + "step": 5739 + }, + { + "epoch": 0.17592252053451024, + "grad_norm": 1.8680109347309453, + "learning_rate": 9.451952865508986e-06, + "loss": 0.7467, + "step": 5740 + }, + { + "epoch": 0.17595316905725145, + "grad_norm": 1.7143289575964116, + "learning_rate": 9.451726921011501e-06, + "loss": 0.7499, + "step": 5741 + }, + { + "epoch": 0.17598381757999265, + "grad_norm": 2.0626602435004435, + "learning_rate": 9.451500932650014e-06, + "loss": 0.812, + "step": 5742 + }, + { + "epoch": 0.17601446610273386, + "grad_norm": 1.9012573876591592, + "learning_rate": 9.451274900426746e-06, + "loss": 0.7166, + "step": 5743 + }, + { + "epoch": 0.17604511462547506, + "grad_norm": 1.7035906306306412, + "learning_rate": 9.451048824343929e-06, + "loss": 0.6817, + "step": 5744 + }, + { + "epoch": 0.17607576314821627, + "grad_norm": 1.8638621908707322, + "learning_rate": 9.450822704403788e-06, + "loss": 0.7752, + "step": 5745 + }, + { + "epoch": 0.17610641167095745, + "grad_norm": 1.6003726877449505, + "learning_rate": 9.450596540608553e-06, + "loss": 0.6392, + "step": 5746 + }, + { + "epoch": 0.17613706019369865, + "grad_norm": 1.6529601051680127, + "learning_rate": 9.450370332960452e-06, + "loss": 0.7044, + "step": 5747 + }, + { + "epoch": 0.17616770871643986, + "grad_norm": 0.9191211621205951, + "learning_rate": 9.450144081461711e-06, + "loss": 0.5197, + "step": 5748 + }, + { + "epoch": 0.17619835723918106, + "grad_norm": 1.8345111203617988, + "learning_rate": 9.449917786114564e-06, + "loss": 0.6627, + "step": 5749 + }, + { + "epoch": 0.17622900576192227, + "grad_norm": 1.8837104171824144, + "learning_rate": 9.449691446921238e-06, + "loss": 0.7072, + "step": 5750 + }, + { + "epoch": 0.17625965428466348, + "grad_norm": 0.8359224169650957, + "learning_rate": 9.449465063883964e-06, + "loss": 0.4894, + "step": 5751 + }, + { + "epoch": 0.17629030280740468, + "grad_norm": 1.8678176680049385, + "learning_rate": 9.449238637004973e-06, + "loss": 0.7649, + "step": 5752 + }, + { + "epoch": 0.1763209513301459, + "grad_norm": 1.722350232307049, + "learning_rate": 9.449012166286493e-06, + "loss": 0.6888, + "step": 5753 + }, + { + "epoch": 0.1763515998528871, + "grad_norm": 1.8521802576395903, + "learning_rate": 9.44878565173076e-06, + "loss": 0.7244, + "step": 5754 + }, + { + "epoch": 0.1763822483756283, + "grad_norm": 1.788802200357734, + "learning_rate": 9.448559093340003e-06, + "loss": 0.6922, + "step": 5755 + }, + { + "epoch": 0.1764128968983695, + "grad_norm": 2.026475708765547, + "learning_rate": 9.448332491116454e-06, + "loss": 0.8257, + "step": 5756 + }, + { + "epoch": 0.1764435454211107, + "grad_norm": 1.9826421669401368, + "learning_rate": 9.448105845062348e-06, + "loss": 0.7097, + "step": 5757 + }, + { + "epoch": 0.17647419394385191, + "grad_norm": 1.6058442454553261, + "learning_rate": 9.447879155179916e-06, + "loss": 0.7718, + "step": 5758 + }, + { + "epoch": 0.17650484246659312, + "grad_norm": 2.043485300613341, + "learning_rate": 9.447652421471394e-06, + "loss": 0.6689, + "step": 5759 + }, + { + "epoch": 0.17653549098933433, + "grad_norm": 1.7590226259778299, + "learning_rate": 9.447425643939014e-06, + "loss": 0.793, + "step": 5760 + }, + { + "epoch": 0.17656613951207553, + "grad_norm": 1.8460788159725612, + "learning_rate": 9.447198822585011e-06, + "loss": 0.72, + "step": 5761 + }, + { + "epoch": 0.1765967880348167, + "grad_norm": 1.878590516513209, + "learning_rate": 9.44697195741162e-06, + "loss": 0.7661, + "step": 5762 + }, + { + "epoch": 0.17662743655755792, + "grad_norm": 0.8950772274981739, + "learning_rate": 9.446745048421077e-06, + "loss": 0.4869, + "step": 5763 + }, + { + "epoch": 0.17665808508029912, + "grad_norm": 1.7756787091676256, + "learning_rate": 9.446518095615618e-06, + "loss": 0.7665, + "step": 5764 + }, + { + "epoch": 0.17668873360304033, + "grad_norm": 1.9000531864856771, + "learning_rate": 9.446291098997477e-06, + "loss": 0.7793, + "step": 5765 + }, + { + "epoch": 0.17671938212578153, + "grad_norm": 0.8787859751913973, + "learning_rate": 9.446064058568894e-06, + "loss": 0.4924, + "step": 5766 + }, + { + "epoch": 0.17675003064852274, + "grad_norm": 1.6707331137080594, + "learning_rate": 9.445836974332103e-06, + "loss": 0.7315, + "step": 5767 + }, + { + "epoch": 0.17678067917126394, + "grad_norm": 1.9418733296388548, + "learning_rate": 9.445609846289342e-06, + "loss": 0.8675, + "step": 5768 + }, + { + "epoch": 0.17681132769400515, + "grad_norm": 2.287554131954926, + "learning_rate": 9.44538267444285e-06, + "loss": 0.8014, + "step": 5769 + }, + { + "epoch": 0.17684197621674635, + "grad_norm": 1.8018527715932224, + "learning_rate": 9.445155458794867e-06, + "loss": 0.7484, + "step": 5770 + }, + { + "epoch": 0.17687262473948756, + "grad_norm": 1.637121740953951, + "learning_rate": 9.444928199347627e-06, + "loss": 0.8071, + "step": 5771 + }, + { + "epoch": 0.17690327326222877, + "grad_norm": 1.7014331525059323, + "learning_rate": 9.444700896103373e-06, + "loss": 0.7811, + "step": 5772 + }, + { + "epoch": 0.17693392178496997, + "grad_norm": 1.9595151400722906, + "learning_rate": 9.444473549064346e-06, + "loss": 0.703, + "step": 5773 + }, + { + "epoch": 0.17696457030771118, + "grad_norm": 1.6441382586565167, + "learning_rate": 9.444246158232783e-06, + "loss": 0.6767, + "step": 5774 + }, + { + "epoch": 0.17699521883045238, + "grad_norm": 1.7715483824540856, + "learning_rate": 9.444018723610925e-06, + "loss": 0.7514, + "step": 5775 + }, + { + "epoch": 0.1770258673531936, + "grad_norm": 1.8113219553116782, + "learning_rate": 9.443791245201013e-06, + "loss": 0.7127, + "step": 5776 + }, + { + "epoch": 0.17705651587593477, + "grad_norm": 1.6823826241099253, + "learning_rate": 9.443563723005288e-06, + "loss": 0.7488, + "step": 5777 + }, + { + "epoch": 0.17708716439867597, + "grad_norm": 2.1519720361651715, + "learning_rate": 9.443336157025995e-06, + "loss": 0.6278, + "step": 5778 + }, + { + "epoch": 0.17711781292141718, + "grad_norm": 1.9314738508750795, + "learning_rate": 9.443108547265375e-06, + "loss": 0.7274, + "step": 5779 + }, + { + "epoch": 0.17714846144415838, + "grad_norm": 1.9399494064439793, + "learning_rate": 9.442880893725667e-06, + "loss": 0.7324, + "step": 5780 + }, + { + "epoch": 0.1771791099668996, + "grad_norm": 2.081476505273356, + "learning_rate": 9.442653196409117e-06, + "loss": 0.6347, + "step": 5781 + }, + { + "epoch": 0.1772097584896408, + "grad_norm": 1.9561710542089297, + "learning_rate": 9.44242545531797e-06, + "loss": 0.819, + "step": 5782 + }, + { + "epoch": 0.177240407012382, + "grad_norm": 1.8839511350435951, + "learning_rate": 9.442197670454466e-06, + "loss": 0.7583, + "step": 5783 + }, + { + "epoch": 0.1772710555351232, + "grad_norm": 1.7881559514458165, + "learning_rate": 9.441969841820853e-06, + "loss": 0.6896, + "step": 5784 + }, + { + "epoch": 0.1773017040578644, + "grad_norm": 1.6921306598464383, + "learning_rate": 9.441741969419374e-06, + "loss": 0.7519, + "step": 5785 + }, + { + "epoch": 0.17733235258060562, + "grad_norm": 1.7406216780323696, + "learning_rate": 9.441514053252276e-06, + "loss": 0.7622, + "step": 5786 + }, + { + "epoch": 0.17736300110334682, + "grad_norm": 1.84754985439868, + "learning_rate": 9.441286093321803e-06, + "loss": 0.7304, + "step": 5787 + }, + { + "epoch": 0.17739364962608803, + "grad_norm": 1.773207354028045, + "learning_rate": 9.441058089630201e-06, + "loss": 0.7435, + "step": 5788 + }, + { + "epoch": 0.17742429814882923, + "grad_norm": 1.7348983307309274, + "learning_rate": 9.44083004217972e-06, + "loss": 0.7167, + "step": 5789 + }, + { + "epoch": 0.17745494667157044, + "grad_norm": 1.697935832040471, + "learning_rate": 9.440601950972603e-06, + "loss": 0.7077, + "step": 5790 + }, + { + "epoch": 0.17748559519431165, + "grad_norm": 1.7519345724779718, + "learning_rate": 9.440373816011097e-06, + "loss": 0.7111, + "step": 5791 + }, + { + "epoch": 0.17751624371705285, + "grad_norm": 1.86244096973403, + "learning_rate": 9.440145637297453e-06, + "loss": 0.7558, + "step": 5792 + }, + { + "epoch": 0.17754689223979403, + "grad_norm": 1.7381317150683129, + "learning_rate": 9.439917414833919e-06, + "loss": 0.7619, + "step": 5793 + }, + { + "epoch": 0.17757754076253524, + "grad_norm": 1.9197654254052978, + "learning_rate": 9.43968914862274e-06, + "loss": 0.7731, + "step": 5794 + }, + { + "epoch": 0.17760818928527644, + "grad_norm": 1.6771565914169102, + "learning_rate": 9.439460838666172e-06, + "loss": 0.732, + "step": 5795 + }, + { + "epoch": 0.17763883780801765, + "grad_norm": 1.8422831502969956, + "learning_rate": 9.439232484966458e-06, + "loss": 0.7412, + "step": 5796 + }, + { + "epoch": 0.17766948633075885, + "grad_norm": 1.8128049759179174, + "learning_rate": 9.439004087525849e-06, + "loss": 0.7007, + "step": 5797 + }, + { + "epoch": 0.17770013485350006, + "grad_norm": 1.0279292336439276, + "learning_rate": 9.4387756463466e-06, + "loss": 0.4943, + "step": 5798 + }, + { + "epoch": 0.17773078337624126, + "grad_norm": 0.9657390024736109, + "learning_rate": 9.438547161430957e-06, + "loss": 0.5027, + "step": 5799 + }, + { + "epoch": 0.17776143189898247, + "grad_norm": 0.8213963403381883, + "learning_rate": 9.438318632781174e-06, + "loss": 0.5125, + "step": 5800 + }, + { + "epoch": 0.17779208042172367, + "grad_norm": 1.624050665816552, + "learning_rate": 9.4380900603995e-06, + "loss": 0.7315, + "step": 5801 + }, + { + "epoch": 0.17782272894446488, + "grad_norm": 2.146100549297567, + "learning_rate": 9.437861444288193e-06, + "loss": 0.8071, + "step": 5802 + }, + { + "epoch": 0.1778533774672061, + "grad_norm": 1.698076272359264, + "learning_rate": 9.437632784449498e-06, + "loss": 0.7273, + "step": 5803 + }, + { + "epoch": 0.1778840259899473, + "grad_norm": 1.9517697054730918, + "learning_rate": 9.437404080885673e-06, + "loss": 0.7291, + "step": 5804 + }, + { + "epoch": 0.1779146745126885, + "grad_norm": 2.014269851595732, + "learning_rate": 9.437175333598971e-06, + "loss": 0.7332, + "step": 5805 + }, + { + "epoch": 0.1779453230354297, + "grad_norm": 1.7206998013096297, + "learning_rate": 9.436946542591644e-06, + "loss": 0.7391, + "step": 5806 + }, + { + "epoch": 0.1779759715581709, + "grad_norm": 2.3020720158606767, + "learning_rate": 9.436717707865948e-06, + "loss": 0.8716, + "step": 5807 + }, + { + "epoch": 0.1780066200809121, + "grad_norm": 2.0515013316860173, + "learning_rate": 9.436488829424138e-06, + "loss": 0.6837, + "step": 5808 + }, + { + "epoch": 0.1780372686036533, + "grad_norm": 1.840107840106971, + "learning_rate": 9.436259907268466e-06, + "loss": 0.7369, + "step": 5809 + }, + { + "epoch": 0.1780679171263945, + "grad_norm": 1.9964817958978358, + "learning_rate": 9.436030941401192e-06, + "loss": 0.6995, + "step": 5810 + }, + { + "epoch": 0.1780985656491357, + "grad_norm": 1.6486556771487852, + "learning_rate": 9.43580193182457e-06, + "loss": 0.6285, + "step": 5811 + }, + { + "epoch": 0.1781292141718769, + "grad_norm": 1.9204768571910427, + "learning_rate": 9.435572878540857e-06, + "loss": 0.6611, + "step": 5812 + }, + { + "epoch": 0.17815986269461812, + "grad_norm": 1.80181079983011, + "learning_rate": 9.435343781552308e-06, + "loss": 0.6332, + "step": 5813 + }, + { + "epoch": 0.17819051121735932, + "grad_norm": 1.8829445782025904, + "learning_rate": 9.435114640861183e-06, + "loss": 0.7841, + "step": 5814 + }, + { + "epoch": 0.17822115974010053, + "grad_norm": 2.0202055203944043, + "learning_rate": 9.434885456469739e-06, + "loss": 0.6966, + "step": 5815 + }, + { + "epoch": 0.17825180826284173, + "grad_norm": 1.708249615895977, + "learning_rate": 9.434656228380233e-06, + "loss": 0.7739, + "step": 5816 + }, + { + "epoch": 0.17828245678558294, + "grad_norm": 1.8185136130993558, + "learning_rate": 9.434426956594926e-06, + "loss": 0.734, + "step": 5817 + }, + { + "epoch": 0.17831310530832414, + "grad_norm": 1.9269190071897233, + "learning_rate": 9.434197641116074e-06, + "loss": 0.8191, + "step": 5818 + }, + { + "epoch": 0.17834375383106535, + "grad_norm": 1.6579585711912772, + "learning_rate": 9.433968281945939e-06, + "loss": 0.8347, + "step": 5819 + }, + { + "epoch": 0.17837440235380655, + "grad_norm": 1.8842667214275879, + "learning_rate": 9.43373887908678e-06, + "loss": 0.7757, + "step": 5820 + }, + { + "epoch": 0.17840505087654776, + "grad_norm": 1.862754653015809, + "learning_rate": 9.433509432540856e-06, + "loss": 0.8586, + "step": 5821 + }, + { + "epoch": 0.17843569939928897, + "grad_norm": 1.8249276408953818, + "learning_rate": 9.433279942310431e-06, + "loss": 0.79, + "step": 5822 + }, + { + "epoch": 0.17846634792203017, + "grad_norm": 1.6630379813333447, + "learning_rate": 9.433050408397763e-06, + "loss": 0.596, + "step": 5823 + }, + { + "epoch": 0.17849699644477135, + "grad_norm": 1.7142087636894647, + "learning_rate": 9.432820830805116e-06, + "loss": 0.7994, + "step": 5824 + }, + { + "epoch": 0.17852764496751256, + "grad_norm": 1.3920370920686236, + "learning_rate": 9.432591209534752e-06, + "loss": 0.5252, + "step": 5825 + }, + { + "epoch": 0.17855829349025376, + "grad_norm": 1.794202467788712, + "learning_rate": 9.43236154458893e-06, + "loss": 0.7274, + "step": 5826 + }, + { + "epoch": 0.17858894201299497, + "grad_norm": 1.7804792596810841, + "learning_rate": 9.432131835969918e-06, + "loss": 0.7802, + "step": 5827 + }, + { + "epoch": 0.17861959053573617, + "grad_norm": 1.8532558601856488, + "learning_rate": 9.431902083679976e-06, + "loss": 0.7878, + "step": 5828 + }, + { + "epoch": 0.17865023905847738, + "grad_norm": 1.4008980277209808, + "learning_rate": 9.43167228772137e-06, + "loss": 0.5955, + "step": 5829 + }, + { + "epoch": 0.17868088758121858, + "grad_norm": 0.8593681639325206, + "learning_rate": 9.431442448096363e-06, + "loss": 0.5076, + "step": 5830 + }, + { + "epoch": 0.1787115361039598, + "grad_norm": 0.8499073296302261, + "learning_rate": 9.431212564807217e-06, + "loss": 0.5078, + "step": 5831 + }, + { + "epoch": 0.178742184626701, + "grad_norm": 1.8997848063645866, + "learning_rate": 9.430982637856202e-06, + "loss": 0.8148, + "step": 5832 + }, + { + "epoch": 0.1787728331494422, + "grad_norm": 1.8708416160741905, + "learning_rate": 9.430752667245581e-06, + "loss": 0.6323, + "step": 5833 + }, + { + "epoch": 0.1788034816721834, + "grad_norm": 1.803201825024467, + "learning_rate": 9.43052265297762e-06, + "loss": 0.6891, + "step": 5834 + }, + { + "epoch": 0.1788341301949246, + "grad_norm": 1.842901838003687, + "learning_rate": 9.430292595054586e-06, + "loss": 0.7656, + "step": 5835 + }, + { + "epoch": 0.17886477871766582, + "grad_norm": 1.6228283274703377, + "learning_rate": 9.430062493478746e-06, + "loss": 0.7131, + "step": 5836 + }, + { + "epoch": 0.17889542724040702, + "grad_norm": 1.6818408156257145, + "learning_rate": 9.429832348252365e-06, + "loss": 0.7484, + "step": 5837 + }, + { + "epoch": 0.17892607576314823, + "grad_norm": 1.833636850374293, + "learning_rate": 9.429602159377715e-06, + "loss": 0.7903, + "step": 5838 + }, + { + "epoch": 0.1789567242858894, + "grad_norm": 1.8002980059140181, + "learning_rate": 9.42937192685706e-06, + "loss": 0.6904, + "step": 5839 + }, + { + "epoch": 0.1789873728086306, + "grad_norm": 1.1618374484895906, + "learning_rate": 9.42914165069267e-06, + "loss": 0.5208, + "step": 5840 + }, + { + "epoch": 0.17901802133137182, + "grad_norm": 1.7529790766707414, + "learning_rate": 9.428911330886816e-06, + "loss": 0.7528, + "step": 5841 + }, + { + "epoch": 0.17904866985411302, + "grad_norm": 1.7631926354186496, + "learning_rate": 9.428680967441764e-06, + "loss": 0.7405, + "step": 5842 + }, + { + "epoch": 0.17907931837685423, + "grad_norm": 1.6881117719227696, + "learning_rate": 9.428450560359786e-06, + "loss": 0.7177, + "step": 5843 + }, + { + "epoch": 0.17910996689959544, + "grad_norm": 0.8576333463548007, + "learning_rate": 9.428220109643149e-06, + "loss": 0.4861, + "step": 5844 + }, + { + "epoch": 0.17914061542233664, + "grad_norm": 1.5855762403069071, + "learning_rate": 9.427989615294128e-06, + "loss": 0.6883, + "step": 5845 + }, + { + "epoch": 0.17917126394507785, + "grad_norm": 1.8791246739721876, + "learning_rate": 9.427759077314993e-06, + "loss": 0.7195, + "step": 5846 + }, + { + "epoch": 0.17920191246781905, + "grad_norm": 1.6436413413208617, + "learning_rate": 9.427528495708015e-06, + "loss": 0.6737, + "step": 5847 + }, + { + "epoch": 0.17923256099056026, + "grad_norm": 2.739527540491803, + "learning_rate": 9.427297870475465e-06, + "loss": 0.789, + "step": 5848 + }, + { + "epoch": 0.17926320951330146, + "grad_norm": 1.7539723618598084, + "learning_rate": 9.427067201619618e-06, + "loss": 0.6749, + "step": 5849 + }, + { + "epoch": 0.17929385803604267, + "grad_norm": 1.7666798832033093, + "learning_rate": 9.426836489142743e-06, + "loss": 0.8171, + "step": 5850 + }, + { + "epoch": 0.17932450655878387, + "grad_norm": 2.287176571195317, + "learning_rate": 9.426605733047116e-06, + "loss": 0.7108, + "step": 5851 + }, + { + "epoch": 0.17935515508152508, + "grad_norm": 1.6595303304547382, + "learning_rate": 9.426374933335009e-06, + "loss": 0.636, + "step": 5852 + }, + { + "epoch": 0.1793858036042663, + "grad_norm": 1.7489518983703711, + "learning_rate": 9.4261440900087e-06, + "loss": 0.5999, + "step": 5853 + }, + { + "epoch": 0.1794164521270075, + "grad_norm": 1.5584982831244998, + "learning_rate": 9.42591320307046e-06, + "loss": 0.643, + "step": 5854 + }, + { + "epoch": 0.17944710064974867, + "grad_norm": 1.9325728443637875, + "learning_rate": 9.425682272522562e-06, + "loss": 0.7839, + "step": 5855 + }, + { + "epoch": 0.17947774917248988, + "grad_norm": 2.0418863205894695, + "learning_rate": 9.425451298367287e-06, + "loss": 0.7307, + "step": 5856 + }, + { + "epoch": 0.17950839769523108, + "grad_norm": 1.0376679606486359, + "learning_rate": 9.425220280606908e-06, + "loss": 0.5136, + "step": 5857 + }, + { + "epoch": 0.1795390462179723, + "grad_norm": 1.8445126219758383, + "learning_rate": 9.424989219243701e-06, + "loss": 0.6974, + "step": 5858 + }, + { + "epoch": 0.1795696947407135, + "grad_norm": 1.674247926387593, + "learning_rate": 9.424758114279942e-06, + "loss": 0.7051, + "step": 5859 + }, + { + "epoch": 0.1796003432634547, + "grad_norm": 1.833805607481279, + "learning_rate": 9.42452696571791e-06, + "loss": 0.6951, + "step": 5860 + }, + { + "epoch": 0.1796309917861959, + "grad_norm": 0.8669664978337449, + "learning_rate": 9.424295773559882e-06, + "loss": 0.4968, + "step": 5861 + }, + { + "epoch": 0.1796616403089371, + "grad_norm": 1.9772877900384296, + "learning_rate": 9.424064537808135e-06, + "loss": 0.8199, + "step": 5862 + }, + { + "epoch": 0.17969228883167832, + "grad_norm": 1.7833461603942777, + "learning_rate": 9.42383325846495e-06, + "loss": 0.7738, + "step": 5863 + }, + { + "epoch": 0.17972293735441952, + "grad_norm": 1.9413549355091715, + "learning_rate": 9.423601935532603e-06, + "loss": 0.7832, + "step": 5864 + }, + { + "epoch": 0.17975358587716073, + "grad_norm": 1.825302951449379, + "learning_rate": 9.423370569013373e-06, + "loss": 0.7821, + "step": 5865 + }, + { + "epoch": 0.17978423439990193, + "grad_norm": 0.9008691673368242, + "learning_rate": 9.423139158909542e-06, + "loss": 0.5033, + "step": 5866 + }, + { + "epoch": 0.17981488292264314, + "grad_norm": 1.748489111936005, + "learning_rate": 9.42290770522339e-06, + "loss": 0.7256, + "step": 5867 + }, + { + "epoch": 0.17984553144538434, + "grad_norm": 1.8359770806050117, + "learning_rate": 9.422676207957195e-06, + "loss": 0.7589, + "step": 5868 + }, + { + "epoch": 0.17987617996812555, + "grad_norm": 1.7336921697256225, + "learning_rate": 9.422444667113239e-06, + "loss": 0.735, + "step": 5869 + }, + { + "epoch": 0.17990682849086673, + "grad_norm": 1.8536967967621325, + "learning_rate": 9.422213082693807e-06, + "loss": 0.7565, + "step": 5870 + }, + { + "epoch": 0.17993747701360793, + "grad_norm": 0.9111016951447877, + "learning_rate": 9.421981454701176e-06, + "loss": 0.5203, + "step": 5871 + }, + { + "epoch": 0.17996812553634914, + "grad_norm": 1.7870567166921574, + "learning_rate": 9.421749783137632e-06, + "loss": 0.8034, + "step": 5872 + }, + { + "epoch": 0.17999877405909034, + "grad_norm": 1.5235667873245717, + "learning_rate": 9.421518068005455e-06, + "loss": 0.6519, + "step": 5873 + }, + { + "epoch": 0.18002942258183155, + "grad_norm": 0.7877934145340947, + "learning_rate": 9.42128630930693e-06, + "loss": 0.4949, + "step": 5874 + }, + { + "epoch": 0.18006007110457276, + "grad_norm": 1.6553215860885608, + "learning_rate": 9.421054507044339e-06, + "loss": 0.6168, + "step": 5875 + }, + { + "epoch": 0.18009071962731396, + "grad_norm": 1.6675075946318538, + "learning_rate": 9.420822661219966e-06, + "loss": 0.8061, + "step": 5876 + }, + { + "epoch": 0.18012136815005517, + "grad_norm": 2.0763552869000454, + "learning_rate": 9.420590771836098e-06, + "loss": 0.8351, + "step": 5877 + }, + { + "epoch": 0.18015201667279637, + "grad_norm": 1.760104476054214, + "learning_rate": 9.420358838895016e-06, + "loss": 0.7627, + "step": 5878 + }, + { + "epoch": 0.18018266519553758, + "grad_norm": 1.9044952113476528, + "learning_rate": 9.420126862399008e-06, + "loss": 0.6426, + "step": 5879 + }, + { + "epoch": 0.18021331371827878, + "grad_norm": 1.7761179356669967, + "learning_rate": 9.41989484235036e-06, + "loss": 0.7657, + "step": 5880 + }, + { + "epoch": 0.18024396224102, + "grad_norm": 1.7682850471853415, + "learning_rate": 9.419662778751356e-06, + "loss": 0.6504, + "step": 5881 + }, + { + "epoch": 0.1802746107637612, + "grad_norm": 1.78466159021523, + "learning_rate": 9.419430671604286e-06, + "loss": 0.746, + "step": 5882 + }, + { + "epoch": 0.1803052592865024, + "grad_norm": 1.900552099274463, + "learning_rate": 9.419198520911433e-06, + "loss": 0.7653, + "step": 5883 + }, + { + "epoch": 0.1803359078092436, + "grad_norm": 2.0220287042761753, + "learning_rate": 9.418966326675088e-06, + "loss": 0.7916, + "step": 5884 + }, + { + "epoch": 0.1803665563319848, + "grad_norm": 1.8954621996777128, + "learning_rate": 9.418734088897534e-06, + "loss": 0.8, + "step": 5885 + }, + { + "epoch": 0.180397204854726, + "grad_norm": 1.701517641623818, + "learning_rate": 9.418501807581065e-06, + "loss": 0.7581, + "step": 5886 + }, + { + "epoch": 0.1804278533774672, + "grad_norm": 0.9723197191331902, + "learning_rate": 9.418269482727966e-06, + "loss": 0.503, + "step": 5887 + }, + { + "epoch": 0.1804585019002084, + "grad_norm": 0.8942186675342775, + "learning_rate": 9.418037114340528e-06, + "loss": 0.4657, + "step": 5888 + }, + { + "epoch": 0.1804891504229496, + "grad_norm": 1.8216773164793205, + "learning_rate": 9.41780470242104e-06, + "loss": 0.7336, + "step": 5889 + }, + { + "epoch": 0.1805197989456908, + "grad_norm": 1.7150182287798925, + "learning_rate": 9.417572246971791e-06, + "loss": 0.7963, + "step": 5890 + }, + { + "epoch": 0.18055044746843202, + "grad_norm": 1.8288328899091826, + "learning_rate": 9.417339747995074e-06, + "loss": 0.7084, + "step": 5891 + }, + { + "epoch": 0.18058109599117322, + "grad_norm": 1.697856111416525, + "learning_rate": 9.417107205493177e-06, + "loss": 0.8015, + "step": 5892 + }, + { + "epoch": 0.18061174451391443, + "grad_norm": 1.7424036440705317, + "learning_rate": 9.416874619468393e-06, + "loss": 0.6258, + "step": 5893 + }, + { + "epoch": 0.18064239303665564, + "grad_norm": 1.023700087824917, + "learning_rate": 9.416641989923012e-06, + "loss": 0.5004, + "step": 5894 + }, + { + "epoch": 0.18067304155939684, + "grad_norm": 1.6553259592850358, + "learning_rate": 9.41640931685933e-06, + "loss": 0.7095, + "step": 5895 + }, + { + "epoch": 0.18070369008213805, + "grad_norm": 1.8054364207843125, + "learning_rate": 9.416176600279635e-06, + "loss": 0.6849, + "step": 5896 + }, + { + "epoch": 0.18073433860487925, + "grad_norm": 0.9629790063270299, + "learning_rate": 9.41594384018622e-06, + "loss": 0.4853, + "step": 5897 + }, + { + "epoch": 0.18076498712762046, + "grad_norm": 0.820096691234335, + "learning_rate": 9.415711036581385e-06, + "loss": 0.4669, + "step": 5898 + }, + { + "epoch": 0.18079563565036166, + "grad_norm": 1.8031337471508013, + "learning_rate": 9.415478189467418e-06, + "loss": 0.7105, + "step": 5899 + }, + { + "epoch": 0.18082628417310287, + "grad_norm": 1.9692857915560416, + "learning_rate": 9.415245298846614e-06, + "loss": 0.7169, + "step": 5900 + }, + { + "epoch": 0.18085693269584405, + "grad_norm": 2.05253403449661, + "learning_rate": 9.415012364721267e-06, + "loss": 0.8093, + "step": 5901 + }, + { + "epoch": 0.18088758121858525, + "grad_norm": 0.9398405252202149, + "learning_rate": 9.414779387093675e-06, + "loss": 0.4952, + "step": 5902 + }, + { + "epoch": 0.18091822974132646, + "grad_norm": 1.8780503261581183, + "learning_rate": 9.414546365966133e-06, + "loss": 0.7068, + "step": 5903 + }, + { + "epoch": 0.18094887826406766, + "grad_norm": 2.007650527224393, + "learning_rate": 9.414313301340936e-06, + "loss": 0.8406, + "step": 5904 + }, + { + "epoch": 0.18097952678680887, + "grad_norm": 0.9265484828219778, + "learning_rate": 9.41408019322038e-06, + "loss": 0.4999, + "step": 5905 + }, + { + "epoch": 0.18101017530955008, + "grad_norm": 2.251518868719559, + "learning_rate": 9.413847041606761e-06, + "loss": 0.7935, + "step": 5906 + }, + { + "epoch": 0.18104082383229128, + "grad_norm": 1.7515609352776633, + "learning_rate": 9.413613846502379e-06, + "loss": 0.7437, + "step": 5907 + }, + { + "epoch": 0.1810714723550325, + "grad_norm": 2.030025331733851, + "learning_rate": 9.41338060790953e-06, + "loss": 0.7105, + "step": 5908 + }, + { + "epoch": 0.1811021208777737, + "grad_norm": 1.5624696730941026, + "learning_rate": 9.413147325830513e-06, + "loss": 0.5705, + "step": 5909 + }, + { + "epoch": 0.1811327694005149, + "grad_norm": 1.6730331528310387, + "learning_rate": 9.412914000267626e-06, + "loss": 0.7379, + "step": 5910 + }, + { + "epoch": 0.1811634179232561, + "grad_norm": 1.924874718474037, + "learning_rate": 9.41268063122317e-06, + "loss": 0.8655, + "step": 5911 + }, + { + "epoch": 0.1811940664459973, + "grad_norm": 1.4302058443312613, + "learning_rate": 9.412447218699442e-06, + "loss": 0.5174, + "step": 5912 + }, + { + "epoch": 0.18122471496873852, + "grad_norm": 1.7962428938987272, + "learning_rate": 9.41221376269874e-06, + "loss": 0.8752, + "step": 5913 + }, + { + "epoch": 0.18125536349147972, + "grad_norm": 1.6351270314175965, + "learning_rate": 9.41198026322337e-06, + "loss": 0.6943, + "step": 5914 + }, + { + "epoch": 0.18128601201422093, + "grad_norm": 0.8212248861089475, + "learning_rate": 9.411746720275628e-06, + "loss": 0.5045, + "step": 5915 + }, + { + "epoch": 0.18131666053696213, + "grad_norm": 1.9144234508007552, + "learning_rate": 9.411513133857816e-06, + "loss": 0.6778, + "step": 5916 + }, + { + "epoch": 0.1813473090597033, + "grad_norm": 1.9787127661364483, + "learning_rate": 9.411279503972239e-06, + "loss": 0.7675, + "step": 5917 + }, + { + "epoch": 0.18137795758244452, + "grad_norm": 1.6547472275215418, + "learning_rate": 9.411045830621194e-06, + "loss": 0.7603, + "step": 5918 + }, + { + "epoch": 0.18140860610518572, + "grad_norm": 1.728444374853868, + "learning_rate": 9.410812113806987e-06, + "loss": 0.7526, + "step": 5919 + }, + { + "epoch": 0.18143925462792693, + "grad_norm": 1.7644044786072728, + "learning_rate": 9.41057835353192e-06, + "loss": 0.6697, + "step": 5920 + }, + { + "epoch": 0.18146990315066813, + "grad_norm": 1.9227866471595856, + "learning_rate": 9.410344549798296e-06, + "loss": 0.7824, + "step": 5921 + }, + { + "epoch": 0.18150055167340934, + "grad_norm": 1.7397994517582958, + "learning_rate": 9.410110702608418e-06, + "loss": 0.8022, + "step": 5922 + }, + { + "epoch": 0.18153120019615054, + "grad_norm": 1.9331807422829224, + "learning_rate": 9.409876811964591e-06, + "loss": 0.7754, + "step": 5923 + }, + { + "epoch": 0.18156184871889175, + "grad_norm": 1.966337520318365, + "learning_rate": 9.409642877869118e-06, + "loss": 0.7455, + "step": 5924 + }, + { + "epoch": 0.18159249724163296, + "grad_norm": 1.895823409938758, + "learning_rate": 9.409408900324308e-06, + "loss": 0.7007, + "step": 5925 + }, + { + "epoch": 0.18162314576437416, + "grad_norm": 2.2630209204578042, + "learning_rate": 9.409174879332463e-06, + "loss": 0.7465, + "step": 5926 + }, + { + "epoch": 0.18165379428711537, + "grad_norm": 2.019188641307699, + "learning_rate": 9.408940814895889e-06, + "loss": 0.7127, + "step": 5927 + }, + { + "epoch": 0.18168444280985657, + "grad_norm": 1.6470928102020768, + "learning_rate": 9.408706707016895e-06, + "loss": 0.7403, + "step": 5928 + }, + { + "epoch": 0.18171509133259778, + "grad_norm": 1.5823122173820414, + "learning_rate": 9.408472555697783e-06, + "loss": 0.742, + "step": 5929 + }, + { + "epoch": 0.18174573985533898, + "grad_norm": 1.6627852223745696, + "learning_rate": 9.408238360940864e-06, + "loss": 0.7567, + "step": 5930 + }, + { + "epoch": 0.1817763883780802, + "grad_norm": 1.6788507353882842, + "learning_rate": 9.408004122748447e-06, + "loss": 0.7445, + "step": 5931 + }, + { + "epoch": 0.18180703690082137, + "grad_norm": 0.942548000525406, + "learning_rate": 9.407769841122834e-06, + "loss": 0.5153, + "step": 5932 + }, + { + "epoch": 0.18183768542356257, + "grad_norm": 0.946038584291695, + "learning_rate": 9.40753551606634e-06, + "loss": 0.5044, + "step": 5933 + }, + { + "epoch": 0.18186833394630378, + "grad_norm": 0.862710727285362, + "learning_rate": 9.40730114758127e-06, + "loss": 0.5147, + "step": 5934 + }, + { + "epoch": 0.18189898246904498, + "grad_norm": 1.7546040312192377, + "learning_rate": 9.407066735669931e-06, + "loss": 0.7564, + "step": 5935 + }, + { + "epoch": 0.1819296309917862, + "grad_norm": 1.822255124830463, + "learning_rate": 9.40683228033464e-06, + "loss": 0.7825, + "step": 5936 + }, + { + "epoch": 0.1819602795145274, + "grad_norm": 1.929065066111895, + "learning_rate": 9.4065977815777e-06, + "loss": 0.7109, + "step": 5937 + }, + { + "epoch": 0.1819909280372686, + "grad_norm": 2.067983857000198, + "learning_rate": 9.406363239401427e-06, + "loss": 0.7715, + "step": 5938 + }, + { + "epoch": 0.1820215765600098, + "grad_norm": 1.9774966560245866, + "learning_rate": 9.406128653808128e-06, + "loss": 0.7473, + "step": 5939 + }, + { + "epoch": 0.182052225082751, + "grad_norm": 1.8097886069702653, + "learning_rate": 9.405894024800118e-06, + "loss": 0.7327, + "step": 5940 + }, + { + "epoch": 0.18208287360549222, + "grad_norm": 1.7713107435889393, + "learning_rate": 9.405659352379704e-06, + "loss": 0.6606, + "step": 5941 + }, + { + "epoch": 0.18211352212823342, + "grad_norm": 1.8857476442591896, + "learning_rate": 9.405424636549202e-06, + "loss": 0.6404, + "step": 5942 + }, + { + "epoch": 0.18214417065097463, + "grad_norm": 1.033573687794708, + "learning_rate": 9.405189877310925e-06, + "loss": 0.5085, + "step": 5943 + }, + { + "epoch": 0.18217481917371584, + "grad_norm": 1.867701621220288, + "learning_rate": 9.404955074667185e-06, + "loss": 0.8281, + "step": 5944 + }, + { + "epoch": 0.18220546769645704, + "grad_norm": 0.9193685023382324, + "learning_rate": 9.404720228620294e-06, + "loss": 0.4896, + "step": 5945 + }, + { + "epoch": 0.18223611621919825, + "grad_norm": 0.8088833593442949, + "learning_rate": 9.404485339172568e-06, + "loss": 0.4976, + "step": 5946 + }, + { + "epoch": 0.18226676474193945, + "grad_norm": 2.3315614967834173, + "learning_rate": 9.404250406326323e-06, + "loss": 0.6394, + "step": 5947 + }, + { + "epoch": 0.18229741326468063, + "grad_norm": 2.0095454203586764, + "learning_rate": 9.40401543008387e-06, + "loss": 0.82, + "step": 5948 + }, + { + "epoch": 0.18232806178742184, + "grad_norm": 2.0027629773638775, + "learning_rate": 9.403780410447528e-06, + "loss": 0.7078, + "step": 5949 + }, + { + "epoch": 0.18235871031016304, + "grad_norm": 1.8490761625467476, + "learning_rate": 9.40354534741961e-06, + "loss": 0.7464, + "step": 5950 + }, + { + "epoch": 0.18238935883290425, + "grad_norm": 2.2216842566096964, + "learning_rate": 9.403310241002433e-06, + "loss": 0.7619, + "step": 5951 + }, + { + "epoch": 0.18242000735564545, + "grad_norm": 1.731643714918723, + "learning_rate": 9.403075091198311e-06, + "loss": 0.6995, + "step": 5952 + }, + { + "epoch": 0.18245065587838666, + "grad_norm": 1.8918727722107291, + "learning_rate": 9.402839898009566e-06, + "loss": 0.7208, + "step": 5953 + }, + { + "epoch": 0.18248130440112786, + "grad_norm": 1.8125472683795554, + "learning_rate": 9.402604661438513e-06, + "loss": 0.6763, + "step": 5954 + }, + { + "epoch": 0.18251195292386907, + "grad_norm": 1.9885520880427772, + "learning_rate": 9.40236938148747e-06, + "loss": 0.722, + "step": 5955 + }, + { + "epoch": 0.18254260144661028, + "grad_norm": 1.9421514615552873, + "learning_rate": 9.402134058158753e-06, + "loss": 0.7437, + "step": 5956 + }, + { + "epoch": 0.18257324996935148, + "grad_norm": 1.640275569298694, + "learning_rate": 9.401898691454686e-06, + "loss": 0.7058, + "step": 5957 + }, + { + "epoch": 0.1826038984920927, + "grad_norm": 1.863913232832668, + "learning_rate": 9.401663281377583e-06, + "loss": 0.6951, + "step": 5958 + }, + { + "epoch": 0.1826345470148339, + "grad_norm": 1.9474825572424828, + "learning_rate": 9.401427827929766e-06, + "loss": 0.7391, + "step": 5959 + }, + { + "epoch": 0.1826651955375751, + "grad_norm": 2.1537499225732706, + "learning_rate": 9.401192331113553e-06, + "loss": 0.8694, + "step": 5960 + }, + { + "epoch": 0.1826958440603163, + "grad_norm": 1.864231698251316, + "learning_rate": 9.400956790931268e-06, + "loss": 0.7681, + "step": 5961 + }, + { + "epoch": 0.1827264925830575, + "grad_norm": 1.7175570744464295, + "learning_rate": 9.400721207385228e-06, + "loss": 0.7682, + "step": 5962 + }, + { + "epoch": 0.1827571411057987, + "grad_norm": 2.0077175519463624, + "learning_rate": 9.400485580477757e-06, + "loss": 0.7913, + "step": 5963 + }, + { + "epoch": 0.1827877896285399, + "grad_norm": 1.786972682535072, + "learning_rate": 9.400249910211176e-06, + "loss": 0.7171, + "step": 5964 + }, + { + "epoch": 0.1828184381512811, + "grad_norm": 1.9735110753304501, + "learning_rate": 9.400014196587805e-06, + "loss": 0.7776, + "step": 5965 + }, + { + "epoch": 0.1828490866740223, + "grad_norm": 1.6968834090954898, + "learning_rate": 9.39977843960997e-06, + "loss": 0.809, + "step": 5966 + }, + { + "epoch": 0.1828797351967635, + "grad_norm": 1.6375045860285529, + "learning_rate": 9.399542639279992e-06, + "loss": 0.5735, + "step": 5967 + }, + { + "epoch": 0.18291038371950472, + "grad_norm": 1.7900332882378185, + "learning_rate": 9.399306795600193e-06, + "loss": 0.7773, + "step": 5968 + }, + { + "epoch": 0.18294103224224592, + "grad_norm": 1.8602994650700393, + "learning_rate": 9.399070908572902e-06, + "loss": 0.7324, + "step": 5969 + }, + { + "epoch": 0.18297168076498713, + "grad_norm": 1.9856513474175341, + "learning_rate": 9.398834978200438e-06, + "loss": 0.7566, + "step": 5970 + }, + { + "epoch": 0.18300232928772833, + "grad_norm": 1.61716327841474, + "learning_rate": 9.398599004485127e-06, + "loss": 0.5258, + "step": 5971 + }, + { + "epoch": 0.18303297781046954, + "grad_norm": 1.9883399255322698, + "learning_rate": 9.398362987429294e-06, + "loss": 0.8403, + "step": 5972 + }, + { + "epoch": 0.18306362633321074, + "grad_norm": 1.722061875914933, + "learning_rate": 9.398126927035267e-06, + "loss": 0.6248, + "step": 5973 + }, + { + "epoch": 0.18309427485595195, + "grad_norm": 1.7183736482247518, + "learning_rate": 9.397890823305369e-06, + "loss": 0.7619, + "step": 5974 + }, + { + "epoch": 0.18312492337869316, + "grad_norm": 1.5766243236414756, + "learning_rate": 9.397654676241927e-06, + "loss": 0.6888, + "step": 5975 + }, + { + "epoch": 0.18315557190143436, + "grad_norm": 1.8006138727167589, + "learning_rate": 9.39741848584727e-06, + "loss": 0.7703, + "step": 5976 + }, + { + "epoch": 0.18318622042417557, + "grad_norm": 1.8917619515906081, + "learning_rate": 9.397182252123722e-06, + "loss": 0.8246, + "step": 5977 + }, + { + "epoch": 0.18321686894691677, + "grad_norm": 1.6399151436312283, + "learning_rate": 9.396945975073613e-06, + "loss": 0.7043, + "step": 5978 + }, + { + "epoch": 0.18324751746965795, + "grad_norm": 1.5773928795971084, + "learning_rate": 9.39670965469927e-06, + "loss": 0.6775, + "step": 5979 + }, + { + "epoch": 0.18327816599239916, + "grad_norm": 1.6735389142931647, + "learning_rate": 9.396473291003021e-06, + "loss": 0.7435, + "step": 5980 + }, + { + "epoch": 0.18330881451514036, + "grad_norm": 1.824882336206107, + "learning_rate": 9.396236883987196e-06, + "loss": 0.886, + "step": 5981 + }, + { + "epoch": 0.18333946303788157, + "grad_norm": 1.7842283314587843, + "learning_rate": 9.396000433654124e-06, + "loss": 0.7072, + "step": 5982 + }, + { + "epoch": 0.18337011156062277, + "grad_norm": 1.6962439181203603, + "learning_rate": 9.395763940006136e-06, + "loss": 0.7135, + "step": 5983 + }, + { + "epoch": 0.18340076008336398, + "grad_norm": 1.2221809390410017, + "learning_rate": 9.395527403045562e-06, + "loss": 0.519, + "step": 5984 + }, + { + "epoch": 0.18343140860610518, + "grad_norm": 2.195465596858276, + "learning_rate": 9.395290822774729e-06, + "loss": 0.8412, + "step": 5985 + }, + { + "epoch": 0.1834620571288464, + "grad_norm": 1.970648775183849, + "learning_rate": 9.395054199195974e-06, + "loss": 0.768, + "step": 5986 + }, + { + "epoch": 0.1834927056515876, + "grad_norm": 0.8603254859820966, + "learning_rate": 9.394817532311625e-06, + "loss": 0.4963, + "step": 5987 + }, + { + "epoch": 0.1835233541743288, + "grad_norm": 2.0122668599031055, + "learning_rate": 9.394580822124012e-06, + "loss": 0.8297, + "step": 5988 + }, + { + "epoch": 0.18355400269707, + "grad_norm": 0.8325015246783091, + "learning_rate": 9.39434406863547e-06, + "loss": 0.5079, + "step": 5989 + }, + { + "epoch": 0.1835846512198112, + "grad_norm": 1.701202195799264, + "learning_rate": 9.394107271848334e-06, + "loss": 0.6605, + "step": 5990 + }, + { + "epoch": 0.18361529974255242, + "grad_norm": 1.845093790440228, + "learning_rate": 9.393870431764933e-06, + "loss": 0.7094, + "step": 5991 + }, + { + "epoch": 0.18364594826529362, + "grad_norm": 0.8540739898338569, + "learning_rate": 9.393633548387603e-06, + "loss": 0.4977, + "step": 5992 + }, + { + "epoch": 0.18367659678803483, + "grad_norm": 0.8927996255042399, + "learning_rate": 9.393396621718678e-06, + "loss": 0.4874, + "step": 5993 + }, + { + "epoch": 0.183707245310776, + "grad_norm": 1.8393562158022714, + "learning_rate": 9.39315965176049e-06, + "loss": 0.8619, + "step": 5994 + }, + { + "epoch": 0.1837378938335172, + "grad_norm": 1.9121435488769403, + "learning_rate": 9.392922638515379e-06, + "loss": 0.7639, + "step": 5995 + }, + { + "epoch": 0.18376854235625842, + "grad_norm": 1.7600825987508888, + "learning_rate": 9.392685581985674e-06, + "loss": 0.7146, + "step": 5996 + }, + { + "epoch": 0.18379919087899962, + "grad_norm": 2.1332925373677325, + "learning_rate": 9.392448482173717e-06, + "loss": 0.7871, + "step": 5997 + }, + { + "epoch": 0.18382983940174083, + "grad_norm": 1.8196935971884771, + "learning_rate": 9.392211339081839e-06, + "loss": 0.6258, + "step": 5998 + }, + { + "epoch": 0.18386048792448204, + "grad_norm": 1.9166275866857498, + "learning_rate": 9.39197415271238e-06, + "loss": 0.7268, + "step": 5999 + }, + { + "epoch": 0.18389113644722324, + "grad_norm": 2.153482561211565, + "learning_rate": 9.391736923067675e-06, + "loss": 0.6728, + "step": 6000 + }, + { + "epoch": 0.18392178496996445, + "grad_norm": 1.8481849400439936, + "learning_rate": 9.391499650150065e-06, + "loss": 0.7346, + "step": 6001 + }, + { + "epoch": 0.18395243349270565, + "grad_norm": 0.8947172521856843, + "learning_rate": 9.391262333961883e-06, + "loss": 0.5177, + "step": 6002 + }, + { + "epoch": 0.18398308201544686, + "grad_norm": 2.1733519378103745, + "learning_rate": 9.39102497450547e-06, + "loss": 0.6121, + "step": 6003 + }, + { + "epoch": 0.18401373053818806, + "grad_norm": 0.8330123935934787, + "learning_rate": 9.390787571783165e-06, + "loss": 0.4981, + "step": 6004 + }, + { + "epoch": 0.18404437906092927, + "grad_norm": 1.7395049057079408, + "learning_rate": 9.390550125797306e-06, + "loss": 0.7018, + "step": 6005 + }, + { + "epoch": 0.18407502758367048, + "grad_norm": 1.8229574602679626, + "learning_rate": 9.390312636550232e-06, + "loss": 0.7863, + "step": 6006 + }, + { + "epoch": 0.18410567610641168, + "grad_norm": 0.8923606622012665, + "learning_rate": 9.390075104044286e-06, + "loss": 0.4969, + "step": 6007 + }, + { + "epoch": 0.1841363246291529, + "grad_norm": 1.812353402005203, + "learning_rate": 9.389837528281807e-06, + "loss": 0.7106, + "step": 6008 + }, + { + "epoch": 0.1841669731518941, + "grad_norm": 2.5663881819056904, + "learning_rate": 9.389599909265135e-06, + "loss": 0.7319, + "step": 6009 + }, + { + "epoch": 0.18419762167463527, + "grad_norm": 1.969191516116712, + "learning_rate": 9.389362246996611e-06, + "loss": 0.759, + "step": 6010 + }, + { + "epoch": 0.18422827019737648, + "grad_norm": 1.9052967649504766, + "learning_rate": 9.38912454147858e-06, + "loss": 0.7453, + "step": 6011 + }, + { + "epoch": 0.18425891872011768, + "grad_norm": 1.742897634089683, + "learning_rate": 9.38888679271338e-06, + "loss": 0.7789, + "step": 6012 + }, + { + "epoch": 0.1842895672428589, + "grad_norm": 1.836263897709223, + "learning_rate": 9.388649000703357e-06, + "loss": 0.7003, + "step": 6013 + }, + { + "epoch": 0.1843202157656001, + "grad_norm": 1.982871954481266, + "learning_rate": 9.38841116545085e-06, + "loss": 0.729, + "step": 6014 + }, + { + "epoch": 0.1843508642883413, + "grad_norm": 1.848857878751707, + "learning_rate": 9.388173286958207e-06, + "loss": 0.7119, + "step": 6015 + }, + { + "epoch": 0.1843815128110825, + "grad_norm": 1.9974769163501627, + "learning_rate": 9.387935365227769e-06, + "loss": 0.7648, + "step": 6016 + }, + { + "epoch": 0.1844121613338237, + "grad_norm": 1.5672422741060017, + "learning_rate": 9.387697400261882e-06, + "loss": 0.6647, + "step": 6017 + }, + { + "epoch": 0.18444280985656492, + "grad_norm": 1.9365917585550299, + "learning_rate": 9.38745939206289e-06, + "loss": 0.7442, + "step": 6018 + }, + { + "epoch": 0.18447345837930612, + "grad_norm": 1.9339704237058664, + "learning_rate": 9.387221340633137e-06, + "loss": 0.7594, + "step": 6019 + }, + { + "epoch": 0.18450410690204733, + "grad_norm": 2.063422237191947, + "learning_rate": 9.386983245974972e-06, + "loss": 0.7134, + "step": 6020 + }, + { + "epoch": 0.18453475542478853, + "grad_norm": 1.6004546343852848, + "learning_rate": 9.386745108090736e-06, + "loss": 0.7793, + "step": 6021 + }, + { + "epoch": 0.18456540394752974, + "grad_norm": 0.9593048769648255, + "learning_rate": 9.38650692698278e-06, + "loss": 0.4913, + "step": 6022 + }, + { + "epoch": 0.18459605247027094, + "grad_norm": 1.722149653791059, + "learning_rate": 9.386268702653447e-06, + "loss": 0.6793, + "step": 6023 + }, + { + "epoch": 0.18462670099301215, + "grad_norm": 1.8741981610845067, + "learning_rate": 9.386030435105085e-06, + "loss": 0.7244, + "step": 6024 + }, + { + "epoch": 0.18465734951575333, + "grad_norm": 1.669154339619704, + "learning_rate": 9.385792124340045e-06, + "loss": 0.7032, + "step": 6025 + }, + { + "epoch": 0.18468799803849453, + "grad_norm": 2.0825856098066753, + "learning_rate": 9.385553770360674e-06, + "loss": 0.7057, + "step": 6026 + }, + { + "epoch": 0.18471864656123574, + "grad_norm": 1.8955900094195834, + "learning_rate": 9.385315373169319e-06, + "loss": 0.7381, + "step": 6027 + }, + { + "epoch": 0.18474929508397694, + "grad_norm": 1.8388494625354044, + "learning_rate": 9.385076932768328e-06, + "loss": 0.7832, + "step": 6028 + }, + { + "epoch": 0.18477994360671815, + "grad_norm": 0.8389900675218156, + "learning_rate": 9.384838449160055e-06, + "loss": 0.4922, + "step": 6029 + }, + { + "epoch": 0.18481059212945936, + "grad_norm": 1.670381625245891, + "learning_rate": 9.384599922346843e-06, + "loss": 0.7383, + "step": 6030 + }, + { + "epoch": 0.18484124065220056, + "grad_norm": 2.156665148794103, + "learning_rate": 9.384361352331048e-06, + "loss": 0.723, + "step": 6031 + }, + { + "epoch": 0.18487188917494177, + "grad_norm": 1.7447200961402756, + "learning_rate": 9.38412273911502e-06, + "loss": 0.7086, + "step": 6032 + }, + { + "epoch": 0.18490253769768297, + "grad_norm": 1.8069365856982655, + "learning_rate": 9.383884082701107e-06, + "loss": 0.7797, + "step": 6033 + }, + { + "epoch": 0.18493318622042418, + "grad_norm": 0.8577202690971957, + "learning_rate": 9.383645383091663e-06, + "loss": 0.5087, + "step": 6034 + }, + { + "epoch": 0.18496383474316538, + "grad_norm": 0.821211010963006, + "learning_rate": 9.383406640289041e-06, + "loss": 0.4995, + "step": 6035 + }, + { + "epoch": 0.1849944832659066, + "grad_norm": 2.0085648794720385, + "learning_rate": 9.383167854295589e-06, + "loss": 0.8597, + "step": 6036 + }, + { + "epoch": 0.1850251317886478, + "grad_norm": 1.5845697014470403, + "learning_rate": 9.382929025113665e-06, + "loss": 0.7506, + "step": 6037 + }, + { + "epoch": 0.185055780311389, + "grad_norm": 1.6257969812110467, + "learning_rate": 9.38269015274562e-06, + "loss": 0.68, + "step": 6038 + }, + { + "epoch": 0.1850864288341302, + "grad_norm": 1.7937707249320836, + "learning_rate": 9.382451237193806e-06, + "loss": 0.6587, + "step": 6039 + }, + { + "epoch": 0.1851170773568714, + "grad_norm": 1.7989994498252715, + "learning_rate": 9.382212278460578e-06, + "loss": 0.7346, + "step": 6040 + }, + { + "epoch": 0.1851477258796126, + "grad_norm": 0.8402352495185432, + "learning_rate": 9.381973276548292e-06, + "loss": 0.5008, + "step": 6041 + }, + { + "epoch": 0.1851783744023538, + "grad_norm": 1.825902516356122, + "learning_rate": 9.381734231459303e-06, + "loss": 0.7224, + "step": 6042 + }, + { + "epoch": 0.185209022925095, + "grad_norm": 1.9263478979417714, + "learning_rate": 9.381495143195966e-06, + "loss": 0.7253, + "step": 6043 + }, + { + "epoch": 0.1852396714478362, + "grad_norm": 0.833013181558193, + "learning_rate": 9.381256011760635e-06, + "loss": 0.4893, + "step": 6044 + }, + { + "epoch": 0.1852703199705774, + "grad_norm": 1.856660696967754, + "learning_rate": 9.381016837155668e-06, + "loss": 0.7316, + "step": 6045 + }, + { + "epoch": 0.18530096849331862, + "grad_norm": 1.8851575038984068, + "learning_rate": 9.38077761938342e-06, + "loss": 0.7643, + "step": 6046 + }, + { + "epoch": 0.18533161701605982, + "grad_norm": 1.7398323282502746, + "learning_rate": 9.380538358446252e-06, + "loss": 0.7566, + "step": 6047 + }, + { + "epoch": 0.18536226553880103, + "grad_norm": 1.6181466671112867, + "learning_rate": 9.380299054346516e-06, + "loss": 0.6961, + "step": 6048 + }, + { + "epoch": 0.18539291406154224, + "grad_norm": 1.841349009315116, + "learning_rate": 9.380059707086573e-06, + "loss": 0.7834, + "step": 6049 + }, + { + "epoch": 0.18542356258428344, + "grad_norm": 1.576530756529921, + "learning_rate": 9.379820316668782e-06, + "loss": 0.7523, + "step": 6050 + }, + { + "epoch": 0.18545421110702465, + "grad_norm": 1.7352382740084888, + "learning_rate": 9.379580883095501e-06, + "loss": 0.6686, + "step": 6051 + }, + { + "epoch": 0.18548485962976585, + "grad_norm": 1.8530065473565982, + "learning_rate": 9.379341406369088e-06, + "loss": 0.7085, + "step": 6052 + }, + { + "epoch": 0.18551550815250706, + "grad_norm": 2.0386255814421075, + "learning_rate": 9.379101886491907e-06, + "loss": 0.6919, + "step": 6053 + }, + { + "epoch": 0.18554615667524826, + "grad_norm": 1.7016237163685035, + "learning_rate": 9.37886232346631e-06, + "loss": 0.6356, + "step": 6054 + }, + { + "epoch": 0.18557680519798947, + "grad_norm": 2.1893077156220984, + "learning_rate": 9.378622717294665e-06, + "loss": 0.7963, + "step": 6055 + }, + { + "epoch": 0.18560745372073065, + "grad_norm": 2.0147253924027395, + "learning_rate": 9.378383067979329e-06, + "loss": 0.7637, + "step": 6056 + }, + { + "epoch": 0.18563810224347185, + "grad_norm": 1.7836107723133456, + "learning_rate": 9.378143375522664e-06, + "loss": 0.7047, + "step": 6057 + }, + { + "epoch": 0.18566875076621306, + "grad_norm": 1.7101469590991971, + "learning_rate": 9.377903639927032e-06, + "loss": 0.7188, + "step": 6058 + }, + { + "epoch": 0.18569939928895426, + "grad_norm": 1.8014107871629048, + "learning_rate": 9.377663861194795e-06, + "loss": 0.8301, + "step": 6059 + }, + { + "epoch": 0.18573004781169547, + "grad_norm": 1.8458966794731964, + "learning_rate": 9.377424039328317e-06, + "loss": 0.6922, + "step": 6060 + }, + { + "epoch": 0.18576069633443668, + "grad_norm": 1.5422658453443157, + "learning_rate": 9.37718417432996e-06, + "loss": 0.7465, + "step": 6061 + }, + { + "epoch": 0.18579134485717788, + "grad_norm": 1.6674315345610147, + "learning_rate": 9.376944266202088e-06, + "loss": 0.7306, + "step": 6062 + }, + { + "epoch": 0.1858219933799191, + "grad_norm": 1.691178208778852, + "learning_rate": 9.376704314947062e-06, + "loss": 0.708, + "step": 6063 + }, + { + "epoch": 0.1858526419026603, + "grad_norm": 1.788966744306337, + "learning_rate": 9.376464320567251e-06, + "loss": 0.7999, + "step": 6064 + }, + { + "epoch": 0.1858832904254015, + "grad_norm": 1.9784248545658467, + "learning_rate": 9.376224283065017e-06, + "loss": 0.708, + "step": 6065 + }, + { + "epoch": 0.1859139389481427, + "grad_norm": 1.7292984697107316, + "learning_rate": 9.375984202442724e-06, + "loss": 0.7885, + "step": 6066 + }, + { + "epoch": 0.1859445874708839, + "grad_norm": 1.8290085882474019, + "learning_rate": 9.37574407870274e-06, + "loss": 0.7551, + "step": 6067 + }, + { + "epoch": 0.18597523599362512, + "grad_norm": 0.9897211602697353, + "learning_rate": 9.375503911847427e-06, + "loss": 0.5038, + "step": 6068 + }, + { + "epoch": 0.18600588451636632, + "grad_norm": 1.6120182012328828, + "learning_rate": 9.375263701879158e-06, + "loss": 0.7132, + "step": 6069 + }, + { + "epoch": 0.18603653303910753, + "grad_norm": 1.9654345369053197, + "learning_rate": 9.375023448800296e-06, + "loss": 0.7666, + "step": 6070 + }, + { + "epoch": 0.18606718156184873, + "grad_norm": 1.6762043152478683, + "learning_rate": 9.374783152613206e-06, + "loss": 0.7754, + "step": 6071 + }, + { + "epoch": 0.1860978300845899, + "grad_norm": 1.548859820105742, + "learning_rate": 9.374542813320261e-06, + "loss": 0.6588, + "step": 6072 + }, + { + "epoch": 0.18612847860733112, + "grad_norm": 1.6359812886937461, + "learning_rate": 9.374302430923827e-06, + "loss": 0.7415, + "step": 6073 + }, + { + "epoch": 0.18615912713007232, + "grad_norm": 1.7689550867590582, + "learning_rate": 9.37406200542627e-06, + "loss": 0.7962, + "step": 6074 + }, + { + "epoch": 0.18618977565281353, + "grad_norm": 1.7736617580175926, + "learning_rate": 9.373821536829962e-06, + "loss": 0.8057, + "step": 6075 + }, + { + "epoch": 0.18622042417555473, + "grad_norm": 1.755750652376938, + "learning_rate": 9.37358102513727e-06, + "loss": 0.7844, + "step": 6076 + }, + { + "epoch": 0.18625107269829594, + "grad_norm": 1.8544846709544396, + "learning_rate": 9.373340470350567e-06, + "loss": 0.7564, + "step": 6077 + }, + { + "epoch": 0.18628172122103714, + "grad_norm": 1.9488230475650627, + "learning_rate": 9.373099872472219e-06, + "loss": 0.8228, + "step": 6078 + }, + { + "epoch": 0.18631236974377835, + "grad_norm": 1.7492441910677607, + "learning_rate": 9.3728592315046e-06, + "loss": 0.7258, + "step": 6079 + }, + { + "epoch": 0.18634301826651956, + "grad_norm": 0.8856880783147925, + "learning_rate": 9.37261854745008e-06, + "loss": 0.4849, + "step": 6080 + }, + { + "epoch": 0.18637366678926076, + "grad_norm": 1.8066370642828071, + "learning_rate": 9.372377820311032e-06, + "loss": 0.7952, + "step": 6081 + }, + { + "epoch": 0.18640431531200197, + "grad_norm": 2.0020768887726836, + "learning_rate": 9.372137050089826e-06, + "loss": 0.8071, + "step": 6082 + }, + { + "epoch": 0.18643496383474317, + "grad_norm": 1.721043845979057, + "learning_rate": 9.371896236788834e-06, + "loss": 0.7752, + "step": 6083 + }, + { + "epoch": 0.18646561235748438, + "grad_norm": 1.7902813011835992, + "learning_rate": 9.37165538041043e-06, + "loss": 0.6857, + "step": 6084 + }, + { + "epoch": 0.18649626088022558, + "grad_norm": 1.70382728469193, + "learning_rate": 9.371414480956988e-06, + "loss": 0.7435, + "step": 6085 + }, + { + "epoch": 0.1865269094029668, + "grad_norm": 1.6890227259203374, + "learning_rate": 9.37117353843088e-06, + "loss": 0.7258, + "step": 6086 + }, + { + "epoch": 0.18655755792570797, + "grad_norm": 1.8834756565230149, + "learning_rate": 9.37093255283448e-06, + "loss": 0.6676, + "step": 6087 + }, + { + "epoch": 0.18658820644844917, + "grad_norm": 1.994028523692978, + "learning_rate": 9.370691524170166e-06, + "loss": 0.7946, + "step": 6088 + }, + { + "epoch": 0.18661885497119038, + "grad_norm": 1.8888748671077595, + "learning_rate": 9.370450452440307e-06, + "loss": 0.687, + "step": 6089 + }, + { + "epoch": 0.18664950349393158, + "grad_norm": 1.622536653862141, + "learning_rate": 9.370209337647282e-06, + "loss": 0.6612, + "step": 6090 + }, + { + "epoch": 0.1866801520166728, + "grad_norm": 1.9415217779691185, + "learning_rate": 9.369968179793467e-06, + "loss": 0.8009, + "step": 6091 + }, + { + "epoch": 0.186710800539414, + "grad_norm": 1.599342104516634, + "learning_rate": 9.369726978881237e-06, + "loss": 0.6958, + "step": 6092 + }, + { + "epoch": 0.1867414490621552, + "grad_norm": 2.0044331543305653, + "learning_rate": 9.369485734912971e-06, + "loss": 0.8118, + "step": 6093 + }, + { + "epoch": 0.1867720975848964, + "grad_norm": 1.8486833206719242, + "learning_rate": 9.369244447891041e-06, + "loss": 0.6606, + "step": 6094 + }, + { + "epoch": 0.1868027461076376, + "grad_norm": 1.8827300610216104, + "learning_rate": 9.36900311781783e-06, + "loss": 0.7611, + "step": 6095 + }, + { + "epoch": 0.18683339463037882, + "grad_norm": 1.7335954522335493, + "learning_rate": 9.368761744695711e-06, + "loss": 0.6498, + "step": 6096 + }, + { + "epoch": 0.18686404315312002, + "grad_norm": 1.9038276072333022, + "learning_rate": 9.368520328527066e-06, + "loss": 0.6848, + "step": 6097 + }, + { + "epoch": 0.18689469167586123, + "grad_norm": 1.6820185075170795, + "learning_rate": 9.368278869314274e-06, + "loss": 0.7581, + "step": 6098 + }, + { + "epoch": 0.18692534019860244, + "grad_norm": 2.0570538614580998, + "learning_rate": 9.36803736705971e-06, + "loss": 0.8138, + "step": 6099 + }, + { + "epoch": 0.18695598872134364, + "grad_norm": 1.798685742305569, + "learning_rate": 9.367795821765758e-06, + "loss": 0.7717, + "step": 6100 + }, + { + "epoch": 0.18698663724408485, + "grad_norm": 2.2954320611767707, + "learning_rate": 9.367554233434795e-06, + "loss": 0.7601, + "step": 6101 + }, + { + "epoch": 0.18701728576682605, + "grad_norm": 1.7689933351695182, + "learning_rate": 9.367312602069203e-06, + "loss": 0.7895, + "step": 6102 + }, + { + "epoch": 0.18704793428956723, + "grad_norm": 1.8000485858312056, + "learning_rate": 9.367070927671361e-06, + "loss": 0.7309, + "step": 6103 + }, + { + "epoch": 0.18707858281230844, + "grad_norm": 1.7709806810000908, + "learning_rate": 9.366829210243655e-06, + "loss": 0.7808, + "step": 6104 + }, + { + "epoch": 0.18710923133504964, + "grad_norm": 0.9310030585571947, + "learning_rate": 9.366587449788463e-06, + "loss": 0.5183, + "step": 6105 + }, + { + "epoch": 0.18713987985779085, + "grad_norm": 1.848203804644696, + "learning_rate": 9.366345646308165e-06, + "loss": 0.7746, + "step": 6106 + }, + { + "epoch": 0.18717052838053205, + "grad_norm": 1.798279340381654, + "learning_rate": 9.366103799805148e-06, + "loss": 0.8005, + "step": 6107 + }, + { + "epoch": 0.18720117690327326, + "grad_norm": 1.7328179438743792, + "learning_rate": 9.365861910281795e-06, + "loss": 0.6666, + "step": 6108 + }, + { + "epoch": 0.18723182542601446, + "grad_norm": 1.7324310063564776, + "learning_rate": 9.365619977740484e-06, + "loss": 0.7349, + "step": 6109 + }, + { + "epoch": 0.18726247394875567, + "grad_norm": 1.9439283414558126, + "learning_rate": 9.365378002183605e-06, + "loss": 0.7251, + "step": 6110 + }, + { + "epoch": 0.18729312247149688, + "grad_norm": 1.7222199916072451, + "learning_rate": 9.365135983613537e-06, + "loss": 0.7686, + "step": 6111 + }, + { + "epoch": 0.18732377099423808, + "grad_norm": 0.969122359930575, + "learning_rate": 9.36489392203267e-06, + "loss": 0.5151, + "step": 6112 + }, + { + "epoch": 0.1873544195169793, + "grad_norm": 1.8966023792653937, + "learning_rate": 9.364651817443384e-06, + "loss": 0.8182, + "step": 6113 + }, + { + "epoch": 0.1873850680397205, + "grad_norm": 1.8373051616548042, + "learning_rate": 9.364409669848069e-06, + "loss": 0.687, + "step": 6114 + }, + { + "epoch": 0.1874157165624617, + "grad_norm": 1.9391163905166362, + "learning_rate": 9.364167479249108e-06, + "loss": 0.7115, + "step": 6115 + }, + { + "epoch": 0.1874463650852029, + "grad_norm": 0.8195443425408866, + "learning_rate": 9.363925245648888e-06, + "loss": 0.4889, + "step": 6116 + }, + { + "epoch": 0.1874770136079441, + "grad_norm": 0.8384404345906605, + "learning_rate": 9.363682969049797e-06, + "loss": 0.5239, + "step": 6117 + }, + { + "epoch": 0.1875076621306853, + "grad_norm": 1.8935740750223533, + "learning_rate": 9.363440649454218e-06, + "loss": 0.7449, + "step": 6118 + }, + { + "epoch": 0.1875383106534265, + "grad_norm": 1.6960198792873649, + "learning_rate": 9.363198286864545e-06, + "loss": 0.7247, + "step": 6119 + }, + { + "epoch": 0.1875689591761677, + "grad_norm": 0.8084156424576531, + "learning_rate": 9.362955881283162e-06, + "loss": 0.4831, + "step": 6120 + }, + { + "epoch": 0.1875996076989089, + "grad_norm": 0.82052421049134, + "learning_rate": 9.36271343271246e-06, + "loss": 0.5038, + "step": 6121 + }, + { + "epoch": 0.1876302562216501, + "grad_norm": 1.6423208088198396, + "learning_rate": 9.362470941154825e-06, + "loss": 0.7883, + "step": 6122 + }, + { + "epoch": 0.18766090474439132, + "grad_norm": 1.7262668208335594, + "learning_rate": 9.362228406612645e-06, + "loss": 0.8193, + "step": 6123 + }, + { + "epoch": 0.18769155326713252, + "grad_norm": 2.038993727489435, + "learning_rate": 9.361985829088316e-06, + "loss": 0.8212, + "step": 6124 + }, + { + "epoch": 0.18772220178987373, + "grad_norm": 1.6777009561119822, + "learning_rate": 9.361743208584223e-06, + "loss": 0.6883, + "step": 6125 + }, + { + "epoch": 0.18775285031261493, + "grad_norm": 1.785008210008594, + "learning_rate": 9.36150054510276e-06, + "loss": 0.7119, + "step": 6126 + }, + { + "epoch": 0.18778349883535614, + "grad_norm": 1.7197690315895087, + "learning_rate": 9.361257838646313e-06, + "loss": 0.6982, + "step": 6127 + }, + { + "epoch": 0.18781414735809734, + "grad_norm": 1.9213770060977193, + "learning_rate": 9.361015089217277e-06, + "loss": 0.8237, + "step": 6128 + }, + { + "epoch": 0.18784479588083855, + "grad_norm": 1.676657033580465, + "learning_rate": 9.360772296818046e-06, + "loss": 0.7073, + "step": 6129 + }, + { + "epoch": 0.18787544440357976, + "grad_norm": 1.687821946614801, + "learning_rate": 9.360529461451009e-06, + "loss": 0.7341, + "step": 6130 + }, + { + "epoch": 0.18790609292632096, + "grad_norm": 1.7858395163733738, + "learning_rate": 9.36028658311856e-06, + "loss": 0.6904, + "step": 6131 + }, + { + "epoch": 0.18793674144906217, + "grad_norm": 2.1194127646186605, + "learning_rate": 9.360043661823089e-06, + "loss": 0.7806, + "step": 6132 + }, + { + "epoch": 0.18796738997180337, + "grad_norm": 1.829260013788456, + "learning_rate": 9.359800697566994e-06, + "loss": 0.7754, + "step": 6133 + }, + { + "epoch": 0.18799803849454455, + "grad_norm": 1.9102897715286655, + "learning_rate": 9.359557690352667e-06, + "loss": 0.7567, + "step": 6134 + }, + { + "epoch": 0.18802868701728576, + "grad_norm": 2.0319670364950895, + "learning_rate": 9.359314640182504e-06, + "loss": 0.7084, + "step": 6135 + }, + { + "epoch": 0.18805933554002696, + "grad_norm": 1.6321504320814058, + "learning_rate": 9.359071547058898e-06, + "loss": 0.7208, + "step": 6136 + }, + { + "epoch": 0.18808998406276817, + "grad_norm": 2.1877596171036355, + "learning_rate": 9.358828410984244e-06, + "loss": 0.6722, + "step": 6137 + }, + { + "epoch": 0.18812063258550937, + "grad_norm": 1.5486497356568654, + "learning_rate": 9.358585231960938e-06, + "loss": 0.6409, + "step": 6138 + }, + { + "epoch": 0.18815128110825058, + "grad_norm": 1.9047311224034482, + "learning_rate": 9.358342009991377e-06, + "loss": 0.753, + "step": 6139 + }, + { + "epoch": 0.18818192963099178, + "grad_norm": 1.8046635471113661, + "learning_rate": 9.358098745077957e-06, + "loss": 0.6912, + "step": 6140 + }, + { + "epoch": 0.188212578153733, + "grad_norm": 1.7540363410683188, + "learning_rate": 9.357855437223075e-06, + "loss": 0.7383, + "step": 6141 + }, + { + "epoch": 0.1882432266764742, + "grad_norm": 1.7627870802166785, + "learning_rate": 9.357612086429129e-06, + "loss": 0.6523, + "step": 6142 + }, + { + "epoch": 0.1882738751992154, + "grad_norm": 1.8886847121643149, + "learning_rate": 9.357368692698515e-06, + "loss": 0.7444, + "step": 6143 + }, + { + "epoch": 0.1883045237219566, + "grad_norm": 1.7088892468576784, + "learning_rate": 9.357125256033634e-06, + "loss": 0.6785, + "step": 6144 + }, + { + "epoch": 0.1883351722446978, + "grad_norm": 1.718408522359068, + "learning_rate": 9.356881776436881e-06, + "loss": 0.7541, + "step": 6145 + }, + { + "epoch": 0.18836582076743902, + "grad_norm": 1.9688460726780437, + "learning_rate": 9.356638253910659e-06, + "loss": 0.7443, + "step": 6146 + }, + { + "epoch": 0.18839646929018022, + "grad_norm": 1.7967535800114953, + "learning_rate": 9.356394688457364e-06, + "loss": 0.8145, + "step": 6147 + }, + { + "epoch": 0.18842711781292143, + "grad_norm": 1.6436884151941373, + "learning_rate": 9.356151080079399e-06, + "loss": 0.6868, + "step": 6148 + }, + { + "epoch": 0.1884577663356626, + "grad_norm": 1.7316689241750143, + "learning_rate": 9.355907428779163e-06, + "loss": 0.8046, + "step": 6149 + }, + { + "epoch": 0.1884884148584038, + "grad_norm": 0.9439582089663613, + "learning_rate": 9.355663734559055e-06, + "loss": 0.4984, + "step": 6150 + }, + { + "epoch": 0.18851906338114502, + "grad_norm": 1.7469018651059167, + "learning_rate": 9.355419997421478e-06, + "loss": 0.6527, + "step": 6151 + }, + { + "epoch": 0.18854971190388622, + "grad_norm": 0.8912834046802529, + "learning_rate": 9.355176217368833e-06, + "loss": 0.4912, + "step": 6152 + }, + { + "epoch": 0.18858036042662743, + "grad_norm": 0.8426714705971972, + "learning_rate": 9.354932394403524e-06, + "loss": 0.5008, + "step": 6153 + }, + { + "epoch": 0.18861100894936864, + "grad_norm": 1.9443073634136752, + "learning_rate": 9.354688528527952e-06, + "loss": 0.6769, + "step": 6154 + }, + { + "epoch": 0.18864165747210984, + "grad_norm": 2.0449156432800444, + "learning_rate": 9.354444619744519e-06, + "loss": 0.7719, + "step": 6155 + }, + { + "epoch": 0.18867230599485105, + "grad_norm": 1.876589031273853, + "learning_rate": 9.354200668055629e-06, + "loss": 0.7807, + "step": 6156 + }, + { + "epoch": 0.18870295451759225, + "grad_norm": 1.7195601401050298, + "learning_rate": 9.353956673463684e-06, + "loss": 0.6668, + "step": 6157 + }, + { + "epoch": 0.18873360304033346, + "grad_norm": 0.9668967691941002, + "learning_rate": 9.353712635971093e-06, + "loss": 0.4906, + "step": 6158 + }, + { + "epoch": 0.18876425156307466, + "grad_norm": 1.8820092509510584, + "learning_rate": 9.353468555580256e-06, + "loss": 0.6636, + "step": 6159 + }, + { + "epoch": 0.18879490008581587, + "grad_norm": 2.601355231563424, + "learning_rate": 9.353224432293578e-06, + "loss": 0.6785, + "step": 6160 + }, + { + "epoch": 0.18882554860855708, + "grad_norm": 0.8429937029479676, + "learning_rate": 9.352980266113468e-06, + "loss": 0.5069, + "step": 6161 + }, + { + "epoch": 0.18885619713129828, + "grad_norm": 1.8389105157254575, + "learning_rate": 9.352736057042329e-06, + "loss": 0.7641, + "step": 6162 + }, + { + "epoch": 0.1888868456540395, + "grad_norm": 1.8538665876946128, + "learning_rate": 9.352491805082568e-06, + "loss": 0.7278, + "step": 6163 + }, + { + "epoch": 0.1889174941767807, + "grad_norm": 2.194001904812742, + "learning_rate": 9.352247510236591e-06, + "loss": 0.7429, + "step": 6164 + }, + { + "epoch": 0.18894814269952187, + "grad_norm": 1.7558059018969299, + "learning_rate": 9.352003172506807e-06, + "loss": 0.7544, + "step": 6165 + }, + { + "epoch": 0.18897879122226308, + "grad_norm": 1.7197599158390502, + "learning_rate": 9.351758791895621e-06, + "loss": 0.7103, + "step": 6166 + }, + { + "epoch": 0.18900943974500428, + "grad_norm": 1.9246468788254343, + "learning_rate": 9.351514368405442e-06, + "loss": 0.7761, + "step": 6167 + }, + { + "epoch": 0.1890400882677455, + "grad_norm": 1.628341487261947, + "learning_rate": 9.35126990203868e-06, + "loss": 0.7267, + "step": 6168 + }, + { + "epoch": 0.1890707367904867, + "grad_norm": 1.7065184035349217, + "learning_rate": 9.35102539279774e-06, + "loss": 0.6718, + "step": 6169 + }, + { + "epoch": 0.1891013853132279, + "grad_norm": 1.6568021133601194, + "learning_rate": 9.350780840685036e-06, + "loss": 0.7457, + "step": 6170 + }, + { + "epoch": 0.1891320338359691, + "grad_norm": 1.8194901673301802, + "learning_rate": 9.350536245702975e-06, + "loss": 0.7875, + "step": 6171 + }, + { + "epoch": 0.1891626823587103, + "grad_norm": 1.828642653900024, + "learning_rate": 9.350291607853965e-06, + "loss": 0.8707, + "step": 6172 + }, + { + "epoch": 0.18919333088145152, + "grad_norm": 1.8628553152723695, + "learning_rate": 9.350046927140422e-06, + "loss": 0.7656, + "step": 6173 + }, + { + "epoch": 0.18922397940419272, + "grad_norm": 1.7752964929036508, + "learning_rate": 9.34980220356475e-06, + "loss": 0.7352, + "step": 6174 + }, + { + "epoch": 0.18925462792693393, + "grad_norm": 1.9859700783384453, + "learning_rate": 9.349557437129366e-06, + "loss": 0.739, + "step": 6175 + }, + { + "epoch": 0.18928527644967513, + "grad_norm": 1.3855412841243013, + "learning_rate": 9.34931262783668e-06, + "loss": 0.5031, + "step": 6176 + }, + { + "epoch": 0.18931592497241634, + "grad_norm": 1.6086710973147613, + "learning_rate": 9.349067775689102e-06, + "loss": 0.7677, + "step": 6177 + }, + { + "epoch": 0.18934657349515754, + "grad_norm": 1.718972112039466, + "learning_rate": 9.348822880689049e-06, + "loss": 0.7431, + "step": 6178 + }, + { + "epoch": 0.18937722201789875, + "grad_norm": 1.6106950612528455, + "learning_rate": 9.34857794283893e-06, + "loss": 0.6953, + "step": 6179 + }, + { + "epoch": 0.18940787054063993, + "grad_norm": 1.7659008378294443, + "learning_rate": 9.34833296214116e-06, + "loss": 0.8316, + "step": 6180 + }, + { + "epoch": 0.18943851906338113, + "grad_norm": 1.7018005444621682, + "learning_rate": 9.348087938598153e-06, + "loss": 0.7641, + "step": 6181 + }, + { + "epoch": 0.18946916758612234, + "grad_norm": 1.1643154422012874, + "learning_rate": 9.347842872212323e-06, + "loss": 0.5009, + "step": 6182 + }, + { + "epoch": 0.18949981610886354, + "grad_norm": 1.7396969621866485, + "learning_rate": 9.347597762986085e-06, + "loss": 0.7323, + "step": 6183 + }, + { + "epoch": 0.18953046463160475, + "grad_norm": 1.646568612226513, + "learning_rate": 9.347352610921853e-06, + "loss": 0.6545, + "step": 6184 + }, + { + "epoch": 0.18956111315434596, + "grad_norm": 0.8808753538992078, + "learning_rate": 9.347107416022043e-06, + "loss": 0.4903, + "step": 6185 + }, + { + "epoch": 0.18959176167708716, + "grad_norm": 1.6814908905661965, + "learning_rate": 9.346862178289073e-06, + "loss": 0.7255, + "step": 6186 + }, + { + "epoch": 0.18962241019982837, + "grad_norm": 1.8926781324860884, + "learning_rate": 9.346616897725357e-06, + "loss": 0.7358, + "step": 6187 + }, + { + "epoch": 0.18965305872256957, + "grad_norm": 1.9972575738341245, + "learning_rate": 9.346371574333312e-06, + "loss": 0.851, + "step": 6188 + }, + { + "epoch": 0.18968370724531078, + "grad_norm": 1.9953282136975272, + "learning_rate": 9.346126208115358e-06, + "loss": 0.7875, + "step": 6189 + }, + { + "epoch": 0.18971435576805198, + "grad_norm": 1.9136719678036673, + "learning_rate": 9.345880799073908e-06, + "loss": 0.7145, + "step": 6190 + }, + { + "epoch": 0.1897450042907932, + "grad_norm": 0.931861939477264, + "learning_rate": 9.345635347211383e-06, + "loss": 0.5106, + "step": 6191 + }, + { + "epoch": 0.1897756528135344, + "grad_norm": 1.5917792329578744, + "learning_rate": 9.345389852530201e-06, + "loss": 0.7632, + "step": 6192 + }, + { + "epoch": 0.1898063013362756, + "grad_norm": 1.7472817583206681, + "learning_rate": 9.345144315032783e-06, + "loss": 0.8247, + "step": 6193 + }, + { + "epoch": 0.1898369498590168, + "grad_norm": 1.8251363474125493, + "learning_rate": 9.344898734721544e-06, + "loss": 0.7404, + "step": 6194 + }, + { + "epoch": 0.189867598381758, + "grad_norm": 1.696202691107418, + "learning_rate": 9.344653111598907e-06, + "loss": 0.7319, + "step": 6195 + }, + { + "epoch": 0.1898982469044992, + "grad_norm": 2.010549900096976, + "learning_rate": 9.344407445667292e-06, + "loss": 0.8267, + "step": 6196 + }, + { + "epoch": 0.1899288954272404, + "grad_norm": 0.8192870111231415, + "learning_rate": 9.344161736929116e-06, + "loss": 0.5017, + "step": 6197 + }, + { + "epoch": 0.1899595439499816, + "grad_norm": 0.9107771458572474, + "learning_rate": 9.343915985386806e-06, + "loss": 0.5153, + "step": 6198 + }, + { + "epoch": 0.1899901924727228, + "grad_norm": 1.7677295137466806, + "learning_rate": 9.34367019104278e-06, + "loss": 0.7234, + "step": 6199 + }, + { + "epoch": 0.190020840995464, + "grad_norm": 0.779491021979682, + "learning_rate": 9.343424353899459e-06, + "loss": 0.4934, + "step": 6200 + }, + { + "epoch": 0.19005148951820522, + "grad_norm": 1.9187869647911355, + "learning_rate": 9.343178473959266e-06, + "loss": 0.7205, + "step": 6201 + }, + { + "epoch": 0.19008213804094642, + "grad_norm": 1.6765333938715308, + "learning_rate": 9.342932551224626e-06, + "loss": 0.7143, + "step": 6202 + }, + { + "epoch": 0.19011278656368763, + "grad_norm": 2.054705058560816, + "learning_rate": 9.34268658569796e-06, + "loss": 0.7427, + "step": 6203 + }, + { + "epoch": 0.19014343508642884, + "grad_norm": 0.8680590196715495, + "learning_rate": 9.34244057738169e-06, + "loss": 0.4977, + "step": 6204 + }, + { + "epoch": 0.19017408360917004, + "grad_norm": 1.708274171053454, + "learning_rate": 9.342194526278243e-06, + "loss": 0.7417, + "step": 6205 + }, + { + "epoch": 0.19020473213191125, + "grad_norm": 1.799838635469959, + "learning_rate": 9.341948432390044e-06, + "loss": 0.6736, + "step": 6206 + }, + { + "epoch": 0.19023538065465245, + "grad_norm": 1.6717666277967047, + "learning_rate": 9.341702295719515e-06, + "loss": 0.7409, + "step": 6207 + }, + { + "epoch": 0.19026602917739366, + "grad_norm": 1.8382838771964893, + "learning_rate": 9.341456116269084e-06, + "loss": 0.7663, + "step": 6208 + }, + { + "epoch": 0.19029667770013486, + "grad_norm": 1.816920907232303, + "learning_rate": 9.341209894041173e-06, + "loss": 0.7654, + "step": 6209 + }, + { + "epoch": 0.19032732622287607, + "grad_norm": 1.7277701092692686, + "learning_rate": 9.340963629038208e-06, + "loss": 0.6896, + "step": 6210 + }, + { + "epoch": 0.19035797474561725, + "grad_norm": 1.8391430685658927, + "learning_rate": 9.340717321262622e-06, + "loss": 0.8165, + "step": 6211 + }, + { + "epoch": 0.19038862326835845, + "grad_norm": 1.9397876781098211, + "learning_rate": 9.340470970716836e-06, + "loss": 0.6658, + "step": 6212 + }, + { + "epoch": 0.19041927179109966, + "grad_norm": 1.8282494856823142, + "learning_rate": 9.340224577403278e-06, + "loss": 0.7429, + "step": 6213 + }, + { + "epoch": 0.19044992031384086, + "grad_norm": 1.7105608602451499, + "learning_rate": 9.339978141324378e-06, + "loss": 0.6955, + "step": 6214 + }, + { + "epoch": 0.19048056883658207, + "grad_norm": 1.7738826287896317, + "learning_rate": 9.339731662482564e-06, + "loss": 0.6999, + "step": 6215 + }, + { + "epoch": 0.19051121735932328, + "grad_norm": 2.5007929335233903, + "learning_rate": 9.339485140880261e-06, + "loss": 0.829, + "step": 6216 + }, + { + "epoch": 0.19054186588206448, + "grad_norm": 1.9367657217238041, + "learning_rate": 9.339238576519902e-06, + "loss": 0.6985, + "step": 6217 + }, + { + "epoch": 0.1905725144048057, + "grad_norm": 1.7265391949864866, + "learning_rate": 9.338991969403914e-06, + "loss": 0.7922, + "step": 6218 + }, + { + "epoch": 0.1906031629275469, + "grad_norm": 1.5946656781701134, + "learning_rate": 9.33874531953473e-06, + "loss": 0.6496, + "step": 6219 + }, + { + "epoch": 0.1906338114502881, + "grad_norm": 1.7730188029781184, + "learning_rate": 9.338498626914776e-06, + "loss": 0.7919, + "step": 6220 + }, + { + "epoch": 0.1906644599730293, + "grad_norm": 1.9870790181781746, + "learning_rate": 9.338251891546486e-06, + "loss": 0.7969, + "step": 6221 + }, + { + "epoch": 0.1906951084957705, + "grad_norm": 1.5785812173225584, + "learning_rate": 9.33800511343229e-06, + "loss": 0.6983, + "step": 6222 + }, + { + "epoch": 0.19072575701851172, + "grad_norm": 1.6829722048023845, + "learning_rate": 9.337758292574622e-06, + "loss": 0.7752, + "step": 6223 + }, + { + "epoch": 0.19075640554125292, + "grad_norm": 1.7125624991615183, + "learning_rate": 9.337511428975908e-06, + "loss": 0.7846, + "step": 6224 + }, + { + "epoch": 0.19078705406399413, + "grad_norm": 1.7609618034108836, + "learning_rate": 9.337264522638584e-06, + "loss": 0.8537, + "step": 6225 + }, + { + "epoch": 0.19081770258673533, + "grad_norm": 1.8368499801733593, + "learning_rate": 9.337017573565086e-06, + "loss": 0.768, + "step": 6226 + }, + { + "epoch": 0.1908483511094765, + "grad_norm": 1.9432676881332314, + "learning_rate": 9.336770581757844e-06, + "loss": 0.7559, + "step": 6227 + }, + { + "epoch": 0.19087899963221772, + "grad_norm": 1.9776956816408695, + "learning_rate": 9.336523547219289e-06, + "loss": 0.7813, + "step": 6228 + }, + { + "epoch": 0.19090964815495892, + "grad_norm": 1.8699445138466688, + "learning_rate": 9.33627646995186e-06, + "loss": 0.7922, + "step": 6229 + }, + { + "epoch": 0.19094029667770013, + "grad_norm": 1.1056146436261347, + "learning_rate": 9.336029349957989e-06, + "loss": 0.5206, + "step": 6230 + }, + { + "epoch": 0.19097094520044133, + "grad_norm": 1.8579580393337352, + "learning_rate": 9.335782187240111e-06, + "loss": 0.6694, + "step": 6231 + }, + { + "epoch": 0.19100159372318254, + "grad_norm": 1.737371313969081, + "learning_rate": 9.335534981800662e-06, + "loss": 0.7502, + "step": 6232 + }, + { + "epoch": 0.19103224224592374, + "grad_norm": 1.7728162818200195, + "learning_rate": 9.335287733642078e-06, + "loss": 0.7715, + "step": 6233 + }, + { + "epoch": 0.19106289076866495, + "grad_norm": 2.145221847890105, + "learning_rate": 9.335040442766794e-06, + "loss": 0.6896, + "step": 6234 + }, + { + "epoch": 0.19109353929140616, + "grad_norm": 1.8236999649652217, + "learning_rate": 9.334793109177248e-06, + "loss": 0.7181, + "step": 6235 + }, + { + "epoch": 0.19112418781414736, + "grad_norm": 1.8832138990125484, + "learning_rate": 9.334545732875876e-06, + "loss": 0.702, + "step": 6236 + }, + { + "epoch": 0.19115483633688857, + "grad_norm": 1.8244180710085083, + "learning_rate": 9.334298313865115e-06, + "loss": 0.7763, + "step": 6237 + }, + { + "epoch": 0.19118548485962977, + "grad_norm": 1.7633631005118275, + "learning_rate": 9.334050852147404e-06, + "loss": 0.7092, + "step": 6238 + }, + { + "epoch": 0.19121613338237098, + "grad_norm": 1.7577861780001114, + "learning_rate": 9.333803347725184e-06, + "loss": 0.6978, + "step": 6239 + }, + { + "epoch": 0.19124678190511218, + "grad_norm": 2.1044025619447795, + "learning_rate": 9.333555800600888e-06, + "loss": 0.7637, + "step": 6240 + }, + { + "epoch": 0.1912774304278534, + "grad_norm": 1.648943182493622, + "learning_rate": 9.333308210776959e-06, + "loss": 0.8788, + "step": 6241 + }, + { + "epoch": 0.19130807895059457, + "grad_norm": 1.7245882618941462, + "learning_rate": 9.333060578255833e-06, + "loss": 0.7327, + "step": 6242 + }, + { + "epoch": 0.19133872747333577, + "grad_norm": 1.8344441855829603, + "learning_rate": 9.332812903039954e-06, + "loss": 0.7506, + "step": 6243 + }, + { + "epoch": 0.19136937599607698, + "grad_norm": 1.747890701144689, + "learning_rate": 9.332565185131762e-06, + "loss": 0.7112, + "step": 6244 + }, + { + "epoch": 0.19140002451881818, + "grad_norm": 1.7439334527558499, + "learning_rate": 9.332317424533696e-06, + "loss": 0.6214, + "step": 6245 + }, + { + "epoch": 0.1914306730415594, + "grad_norm": 2.054532761916262, + "learning_rate": 9.332069621248199e-06, + "loss": 0.6803, + "step": 6246 + }, + { + "epoch": 0.1914613215643006, + "grad_norm": 1.7382818191818101, + "learning_rate": 9.33182177527771e-06, + "loss": 0.7851, + "step": 6247 + }, + { + "epoch": 0.1914919700870418, + "grad_norm": 1.665879293483115, + "learning_rate": 9.331573886624672e-06, + "loss": 0.626, + "step": 6248 + }, + { + "epoch": 0.191522618609783, + "grad_norm": 1.9225199334434353, + "learning_rate": 9.33132595529153e-06, + "loss": 0.6566, + "step": 6249 + }, + { + "epoch": 0.1915532671325242, + "grad_norm": 1.8328710915903794, + "learning_rate": 9.331077981280724e-06, + "loss": 0.7981, + "step": 6250 + }, + { + "epoch": 0.19158391565526542, + "grad_norm": 1.8536834779868059, + "learning_rate": 9.330829964594698e-06, + "loss": 0.7098, + "step": 6251 + }, + { + "epoch": 0.19161456417800662, + "grad_norm": 1.7684617232667736, + "learning_rate": 9.330581905235898e-06, + "loss": 0.6886, + "step": 6252 + }, + { + "epoch": 0.19164521270074783, + "grad_norm": 1.1294117032601214, + "learning_rate": 9.330333803206766e-06, + "loss": 0.5055, + "step": 6253 + }, + { + "epoch": 0.19167586122348904, + "grad_norm": 1.9150655787853286, + "learning_rate": 9.330085658509747e-06, + "loss": 0.7152, + "step": 6254 + }, + { + "epoch": 0.19170650974623024, + "grad_norm": 1.9314951354709982, + "learning_rate": 9.329837471147286e-06, + "loss": 0.7192, + "step": 6255 + }, + { + "epoch": 0.19173715826897145, + "grad_norm": 1.6137482424685203, + "learning_rate": 9.329589241121828e-06, + "loss": 0.7262, + "step": 6256 + }, + { + "epoch": 0.19176780679171265, + "grad_norm": 1.7977042906576934, + "learning_rate": 9.32934096843582e-06, + "loss": 0.7805, + "step": 6257 + }, + { + "epoch": 0.19179845531445383, + "grad_norm": 1.7006130479280352, + "learning_rate": 9.329092653091708e-06, + "loss": 0.7324, + "step": 6258 + }, + { + "epoch": 0.19182910383719504, + "grad_norm": 1.6501029637133173, + "learning_rate": 9.328844295091938e-06, + "loss": 0.7011, + "step": 6259 + }, + { + "epoch": 0.19185975235993624, + "grad_norm": 0.9525789244555942, + "learning_rate": 9.328595894438958e-06, + "loss": 0.4772, + "step": 6260 + }, + { + "epoch": 0.19189040088267745, + "grad_norm": 1.7469168955446288, + "learning_rate": 9.328347451135213e-06, + "loss": 0.6175, + "step": 6261 + }, + { + "epoch": 0.19192104940541865, + "grad_norm": 1.8783148213829208, + "learning_rate": 9.328098965183157e-06, + "loss": 0.7581, + "step": 6262 + }, + { + "epoch": 0.19195169792815986, + "grad_norm": 1.4962088986137452, + "learning_rate": 9.32785043658523e-06, + "loss": 0.6554, + "step": 6263 + }, + { + "epoch": 0.19198234645090106, + "grad_norm": 1.705812615511825, + "learning_rate": 9.32760186534389e-06, + "loss": 0.8057, + "step": 6264 + }, + { + "epoch": 0.19201299497364227, + "grad_norm": 0.8953881854013317, + "learning_rate": 9.327353251461578e-06, + "loss": 0.4894, + "step": 6265 + }, + { + "epoch": 0.19204364349638348, + "grad_norm": 1.7144414491838276, + "learning_rate": 9.327104594940748e-06, + "loss": 0.7204, + "step": 6266 + }, + { + "epoch": 0.19207429201912468, + "grad_norm": 1.7900711956192217, + "learning_rate": 9.326855895783851e-06, + "loss": 0.7301, + "step": 6267 + }, + { + "epoch": 0.1921049405418659, + "grad_norm": 1.6788678569983464, + "learning_rate": 9.326607153993335e-06, + "loss": 0.6953, + "step": 6268 + }, + { + "epoch": 0.1921355890646071, + "grad_norm": 1.7271933743521204, + "learning_rate": 9.32635836957165e-06, + "loss": 0.6494, + "step": 6269 + }, + { + "epoch": 0.1921662375873483, + "grad_norm": 1.7188696526839624, + "learning_rate": 9.326109542521252e-06, + "loss": 0.6635, + "step": 6270 + }, + { + "epoch": 0.1921968861100895, + "grad_norm": 1.5848764788653016, + "learning_rate": 9.325860672844586e-06, + "loss": 0.6957, + "step": 6271 + }, + { + "epoch": 0.1922275346328307, + "grad_norm": 1.7381068122718748, + "learning_rate": 9.325611760544112e-06, + "loss": 0.7456, + "step": 6272 + }, + { + "epoch": 0.1922581831555719, + "grad_norm": 1.812134388646277, + "learning_rate": 9.325362805622275e-06, + "loss": 0.7479, + "step": 6273 + }, + { + "epoch": 0.1922888316783131, + "grad_norm": 1.7933366588880884, + "learning_rate": 9.325113808081535e-06, + "loss": 0.7643, + "step": 6274 + }, + { + "epoch": 0.1923194802010543, + "grad_norm": 1.6217649310534876, + "learning_rate": 9.32486476792434e-06, + "loss": 0.6646, + "step": 6275 + }, + { + "epoch": 0.1923501287237955, + "grad_norm": 1.862208358016709, + "learning_rate": 9.324615685153145e-06, + "loss": 0.7349, + "step": 6276 + }, + { + "epoch": 0.1923807772465367, + "grad_norm": 1.5827303289399204, + "learning_rate": 9.324366559770406e-06, + "loss": 0.6717, + "step": 6277 + }, + { + "epoch": 0.19241142576927792, + "grad_norm": 1.766435905716033, + "learning_rate": 9.324117391778577e-06, + "loss": 0.7954, + "step": 6278 + }, + { + "epoch": 0.19244207429201912, + "grad_norm": 1.7798089985592347, + "learning_rate": 9.323868181180113e-06, + "loss": 0.7182, + "step": 6279 + }, + { + "epoch": 0.19247272281476033, + "grad_norm": 1.6911886346051506, + "learning_rate": 9.32361892797747e-06, + "loss": 0.8037, + "step": 6280 + }, + { + "epoch": 0.19250337133750153, + "grad_norm": 1.8319638746275988, + "learning_rate": 9.323369632173103e-06, + "loss": 0.6758, + "step": 6281 + }, + { + "epoch": 0.19253401986024274, + "grad_norm": 1.6615377700976435, + "learning_rate": 9.323120293769468e-06, + "loss": 0.7131, + "step": 6282 + }, + { + "epoch": 0.19256466838298394, + "grad_norm": 1.760448321913327, + "learning_rate": 9.322870912769024e-06, + "loss": 0.7314, + "step": 6283 + }, + { + "epoch": 0.19259531690572515, + "grad_norm": 1.7560068684139238, + "learning_rate": 9.322621489174226e-06, + "loss": 0.8262, + "step": 6284 + }, + { + "epoch": 0.19262596542846636, + "grad_norm": 1.7132032143565334, + "learning_rate": 9.322372022987533e-06, + "loss": 0.7342, + "step": 6285 + }, + { + "epoch": 0.19265661395120756, + "grad_norm": 1.661153784972312, + "learning_rate": 9.322122514211402e-06, + "loss": 0.6669, + "step": 6286 + }, + { + "epoch": 0.19268726247394877, + "grad_norm": 1.569661900256363, + "learning_rate": 9.321872962848292e-06, + "loss": 0.6453, + "step": 6287 + }, + { + "epoch": 0.19271791099668997, + "grad_norm": 1.7824208214441415, + "learning_rate": 9.321623368900664e-06, + "loss": 0.7833, + "step": 6288 + }, + { + "epoch": 0.19274855951943115, + "grad_norm": 1.7166577478727896, + "learning_rate": 9.321373732370973e-06, + "loss": 0.7551, + "step": 6289 + }, + { + "epoch": 0.19277920804217236, + "grad_norm": 1.786585375159658, + "learning_rate": 9.321124053261681e-06, + "loss": 0.68, + "step": 6290 + }, + { + "epoch": 0.19280985656491356, + "grad_norm": 1.6997165846699454, + "learning_rate": 9.32087433157525e-06, + "loss": 0.7169, + "step": 6291 + }, + { + "epoch": 0.19284050508765477, + "grad_norm": 1.7555270906221352, + "learning_rate": 9.320624567314136e-06, + "loss": 0.7426, + "step": 6292 + }, + { + "epoch": 0.19287115361039597, + "grad_norm": 1.6336224759170959, + "learning_rate": 9.320374760480804e-06, + "loss": 0.6859, + "step": 6293 + }, + { + "epoch": 0.19290180213313718, + "grad_norm": 1.6416470622934667, + "learning_rate": 9.320124911077713e-06, + "loss": 0.6494, + "step": 6294 + }, + { + "epoch": 0.19293245065587838, + "grad_norm": 1.7185834304585055, + "learning_rate": 9.319875019107327e-06, + "loss": 0.6622, + "step": 6295 + }, + { + "epoch": 0.1929630991786196, + "grad_norm": 1.7500922169202577, + "learning_rate": 9.319625084572108e-06, + "loss": 0.7817, + "step": 6296 + }, + { + "epoch": 0.1929937477013608, + "grad_norm": 1.8158441629306417, + "learning_rate": 9.319375107474516e-06, + "loss": 0.6961, + "step": 6297 + }, + { + "epoch": 0.193024396224102, + "grad_norm": 1.6320033603963462, + "learning_rate": 9.319125087817017e-06, + "loss": 0.7567, + "step": 6298 + }, + { + "epoch": 0.1930550447468432, + "grad_norm": 1.7988427247454395, + "learning_rate": 9.318875025602072e-06, + "loss": 0.7351, + "step": 6299 + }, + { + "epoch": 0.1930856932695844, + "grad_norm": 1.83273153551287, + "learning_rate": 9.31862492083215e-06, + "loss": 0.7853, + "step": 6300 + }, + { + "epoch": 0.19311634179232562, + "grad_norm": 1.915129323595258, + "learning_rate": 9.318374773509707e-06, + "loss": 0.8119, + "step": 6301 + }, + { + "epoch": 0.19314699031506682, + "grad_norm": 1.7276921069280324, + "learning_rate": 9.318124583637216e-06, + "loss": 0.6791, + "step": 6302 + }, + { + "epoch": 0.19317763883780803, + "grad_norm": 1.8237742593968385, + "learning_rate": 9.317874351217136e-06, + "loss": 0.7344, + "step": 6303 + }, + { + "epoch": 0.1932082873605492, + "grad_norm": 1.7998569739206967, + "learning_rate": 9.317624076251936e-06, + "loss": 0.7352, + "step": 6304 + }, + { + "epoch": 0.1932389358832904, + "grad_norm": 1.9396508923388711, + "learning_rate": 9.317373758744082e-06, + "loss": 0.7645, + "step": 6305 + }, + { + "epoch": 0.19326958440603162, + "grad_norm": 1.7923651388599366, + "learning_rate": 9.317123398696039e-06, + "loss": 0.7259, + "step": 6306 + }, + { + "epoch": 0.19330023292877282, + "grad_norm": 1.6143834512400248, + "learning_rate": 9.316872996110276e-06, + "loss": 0.6244, + "step": 6307 + }, + { + "epoch": 0.19333088145151403, + "grad_norm": 1.8740352304881194, + "learning_rate": 9.316622550989259e-06, + "loss": 0.7431, + "step": 6308 + }, + { + "epoch": 0.19336152997425524, + "grad_norm": 1.8253333729673848, + "learning_rate": 9.316372063335453e-06, + "loss": 0.7509, + "step": 6309 + }, + { + "epoch": 0.19339217849699644, + "grad_norm": 0.9660179897407231, + "learning_rate": 9.31612153315133e-06, + "loss": 0.5067, + "step": 6310 + }, + { + "epoch": 0.19342282701973765, + "grad_norm": 1.7930911218649523, + "learning_rate": 9.315870960439357e-06, + "loss": 0.7397, + "step": 6311 + }, + { + "epoch": 0.19345347554247885, + "grad_norm": 1.9523516457412753, + "learning_rate": 9.315620345202004e-06, + "loss": 0.7882, + "step": 6312 + }, + { + "epoch": 0.19348412406522006, + "grad_norm": 1.722995707757921, + "learning_rate": 9.31536968744174e-06, + "loss": 0.7041, + "step": 6313 + }, + { + "epoch": 0.19351477258796126, + "grad_norm": 2.019993213539517, + "learning_rate": 9.31511898716103e-06, + "loss": 0.7814, + "step": 6314 + }, + { + "epoch": 0.19354542111070247, + "grad_norm": 1.5702834994200388, + "learning_rate": 9.314868244362355e-06, + "loss": 0.7051, + "step": 6315 + }, + { + "epoch": 0.19357606963344368, + "grad_norm": 1.895886215825941, + "learning_rate": 9.314617459048175e-06, + "loss": 0.6503, + "step": 6316 + }, + { + "epoch": 0.19360671815618488, + "grad_norm": 1.6849583181843695, + "learning_rate": 9.314366631220965e-06, + "loss": 0.6545, + "step": 6317 + }, + { + "epoch": 0.1936373666789261, + "grad_norm": 2.0319296133536096, + "learning_rate": 9.314115760883199e-06, + "loss": 0.683, + "step": 6318 + }, + { + "epoch": 0.1936680152016673, + "grad_norm": 1.9524610493306447, + "learning_rate": 9.313864848037346e-06, + "loss": 0.8466, + "step": 6319 + }, + { + "epoch": 0.19369866372440847, + "grad_norm": 0.9971938440378584, + "learning_rate": 9.313613892685877e-06, + "loss": 0.4896, + "step": 6320 + }, + { + "epoch": 0.19372931224714968, + "grad_norm": 0.9323517396508137, + "learning_rate": 9.31336289483127e-06, + "loss": 0.4924, + "step": 6321 + }, + { + "epoch": 0.19375996076989088, + "grad_norm": 2.2239064346476956, + "learning_rate": 9.313111854475991e-06, + "loss": 0.7686, + "step": 6322 + }, + { + "epoch": 0.1937906092926321, + "grad_norm": 0.8475196897378168, + "learning_rate": 9.312860771622521e-06, + "loss": 0.4723, + "step": 6323 + }, + { + "epoch": 0.1938212578153733, + "grad_norm": 1.8295248905228436, + "learning_rate": 9.312609646273327e-06, + "loss": 0.7725, + "step": 6324 + }, + { + "epoch": 0.1938519063381145, + "grad_norm": 0.9545950929384336, + "learning_rate": 9.31235847843089e-06, + "loss": 0.4909, + "step": 6325 + }, + { + "epoch": 0.1938825548608557, + "grad_norm": 2.381387366439298, + "learning_rate": 9.312107268097679e-06, + "loss": 0.7182, + "step": 6326 + }, + { + "epoch": 0.1939132033835969, + "grad_norm": 1.8789745317230204, + "learning_rate": 9.311856015276172e-06, + "loss": 0.6829, + "step": 6327 + }, + { + "epoch": 0.19394385190633812, + "grad_norm": 2.0220694693123824, + "learning_rate": 9.311604719968845e-06, + "loss": 0.7009, + "step": 6328 + }, + { + "epoch": 0.19397450042907932, + "grad_norm": 1.717527390697982, + "learning_rate": 9.311353382178174e-06, + "loss": 0.8135, + "step": 6329 + }, + { + "epoch": 0.19400514895182053, + "grad_norm": 1.764276676715296, + "learning_rate": 9.311102001906634e-06, + "loss": 0.7703, + "step": 6330 + }, + { + "epoch": 0.19403579747456173, + "grad_norm": 1.094451719680199, + "learning_rate": 9.310850579156703e-06, + "loss": 0.5007, + "step": 6331 + }, + { + "epoch": 0.19406644599730294, + "grad_norm": 1.9282270431134914, + "learning_rate": 9.31059911393086e-06, + "loss": 0.7714, + "step": 6332 + }, + { + "epoch": 0.19409709452004414, + "grad_norm": 1.7552488715503436, + "learning_rate": 9.31034760623158e-06, + "loss": 0.7471, + "step": 6333 + }, + { + "epoch": 0.19412774304278535, + "grad_norm": 1.6706135007535394, + "learning_rate": 9.310096056061341e-06, + "loss": 0.6818, + "step": 6334 + }, + { + "epoch": 0.19415839156552653, + "grad_norm": 1.781943392709719, + "learning_rate": 9.309844463422624e-06, + "loss": 0.6843, + "step": 6335 + }, + { + "epoch": 0.19418904008826773, + "grad_norm": 2.125189756977096, + "learning_rate": 9.309592828317906e-06, + "loss": 0.822, + "step": 6336 + }, + { + "epoch": 0.19421968861100894, + "grad_norm": 1.6472293499476545, + "learning_rate": 9.309341150749669e-06, + "loss": 0.7113, + "step": 6337 + }, + { + "epoch": 0.19425033713375014, + "grad_norm": 1.8533282213036208, + "learning_rate": 9.30908943072039e-06, + "loss": 0.7778, + "step": 6338 + }, + { + "epoch": 0.19428098565649135, + "grad_norm": 0.8752585498548058, + "learning_rate": 9.308837668232548e-06, + "loss": 0.4759, + "step": 6339 + }, + { + "epoch": 0.19431163417923256, + "grad_norm": 1.5872553832185043, + "learning_rate": 9.30858586328863e-06, + "loss": 0.6494, + "step": 6340 + }, + { + "epoch": 0.19434228270197376, + "grad_norm": 0.8636186270484072, + "learning_rate": 9.30833401589111e-06, + "loss": 0.5086, + "step": 6341 + }, + { + "epoch": 0.19437293122471497, + "grad_norm": 1.4748506715829157, + "learning_rate": 9.308082126042474e-06, + "loss": 0.712, + "step": 6342 + }, + { + "epoch": 0.19440357974745617, + "grad_norm": 1.5639284169797143, + "learning_rate": 9.307830193745203e-06, + "loss": 0.7216, + "step": 6343 + }, + { + "epoch": 0.19443422827019738, + "grad_norm": 1.8158354173917373, + "learning_rate": 9.307578219001778e-06, + "loss": 0.6831, + "step": 6344 + }, + { + "epoch": 0.19446487679293858, + "grad_norm": 0.8941628428615838, + "learning_rate": 9.307326201814684e-06, + "loss": 0.5059, + "step": 6345 + }, + { + "epoch": 0.1944955253156798, + "grad_norm": 2.006643857250631, + "learning_rate": 9.307074142186401e-06, + "loss": 0.7456, + "step": 6346 + }, + { + "epoch": 0.194526173838421, + "grad_norm": 0.9022065299978345, + "learning_rate": 9.306822040119415e-06, + "loss": 0.4915, + "step": 6347 + }, + { + "epoch": 0.1945568223611622, + "grad_norm": 1.8938974524748047, + "learning_rate": 9.30656989561621e-06, + "loss": 0.6817, + "step": 6348 + }, + { + "epoch": 0.1945874708839034, + "grad_norm": 1.9062982734651666, + "learning_rate": 9.30631770867927e-06, + "loss": 0.6814, + "step": 6349 + }, + { + "epoch": 0.1946181194066446, + "grad_norm": 1.6384872785278868, + "learning_rate": 9.30606547931108e-06, + "loss": 0.6755, + "step": 6350 + }, + { + "epoch": 0.1946487679293858, + "grad_norm": 1.8456460694321861, + "learning_rate": 9.305813207514123e-06, + "loss": 0.7495, + "step": 6351 + }, + { + "epoch": 0.194679416452127, + "grad_norm": 1.6466918891428377, + "learning_rate": 9.305560893290889e-06, + "loss": 0.6413, + "step": 6352 + }, + { + "epoch": 0.1947100649748682, + "grad_norm": 1.9621879104994708, + "learning_rate": 9.30530853664386e-06, + "loss": 0.7568, + "step": 6353 + }, + { + "epoch": 0.1947407134976094, + "grad_norm": 1.8528752650001197, + "learning_rate": 9.305056137575526e-06, + "loss": 0.77, + "step": 6354 + }, + { + "epoch": 0.1947713620203506, + "grad_norm": 1.8075175491493223, + "learning_rate": 9.304803696088372e-06, + "loss": 0.666, + "step": 6355 + }, + { + "epoch": 0.19480201054309182, + "grad_norm": 0.9862874689424715, + "learning_rate": 9.304551212184887e-06, + "loss": 0.4938, + "step": 6356 + }, + { + "epoch": 0.19483265906583302, + "grad_norm": 2.04730105850082, + "learning_rate": 9.304298685867556e-06, + "loss": 0.7185, + "step": 6357 + }, + { + "epoch": 0.19486330758857423, + "grad_norm": 1.6339444630867515, + "learning_rate": 9.304046117138868e-06, + "loss": 0.7361, + "step": 6358 + }, + { + "epoch": 0.19489395611131544, + "grad_norm": 1.7424169545694963, + "learning_rate": 9.303793506001314e-06, + "loss": 0.7292, + "step": 6359 + }, + { + "epoch": 0.19492460463405664, + "grad_norm": 1.760443906398041, + "learning_rate": 9.30354085245738e-06, + "loss": 0.6577, + "step": 6360 + }, + { + "epoch": 0.19495525315679785, + "grad_norm": 0.8202698673575726, + "learning_rate": 9.303288156509557e-06, + "loss": 0.5026, + "step": 6361 + }, + { + "epoch": 0.19498590167953905, + "grad_norm": 1.7549243052749943, + "learning_rate": 9.303035418160337e-06, + "loss": 0.726, + "step": 6362 + }, + { + "epoch": 0.19501655020228026, + "grad_norm": 1.810569480771087, + "learning_rate": 9.302782637412206e-06, + "loss": 0.7336, + "step": 6363 + }, + { + "epoch": 0.19504719872502146, + "grad_norm": 1.7124099203345902, + "learning_rate": 9.302529814267658e-06, + "loss": 0.8383, + "step": 6364 + }, + { + "epoch": 0.19507784724776267, + "grad_norm": 1.6077787784457611, + "learning_rate": 9.302276948729182e-06, + "loss": 0.7487, + "step": 6365 + }, + { + "epoch": 0.19510849577050385, + "grad_norm": 0.8401264426170517, + "learning_rate": 9.30202404079927e-06, + "loss": 0.5006, + "step": 6366 + }, + { + "epoch": 0.19513914429324505, + "grad_norm": 0.8589730147887038, + "learning_rate": 9.301771090480415e-06, + "loss": 0.5236, + "step": 6367 + }, + { + "epoch": 0.19516979281598626, + "grad_norm": 1.7765930487921207, + "learning_rate": 9.301518097775109e-06, + "loss": 0.6955, + "step": 6368 + }, + { + "epoch": 0.19520044133872747, + "grad_norm": 1.735534110391356, + "learning_rate": 9.301265062685845e-06, + "loss": 0.758, + "step": 6369 + }, + { + "epoch": 0.19523108986146867, + "grad_norm": 0.8248828072309699, + "learning_rate": 9.301011985215113e-06, + "loss": 0.512, + "step": 6370 + }, + { + "epoch": 0.19526173838420988, + "grad_norm": 1.9672361560899982, + "learning_rate": 9.300758865365413e-06, + "loss": 0.7277, + "step": 6371 + }, + { + "epoch": 0.19529238690695108, + "grad_norm": 1.6306779234589075, + "learning_rate": 9.300505703139235e-06, + "loss": 0.6772, + "step": 6372 + }, + { + "epoch": 0.1953230354296923, + "grad_norm": 1.864762734901615, + "learning_rate": 9.300252498539073e-06, + "loss": 0.756, + "step": 6373 + }, + { + "epoch": 0.1953536839524335, + "grad_norm": 1.7501456543154288, + "learning_rate": 9.299999251567421e-06, + "loss": 0.6703, + "step": 6374 + }, + { + "epoch": 0.1953843324751747, + "grad_norm": 1.6597740885423367, + "learning_rate": 9.29974596222678e-06, + "loss": 0.6579, + "step": 6375 + }, + { + "epoch": 0.1954149809979159, + "grad_norm": 1.5655747154471178, + "learning_rate": 9.29949263051964e-06, + "loss": 0.7811, + "step": 6376 + }, + { + "epoch": 0.1954456295206571, + "grad_norm": 1.9854671262289538, + "learning_rate": 9.299239256448497e-06, + "loss": 0.7522, + "step": 6377 + }, + { + "epoch": 0.19547627804339832, + "grad_norm": 1.800175743115053, + "learning_rate": 9.298985840015853e-06, + "loss": 0.7121, + "step": 6378 + }, + { + "epoch": 0.19550692656613952, + "grad_norm": 1.822820974433255, + "learning_rate": 9.2987323812242e-06, + "loss": 0.7287, + "step": 6379 + }, + { + "epoch": 0.19553757508888073, + "grad_norm": 1.9808135704798946, + "learning_rate": 9.298478880076037e-06, + "loss": 0.704, + "step": 6380 + }, + { + "epoch": 0.19556822361162193, + "grad_norm": 1.0236616881091647, + "learning_rate": 9.298225336573863e-06, + "loss": 0.495, + "step": 6381 + }, + { + "epoch": 0.1955988721343631, + "grad_norm": 1.5381678196814188, + "learning_rate": 9.297971750720174e-06, + "loss": 0.6772, + "step": 6382 + }, + { + "epoch": 0.19562952065710432, + "grad_norm": 1.7581341324581536, + "learning_rate": 9.29771812251747e-06, + "loss": 0.7861, + "step": 6383 + }, + { + "epoch": 0.19566016917984552, + "grad_norm": 2.1126053427813383, + "learning_rate": 9.297464451968248e-06, + "loss": 0.8563, + "step": 6384 + }, + { + "epoch": 0.19569081770258673, + "grad_norm": 1.6793710502578625, + "learning_rate": 9.29721073907501e-06, + "loss": 0.7334, + "step": 6385 + }, + { + "epoch": 0.19572146622532793, + "grad_norm": 1.7045491768911236, + "learning_rate": 9.296956983840258e-06, + "loss": 0.7186, + "step": 6386 + }, + { + "epoch": 0.19575211474806914, + "grad_norm": 1.8124514009361392, + "learning_rate": 9.296703186266486e-06, + "loss": 0.6232, + "step": 6387 + }, + { + "epoch": 0.19578276327081034, + "grad_norm": 1.8011281516120192, + "learning_rate": 9.296449346356199e-06, + "loss": 0.7463, + "step": 6388 + }, + { + "epoch": 0.19581341179355155, + "grad_norm": 1.6539545261154758, + "learning_rate": 9.296195464111899e-06, + "loss": 0.793, + "step": 6389 + }, + { + "epoch": 0.19584406031629276, + "grad_norm": 2.0388111166744554, + "learning_rate": 9.295941539536083e-06, + "loss": 0.8293, + "step": 6390 + }, + { + "epoch": 0.19587470883903396, + "grad_norm": 1.7159187476382656, + "learning_rate": 9.295687572631258e-06, + "loss": 0.7513, + "step": 6391 + }, + { + "epoch": 0.19590535736177517, + "grad_norm": 1.128926076394993, + "learning_rate": 9.295433563399922e-06, + "loss": 0.4775, + "step": 6392 + }, + { + "epoch": 0.19593600588451637, + "grad_norm": 1.9295357422202875, + "learning_rate": 9.295179511844583e-06, + "loss": 0.753, + "step": 6393 + }, + { + "epoch": 0.19596665440725758, + "grad_norm": 0.8558218812595351, + "learning_rate": 9.29492541796774e-06, + "loss": 0.5247, + "step": 6394 + }, + { + "epoch": 0.19599730292999878, + "grad_norm": 1.6606672202250554, + "learning_rate": 9.294671281771897e-06, + "loss": 0.6711, + "step": 6395 + }, + { + "epoch": 0.19602795145274, + "grad_norm": 0.8322099642734455, + "learning_rate": 9.29441710325956e-06, + "loss": 0.4845, + "step": 6396 + }, + { + "epoch": 0.19605859997548117, + "grad_norm": 1.7118254061877063, + "learning_rate": 9.294162882433233e-06, + "loss": 0.7163, + "step": 6397 + }, + { + "epoch": 0.19608924849822237, + "grad_norm": 0.8909681006805223, + "learning_rate": 9.29390861929542e-06, + "loss": 0.4928, + "step": 6398 + }, + { + "epoch": 0.19611989702096358, + "grad_norm": 0.8714199357061401, + "learning_rate": 9.293654313848626e-06, + "loss": 0.4961, + "step": 6399 + }, + { + "epoch": 0.19615054554370479, + "grad_norm": 0.8280571721808039, + "learning_rate": 9.293399966095358e-06, + "loss": 0.478, + "step": 6400 + }, + { + "epoch": 0.196181194066446, + "grad_norm": 2.0965348394933256, + "learning_rate": 9.293145576038121e-06, + "loss": 0.8056, + "step": 6401 + }, + { + "epoch": 0.1962118425891872, + "grad_norm": 1.7533662415415059, + "learning_rate": 9.292891143679423e-06, + "loss": 0.6583, + "step": 6402 + }, + { + "epoch": 0.1962424911119284, + "grad_norm": 0.9211623166435621, + "learning_rate": 9.29263666902177e-06, + "loss": 0.4939, + "step": 6403 + }, + { + "epoch": 0.1962731396346696, + "grad_norm": 1.7501530267300203, + "learning_rate": 9.29238215206767e-06, + "loss": 0.7559, + "step": 6404 + }, + { + "epoch": 0.1963037881574108, + "grad_norm": 1.9179057291881894, + "learning_rate": 9.29212759281963e-06, + "loss": 0.7998, + "step": 6405 + }, + { + "epoch": 0.19633443668015202, + "grad_norm": 1.616222499695529, + "learning_rate": 9.291872991280158e-06, + "loss": 0.7096, + "step": 6406 + }, + { + "epoch": 0.19636508520289322, + "grad_norm": 1.8004864605700883, + "learning_rate": 9.291618347451763e-06, + "loss": 0.76, + "step": 6407 + }, + { + "epoch": 0.19639573372563443, + "grad_norm": 1.6765962404597687, + "learning_rate": 9.291363661336956e-06, + "loss": 0.7311, + "step": 6408 + }, + { + "epoch": 0.19642638224837564, + "grad_norm": 1.930103969912639, + "learning_rate": 9.291108932938244e-06, + "loss": 0.807, + "step": 6409 + }, + { + "epoch": 0.19645703077111684, + "grad_norm": 0.9458555507543898, + "learning_rate": 9.290854162258138e-06, + "loss": 0.501, + "step": 6410 + }, + { + "epoch": 0.19648767929385805, + "grad_norm": 1.565464912051991, + "learning_rate": 9.290599349299148e-06, + "loss": 0.7449, + "step": 6411 + }, + { + "epoch": 0.19651832781659925, + "grad_norm": 2.7590689188083153, + "learning_rate": 9.290344494063785e-06, + "loss": 0.6159, + "step": 6412 + }, + { + "epoch": 0.19654897633934043, + "grad_norm": 1.6690925000148615, + "learning_rate": 9.290089596554559e-06, + "loss": 0.7026, + "step": 6413 + }, + { + "epoch": 0.19657962486208164, + "grad_norm": 1.6858566873408638, + "learning_rate": 9.289834656773984e-06, + "loss": 0.7569, + "step": 6414 + }, + { + "epoch": 0.19661027338482284, + "grad_norm": 1.882801497813053, + "learning_rate": 9.28957967472457e-06, + "loss": 0.7175, + "step": 6415 + }, + { + "epoch": 0.19664092190756405, + "grad_norm": 1.6654009535420446, + "learning_rate": 9.28932465040883e-06, + "loss": 0.643, + "step": 6416 + }, + { + "epoch": 0.19667157043030525, + "grad_norm": 1.7361756430868456, + "learning_rate": 9.289069583829276e-06, + "loss": 0.7121, + "step": 6417 + }, + { + "epoch": 0.19670221895304646, + "grad_norm": 1.8567293161288434, + "learning_rate": 9.288814474988421e-06, + "loss": 0.7932, + "step": 6418 + }, + { + "epoch": 0.19673286747578766, + "grad_norm": 2.0461178881905546, + "learning_rate": 9.288559323888781e-06, + "loss": 0.7981, + "step": 6419 + }, + { + "epoch": 0.19676351599852887, + "grad_norm": 1.6713250546360372, + "learning_rate": 9.28830413053287e-06, + "loss": 0.75, + "step": 6420 + }, + { + "epoch": 0.19679416452127008, + "grad_norm": 1.8069226254718131, + "learning_rate": 9.2880488949232e-06, + "loss": 0.7023, + "step": 6421 + }, + { + "epoch": 0.19682481304401128, + "grad_norm": 1.8718064009620607, + "learning_rate": 9.287793617062286e-06, + "loss": 0.7201, + "step": 6422 + }, + { + "epoch": 0.1968554615667525, + "grad_norm": 1.754981439627596, + "learning_rate": 9.287538296952646e-06, + "loss": 0.7276, + "step": 6423 + }, + { + "epoch": 0.1968861100894937, + "grad_norm": 1.8171629933807478, + "learning_rate": 9.287282934596793e-06, + "loss": 0.6745, + "step": 6424 + }, + { + "epoch": 0.1969167586122349, + "grad_norm": 1.7529090265945033, + "learning_rate": 9.287027529997246e-06, + "loss": 0.6668, + "step": 6425 + }, + { + "epoch": 0.1969474071349761, + "grad_norm": 1.8570710919882267, + "learning_rate": 9.286772083156518e-06, + "loss": 0.7804, + "step": 6426 + }, + { + "epoch": 0.1969780556577173, + "grad_norm": 1.061519683897881, + "learning_rate": 9.286516594077129e-06, + "loss": 0.4917, + "step": 6427 + }, + { + "epoch": 0.1970087041804585, + "grad_norm": 1.7247887543652956, + "learning_rate": 9.286261062761595e-06, + "loss": 0.745, + "step": 6428 + }, + { + "epoch": 0.1970393527031997, + "grad_norm": 1.739831383640912, + "learning_rate": 9.286005489212433e-06, + "loss": 0.6357, + "step": 6429 + }, + { + "epoch": 0.1970700012259409, + "grad_norm": 0.8189964223256679, + "learning_rate": 9.285749873432165e-06, + "loss": 0.4826, + "step": 6430 + }, + { + "epoch": 0.1971006497486821, + "grad_norm": 0.8154709813212014, + "learning_rate": 9.285494215423304e-06, + "loss": 0.4774, + "step": 6431 + }, + { + "epoch": 0.1971312982714233, + "grad_norm": 1.9070571085184571, + "learning_rate": 9.285238515188372e-06, + "loss": 0.7752, + "step": 6432 + }, + { + "epoch": 0.19716194679416452, + "grad_norm": 1.8487594876082918, + "learning_rate": 9.284982772729891e-06, + "loss": 0.7229, + "step": 6433 + }, + { + "epoch": 0.19719259531690572, + "grad_norm": 1.8481317683344325, + "learning_rate": 9.284726988050376e-06, + "loss": 0.7219, + "step": 6434 + }, + { + "epoch": 0.19722324383964693, + "grad_norm": 1.7779272593587068, + "learning_rate": 9.284471161152351e-06, + "loss": 0.7436, + "step": 6435 + }, + { + "epoch": 0.19725389236238813, + "grad_norm": 2.0505315459375706, + "learning_rate": 9.284215292038335e-06, + "loss": 0.8079, + "step": 6436 + }, + { + "epoch": 0.19728454088512934, + "grad_norm": 1.7429384144497582, + "learning_rate": 9.28395938071085e-06, + "loss": 0.7048, + "step": 6437 + }, + { + "epoch": 0.19731518940787054, + "grad_norm": 1.624647163011786, + "learning_rate": 9.283703427172417e-06, + "loss": 0.7496, + "step": 6438 + }, + { + "epoch": 0.19734583793061175, + "grad_norm": 1.7439620519808146, + "learning_rate": 9.28344743142556e-06, + "loss": 0.7428, + "step": 6439 + }, + { + "epoch": 0.19737648645335296, + "grad_norm": 1.7852972437407661, + "learning_rate": 9.283191393472796e-06, + "loss": 0.6996, + "step": 6440 + }, + { + "epoch": 0.19740713497609416, + "grad_norm": 1.9744656220333134, + "learning_rate": 9.282935313316652e-06, + "loss": 0.7742, + "step": 6441 + }, + { + "epoch": 0.19743778349883537, + "grad_norm": 1.483247229928701, + "learning_rate": 9.282679190959652e-06, + "loss": 0.612, + "step": 6442 + }, + { + "epoch": 0.19746843202157657, + "grad_norm": 1.6675391119778633, + "learning_rate": 9.282423026404317e-06, + "loss": 0.7495, + "step": 6443 + }, + { + "epoch": 0.19749908054431775, + "grad_norm": 1.8135359713562058, + "learning_rate": 9.282166819653172e-06, + "loss": 0.754, + "step": 6444 + }, + { + "epoch": 0.19752972906705896, + "grad_norm": 1.928439739319495, + "learning_rate": 9.281910570708744e-06, + "loss": 0.8254, + "step": 6445 + }, + { + "epoch": 0.19756037758980016, + "grad_norm": 2.0237653856231517, + "learning_rate": 9.281654279573553e-06, + "loss": 0.7563, + "step": 6446 + }, + { + "epoch": 0.19759102611254137, + "grad_norm": 1.6244021995768858, + "learning_rate": 9.281397946250129e-06, + "loss": 0.6221, + "step": 6447 + }, + { + "epoch": 0.19762167463528257, + "grad_norm": 1.7302599867396855, + "learning_rate": 9.281141570740992e-06, + "loss": 0.6719, + "step": 6448 + }, + { + "epoch": 0.19765232315802378, + "grad_norm": 1.665264491967291, + "learning_rate": 9.280885153048676e-06, + "loss": 0.6818, + "step": 6449 + }, + { + "epoch": 0.19768297168076499, + "grad_norm": 1.6526686595066393, + "learning_rate": 9.2806286931757e-06, + "loss": 0.6394, + "step": 6450 + }, + { + "epoch": 0.1977136202035062, + "grad_norm": 1.1901279074542426, + "learning_rate": 9.280372191124596e-06, + "loss": 0.4839, + "step": 6451 + }, + { + "epoch": 0.1977442687262474, + "grad_norm": 1.6398349974418978, + "learning_rate": 9.280115646897888e-06, + "loss": 0.6664, + "step": 6452 + }, + { + "epoch": 0.1977749172489886, + "grad_norm": 1.8621854432078448, + "learning_rate": 9.279859060498107e-06, + "loss": 0.7503, + "step": 6453 + }, + { + "epoch": 0.1978055657717298, + "grad_norm": 1.8853494737509842, + "learning_rate": 9.27960243192778e-06, + "loss": 0.8112, + "step": 6454 + }, + { + "epoch": 0.197836214294471, + "grad_norm": 0.9226379468292342, + "learning_rate": 9.279345761189435e-06, + "loss": 0.5057, + "step": 6455 + }, + { + "epoch": 0.19786686281721222, + "grad_norm": 2.079842136933081, + "learning_rate": 9.2790890482856e-06, + "loss": 0.6976, + "step": 6456 + }, + { + "epoch": 0.19789751133995342, + "grad_norm": 1.857664675351297, + "learning_rate": 9.278832293218807e-06, + "loss": 0.6906, + "step": 6457 + }, + { + "epoch": 0.19792815986269463, + "grad_norm": 1.6184504087951503, + "learning_rate": 9.278575495991583e-06, + "loss": 0.6351, + "step": 6458 + }, + { + "epoch": 0.1979588083854358, + "grad_norm": 1.8201151159168276, + "learning_rate": 9.278318656606463e-06, + "loss": 0.6742, + "step": 6459 + }, + { + "epoch": 0.19798945690817701, + "grad_norm": 0.9587105377194193, + "learning_rate": 9.278061775065972e-06, + "loss": 0.5019, + "step": 6460 + }, + { + "epoch": 0.19802010543091822, + "grad_norm": 1.8631748149411076, + "learning_rate": 9.277804851372643e-06, + "loss": 0.8024, + "step": 6461 + }, + { + "epoch": 0.19805075395365943, + "grad_norm": 1.920193821528314, + "learning_rate": 9.27754788552901e-06, + "loss": 0.6363, + "step": 6462 + }, + { + "epoch": 0.19808140247640063, + "grad_norm": 1.747334374295573, + "learning_rate": 9.277290877537603e-06, + "loss": 0.6503, + "step": 6463 + }, + { + "epoch": 0.19811205099914184, + "grad_norm": 1.6313169339604778, + "learning_rate": 9.277033827400956e-06, + "loss": 0.7799, + "step": 6464 + }, + { + "epoch": 0.19814269952188304, + "grad_norm": 1.5099765896744672, + "learning_rate": 9.276776735121597e-06, + "loss": 0.6853, + "step": 6465 + }, + { + "epoch": 0.19817334804462425, + "grad_norm": 0.9253003484355331, + "learning_rate": 9.276519600702065e-06, + "loss": 0.5119, + "step": 6466 + }, + { + "epoch": 0.19820399656736545, + "grad_norm": 1.7441891825750584, + "learning_rate": 9.276262424144891e-06, + "loss": 0.6892, + "step": 6467 + }, + { + "epoch": 0.19823464509010666, + "grad_norm": 2.0063960022236578, + "learning_rate": 9.27600520545261e-06, + "loss": 0.7979, + "step": 6468 + }, + { + "epoch": 0.19826529361284786, + "grad_norm": 2.042939965439035, + "learning_rate": 9.275747944627753e-06, + "loss": 0.7269, + "step": 6469 + }, + { + "epoch": 0.19829594213558907, + "grad_norm": 1.623277014783498, + "learning_rate": 9.275490641672859e-06, + "loss": 0.6874, + "step": 6470 + }, + { + "epoch": 0.19832659065833028, + "grad_norm": 1.7028776537398052, + "learning_rate": 9.275233296590463e-06, + "loss": 0.6713, + "step": 6471 + }, + { + "epoch": 0.19835723918107148, + "grad_norm": 1.7519365216726948, + "learning_rate": 9.274975909383097e-06, + "loss": 0.6892, + "step": 6472 + }, + { + "epoch": 0.1983878877038127, + "grad_norm": 1.7439645745464571, + "learning_rate": 9.274718480053303e-06, + "loss": 0.6694, + "step": 6473 + }, + { + "epoch": 0.1984185362265539, + "grad_norm": 1.6437913530841028, + "learning_rate": 9.27446100860361e-06, + "loss": 0.7306, + "step": 6474 + }, + { + "epoch": 0.19844918474929507, + "grad_norm": 1.5397948793699208, + "learning_rate": 9.274203495036563e-06, + "loss": 0.6551, + "step": 6475 + }, + { + "epoch": 0.19847983327203628, + "grad_norm": 1.6552413665171661, + "learning_rate": 9.273945939354691e-06, + "loss": 0.6808, + "step": 6476 + }, + { + "epoch": 0.19851048179477748, + "grad_norm": 1.613710982392899, + "learning_rate": 9.27368834156054e-06, + "loss": 0.7577, + "step": 6477 + }, + { + "epoch": 0.1985411303175187, + "grad_norm": 1.7828817985915686, + "learning_rate": 9.273430701656642e-06, + "loss": 0.7232, + "step": 6478 + }, + { + "epoch": 0.1985717788402599, + "grad_norm": 1.8919765536997555, + "learning_rate": 9.273173019645539e-06, + "loss": 0.7567, + "step": 6479 + }, + { + "epoch": 0.1986024273630011, + "grad_norm": 1.8547110024103097, + "learning_rate": 9.272915295529768e-06, + "loss": 0.7192, + "step": 6480 + }, + { + "epoch": 0.1986330758857423, + "grad_norm": 1.5853654005797944, + "learning_rate": 9.27265752931187e-06, + "loss": 0.6893, + "step": 6481 + }, + { + "epoch": 0.1986637244084835, + "grad_norm": 0.8978727915525913, + "learning_rate": 9.272399720994384e-06, + "loss": 0.4844, + "step": 6482 + }, + { + "epoch": 0.19869437293122472, + "grad_norm": 1.677358570111396, + "learning_rate": 9.272141870579851e-06, + "loss": 0.7201, + "step": 6483 + }, + { + "epoch": 0.19872502145396592, + "grad_norm": 0.802322392916381, + "learning_rate": 9.27188397807081e-06, + "loss": 0.5101, + "step": 6484 + }, + { + "epoch": 0.19875566997670713, + "grad_norm": 1.6648513207676447, + "learning_rate": 9.271626043469804e-06, + "loss": 0.7304, + "step": 6485 + }, + { + "epoch": 0.19878631849944833, + "grad_norm": 1.897100051267405, + "learning_rate": 9.271368066779373e-06, + "loss": 0.7306, + "step": 6486 + }, + { + "epoch": 0.19881696702218954, + "grad_norm": 1.6580236369402588, + "learning_rate": 9.27111004800206e-06, + "loss": 0.7083, + "step": 6487 + }, + { + "epoch": 0.19884761554493074, + "grad_norm": 1.8969675328327709, + "learning_rate": 9.270851987140405e-06, + "loss": 0.7397, + "step": 6488 + }, + { + "epoch": 0.19887826406767195, + "grad_norm": 1.0039515977968339, + "learning_rate": 9.270593884196956e-06, + "loss": 0.5099, + "step": 6489 + }, + { + "epoch": 0.19890891259041313, + "grad_norm": 1.7616438487630224, + "learning_rate": 9.270335739174251e-06, + "loss": 0.6775, + "step": 6490 + }, + { + "epoch": 0.19893956111315433, + "grad_norm": 1.618981591569874, + "learning_rate": 9.270077552074835e-06, + "loss": 0.6672, + "step": 6491 + }, + { + "epoch": 0.19897020963589554, + "grad_norm": 1.9305968818678658, + "learning_rate": 9.269819322901254e-06, + "loss": 0.7416, + "step": 6492 + }, + { + "epoch": 0.19900085815863675, + "grad_norm": 1.7394713210718828, + "learning_rate": 9.269561051656049e-06, + "loss": 0.7041, + "step": 6493 + }, + { + "epoch": 0.19903150668137795, + "grad_norm": 1.8124450454898908, + "learning_rate": 9.269302738341766e-06, + "loss": 0.7073, + "step": 6494 + }, + { + "epoch": 0.19906215520411916, + "grad_norm": 1.6607977556917173, + "learning_rate": 9.269044382960952e-06, + "loss": 0.6831, + "step": 6495 + }, + { + "epoch": 0.19909280372686036, + "grad_norm": 1.967393109529356, + "learning_rate": 9.268785985516152e-06, + "loss": 0.6859, + "step": 6496 + }, + { + "epoch": 0.19912345224960157, + "grad_norm": 1.9391364210005757, + "learning_rate": 9.268527546009911e-06, + "loss": 0.7974, + "step": 6497 + }, + { + "epoch": 0.19915410077234277, + "grad_norm": 1.9084987444008883, + "learning_rate": 9.268269064444775e-06, + "loss": 0.8073, + "step": 6498 + }, + { + "epoch": 0.19918474929508398, + "grad_norm": 1.9457612101765733, + "learning_rate": 9.268010540823294e-06, + "loss": 0.7, + "step": 6499 + }, + { + "epoch": 0.19921539781782518, + "grad_norm": 1.8974887247429777, + "learning_rate": 9.267751975148011e-06, + "loss": 0.7678, + "step": 6500 + }, + { + "epoch": 0.1992460463405664, + "grad_norm": 1.6771723321719774, + "learning_rate": 9.267493367421476e-06, + "loss": 0.7398, + "step": 6501 + }, + { + "epoch": 0.1992766948633076, + "grad_norm": 2.098008878595679, + "learning_rate": 9.267234717646237e-06, + "loss": 0.7484, + "step": 6502 + }, + { + "epoch": 0.1993073433860488, + "grad_norm": 1.8639659613913155, + "learning_rate": 9.266976025824843e-06, + "loss": 0.6622, + "step": 6503 + }, + { + "epoch": 0.19933799190879, + "grad_norm": 1.9640347642358713, + "learning_rate": 9.266717291959843e-06, + "loss": 0.7614, + "step": 6504 + }, + { + "epoch": 0.1993686404315312, + "grad_norm": 1.8265190477469109, + "learning_rate": 9.266458516053785e-06, + "loss": 0.7425, + "step": 6505 + }, + { + "epoch": 0.1993992889542724, + "grad_norm": 2.005666962810202, + "learning_rate": 9.266199698109219e-06, + "loss": 0.7609, + "step": 6506 + }, + { + "epoch": 0.1994299374770136, + "grad_norm": 1.8381972242067026, + "learning_rate": 9.265940838128698e-06, + "loss": 0.7611, + "step": 6507 + }, + { + "epoch": 0.1994605859997548, + "grad_norm": 1.7901035774589307, + "learning_rate": 9.265681936114768e-06, + "loss": 0.7272, + "step": 6508 + }, + { + "epoch": 0.199491234522496, + "grad_norm": 1.8544623277281302, + "learning_rate": 9.265422992069985e-06, + "loss": 0.7246, + "step": 6509 + }, + { + "epoch": 0.1995218830452372, + "grad_norm": 1.7111752379536462, + "learning_rate": 9.265164005996896e-06, + "loss": 0.6636, + "step": 6510 + }, + { + "epoch": 0.19955253156797842, + "grad_norm": 1.6597713140654384, + "learning_rate": 9.264904977898055e-06, + "loss": 0.6615, + "step": 6511 + }, + { + "epoch": 0.19958318009071963, + "grad_norm": 1.548054026993805, + "learning_rate": 9.264645907776013e-06, + "loss": 0.7752, + "step": 6512 + }, + { + "epoch": 0.19961382861346083, + "grad_norm": 1.0261546688141485, + "learning_rate": 9.264386795633327e-06, + "loss": 0.5064, + "step": 6513 + }, + { + "epoch": 0.19964447713620204, + "grad_norm": 1.6736102080162423, + "learning_rate": 9.264127641472544e-06, + "loss": 0.727, + "step": 6514 + }, + { + "epoch": 0.19967512565894324, + "grad_norm": 0.8479667221483225, + "learning_rate": 9.263868445296222e-06, + "loss": 0.5076, + "step": 6515 + }, + { + "epoch": 0.19970577418168445, + "grad_norm": 1.9582333728968309, + "learning_rate": 9.263609207106911e-06, + "loss": 0.7367, + "step": 6516 + }, + { + "epoch": 0.19973642270442565, + "grad_norm": 1.73662935537689, + "learning_rate": 9.26334992690717e-06, + "loss": 0.669, + "step": 6517 + }, + { + "epoch": 0.19976707122716686, + "grad_norm": 1.7535092090312512, + "learning_rate": 9.263090604699549e-06, + "loss": 0.7228, + "step": 6518 + }, + { + "epoch": 0.19979771974990806, + "grad_norm": 1.6988943099374463, + "learning_rate": 9.262831240486608e-06, + "loss": 0.6388, + "step": 6519 + }, + { + "epoch": 0.19982836827264927, + "grad_norm": 1.8594355390930355, + "learning_rate": 9.262571834270899e-06, + "loss": 0.6673, + "step": 6520 + }, + { + "epoch": 0.19985901679539045, + "grad_norm": 1.1310620286315272, + "learning_rate": 9.262312386054978e-06, + "loss": 0.516, + "step": 6521 + }, + { + "epoch": 0.19988966531813165, + "grad_norm": 1.8670707272267915, + "learning_rate": 9.262052895841404e-06, + "loss": 0.7726, + "step": 6522 + }, + { + "epoch": 0.19992031384087286, + "grad_norm": 0.8602198891209846, + "learning_rate": 9.261793363632733e-06, + "loss": 0.4943, + "step": 6523 + }, + { + "epoch": 0.19995096236361407, + "grad_norm": 2.0118785024782215, + "learning_rate": 9.26153378943152e-06, + "loss": 0.6839, + "step": 6524 + }, + { + "epoch": 0.19998161088635527, + "grad_norm": 2.1565823948729927, + "learning_rate": 9.261274173240324e-06, + "loss": 0.7537, + "step": 6525 + }, + { + "epoch": 0.20001225940909648, + "grad_norm": 2.1874435413626747, + "learning_rate": 9.261014515061704e-06, + "loss": 0.7277, + "step": 6526 + }, + { + "epoch": 0.20004290793183768, + "grad_norm": 1.7435204967234357, + "learning_rate": 9.260754814898217e-06, + "loss": 0.6857, + "step": 6527 + }, + { + "epoch": 0.2000735564545789, + "grad_norm": 2.1095300163762816, + "learning_rate": 9.260495072752424e-06, + "loss": 0.8037, + "step": 6528 + }, + { + "epoch": 0.2001042049773201, + "grad_norm": 1.6427355688273844, + "learning_rate": 9.260235288626882e-06, + "loss": 0.6177, + "step": 6529 + }, + { + "epoch": 0.2001348535000613, + "grad_norm": 1.8494462473031086, + "learning_rate": 9.259975462524151e-06, + "loss": 0.755, + "step": 6530 + }, + { + "epoch": 0.2001655020228025, + "grad_norm": 2.1838163809606694, + "learning_rate": 9.259715594446794e-06, + "loss": 0.8206, + "step": 6531 + }, + { + "epoch": 0.2001961505455437, + "grad_norm": 1.7601551847355834, + "learning_rate": 9.259455684397367e-06, + "loss": 0.7058, + "step": 6532 + }, + { + "epoch": 0.20022679906828492, + "grad_norm": 1.9787862729957775, + "learning_rate": 9.259195732378436e-06, + "loss": 0.7577, + "step": 6533 + }, + { + "epoch": 0.20025744759102612, + "grad_norm": 1.326563794882844, + "learning_rate": 9.258935738392557e-06, + "loss": 0.4942, + "step": 6534 + }, + { + "epoch": 0.20028809611376733, + "grad_norm": 1.8447456883663989, + "learning_rate": 9.258675702442295e-06, + "loss": 0.686, + "step": 6535 + }, + { + "epoch": 0.20031874463650853, + "grad_norm": 2.019769155067558, + "learning_rate": 9.258415624530213e-06, + "loss": 0.7846, + "step": 6536 + }, + { + "epoch": 0.2003493931592497, + "grad_norm": 1.7716291588635975, + "learning_rate": 9.258155504658871e-06, + "loss": 0.7755, + "step": 6537 + }, + { + "epoch": 0.20038004168199092, + "grad_norm": 1.7013285779860021, + "learning_rate": 9.257895342830834e-06, + "loss": 0.6499, + "step": 6538 + }, + { + "epoch": 0.20041069020473212, + "grad_norm": 1.8281825628933759, + "learning_rate": 9.257635139048664e-06, + "loss": 0.652, + "step": 6539 + }, + { + "epoch": 0.20044133872747333, + "grad_norm": 1.681680862198622, + "learning_rate": 9.257374893314929e-06, + "loss": 0.745, + "step": 6540 + }, + { + "epoch": 0.20047198725021453, + "grad_norm": 1.750748344736613, + "learning_rate": 9.257114605632184e-06, + "loss": 0.8037, + "step": 6541 + }, + { + "epoch": 0.20050263577295574, + "grad_norm": 2.0194441639472918, + "learning_rate": 9.256854276003004e-06, + "loss": 0.7671, + "step": 6542 + }, + { + "epoch": 0.20053328429569695, + "grad_norm": 1.5491189310099804, + "learning_rate": 9.256593904429948e-06, + "loss": 0.7313, + "step": 6543 + }, + { + "epoch": 0.20056393281843815, + "grad_norm": 2.0387317544915673, + "learning_rate": 9.256333490915583e-06, + "loss": 0.8265, + "step": 6544 + }, + { + "epoch": 0.20059458134117936, + "grad_norm": 1.8472783140684421, + "learning_rate": 9.256073035462476e-06, + "loss": 0.7112, + "step": 6545 + }, + { + "epoch": 0.20062522986392056, + "grad_norm": 1.9559997476873752, + "learning_rate": 9.255812538073192e-06, + "loss": 0.8023, + "step": 6546 + }, + { + "epoch": 0.20065587838666177, + "grad_norm": 1.7751963151330756, + "learning_rate": 9.255551998750298e-06, + "loss": 0.7465, + "step": 6547 + }, + { + "epoch": 0.20068652690940297, + "grad_norm": 1.6285089872398384, + "learning_rate": 9.255291417496361e-06, + "loss": 0.6707, + "step": 6548 + }, + { + "epoch": 0.20071717543214418, + "grad_norm": 2.066887021409831, + "learning_rate": 9.255030794313951e-06, + "loss": 0.7359, + "step": 6549 + }, + { + "epoch": 0.20074782395488538, + "grad_norm": 1.810026201322013, + "learning_rate": 9.254770129205631e-06, + "loss": 0.8098, + "step": 6550 + }, + { + "epoch": 0.2007784724776266, + "grad_norm": 1.32192609427557, + "learning_rate": 9.254509422173974e-06, + "loss": 0.5172, + "step": 6551 + }, + { + "epoch": 0.20080912100036777, + "grad_norm": 1.8083278493132338, + "learning_rate": 9.254248673221546e-06, + "loss": 0.7667, + "step": 6552 + }, + { + "epoch": 0.20083976952310897, + "grad_norm": 2.3592698823056404, + "learning_rate": 9.253987882350919e-06, + "loss": 0.7318, + "step": 6553 + }, + { + "epoch": 0.20087041804585018, + "grad_norm": 1.8364253041244465, + "learning_rate": 9.25372704956466e-06, + "loss": 0.7282, + "step": 6554 + }, + { + "epoch": 0.20090106656859139, + "grad_norm": 1.916035390958038, + "learning_rate": 9.25346617486534e-06, + "loss": 0.7891, + "step": 6555 + }, + { + "epoch": 0.2009317150913326, + "grad_norm": 0.9096382859377576, + "learning_rate": 9.25320525825553e-06, + "loss": 0.5002, + "step": 6556 + }, + { + "epoch": 0.2009623636140738, + "grad_norm": 1.7671200267688203, + "learning_rate": 9.252944299737799e-06, + "loss": 0.6929, + "step": 6557 + }, + { + "epoch": 0.200993012136815, + "grad_norm": 1.8343039517851902, + "learning_rate": 9.25268329931472e-06, + "loss": 0.731, + "step": 6558 + }, + { + "epoch": 0.2010236606595562, + "grad_norm": 1.6883517637639183, + "learning_rate": 9.252422256988864e-06, + "loss": 0.6939, + "step": 6559 + }, + { + "epoch": 0.2010543091822974, + "grad_norm": 1.805050150843471, + "learning_rate": 9.252161172762806e-06, + "loss": 0.6901, + "step": 6560 + }, + { + "epoch": 0.20108495770503862, + "grad_norm": 1.6380221853747419, + "learning_rate": 9.251900046639113e-06, + "loss": 0.679, + "step": 6561 + }, + { + "epoch": 0.20111560622777983, + "grad_norm": 1.8102916102828397, + "learning_rate": 9.251638878620363e-06, + "loss": 0.7885, + "step": 6562 + }, + { + "epoch": 0.20114625475052103, + "grad_norm": 1.8241294067775564, + "learning_rate": 9.251377668709127e-06, + "loss": 0.7478, + "step": 6563 + }, + { + "epoch": 0.20117690327326224, + "grad_norm": 1.6842233813816216, + "learning_rate": 9.251116416907977e-06, + "loss": 0.7498, + "step": 6564 + }, + { + "epoch": 0.20120755179600344, + "grad_norm": 1.8100293863430048, + "learning_rate": 9.25085512321949e-06, + "loss": 0.6872, + "step": 6565 + }, + { + "epoch": 0.20123820031874465, + "grad_norm": 1.6472730466486907, + "learning_rate": 9.250593787646243e-06, + "loss": 0.6643, + "step": 6566 + }, + { + "epoch": 0.20126884884148585, + "grad_norm": 1.8889242535858384, + "learning_rate": 9.250332410190805e-06, + "loss": 0.6635, + "step": 6567 + }, + { + "epoch": 0.20129949736422703, + "grad_norm": 1.8184661052451312, + "learning_rate": 9.250070990855755e-06, + "loss": 0.6848, + "step": 6568 + }, + { + "epoch": 0.20133014588696824, + "grad_norm": 1.9989499771384995, + "learning_rate": 9.249809529643668e-06, + "loss": 0.7583, + "step": 6569 + }, + { + "epoch": 0.20136079440970944, + "grad_norm": 1.657809201819308, + "learning_rate": 9.24954802655712e-06, + "loss": 0.7436, + "step": 6570 + }, + { + "epoch": 0.20139144293245065, + "grad_norm": 1.621128316557437, + "learning_rate": 9.249286481598686e-06, + "loss": 0.6899, + "step": 6571 + }, + { + "epoch": 0.20142209145519185, + "grad_norm": 1.7249272348809492, + "learning_rate": 9.24902489477095e-06, + "loss": 0.7572, + "step": 6572 + }, + { + "epoch": 0.20145273997793306, + "grad_norm": 1.7968036147698503, + "learning_rate": 9.248763266076482e-06, + "loss": 0.748, + "step": 6573 + }, + { + "epoch": 0.20148338850067427, + "grad_norm": 1.768502716217813, + "learning_rate": 9.248501595517861e-06, + "loss": 0.6725, + "step": 6574 + }, + { + "epoch": 0.20151403702341547, + "grad_norm": 1.656566743879616, + "learning_rate": 9.248239883097668e-06, + "loss": 0.6612, + "step": 6575 + }, + { + "epoch": 0.20154468554615668, + "grad_norm": 1.6832491164024348, + "learning_rate": 9.247978128818482e-06, + "loss": 0.6928, + "step": 6576 + }, + { + "epoch": 0.20157533406889788, + "grad_norm": 1.6744588640819036, + "learning_rate": 9.24771633268288e-06, + "loss": 0.7005, + "step": 6577 + }, + { + "epoch": 0.2016059825916391, + "grad_norm": 1.8339430330868887, + "learning_rate": 9.24745449469344e-06, + "loss": 0.7301, + "step": 6578 + }, + { + "epoch": 0.2016366311143803, + "grad_norm": 1.7663051297227084, + "learning_rate": 9.247192614852744e-06, + "loss": 0.6911, + "step": 6579 + }, + { + "epoch": 0.2016672796371215, + "grad_norm": 1.6095730052810935, + "learning_rate": 9.246930693163375e-06, + "loss": 0.7305, + "step": 6580 + }, + { + "epoch": 0.2016979281598627, + "grad_norm": 1.0059369530694335, + "learning_rate": 9.246668729627911e-06, + "loss": 0.4961, + "step": 6581 + }, + { + "epoch": 0.2017285766826039, + "grad_norm": 0.9410611562624123, + "learning_rate": 9.246406724248931e-06, + "loss": 0.5049, + "step": 6582 + }, + { + "epoch": 0.2017592252053451, + "grad_norm": 1.6671549903166316, + "learning_rate": 9.246144677029022e-06, + "loss": 0.6655, + "step": 6583 + }, + { + "epoch": 0.2017898737280863, + "grad_norm": 2.0677759648419567, + "learning_rate": 9.245882587970761e-06, + "loss": 0.7799, + "step": 6584 + }, + { + "epoch": 0.2018205222508275, + "grad_norm": 1.8156971920890892, + "learning_rate": 9.245620457076732e-06, + "loss": 0.735, + "step": 6585 + }, + { + "epoch": 0.2018511707735687, + "grad_norm": 1.8282996842307349, + "learning_rate": 9.24535828434952e-06, + "loss": 0.7732, + "step": 6586 + }, + { + "epoch": 0.2018818192963099, + "grad_norm": 1.0399718028761362, + "learning_rate": 9.245096069791706e-06, + "loss": 0.502, + "step": 6587 + }, + { + "epoch": 0.20191246781905112, + "grad_norm": 1.7518183566681909, + "learning_rate": 9.244833813405875e-06, + "loss": 0.6816, + "step": 6588 + }, + { + "epoch": 0.20194311634179232, + "grad_norm": 1.9230261471242749, + "learning_rate": 9.244571515194609e-06, + "loss": 0.7296, + "step": 6589 + }, + { + "epoch": 0.20197376486453353, + "grad_norm": 1.9921698624294162, + "learning_rate": 9.244309175160495e-06, + "loss": 0.7735, + "step": 6590 + }, + { + "epoch": 0.20200441338727473, + "grad_norm": 1.7383508820920837, + "learning_rate": 9.244046793306116e-06, + "loss": 0.7678, + "step": 6591 + }, + { + "epoch": 0.20203506191001594, + "grad_norm": 0.9111723580628409, + "learning_rate": 9.243784369634059e-06, + "loss": 0.4642, + "step": 6592 + }, + { + "epoch": 0.20206571043275715, + "grad_norm": 1.8201120144928187, + "learning_rate": 9.243521904146908e-06, + "loss": 0.8044, + "step": 6593 + }, + { + "epoch": 0.20209635895549835, + "grad_norm": 1.7402936848523232, + "learning_rate": 9.243259396847251e-06, + "loss": 0.7929, + "step": 6594 + }, + { + "epoch": 0.20212700747823956, + "grad_norm": 1.8216661972513957, + "learning_rate": 9.242996847737672e-06, + "loss": 0.7196, + "step": 6595 + }, + { + "epoch": 0.20215765600098076, + "grad_norm": 1.8601013072774117, + "learning_rate": 9.24273425682076e-06, + "loss": 0.7483, + "step": 6596 + }, + { + "epoch": 0.20218830452372197, + "grad_norm": 1.758711750290856, + "learning_rate": 9.242471624099102e-06, + "loss": 0.7508, + "step": 6597 + }, + { + "epoch": 0.20221895304646317, + "grad_norm": 1.6594963955388595, + "learning_rate": 9.242208949575286e-06, + "loss": 0.6746, + "step": 6598 + }, + { + "epoch": 0.20224960156920435, + "grad_norm": 1.6858120551761677, + "learning_rate": 9.241946233251899e-06, + "loss": 0.6328, + "step": 6599 + }, + { + "epoch": 0.20228025009194556, + "grad_norm": 1.619262767924616, + "learning_rate": 9.241683475131531e-06, + "loss": 0.6804, + "step": 6600 + }, + { + "epoch": 0.20231089861468676, + "grad_norm": 2.067417912480514, + "learning_rate": 9.24142067521677e-06, + "loss": 0.7521, + "step": 6601 + }, + { + "epoch": 0.20234154713742797, + "grad_norm": 1.6015944220427185, + "learning_rate": 9.241157833510206e-06, + "loss": 0.7698, + "step": 6602 + }, + { + "epoch": 0.20237219566016917, + "grad_norm": 1.7107399644773316, + "learning_rate": 9.240894950014429e-06, + "loss": 0.6641, + "step": 6603 + }, + { + "epoch": 0.20240284418291038, + "grad_norm": 1.7653458788290497, + "learning_rate": 9.240632024732027e-06, + "loss": 0.7876, + "step": 6604 + }, + { + "epoch": 0.20243349270565159, + "grad_norm": 1.9352651356098804, + "learning_rate": 9.240369057665595e-06, + "loss": 0.7347, + "step": 6605 + }, + { + "epoch": 0.2024641412283928, + "grad_norm": 1.841141778677358, + "learning_rate": 9.24010604881772e-06, + "loss": 0.6776, + "step": 6606 + }, + { + "epoch": 0.202494789751134, + "grad_norm": 1.9876588152466874, + "learning_rate": 9.239842998190997e-06, + "loss": 0.8427, + "step": 6607 + }, + { + "epoch": 0.2025254382738752, + "grad_norm": 1.7679773685434141, + "learning_rate": 9.239579905788016e-06, + "loss": 0.7741, + "step": 6608 + }, + { + "epoch": 0.2025560867966164, + "grad_norm": 1.2065562400598246, + "learning_rate": 9.239316771611369e-06, + "loss": 0.4845, + "step": 6609 + }, + { + "epoch": 0.2025867353193576, + "grad_norm": 1.7616902080727936, + "learning_rate": 9.239053595663649e-06, + "loss": 0.8193, + "step": 6610 + }, + { + "epoch": 0.20261738384209882, + "grad_norm": 1.8872163038897531, + "learning_rate": 9.23879037794745e-06, + "loss": 0.8159, + "step": 6611 + }, + { + "epoch": 0.20264803236484003, + "grad_norm": 0.7982441913334285, + "learning_rate": 9.238527118465364e-06, + "loss": 0.4809, + "step": 6612 + }, + { + "epoch": 0.20267868088758123, + "grad_norm": 1.7478954917779395, + "learning_rate": 9.238263817219986e-06, + "loss": 0.808, + "step": 6613 + }, + { + "epoch": 0.2027093294103224, + "grad_norm": 1.770424173465027, + "learning_rate": 9.23800047421391e-06, + "loss": 0.7679, + "step": 6614 + }, + { + "epoch": 0.20273997793306361, + "grad_norm": 1.9557891975531323, + "learning_rate": 9.237737089449731e-06, + "loss": 0.776, + "step": 6615 + }, + { + "epoch": 0.20277062645580482, + "grad_norm": 1.8451328079787055, + "learning_rate": 9.237473662930045e-06, + "loss": 0.7737, + "step": 6616 + }, + { + "epoch": 0.20280127497854603, + "grad_norm": 1.7156679632689815, + "learning_rate": 9.237210194657447e-06, + "loss": 0.6564, + "step": 6617 + }, + { + "epoch": 0.20283192350128723, + "grad_norm": 1.6644964464131988, + "learning_rate": 9.236946684634531e-06, + "loss": 0.7806, + "step": 6618 + }, + { + "epoch": 0.20286257202402844, + "grad_norm": 1.70294977689974, + "learning_rate": 9.236683132863897e-06, + "loss": 0.6964, + "step": 6619 + }, + { + "epoch": 0.20289322054676964, + "grad_norm": 1.6561775822855274, + "learning_rate": 9.236419539348138e-06, + "loss": 0.6342, + "step": 6620 + }, + { + "epoch": 0.20292386906951085, + "grad_norm": 1.566969488571515, + "learning_rate": 9.236155904089856e-06, + "loss": 0.7468, + "step": 6621 + }, + { + "epoch": 0.20295451759225205, + "grad_norm": 1.8224557885067216, + "learning_rate": 9.235892227091645e-06, + "loss": 0.7336, + "step": 6622 + }, + { + "epoch": 0.20298516611499326, + "grad_norm": 1.5420780528287912, + "learning_rate": 9.235628508356102e-06, + "loss": 0.6374, + "step": 6623 + }, + { + "epoch": 0.20301581463773447, + "grad_norm": 1.5599458786406164, + "learning_rate": 9.235364747885831e-06, + "loss": 0.6835, + "step": 6624 + }, + { + "epoch": 0.20304646316047567, + "grad_norm": 1.6870489407930334, + "learning_rate": 9.235100945683425e-06, + "loss": 0.7792, + "step": 6625 + }, + { + "epoch": 0.20307711168321688, + "grad_norm": 1.7135441503591025, + "learning_rate": 9.234837101751486e-06, + "loss": 0.7138, + "step": 6626 + }, + { + "epoch": 0.20310776020595808, + "grad_norm": 1.652015049017691, + "learning_rate": 9.234573216092615e-06, + "loss": 0.6777, + "step": 6627 + }, + { + "epoch": 0.2031384087286993, + "grad_norm": 1.5962697925543086, + "learning_rate": 9.23430928870941e-06, + "loss": 0.6849, + "step": 6628 + }, + { + "epoch": 0.2031690572514405, + "grad_norm": 1.696705233588403, + "learning_rate": 9.234045319604472e-06, + "loss": 0.6438, + "step": 6629 + }, + { + "epoch": 0.20319970577418167, + "grad_norm": 1.7254547769411297, + "learning_rate": 9.233781308780402e-06, + "loss": 0.5049, + "step": 6630 + }, + { + "epoch": 0.20323035429692288, + "grad_norm": 1.8106271581921312, + "learning_rate": 9.233517256239801e-06, + "loss": 0.7528, + "step": 6631 + }, + { + "epoch": 0.20326100281966408, + "grad_norm": 1.770654445739423, + "learning_rate": 9.233253161985271e-06, + "loss": 0.7694, + "step": 6632 + }, + { + "epoch": 0.2032916513424053, + "grad_norm": 1.908244034338254, + "learning_rate": 9.232989026019417e-06, + "loss": 0.7493, + "step": 6633 + }, + { + "epoch": 0.2033222998651465, + "grad_norm": 2.0669613320381792, + "learning_rate": 9.232724848344836e-06, + "loss": 0.7955, + "step": 6634 + }, + { + "epoch": 0.2033529483878877, + "grad_norm": 1.9512955535582817, + "learning_rate": 9.232460628964138e-06, + "loss": 0.7618, + "step": 6635 + }, + { + "epoch": 0.2033835969106289, + "grad_norm": 1.66731987792686, + "learning_rate": 9.23219636787992e-06, + "loss": 0.7795, + "step": 6636 + }, + { + "epoch": 0.2034142454333701, + "grad_norm": 1.9752484349690727, + "learning_rate": 9.231932065094787e-06, + "loss": 0.6804, + "step": 6637 + }, + { + "epoch": 0.20344489395611132, + "grad_norm": 1.7984650492034293, + "learning_rate": 9.231667720611345e-06, + "loss": 0.8317, + "step": 6638 + }, + { + "epoch": 0.20347554247885252, + "grad_norm": 1.6866074085358782, + "learning_rate": 9.2314033344322e-06, + "loss": 0.7022, + "step": 6639 + }, + { + "epoch": 0.20350619100159373, + "grad_norm": 1.9260443436024903, + "learning_rate": 9.231138906559956e-06, + "loss": 0.7899, + "step": 6640 + }, + { + "epoch": 0.20353683952433493, + "grad_norm": 1.8732996327300622, + "learning_rate": 9.230874436997215e-06, + "loss": 0.7327, + "step": 6641 + }, + { + "epoch": 0.20356748804707614, + "grad_norm": 1.872417946494757, + "learning_rate": 9.230609925746588e-06, + "loss": 0.7626, + "step": 6642 + }, + { + "epoch": 0.20359813656981735, + "grad_norm": 1.5911728500186832, + "learning_rate": 9.230345372810676e-06, + "loss": 0.5149, + "step": 6643 + }, + { + "epoch": 0.20362878509255855, + "grad_norm": 1.2947621163368583, + "learning_rate": 9.230080778192091e-06, + "loss": 0.4869, + "step": 6644 + }, + { + "epoch": 0.20365943361529976, + "grad_norm": 2.099586371873431, + "learning_rate": 9.229816141893438e-06, + "loss": 0.7174, + "step": 6645 + }, + { + "epoch": 0.20369008213804093, + "grad_norm": 1.9319761996023133, + "learning_rate": 9.229551463917323e-06, + "loss": 0.7798, + "step": 6646 + }, + { + "epoch": 0.20372073066078214, + "grad_norm": 1.912523783487992, + "learning_rate": 9.229286744266356e-06, + "loss": 0.7719, + "step": 6647 + }, + { + "epoch": 0.20375137918352335, + "grad_norm": 2.0253023980915192, + "learning_rate": 9.229021982943144e-06, + "loss": 0.7497, + "step": 6648 + }, + { + "epoch": 0.20378202770626455, + "grad_norm": 1.8800375719252924, + "learning_rate": 9.228757179950298e-06, + "loss": 0.7786, + "step": 6649 + }, + { + "epoch": 0.20381267622900576, + "grad_norm": 1.9702133858665636, + "learning_rate": 9.228492335290423e-06, + "loss": 0.6569, + "step": 6650 + }, + { + "epoch": 0.20384332475174696, + "grad_norm": 1.3578457624577465, + "learning_rate": 9.228227448966132e-06, + "loss": 0.505, + "step": 6651 + }, + { + "epoch": 0.20387397327448817, + "grad_norm": 1.2011443754920823, + "learning_rate": 9.227962520980037e-06, + "loss": 0.5069, + "step": 6652 + }, + { + "epoch": 0.20390462179722937, + "grad_norm": 2.106345887566035, + "learning_rate": 9.227697551334741e-06, + "loss": 0.7053, + "step": 6653 + }, + { + "epoch": 0.20393527031997058, + "grad_norm": 2.114125189602315, + "learning_rate": 9.227432540032862e-06, + "loss": 0.6981, + "step": 6654 + }, + { + "epoch": 0.20396591884271179, + "grad_norm": 1.849583004384926, + "learning_rate": 9.227167487077008e-06, + "loss": 0.7422, + "step": 6655 + }, + { + "epoch": 0.203996567365453, + "grad_norm": 2.0034395382260817, + "learning_rate": 9.226902392469792e-06, + "loss": 0.8278, + "step": 6656 + }, + { + "epoch": 0.2040272158881942, + "grad_norm": 1.0590754775607494, + "learning_rate": 9.226637256213825e-06, + "loss": 0.5105, + "step": 6657 + }, + { + "epoch": 0.2040578644109354, + "grad_norm": 1.8496570912603294, + "learning_rate": 9.226372078311718e-06, + "loss": 0.8226, + "step": 6658 + }, + { + "epoch": 0.2040885129336766, + "grad_norm": 1.7876544555006488, + "learning_rate": 9.226106858766086e-06, + "loss": 0.6342, + "step": 6659 + }, + { + "epoch": 0.2041191614564178, + "grad_norm": 1.7938157363536023, + "learning_rate": 9.225841597579543e-06, + "loss": 0.6896, + "step": 6660 + }, + { + "epoch": 0.204149809979159, + "grad_norm": 1.9565169667769466, + "learning_rate": 9.2255762947547e-06, + "loss": 0.7452, + "step": 6661 + }, + { + "epoch": 0.2041804585019002, + "grad_norm": 1.8758852222531457, + "learning_rate": 9.225310950294173e-06, + "loss": 0.6646, + "step": 6662 + }, + { + "epoch": 0.2042111070246414, + "grad_norm": 1.7023424269775942, + "learning_rate": 9.225045564200578e-06, + "loss": 0.7268, + "step": 6663 + }, + { + "epoch": 0.2042417555473826, + "grad_norm": 1.8212406994461379, + "learning_rate": 9.224780136476526e-06, + "loss": 0.6483, + "step": 6664 + }, + { + "epoch": 0.20427240407012381, + "grad_norm": 1.9114192350711876, + "learning_rate": 9.224514667124636e-06, + "loss": 0.7738, + "step": 6665 + }, + { + "epoch": 0.20430305259286502, + "grad_norm": 0.9464330160168086, + "learning_rate": 9.22424915614752e-06, + "loss": 0.4988, + "step": 6666 + }, + { + "epoch": 0.20433370111560623, + "grad_norm": 1.6774413809895843, + "learning_rate": 9.223983603547797e-06, + "loss": 0.76, + "step": 6667 + }, + { + "epoch": 0.20436434963834743, + "grad_norm": 2.1297242980020585, + "learning_rate": 9.223718009328085e-06, + "loss": 0.7458, + "step": 6668 + }, + { + "epoch": 0.20439499816108864, + "grad_norm": 1.8391746031578589, + "learning_rate": 9.223452373490995e-06, + "loss": 0.7715, + "step": 6669 + }, + { + "epoch": 0.20442564668382984, + "grad_norm": 1.7969514684466465, + "learning_rate": 9.22318669603915e-06, + "loss": 0.7615, + "step": 6670 + }, + { + "epoch": 0.20445629520657105, + "grad_norm": 1.8464545630006532, + "learning_rate": 9.222920976975166e-06, + "loss": 0.7668, + "step": 6671 + }, + { + "epoch": 0.20448694372931225, + "grad_norm": 2.0173507697568365, + "learning_rate": 9.222655216301663e-06, + "loss": 0.7951, + "step": 6672 + }, + { + "epoch": 0.20451759225205346, + "grad_norm": 1.8196246664633693, + "learning_rate": 9.222389414021256e-06, + "loss": 0.7519, + "step": 6673 + }, + { + "epoch": 0.20454824077479467, + "grad_norm": 1.9183998986632764, + "learning_rate": 9.222123570136565e-06, + "loss": 0.7243, + "step": 6674 + }, + { + "epoch": 0.20457888929753587, + "grad_norm": 1.7768919606366251, + "learning_rate": 9.22185768465021e-06, + "loss": 0.7392, + "step": 6675 + }, + { + "epoch": 0.20460953782027708, + "grad_norm": 1.7240818509903748, + "learning_rate": 9.221591757564811e-06, + "loss": 0.7269, + "step": 6676 + }, + { + "epoch": 0.20464018634301825, + "grad_norm": 1.8175113927921152, + "learning_rate": 9.221325788882991e-06, + "loss": 0.7112, + "step": 6677 + }, + { + "epoch": 0.20467083486575946, + "grad_norm": 1.8001141674447956, + "learning_rate": 9.221059778607366e-06, + "loss": 0.6908, + "step": 6678 + }, + { + "epoch": 0.20470148338850067, + "grad_norm": 1.721961686047634, + "learning_rate": 9.220793726740558e-06, + "loss": 0.7957, + "step": 6679 + }, + { + "epoch": 0.20473213191124187, + "grad_norm": 1.6468989326694612, + "learning_rate": 9.22052763328519e-06, + "loss": 0.6497, + "step": 6680 + }, + { + "epoch": 0.20476278043398308, + "grad_norm": 0.9140398497616437, + "learning_rate": 9.220261498243886e-06, + "loss": 0.5198, + "step": 6681 + }, + { + "epoch": 0.20479342895672428, + "grad_norm": 1.5069759168311372, + "learning_rate": 9.219995321619263e-06, + "loss": 0.5768, + "step": 6682 + }, + { + "epoch": 0.2048240774794655, + "grad_norm": 1.6584602009983955, + "learning_rate": 9.219729103413946e-06, + "loss": 0.6692, + "step": 6683 + }, + { + "epoch": 0.2048547260022067, + "grad_norm": 1.9468948683189988, + "learning_rate": 9.21946284363056e-06, + "loss": 0.7032, + "step": 6684 + }, + { + "epoch": 0.2048853745249479, + "grad_norm": 1.7505433745981749, + "learning_rate": 9.219196542271727e-06, + "loss": 0.6931, + "step": 6685 + }, + { + "epoch": 0.2049160230476891, + "grad_norm": 2.0072518513043325, + "learning_rate": 9.218930199340071e-06, + "loss": 0.606, + "step": 6686 + }, + { + "epoch": 0.2049466715704303, + "grad_norm": 1.7422720055249175, + "learning_rate": 9.218663814838215e-06, + "loss": 0.7615, + "step": 6687 + }, + { + "epoch": 0.20497732009317152, + "grad_norm": 1.9220098459847357, + "learning_rate": 9.218397388768785e-06, + "loss": 0.7736, + "step": 6688 + }, + { + "epoch": 0.20500796861591272, + "grad_norm": 1.6868481486025837, + "learning_rate": 9.218130921134408e-06, + "loss": 0.7595, + "step": 6689 + }, + { + "epoch": 0.20503861713865393, + "grad_norm": 1.8689688465186625, + "learning_rate": 9.217864411937704e-06, + "loss": 0.7504, + "step": 6690 + }, + { + "epoch": 0.20506926566139513, + "grad_norm": 1.4545475313142977, + "learning_rate": 9.217597861181306e-06, + "loss": 0.6051, + "step": 6691 + }, + { + "epoch": 0.2050999141841363, + "grad_norm": 1.6395750549237205, + "learning_rate": 9.217331268867838e-06, + "loss": 0.6541, + "step": 6692 + }, + { + "epoch": 0.20513056270687752, + "grad_norm": 0.8965563947513557, + "learning_rate": 9.217064634999925e-06, + "loss": 0.492, + "step": 6693 + }, + { + "epoch": 0.20516121122961872, + "grad_norm": 1.6538847526067177, + "learning_rate": 9.216797959580193e-06, + "loss": 0.7286, + "step": 6694 + }, + { + "epoch": 0.20519185975235993, + "grad_norm": 1.8670750818848154, + "learning_rate": 9.216531242611275e-06, + "loss": 0.8586, + "step": 6695 + }, + { + "epoch": 0.20522250827510113, + "grad_norm": 1.5827274861026595, + "learning_rate": 9.216264484095794e-06, + "loss": 0.5766, + "step": 6696 + }, + { + "epoch": 0.20525315679784234, + "grad_norm": 2.1214094957196954, + "learning_rate": 9.21599768403638e-06, + "loss": 0.7259, + "step": 6697 + }, + { + "epoch": 0.20528380532058355, + "grad_norm": 1.8194552350215416, + "learning_rate": 9.215730842435663e-06, + "loss": 0.8057, + "step": 6698 + }, + { + "epoch": 0.20531445384332475, + "grad_norm": 2.2368767750227447, + "learning_rate": 9.215463959296272e-06, + "loss": 0.7998, + "step": 6699 + }, + { + "epoch": 0.20534510236606596, + "grad_norm": 1.8227237030594687, + "learning_rate": 9.215197034620835e-06, + "loss": 0.7978, + "step": 6700 + }, + { + "epoch": 0.20537575088880716, + "grad_norm": 1.7993357358745083, + "learning_rate": 9.214930068411982e-06, + "loss": 0.808, + "step": 6701 + }, + { + "epoch": 0.20540639941154837, + "grad_norm": 0.8086682758621407, + "learning_rate": 9.214663060672347e-06, + "loss": 0.4703, + "step": 6702 + }, + { + "epoch": 0.20543704793428957, + "grad_norm": 1.6677295138826123, + "learning_rate": 9.214396011404557e-06, + "loss": 0.711, + "step": 6703 + }, + { + "epoch": 0.20546769645703078, + "grad_norm": 1.6527854358083434, + "learning_rate": 9.214128920611246e-06, + "loss": 0.6581, + "step": 6704 + }, + { + "epoch": 0.20549834497977199, + "grad_norm": 1.6138597213212753, + "learning_rate": 9.213861788295043e-06, + "loss": 0.7504, + "step": 6705 + }, + { + "epoch": 0.2055289935025132, + "grad_norm": 1.9000633820630646, + "learning_rate": 9.21359461445858e-06, + "loss": 0.8029, + "step": 6706 + }, + { + "epoch": 0.2055596420252544, + "grad_norm": 0.8079371026457818, + "learning_rate": 9.213327399104495e-06, + "loss": 0.497, + "step": 6707 + }, + { + "epoch": 0.20559029054799557, + "grad_norm": 1.8018031587273264, + "learning_rate": 9.213060142235415e-06, + "loss": 0.8084, + "step": 6708 + }, + { + "epoch": 0.20562093907073678, + "grad_norm": 1.8431215762671034, + "learning_rate": 9.212792843853976e-06, + "loss": 0.7071, + "step": 6709 + }, + { + "epoch": 0.20565158759347799, + "grad_norm": 1.7808608251179903, + "learning_rate": 9.212525503962812e-06, + "loss": 0.7322, + "step": 6710 + }, + { + "epoch": 0.2056822361162192, + "grad_norm": 1.7596583454879335, + "learning_rate": 9.212258122564554e-06, + "loss": 0.7171, + "step": 6711 + }, + { + "epoch": 0.2057128846389604, + "grad_norm": 1.848475545135932, + "learning_rate": 9.21199069966184e-06, + "loss": 0.656, + "step": 6712 + }, + { + "epoch": 0.2057435331617016, + "grad_norm": 1.7322849877973003, + "learning_rate": 9.211723235257306e-06, + "loss": 0.6613, + "step": 6713 + }, + { + "epoch": 0.2057741816844428, + "grad_norm": 1.726106141228765, + "learning_rate": 9.211455729353584e-06, + "loss": 0.5959, + "step": 6714 + }, + { + "epoch": 0.20580483020718401, + "grad_norm": 0.8449438454935616, + "learning_rate": 9.21118818195331e-06, + "loss": 0.4771, + "step": 6715 + }, + { + "epoch": 0.20583547872992522, + "grad_norm": 1.8006532677505842, + "learning_rate": 9.210920593059124e-06, + "loss": 0.7319, + "step": 6716 + }, + { + "epoch": 0.20586612725266643, + "grad_norm": 1.818219088097725, + "learning_rate": 9.210652962673658e-06, + "loss": 0.7147, + "step": 6717 + }, + { + "epoch": 0.20589677577540763, + "grad_norm": 1.769715759308144, + "learning_rate": 9.210385290799551e-06, + "loss": 0.6483, + "step": 6718 + }, + { + "epoch": 0.20592742429814884, + "grad_norm": 1.99237866432386, + "learning_rate": 9.210117577439441e-06, + "loss": 0.8504, + "step": 6719 + }, + { + "epoch": 0.20595807282089004, + "grad_norm": 0.8648996471638114, + "learning_rate": 9.209849822595964e-06, + "loss": 0.5102, + "step": 6720 + }, + { + "epoch": 0.20598872134363125, + "grad_norm": 1.8001993989486489, + "learning_rate": 9.209582026271762e-06, + "loss": 0.7263, + "step": 6721 + }, + { + "epoch": 0.20601936986637245, + "grad_norm": 2.2853154744169792, + "learning_rate": 9.209314188469469e-06, + "loss": 0.7677, + "step": 6722 + }, + { + "epoch": 0.20605001838911363, + "grad_norm": 1.8936703711050131, + "learning_rate": 9.209046309191727e-06, + "loss": 0.8022, + "step": 6723 + }, + { + "epoch": 0.20608066691185484, + "grad_norm": 1.655890791861509, + "learning_rate": 9.208778388441175e-06, + "loss": 0.7126, + "step": 6724 + }, + { + "epoch": 0.20611131543459604, + "grad_norm": 1.6660759053116119, + "learning_rate": 9.208510426220454e-06, + "loss": 0.6042, + "step": 6725 + }, + { + "epoch": 0.20614196395733725, + "grad_norm": 1.7908682415571213, + "learning_rate": 9.2082424225322e-06, + "loss": 0.662, + "step": 6726 + }, + { + "epoch": 0.20617261248007845, + "grad_norm": 0.8416356398587774, + "learning_rate": 9.20797437737906e-06, + "loss": 0.4858, + "step": 6727 + }, + { + "epoch": 0.20620326100281966, + "grad_norm": 1.8428161681178898, + "learning_rate": 9.20770629076367e-06, + "loss": 0.788, + "step": 6728 + }, + { + "epoch": 0.20623390952556087, + "grad_norm": 1.8046199830959653, + "learning_rate": 9.207438162688673e-06, + "loss": 0.7917, + "step": 6729 + }, + { + "epoch": 0.20626455804830207, + "grad_norm": 1.635368796239476, + "learning_rate": 9.207169993156713e-06, + "loss": 0.8005, + "step": 6730 + }, + { + "epoch": 0.20629520657104328, + "grad_norm": 1.8843761641425258, + "learning_rate": 9.206901782170428e-06, + "loss": 0.7298, + "step": 6731 + }, + { + "epoch": 0.20632585509378448, + "grad_norm": 1.894063755421474, + "learning_rate": 9.206633529732465e-06, + "loss": 0.7891, + "step": 6732 + }, + { + "epoch": 0.2063565036165257, + "grad_norm": 1.7218024300435457, + "learning_rate": 9.206365235845465e-06, + "loss": 0.7171, + "step": 6733 + }, + { + "epoch": 0.2063871521392669, + "grad_norm": 1.7324222501452822, + "learning_rate": 9.206096900512072e-06, + "loss": 0.662, + "step": 6734 + }, + { + "epoch": 0.2064178006620081, + "grad_norm": 1.7759893358881007, + "learning_rate": 9.205828523734931e-06, + "loss": 0.6528, + "step": 6735 + }, + { + "epoch": 0.2064484491847493, + "grad_norm": 1.5790169613134686, + "learning_rate": 9.205560105516684e-06, + "loss": 0.7323, + "step": 6736 + }, + { + "epoch": 0.2064790977074905, + "grad_norm": 1.689437440433285, + "learning_rate": 9.20529164585998e-06, + "loss": 0.7641, + "step": 6737 + }, + { + "epoch": 0.20650974623023172, + "grad_norm": 1.7850496429094946, + "learning_rate": 9.205023144767457e-06, + "loss": 0.7447, + "step": 6738 + }, + { + "epoch": 0.2065403947529729, + "grad_norm": 1.5837632832801825, + "learning_rate": 9.20475460224177e-06, + "loss": 0.7499, + "step": 6739 + }, + { + "epoch": 0.2065710432757141, + "grad_norm": 2.496330439983192, + "learning_rate": 9.204486018285557e-06, + "loss": 0.7428, + "step": 6740 + }, + { + "epoch": 0.2066016917984553, + "grad_norm": 1.6272459903651788, + "learning_rate": 9.204217392901468e-06, + "loss": 0.6738, + "step": 6741 + }, + { + "epoch": 0.2066323403211965, + "grad_norm": 0.91348703353134, + "learning_rate": 9.20394872609215e-06, + "loss": 0.489, + "step": 6742 + }, + { + "epoch": 0.20666298884393772, + "grad_norm": 1.8931154371226169, + "learning_rate": 9.203680017860249e-06, + "loss": 0.7694, + "step": 6743 + }, + { + "epoch": 0.20669363736667892, + "grad_norm": 1.9432522425411105, + "learning_rate": 9.203411268208413e-06, + "loss": 0.6489, + "step": 6744 + }, + { + "epoch": 0.20672428588942013, + "grad_norm": 1.6733472115195271, + "learning_rate": 9.20314247713929e-06, + "loss": 0.6972, + "step": 6745 + }, + { + "epoch": 0.20675493441216133, + "grad_norm": 1.7142606105069549, + "learning_rate": 9.20287364465553e-06, + "loss": 0.7421, + "step": 6746 + }, + { + "epoch": 0.20678558293490254, + "grad_norm": 2.0087471357459714, + "learning_rate": 9.20260477075978e-06, + "loss": 0.7201, + "step": 6747 + }, + { + "epoch": 0.20681623145764375, + "grad_norm": 0.8458847455538521, + "learning_rate": 9.20233585545469e-06, + "loss": 0.4966, + "step": 6748 + }, + { + "epoch": 0.20684687998038495, + "grad_norm": 2.049286359960871, + "learning_rate": 9.20206689874291e-06, + "loss": 0.6957, + "step": 6749 + }, + { + "epoch": 0.20687752850312616, + "grad_norm": 0.7826940363582866, + "learning_rate": 9.201797900627087e-06, + "loss": 0.4764, + "step": 6750 + }, + { + "epoch": 0.20690817702586736, + "grad_norm": 1.6912603616861772, + "learning_rate": 9.201528861109877e-06, + "loss": 0.7015, + "step": 6751 + }, + { + "epoch": 0.20693882554860857, + "grad_norm": 0.8170849642207012, + "learning_rate": 9.201259780193927e-06, + "loss": 0.4846, + "step": 6752 + }, + { + "epoch": 0.20696947407134977, + "grad_norm": 1.827240031504076, + "learning_rate": 9.200990657881891e-06, + "loss": 0.6992, + "step": 6753 + }, + { + "epoch": 0.20700012259409095, + "grad_norm": 1.8259011429069365, + "learning_rate": 9.200721494176418e-06, + "loss": 0.7607, + "step": 6754 + }, + { + "epoch": 0.20703077111683216, + "grad_norm": 1.9065495493963849, + "learning_rate": 9.200452289080161e-06, + "loss": 0.7467, + "step": 6755 + }, + { + "epoch": 0.20706141963957336, + "grad_norm": 1.7927005656618156, + "learning_rate": 9.200183042595775e-06, + "loss": 0.7667, + "step": 6756 + }, + { + "epoch": 0.20709206816231457, + "grad_norm": 1.8258823910021642, + "learning_rate": 9.199913754725908e-06, + "loss": 0.7707, + "step": 6757 + }, + { + "epoch": 0.20712271668505577, + "grad_norm": 1.5340429475217137, + "learning_rate": 9.199644425473217e-06, + "loss": 0.6963, + "step": 6758 + }, + { + "epoch": 0.20715336520779698, + "grad_norm": 1.8299104565238002, + "learning_rate": 9.199375054840356e-06, + "loss": 0.6688, + "step": 6759 + }, + { + "epoch": 0.20718401373053819, + "grad_norm": 1.8802104902477303, + "learning_rate": 9.199105642829977e-06, + "loss": 0.7738, + "step": 6760 + }, + { + "epoch": 0.2072146622532794, + "grad_norm": 1.7598982859339458, + "learning_rate": 9.198836189444735e-06, + "loss": 0.6863, + "step": 6761 + }, + { + "epoch": 0.2072453107760206, + "grad_norm": 1.8367670019135447, + "learning_rate": 9.198566694687288e-06, + "loss": 0.6863, + "step": 6762 + }, + { + "epoch": 0.2072759592987618, + "grad_norm": 1.9563584116316692, + "learning_rate": 9.198297158560288e-06, + "loss": 0.7123, + "step": 6763 + }, + { + "epoch": 0.207306607821503, + "grad_norm": 1.8992862192274782, + "learning_rate": 9.19802758106639e-06, + "loss": 0.8067, + "step": 6764 + }, + { + "epoch": 0.20733725634424421, + "grad_norm": 1.519697320007608, + "learning_rate": 9.197757962208257e-06, + "loss": 0.6362, + "step": 6765 + }, + { + "epoch": 0.20736790486698542, + "grad_norm": 1.6775450215848355, + "learning_rate": 9.197488301988535e-06, + "loss": 0.7423, + "step": 6766 + }, + { + "epoch": 0.20739855338972663, + "grad_norm": 1.9043297210704055, + "learning_rate": 9.197218600409889e-06, + "loss": 0.7888, + "step": 6767 + }, + { + "epoch": 0.20742920191246783, + "grad_norm": 1.7084663081533433, + "learning_rate": 9.196948857474976e-06, + "loss": 0.6655, + "step": 6768 + }, + { + "epoch": 0.20745985043520904, + "grad_norm": 1.7864470187024433, + "learning_rate": 9.19667907318645e-06, + "loss": 0.7384, + "step": 6769 + }, + { + "epoch": 0.20749049895795021, + "grad_norm": 1.9146610171939578, + "learning_rate": 9.196409247546973e-06, + "loss": 0.7373, + "step": 6770 + }, + { + "epoch": 0.20752114748069142, + "grad_norm": 1.5098805827472352, + "learning_rate": 9.196139380559201e-06, + "loss": 0.6808, + "step": 6771 + }, + { + "epoch": 0.20755179600343263, + "grad_norm": 1.1884687593674135, + "learning_rate": 9.195869472225794e-06, + "loss": 0.5041, + "step": 6772 + }, + { + "epoch": 0.20758244452617383, + "grad_norm": 1.8479144097021498, + "learning_rate": 9.195599522549412e-06, + "loss": 0.7799, + "step": 6773 + }, + { + "epoch": 0.20761309304891504, + "grad_norm": 1.9262281230425602, + "learning_rate": 9.195329531532715e-06, + "loss": 0.8028, + "step": 6774 + }, + { + "epoch": 0.20764374157165624, + "grad_norm": 1.8343948572505522, + "learning_rate": 9.19505949917836e-06, + "loss": 0.8153, + "step": 6775 + }, + { + "epoch": 0.20767439009439745, + "grad_norm": 1.5880062968952529, + "learning_rate": 9.194789425489012e-06, + "loss": 0.6009, + "step": 6776 + }, + { + "epoch": 0.20770503861713865, + "grad_norm": 1.4759332949912205, + "learning_rate": 9.194519310467332e-06, + "loss": 0.6627, + "step": 6777 + }, + { + "epoch": 0.20773568713987986, + "grad_norm": 2.2566249198500032, + "learning_rate": 9.194249154115978e-06, + "loss": 0.8453, + "step": 6778 + }, + { + "epoch": 0.20776633566262107, + "grad_norm": 1.7110157569887257, + "learning_rate": 9.193978956437615e-06, + "loss": 0.7175, + "step": 6779 + }, + { + "epoch": 0.20779698418536227, + "grad_norm": 1.7255247337861706, + "learning_rate": 9.193708717434904e-06, + "loss": 0.6665, + "step": 6780 + }, + { + "epoch": 0.20782763270810348, + "grad_norm": 1.7307950166748334, + "learning_rate": 9.193438437110508e-06, + "loss": 0.7388, + "step": 6781 + }, + { + "epoch": 0.20785828123084468, + "grad_norm": 1.7659489345844288, + "learning_rate": 9.19316811546709e-06, + "loss": 0.7473, + "step": 6782 + }, + { + "epoch": 0.2078889297535859, + "grad_norm": 2.31007115288661, + "learning_rate": 9.192897752507314e-06, + "loss": 0.7416, + "step": 6783 + }, + { + "epoch": 0.2079195782763271, + "grad_norm": 1.9916791692524527, + "learning_rate": 9.192627348233842e-06, + "loss": 0.7949, + "step": 6784 + }, + { + "epoch": 0.20795022679906827, + "grad_norm": 1.7704382326300976, + "learning_rate": 9.192356902649342e-06, + "loss": 0.696, + "step": 6785 + }, + { + "epoch": 0.20798087532180948, + "grad_norm": 1.7451824999617491, + "learning_rate": 9.192086415756476e-06, + "loss": 0.7713, + "step": 6786 + }, + { + "epoch": 0.20801152384455068, + "grad_norm": 1.6031705686453945, + "learning_rate": 9.19181588755791e-06, + "loss": 0.591, + "step": 6787 + }, + { + "epoch": 0.2080421723672919, + "grad_norm": 1.5919476095006226, + "learning_rate": 9.19154531805631e-06, + "loss": 0.7394, + "step": 6788 + }, + { + "epoch": 0.2080728208900331, + "grad_norm": 1.7753501111179872, + "learning_rate": 9.191274707254342e-06, + "loss": 0.6309, + "step": 6789 + }, + { + "epoch": 0.2081034694127743, + "grad_norm": 1.0507846759425072, + "learning_rate": 9.19100405515467e-06, + "loss": 0.4797, + "step": 6790 + }, + { + "epoch": 0.2081341179355155, + "grad_norm": 1.9062070865198815, + "learning_rate": 9.190733361759964e-06, + "loss": 0.8396, + "step": 6791 + }, + { + "epoch": 0.2081647664582567, + "grad_norm": 1.782948521099564, + "learning_rate": 9.190462627072891e-06, + "loss": 0.6718, + "step": 6792 + }, + { + "epoch": 0.20819541498099792, + "grad_norm": 1.5291121421295177, + "learning_rate": 9.190191851096116e-06, + "loss": 0.628, + "step": 6793 + }, + { + "epoch": 0.20822606350373912, + "grad_norm": 1.784674869657271, + "learning_rate": 9.18992103383231e-06, + "loss": 0.6685, + "step": 6794 + }, + { + "epoch": 0.20825671202648033, + "grad_norm": 1.9825531035681496, + "learning_rate": 9.189650175284139e-06, + "loss": 0.756, + "step": 6795 + }, + { + "epoch": 0.20828736054922153, + "grad_norm": 1.6512585115184524, + "learning_rate": 9.18937927545427e-06, + "loss": 0.6894, + "step": 6796 + }, + { + "epoch": 0.20831800907196274, + "grad_norm": 1.7291805660032682, + "learning_rate": 9.18910833434538e-06, + "loss": 0.7714, + "step": 6797 + }, + { + "epoch": 0.20834865759470395, + "grad_norm": 0.9659235120587193, + "learning_rate": 9.188837351960132e-06, + "loss": 0.5062, + "step": 6798 + }, + { + "epoch": 0.20837930611744515, + "grad_norm": 1.5450964518569246, + "learning_rate": 9.188566328301196e-06, + "loss": 0.6389, + "step": 6799 + }, + { + "epoch": 0.20840995464018636, + "grad_norm": 2.1041982554955165, + "learning_rate": 9.188295263371247e-06, + "loss": 0.8099, + "step": 6800 + }, + { + "epoch": 0.20844060316292753, + "grad_norm": 0.7944831695327185, + "learning_rate": 9.188024157172952e-06, + "loss": 0.4939, + "step": 6801 + }, + { + "epoch": 0.20847125168566874, + "grad_norm": 1.9803244997547809, + "learning_rate": 9.187753009708983e-06, + "loss": 0.6371, + "step": 6802 + }, + { + "epoch": 0.20850190020840995, + "grad_norm": 1.8660461576652703, + "learning_rate": 9.187481820982011e-06, + "loss": 0.7462, + "step": 6803 + }, + { + "epoch": 0.20853254873115115, + "grad_norm": 1.579493382579708, + "learning_rate": 9.187210590994711e-06, + "loss": 0.7788, + "step": 6804 + }, + { + "epoch": 0.20856319725389236, + "grad_norm": 1.7960810579646789, + "learning_rate": 9.186939319749756e-06, + "loss": 0.7927, + "step": 6805 + }, + { + "epoch": 0.20859384577663356, + "grad_norm": 1.8314649508709209, + "learning_rate": 9.186668007249813e-06, + "loss": 0.7748, + "step": 6806 + }, + { + "epoch": 0.20862449429937477, + "grad_norm": 1.8205891989113856, + "learning_rate": 9.18639665349756e-06, + "loss": 0.6856, + "step": 6807 + }, + { + "epoch": 0.20865514282211597, + "grad_norm": 1.957675173616404, + "learning_rate": 9.186125258495668e-06, + "loss": 0.7182, + "step": 6808 + }, + { + "epoch": 0.20868579134485718, + "grad_norm": 0.9703100665088302, + "learning_rate": 9.185853822246814e-06, + "loss": 0.4858, + "step": 6809 + }, + { + "epoch": 0.20871643986759839, + "grad_norm": 0.9158888763485403, + "learning_rate": 9.185582344753673e-06, + "loss": 0.494, + "step": 6810 + }, + { + "epoch": 0.2087470883903396, + "grad_norm": 2.051007840441848, + "learning_rate": 9.185310826018916e-06, + "loss": 0.8195, + "step": 6811 + }, + { + "epoch": 0.2087777369130808, + "grad_norm": 1.7732338693936338, + "learning_rate": 9.185039266045221e-06, + "loss": 0.6585, + "step": 6812 + }, + { + "epoch": 0.208808385435822, + "grad_norm": 1.579575628333002, + "learning_rate": 9.184767664835264e-06, + "loss": 0.7207, + "step": 6813 + }, + { + "epoch": 0.2088390339585632, + "grad_norm": 1.7727019748813604, + "learning_rate": 9.18449602239172e-06, + "loss": 0.697, + "step": 6814 + }, + { + "epoch": 0.20886968248130441, + "grad_norm": 1.7808187399192292, + "learning_rate": 9.184224338717268e-06, + "loss": 0.7662, + "step": 6815 + }, + { + "epoch": 0.2089003310040456, + "grad_norm": 1.6726219273615999, + "learning_rate": 9.18395261381458e-06, + "loss": 0.6156, + "step": 6816 + }, + { + "epoch": 0.2089309795267868, + "grad_norm": 1.8326021329469375, + "learning_rate": 9.183680847686338e-06, + "loss": 0.8114, + "step": 6817 + }, + { + "epoch": 0.208961628049528, + "grad_norm": 1.9288626618344638, + "learning_rate": 9.183409040335218e-06, + "loss": 0.7644, + "step": 6818 + }, + { + "epoch": 0.2089922765722692, + "grad_norm": 1.672073496473314, + "learning_rate": 9.183137191763898e-06, + "loss": 0.6419, + "step": 6819 + }, + { + "epoch": 0.20902292509501041, + "grad_norm": 1.691755527101769, + "learning_rate": 9.182865301975056e-06, + "loss": 0.7427, + "step": 6820 + }, + { + "epoch": 0.20905357361775162, + "grad_norm": 1.765372803180703, + "learning_rate": 9.182593370971373e-06, + "loss": 0.7697, + "step": 6821 + }, + { + "epoch": 0.20908422214049283, + "grad_norm": 2.1117715895021796, + "learning_rate": 9.18232139875553e-06, + "loss": 0.801, + "step": 6822 + }, + { + "epoch": 0.20911487066323403, + "grad_norm": 1.7747514290506967, + "learning_rate": 9.1820493853302e-06, + "loss": 0.6449, + "step": 6823 + }, + { + "epoch": 0.20914551918597524, + "grad_norm": 1.6660686883071856, + "learning_rate": 9.181777330698069e-06, + "loss": 0.713, + "step": 6824 + }, + { + "epoch": 0.20917616770871644, + "grad_norm": 1.8518806501079634, + "learning_rate": 9.181505234861816e-06, + "loss": 0.7505, + "step": 6825 + }, + { + "epoch": 0.20920681623145765, + "grad_norm": 1.640453133222903, + "learning_rate": 9.181233097824123e-06, + "loss": 0.6494, + "step": 6826 + }, + { + "epoch": 0.20923746475419885, + "grad_norm": 1.7537630114903089, + "learning_rate": 9.18096091958767e-06, + "loss": 0.764, + "step": 6827 + }, + { + "epoch": 0.20926811327694006, + "grad_norm": 1.999009199335041, + "learning_rate": 9.180688700155138e-06, + "loss": 0.7229, + "step": 6828 + }, + { + "epoch": 0.20929876179968127, + "grad_norm": 1.7170214898379217, + "learning_rate": 9.180416439529211e-06, + "loss": 0.7814, + "step": 6829 + }, + { + "epoch": 0.20932941032242247, + "grad_norm": 1.8149656575857185, + "learning_rate": 9.18014413771257e-06, + "loss": 0.7234, + "step": 6830 + }, + { + "epoch": 0.20936005884516368, + "grad_norm": 1.7920228999272243, + "learning_rate": 9.179871794707903e-06, + "loss": 0.826, + "step": 6831 + }, + { + "epoch": 0.20939070736790485, + "grad_norm": 1.944797410749394, + "learning_rate": 9.179599410517887e-06, + "loss": 0.74, + "step": 6832 + }, + { + "epoch": 0.20942135589064606, + "grad_norm": 1.9416265894432077, + "learning_rate": 9.17932698514521e-06, + "loss": 0.8058, + "step": 6833 + }, + { + "epoch": 0.20945200441338727, + "grad_norm": 1.8778135412842543, + "learning_rate": 9.179054518592553e-06, + "loss": 0.7686, + "step": 6834 + }, + { + "epoch": 0.20948265293612847, + "grad_norm": 1.64474573013903, + "learning_rate": 9.178782010862603e-06, + "loss": 0.7435, + "step": 6835 + }, + { + "epoch": 0.20951330145886968, + "grad_norm": 1.8319759640079765, + "learning_rate": 9.178509461958046e-06, + "loss": 0.7605, + "step": 6836 + }, + { + "epoch": 0.20954394998161088, + "grad_norm": 1.7855648579986367, + "learning_rate": 9.178236871881565e-06, + "loss": 0.7925, + "step": 6837 + }, + { + "epoch": 0.2095745985043521, + "grad_norm": 1.6585637434699263, + "learning_rate": 9.177964240635849e-06, + "loss": 0.7004, + "step": 6838 + }, + { + "epoch": 0.2096052470270933, + "grad_norm": 1.86410404390193, + "learning_rate": 9.17769156822358e-06, + "loss": 0.7072, + "step": 6839 + }, + { + "epoch": 0.2096358955498345, + "grad_norm": 1.8530621563971725, + "learning_rate": 9.177418854647447e-06, + "loss": 0.6818, + "step": 6840 + }, + { + "epoch": 0.2096665440725757, + "grad_norm": 1.125894768126668, + "learning_rate": 9.17714609991014e-06, + "loss": 0.5028, + "step": 6841 + }, + { + "epoch": 0.2096971925953169, + "grad_norm": 0.9734690070478185, + "learning_rate": 9.17687330401434e-06, + "loss": 0.4998, + "step": 6842 + }, + { + "epoch": 0.20972784111805812, + "grad_norm": 1.8644130238033845, + "learning_rate": 9.176600466962741e-06, + "loss": 0.8043, + "step": 6843 + }, + { + "epoch": 0.20975848964079932, + "grad_norm": 1.883425353539842, + "learning_rate": 9.176327588758029e-06, + "loss": 0.8088, + "step": 6844 + }, + { + "epoch": 0.20978913816354053, + "grad_norm": 1.656372899019515, + "learning_rate": 9.176054669402892e-06, + "loss": 0.7002, + "step": 6845 + }, + { + "epoch": 0.20981978668628173, + "grad_norm": 1.7263265397225005, + "learning_rate": 9.17578170890002e-06, + "loss": 0.8127, + "step": 6846 + }, + { + "epoch": 0.2098504352090229, + "grad_norm": 1.7121882198460352, + "learning_rate": 9.175508707252102e-06, + "loss": 0.8137, + "step": 6847 + }, + { + "epoch": 0.20988108373176412, + "grad_norm": 1.8794673935085513, + "learning_rate": 9.175235664461828e-06, + "loss": 0.7443, + "step": 6848 + }, + { + "epoch": 0.20991173225450532, + "grad_norm": 2.0044817246563564, + "learning_rate": 9.17496258053189e-06, + "loss": 0.7848, + "step": 6849 + }, + { + "epoch": 0.20994238077724653, + "grad_norm": 1.744483937240471, + "learning_rate": 9.174689455464978e-06, + "loss": 0.8035, + "step": 6850 + }, + { + "epoch": 0.20997302929998773, + "grad_norm": 2.1194148158500026, + "learning_rate": 9.174416289263781e-06, + "loss": 0.8102, + "step": 6851 + }, + { + "epoch": 0.21000367782272894, + "grad_norm": 1.936136297460602, + "learning_rate": 9.174143081930993e-06, + "loss": 0.7016, + "step": 6852 + }, + { + "epoch": 0.21003432634547015, + "grad_norm": 1.7685777371932359, + "learning_rate": 9.173869833469304e-06, + "loss": 0.6208, + "step": 6853 + }, + { + "epoch": 0.21006497486821135, + "grad_norm": 1.7870901576654972, + "learning_rate": 9.17359654388141e-06, + "loss": 0.7219, + "step": 6854 + }, + { + "epoch": 0.21009562339095256, + "grad_norm": 1.7709667104532059, + "learning_rate": 9.17332321317e-06, + "loss": 0.7301, + "step": 6855 + }, + { + "epoch": 0.21012627191369376, + "grad_norm": 1.748446950352049, + "learning_rate": 9.173049841337768e-06, + "loss": 0.7382, + "step": 6856 + }, + { + "epoch": 0.21015692043643497, + "grad_norm": 1.8398553345711108, + "learning_rate": 9.172776428387408e-06, + "loss": 0.7559, + "step": 6857 + }, + { + "epoch": 0.21018756895917617, + "grad_norm": 1.7009313786225615, + "learning_rate": 9.172502974321616e-06, + "loss": 0.7497, + "step": 6858 + }, + { + "epoch": 0.21021821748191738, + "grad_norm": 1.8026413552088343, + "learning_rate": 9.172229479143085e-06, + "loss": 0.5226, + "step": 6859 + }, + { + "epoch": 0.21024886600465859, + "grad_norm": 1.9363748529202822, + "learning_rate": 9.171955942854506e-06, + "loss": 0.7579, + "step": 6860 + }, + { + "epoch": 0.2102795145273998, + "grad_norm": 1.8033573832254106, + "learning_rate": 9.17168236545858e-06, + "loss": 0.7517, + "step": 6861 + }, + { + "epoch": 0.210310163050141, + "grad_norm": 1.748957965893536, + "learning_rate": 9.171408746958e-06, + "loss": 0.6844, + "step": 6862 + }, + { + "epoch": 0.21034081157288217, + "grad_norm": 1.8161299308525722, + "learning_rate": 9.171135087355463e-06, + "loss": 0.7819, + "step": 6863 + }, + { + "epoch": 0.21037146009562338, + "grad_norm": 1.7878548993667627, + "learning_rate": 9.170861386653665e-06, + "loss": 0.756, + "step": 6864 + }, + { + "epoch": 0.21040210861836459, + "grad_norm": 1.792135781338691, + "learning_rate": 9.1705876448553e-06, + "loss": 0.7271, + "step": 6865 + }, + { + "epoch": 0.2104327571411058, + "grad_norm": 0.9734903945267865, + "learning_rate": 9.17031386196307e-06, + "loss": 0.5096, + "step": 6866 + }, + { + "epoch": 0.210463405663847, + "grad_norm": 1.9028634184432638, + "learning_rate": 9.170040037979671e-06, + "loss": 0.7614, + "step": 6867 + }, + { + "epoch": 0.2104940541865882, + "grad_norm": 1.694211012977835, + "learning_rate": 9.169766172907799e-06, + "loss": 0.7729, + "step": 6868 + }, + { + "epoch": 0.2105247027093294, + "grad_norm": 2.0870773322495904, + "learning_rate": 9.169492266750154e-06, + "loss": 0.6842, + "step": 6869 + }, + { + "epoch": 0.21055535123207061, + "grad_norm": 1.792154594032373, + "learning_rate": 9.169218319509436e-06, + "loss": 0.722, + "step": 6870 + }, + { + "epoch": 0.21058599975481182, + "grad_norm": 0.9154273263535209, + "learning_rate": 9.168944331188342e-06, + "loss": 0.5163, + "step": 6871 + }, + { + "epoch": 0.21061664827755303, + "grad_norm": 1.9743349525453757, + "learning_rate": 9.168670301789574e-06, + "loss": 0.792, + "step": 6872 + }, + { + "epoch": 0.21064729680029423, + "grad_norm": 1.8693132833812818, + "learning_rate": 9.16839623131583e-06, + "loss": 0.7489, + "step": 6873 + }, + { + "epoch": 0.21067794532303544, + "grad_norm": 1.7813273678304533, + "learning_rate": 9.168122119769813e-06, + "loss": 0.8148, + "step": 6874 + }, + { + "epoch": 0.21070859384577664, + "grad_norm": 2.792978920969533, + "learning_rate": 9.167847967154219e-06, + "loss": 0.7076, + "step": 6875 + }, + { + "epoch": 0.21073924236851785, + "grad_norm": 1.5876035303021407, + "learning_rate": 9.167573773471756e-06, + "loss": 0.624, + "step": 6876 + }, + { + "epoch": 0.21076989089125905, + "grad_norm": 1.6921049429493127, + "learning_rate": 9.167299538725121e-06, + "loss": 0.6789, + "step": 6877 + }, + { + "epoch": 0.21080053941400023, + "grad_norm": 1.7170578211337775, + "learning_rate": 9.167025262917018e-06, + "loss": 0.6965, + "step": 6878 + }, + { + "epoch": 0.21083118793674144, + "grad_norm": 1.7578112592531, + "learning_rate": 9.166750946050147e-06, + "loss": 0.6755, + "step": 6879 + }, + { + "epoch": 0.21086183645948264, + "grad_norm": 1.7834347659126346, + "learning_rate": 9.166476588127215e-06, + "loss": 0.7212, + "step": 6880 + }, + { + "epoch": 0.21089248498222385, + "grad_norm": 1.714172527356643, + "learning_rate": 9.166202189150922e-06, + "loss": 0.7632, + "step": 6881 + }, + { + "epoch": 0.21092313350496505, + "grad_norm": 1.8270774505399052, + "learning_rate": 9.165927749123972e-06, + "loss": 0.8293, + "step": 6882 + }, + { + "epoch": 0.21095378202770626, + "grad_norm": 1.7095949547928186, + "learning_rate": 9.165653268049072e-06, + "loss": 0.6976, + "step": 6883 + }, + { + "epoch": 0.21098443055044747, + "grad_norm": 1.6144527886490627, + "learning_rate": 9.165378745928923e-06, + "loss": 0.7105, + "step": 6884 + }, + { + "epoch": 0.21101507907318867, + "grad_norm": 1.788243461813118, + "learning_rate": 9.165104182766233e-06, + "loss": 0.7202, + "step": 6885 + }, + { + "epoch": 0.21104572759592988, + "grad_norm": 1.6479423097165988, + "learning_rate": 9.164829578563705e-06, + "loss": 0.641, + "step": 6886 + }, + { + "epoch": 0.21107637611867108, + "grad_norm": 0.9544702610902126, + "learning_rate": 9.164554933324045e-06, + "loss": 0.5021, + "step": 6887 + }, + { + "epoch": 0.2111070246414123, + "grad_norm": 1.5791800181820352, + "learning_rate": 9.164280247049961e-06, + "loss": 0.6838, + "step": 6888 + }, + { + "epoch": 0.2111376731641535, + "grad_norm": 1.674161460783684, + "learning_rate": 9.164005519744157e-06, + "loss": 0.6221, + "step": 6889 + }, + { + "epoch": 0.2111683216868947, + "grad_norm": 0.7711683694305198, + "learning_rate": 9.163730751409342e-06, + "loss": 0.475, + "step": 6890 + }, + { + "epoch": 0.2111989702096359, + "grad_norm": 1.7167220706392847, + "learning_rate": 9.16345594204822e-06, + "loss": 0.7976, + "step": 6891 + }, + { + "epoch": 0.2112296187323771, + "grad_norm": 1.833129240930157, + "learning_rate": 9.163181091663504e-06, + "loss": 0.7704, + "step": 6892 + }, + { + "epoch": 0.21126026725511832, + "grad_norm": 1.6989457380427906, + "learning_rate": 9.162906200257899e-06, + "loss": 0.6912, + "step": 6893 + }, + { + "epoch": 0.2112909157778595, + "grad_norm": 0.8303846636551513, + "learning_rate": 9.162631267834114e-06, + "loss": 0.4808, + "step": 6894 + }, + { + "epoch": 0.2113215643006007, + "grad_norm": 1.6514146018567908, + "learning_rate": 9.162356294394858e-06, + "loss": 0.7695, + "step": 6895 + }, + { + "epoch": 0.2113522128233419, + "grad_norm": 1.7210836422496705, + "learning_rate": 9.16208127994284e-06, + "loss": 0.7056, + "step": 6896 + }, + { + "epoch": 0.2113828613460831, + "grad_norm": 1.8070236791121794, + "learning_rate": 9.161806224480772e-06, + "loss": 0.6796, + "step": 6897 + }, + { + "epoch": 0.21141350986882432, + "grad_norm": 1.881684405334881, + "learning_rate": 9.161531128011361e-06, + "loss": 0.7223, + "step": 6898 + }, + { + "epoch": 0.21144415839156552, + "grad_norm": 1.932841210681594, + "learning_rate": 9.161255990537318e-06, + "loss": 0.7558, + "step": 6899 + }, + { + "epoch": 0.21147480691430673, + "grad_norm": 1.909009164221902, + "learning_rate": 9.160980812061357e-06, + "loss": 0.7713, + "step": 6900 + }, + { + "epoch": 0.21150545543704793, + "grad_norm": 1.8644356886033961, + "learning_rate": 9.160705592586187e-06, + "loss": 0.7333, + "step": 6901 + }, + { + "epoch": 0.21153610395978914, + "grad_norm": 1.6675138254542254, + "learning_rate": 9.16043033211452e-06, + "loss": 0.6969, + "step": 6902 + }, + { + "epoch": 0.21156675248253035, + "grad_norm": 1.6892572185055745, + "learning_rate": 9.16015503064907e-06, + "loss": 0.7077, + "step": 6903 + }, + { + "epoch": 0.21159740100527155, + "grad_norm": 2.1049162270148996, + "learning_rate": 9.159879688192546e-06, + "loss": 0.8021, + "step": 6904 + }, + { + "epoch": 0.21162804952801276, + "grad_norm": 0.9823090997930126, + "learning_rate": 9.159604304747662e-06, + "loss": 0.5073, + "step": 6905 + }, + { + "epoch": 0.21165869805075396, + "grad_norm": 1.8230607892727704, + "learning_rate": 9.159328880317136e-06, + "loss": 0.6742, + "step": 6906 + }, + { + "epoch": 0.21168934657349517, + "grad_norm": 0.8581277215546509, + "learning_rate": 9.159053414903676e-06, + "loss": 0.5198, + "step": 6907 + }, + { + "epoch": 0.21171999509623637, + "grad_norm": 1.7963722666636788, + "learning_rate": 9.158777908509999e-06, + "loss": 0.7073, + "step": 6908 + }, + { + "epoch": 0.21175064361897755, + "grad_norm": 1.788095152589813, + "learning_rate": 9.158502361138819e-06, + "loss": 0.7205, + "step": 6909 + }, + { + "epoch": 0.21178129214171876, + "grad_norm": 0.8710616070081827, + "learning_rate": 9.158226772792852e-06, + "loss": 0.4857, + "step": 6910 + }, + { + "epoch": 0.21181194066445996, + "grad_norm": 2.0178951303650083, + "learning_rate": 9.15795114347481e-06, + "loss": 0.7412, + "step": 6911 + }, + { + "epoch": 0.21184258918720117, + "grad_norm": 1.64157528679434, + "learning_rate": 9.157675473187414e-06, + "loss": 0.7283, + "step": 6912 + }, + { + "epoch": 0.21187323770994237, + "grad_norm": 1.7019332214032006, + "learning_rate": 9.157399761933377e-06, + "loss": 0.7259, + "step": 6913 + }, + { + "epoch": 0.21190388623268358, + "grad_norm": 1.6060917912374433, + "learning_rate": 9.157124009715417e-06, + "loss": 0.7573, + "step": 6914 + }, + { + "epoch": 0.21193453475542479, + "grad_norm": 1.9453801078820514, + "learning_rate": 9.156848216536251e-06, + "loss": 0.6643, + "step": 6915 + }, + { + "epoch": 0.211965183278166, + "grad_norm": 1.658835094098403, + "learning_rate": 9.156572382398594e-06, + "loss": 0.7044, + "step": 6916 + }, + { + "epoch": 0.2119958318009072, + "grad_norm": 1.7395985814295267, + "learning_rate": 9.156296507305167e-06, + "loss": 0.6636, + "step": 6917 + }, + { + "epoch": 0.2120264803236484, + "grad_norm": 1.8210205225629972, + "learning_rate": 9.156020591258687e-06, + "loss": 0.7422, + "step": 6918 + }, + { + "epoch": 0.2120571288463896, + "grad_norm": 0.8594983365179799, + "learning_rate": 9.155744634261874e-06, + "loss": 0.4893, + "step": 6919 + }, + { + "epoch": 0.21208777736913081, + "grad_norm": 1.8212139680934627, + "learning_rate": 9.155468636317443e-06, + "loss": 0.8511, + "step": 6920 + }, + { + "epoch": 0.21211842589187202, + "grad_norm": 1.6324249670599813, + "learning_rate": 9.15519259742812e-06, + "loss": 0.7161, + "step": 6921 + }, + { + "epoch": 0.21214907441461323, + "grad_norm": 1.8316990862187694, + "learning_rate": 9.15491651759662e-06, + "loss": 0.7616, + "step": 6922 + }, + { + "epoch": 0.21217972293735443, + "grad_norm": 1.7195018363195347, + "learning_rate": 9.154640396825662e-06, + "loss": 0.7164, + "step": 6923 + }, + { + "epoch": 0.21221037146009564, + "grad_norm": 0.8347845723763856, + "learning_rate": 9.15436423511797e-06, + "loss": 0.4913, + "step": 6924 + }, + { + "epoch": 0.21224101998283681, + "grad_norm": 1.6691154470476273, + "learning_rate": 9.154088032476266e-06, + "loss": 0.7715, + "step": 6925 + }, + { + "epoch": 0.21227166850557802, + "grad_norm": 1.8244152941922673, + "learning_rate": 9.153811788903269e-06, + "loss": 0.681, + "step": 6926 + }, + { + "epoch": 0.21230231702831923, + "grad_norm": 2.0724667101050183, + "learning_rate": 9.1535355044017e-06, + "loss": 0.7293, + "step": 6927 + }, + { + "epoch": 0.21233296555106043, + "grad_norm": 2.182190078939684, + "learning_rate": 9.153259178974286e-06, + "loss": 0.6537, + "step": 6928 + }, + { + "epoch": 0.21236361407380164, + "grad_norm": 1.567885108712536, + "learning_rate": 9.152982812623746e-06, + "loss": 0.6684, + "step": 6929 + }, + { + "epoch": 0.21239426259654284, + "grad_norm": 1.8021626820561905, + "learning_rate": 9.152706405352802e-06, + "loss": 0.7286, + "step": 6930 + }, + { + "epoch": 0.21242491111928405, + "grad_norm": 1.6268337808558793, + "learning_rate": 9.15242995716418e-06, + "loss": 0.6745, + "step": 6931 + }, + { + "epoch": 0.21245555964202525, + "grad_norm": 0.8203364787433969, + "learning_rate": 9.152153468060603e-06, + "loss": 0.49, + "step": 6932 + }, + { + "epoch": 0.21248620816476646, + "grad_norm": 1.6681435869876402, + "learning_rate": 9.151876938044795e-06, + "loss": 0.6759, + "step": 6933 + }, + { + "epoch": 0.21251685668750767, + "grad_norm": 1.8097469010537726, + "learning_rate": 9.151600367119482e-06, + "loss": 0.7607, + "step": 6934 + }, + { + "epoch": 0.21254750521024887, + "grad_norm": 1.8253314774917206, + "learning_rate": 9.15132375528739e-06, + "loss": 0.7188, + "step": 6935 + }, + { + "epoch": 0.21257815373299008, + "grad_norm": 0.8256443023293238, + "learning_rate": 9.15104710255124e-06, + "loss": 0.482, + "step": 6936 + }, + { + "epoch": 0.21260880225573128, + "grad_norm": 1.7466118191256845, + "learning_rate": 9.150770408913763e-06, + "loss": 0.7173, + "step": 6937 + }, + { + "epoch": 0.2126394507784725, + "grad_norm": 1.7395428791537741, + "learning_rate": 9.15049367437768e-06, + "loss": 0.7283, + "step": 6938 + }, + { + "epoch": 0.2126700993012137, + "grad_norm": 2.3786985446323947, + "learning_rate": 9.150216898945724e-06, + "loss": 0.7599, + "step": 6939 + }, + { + "epoch": 0.21270074782395487, + "grad_norm": 1.801658470555801, + "learning_rate": 9.149940082620618e-06, + "loss": 0.7653, + "step": 6940 + }, + { + "epoch": 0.21273139634669608, + "grad_norm": 1.9168776006052357, + "learning_rate": 9.149663225405092e-06, + "loss": 0.7769, + "step": 6941 + }, + { + "epoch": 0.21276204486943728, + "grad_norm": 1.6620627990228012, + "learning_rate": 9.14938632730187e-06, + "loss": 0.6881, + "step": 6942 + }, + { + "epoch": 0.2127926933921785, + "grad_norm": 1.5318401627367622, + "learning_rate": 9.149109388313684e-06, + "loss": 0.719, + "step": 6943 + }, + { + "epoch": 0.2128233419149197, + "grad_norm": 1.7383589017233156, + "learning_rate": 9.148832408443262e-06, + "loss": 0.7464, + "step": 6944 + }, + { + "epoch": 0.2128539904376609, + "grad_norm": 0.8443623031484068, + "learning_rate": 9.148555387693332e-06, + "loss": 0.4988, + "step": 6945 + }, + { + "epoch": 0.2128846389604021, + "grad_norm": 1.924540246479336, + "learning_rate": 9.148278326066624e-06, + "loss": 0.6775, + "step": 6946 + }, + { + "epoch": 0.2129152874831433, + "grad_norm": 1.9723913781697802, + "learning_rate": 9.14800122356587e-06, + "loss": 0.753, + "step": 6947 + }, + { + "epoch": 0.21294593600588452, + "grad_norm": 0.8551429205466912, + "learning_rate": 9.147724080193798e-06, + "loss": 0.4968, + "step": 6948 + }, + { + "epoch": 0.21297658452862572, + "grad_norm": 1.8131640879754274, + "learning_rate": 9.147446895953138e-06, + "loss": 0.6264, + "step": 6949 + }, + { + "epoch": 0.21300723305136693, + "grad_norm": 0.7812227737499419, + "learning_rate": 9.147169670846623e-06, + "loss": 0.5006, + "step": 6950 + }, + { + "epoch": 0.21303788157410813, + "grad_norm": 1.659525111079974, + "learning_rate": 9.146892404876985e-06, + "loss": 0.7178, + "step": 6951 + }, + { + "epoch": 0.21306853009684934, + "grad_norm": 0.7894846818895113, + "learning_rate": 9.146615098046953e-06, + "loss": 0.4764, + "step": 6952 + }, + { + "epoch": 0.21309917861959055, + "grad_norm": 2.0668705931444187, + "learning_rate": 9.146337750359265e-06, + "loss": 0.7409, + "step": 6953 + }, + { + "epoch": 0.21312982714233175, + "grad_norm": 1.9334955030924266, + "learning_rate": 9.146060361816648e-06, + "loss": 0.7766, + "step": 6954 + }, + { + "epoch": 0.21316047566507296, + "grad_norm": 1.8374477868586896, + "learning_rate": 9.145782932421838e-06, + "loss": 0.775, + "step": 6955 + }, + { + "epoch": 0.21319112418781413, + "grad_norm": 1.9337956402301197, + "learning_rate": 9.145505462177569e-06, + "loss": 0.8049, + "step": 6956 + }, + { + "epoch": 0.21322177271055534, + "grad_norm": 1.9745067624578612, + "learning_rate": 9.145227951086573e-06, + "loss": 0.7013, + "step": 6957 + }, + { + "epoch": 0.21325242123329655, + "grad_norm": 1.8575003603461222, + "learning_rate": 9.144950399151584e-06, + "loss": 0.8499, + "step": 6958 + }, + { + "epoch": 0.21328306975603775, + "grad_norm": 1.753764247161566, + "learning_rate": 9.144672806375341e-06, + "loss": 0.7443, + "step": 6959 + }, + { + "epoch": 0.21331371827877896, + "grad_norm": 2.06861984509508, + "learning_rate": 9.144395172760574e-06, + "loss": 0.7021, + "step": 6960 + }, + { + "epoch": 0.21334436680152016, + "grad_norm": 1.9298406316946488, + "learning_rate": 9.144117498310023e-06, + "loss": 0.8098, + "step": 6961 + }, + { + "epoch": 0.21337501532426137, + "grad_norm": 1.8290123630639186, + "learning_rate": 9.14383978302642e-06, + "loss": 0.697, + "step": 6962 + }, + { + "epoch": 0.21340566384700257, + "grad_norm": 2.0792271236254636, + "learning_rate": 9.143562026912504e-06, + "loss": 0.7984, + "step": 6963 + }, + { + "epoch": 0.21343631236974378, + "grad_norm": 1.7128611880415845, + "learning_rate": 9.143284229971011e-06, + "loss": 0.7134, + "step": 6964 + }, + { + "epoch": 0.21346696089248499, + "grad_norm": 1.9479319944555873, + "learning_rate": 9.143006392204678e-06, + "loss": 0.7019, + "step": 6965 + }, + { + "epoch": 0.2134976094152262, + "grad_norm": 1.7778286482379715, + "learning_rate": 9.142728513616245e-06, + "loss": 0.6988, + "step": 6966 + }, + { + "epoch": 0.2135282579379674, + "grad_norm": 1.7407668433087269, + "learning_rate": 9.142450594208447e-06, + "loss": 0.7783, + "step": 6967 + }, + { + "epoch": 0.2135589064607086, + "grad_norm": 1.8564017891483227, + "learning_rate": 9.142172633984024e-06, + "loss": 0.7184, + "step": 6968 + }, + { + "epoch": 0.2135895549834498, + "grad_norm": 2.117485928426193, + "learning_rate": 9.141894632945712e-06, + "loss": 0.779, + "step": 6969 + }, + { + "epoch": 0.21362020350619101, + "grad_norm": 1.671249899640579, + "learning_rate": 9.141616591096255e-06, + "loss": 0.7192, + "step": 6970 + }, + { + "epoch": 0.2136508520289322, + "grad_norm": 1.6754820019153722, + "learning_rate": 9.141338508438388e-06, + "loss": 0.6422, + "step": 6971 + }, + { + "epoch": 0.2136815005516734, + "grad_norm": 2.0037980016868118, + "learning_rate": 9.141060384974854e-06, + "loss": 0.7256, + "step": 6972 + }, + { + "epoch": 0.2137121490744146, + "grad_norm": 1.770454684529185, + "learning_rate": 9.140782220708393e-06, + "loss": 0.7524, + "step": 6973 + }, + { + "epoch": 0.2137427975971558, + "grad_norm": 0.9847829232137651, + "learning_rate": 9.140504015641745e-06, + "loss": 0.498, + "step": 6974 + }, + { + "epoch": 0.21377344611989701, + "grad_norm": 1.8447134369591225, + "learning_rate": 9.140225769777652e-06, + "loss": 0.6444, + "step": 6975 + }, + { + "epoch": 0.21380409464263822, + "grad_norm": 1.7538201932715816, + "learning_rate": 9.139947483118852e-06, + "loss": 0.7449, + "step": 6976 + }, + { + "epoch": 0.21383474316537943, + "grad_norm": 1.5951626062886828, + "learning_rate": 9.139669155668095e-06, + "loss": 0.7437, + "step": 6977 + }, + { + "epoch": 0.21386539168812063, + "grad_norm": 1.858833259075066, + "learning_rate": 9.139390787428115e-06, + "loss": 0.6933, + "step": 6978 + }, + { + "epoch": 0.21389604021086184, + "grad_norm": 1.7212704740693199, + "learning_rate": 9.139112378401659e-06, + "loss": 0.6782, + "step": 6979 + }, + { + "epoch": 0.21392668873360304, + "grad_norm": 1.6663084474576322, + "learning_rate": 9.138833928591471e-06, + "loss": 0.5944, + "step": 6980 + }, + { + "epoch": 0.21395733725634425, + "grad_norm": 0.8699602529563103, + "learning_rate": 9.138555438000291e-06, + "loss": 0.4832, + "step": 6981 + }, + { + "epoch": 0.21398798577908545, + "grad_norm": 1.9034420691868836, + "learning_rate": 9.138276906630868e-06, + "loss": 0.7864, + "step": 6982 + }, + { + "epoch": 0.21401863430182666, + "grad_norm": 1.6268549539385289, + "learning_rate": 9.137998334485944e-06, + "loss": 0.7298, + "step": 6983 + }, + { + "epoch": 0.21404928282456787, + "grad_norm": 1.79039929114554, + "learning_rate": 9.137719721568263e-06, + "loss": 0.7484, + "step": 6984 + }, + { + "epoch": 0.21407993134730907, + "grad_norm": 1.8175667957601191, + "learning_rate": 9.13744106788057e-06, + "loss": 0.7228, + "step": 6985 + }, + { + "epoch": 0.21411057987005028, + "grad_norm": 1.8909405959757197, + "learning_rate": 9.137162373425612e-06, + "loss": 0.7549, + "step": 6986 + }, + { + "epoch": 0.21414122839279146, + "grad_norm": 1.6357334683518054, + "learning_rate": 9.136883638206135e-06, + "loss": 0.6965, + "step": 6987 + }, + { + "epoch": 0.21417187691553266, + "grad_norm": 1.8584534396803898, + "learning_rate": 9.136604862224886e-06, + "loss": 0.7455, + "step": 6988 + }, + { + "epoch": 0.21420252543827387, + "grad_norm": 1.923274238546006, + "learning_rate": 9.136326045484607e-06, + "loss": 0.8737, + "step": 6989 + }, + { + "epoch": 0.21423317396101507, + "grad_norm": 0.8596550191543165, + "learning_rate": 9.136047187988053e-06, + "loss": 0.5077, + "step": 6990 + }, + { + "epoch": 0.21426382248375628, + "grad_norm": 1.9603129315645629, + "learning_rate": 9.135768289737966e-06, + "loss": 0.7598, + "step": 6991 + }, + { + "epoch": 0.21429447100649748, + "grad_norm": 1.595049213894692, + "learning_rate": 9.135489350737096e-06, + "loss": 0.788, + "step": 6992 + }, + { + "epoch": 0.2143251195292387, + "grad_norm": 1.8386993735326143, + "learning_rate": 9.135210370988192e-06, + "loss": 0.7566, + "step": 6993 + }, + { + "epoch": 0.2143557680519799, + "grad_norm": 1.7735742927597118, + "learning_rate": 9.134931350494001e-06, + "loss": 0.7659, + "step": 6994 + }, + { + "epoch": 0.2143864165747211, + "grad_norm": 1.9014561275349697, + "learning_rate": 9.134652289257275e-06, + "loss": 0.6432, + "step": 6995 + }, + { + "epoch": 0.2144170650974623, + "grad_norm": 1.7715084501512703, + "learning_rate": 9.134373187280761e-06, + "loss": 0.662, + "step": 6996 + }, + { + "epoch": 0.2144477136202035, + "grad_norm": 1.805479575609256, + "learning_rate": 9.134094044567213e-06, + "loss": 0.6815, + "step": 6997 + }, + { + "epoch": 0.21447836214294472, + "grad_norm": 0.8043156297456138, + "learning_rate": 9.133814861119375e-06, + "loss": 0.4906, + "step": 6998 + }, + { + "epoch": 0.21450901066568592, + "grad_norm": 1.6647302086917195, + "learning_rate": 9.133535636940003e-06, + "loss": 0.7153, + "step": 6999 + }, + { + "epoch": 0.21453965918842713, + "grad_norm": 1.7887073532106652, + "learning_rate": 9.133256372031845e-06, + "loss": 0.7204, + "step": 7000 + }, + { + "epoch": 0.21457030771116833, + "grad_norm": 0.8385304043339927, + "learning_rate": 9.132977066397657e-06, + "loss": 0.4754, + "step": 7001 + }, + { + "epoch": 0.2146009562339095, + "grad_norm": 0.8255902657217924, + "learning_rate": 9.132697720040187e-06, + "loss": 0.506, + "step": 7002 + }, + { + "epoch": 0.21463160475665072, + "grad_norm": 1.9232229840657447, + "learning_rate": 9.132418332962189e-06, + "loss": 0.6644, + "step": 7003 + }, + { + "epoch": 0.21466225327939192, + "grad_norm": 1.7802428702456325, + "learning_rate": 9.132138905166417e-06, + "loss": 0.6896, + "step": 7004 + }, + { + "epoch": 0.21469290180213313, + "grad_norm": 1.81263224791839, + "learning_rate": 9.131859436655621e-06, + "loss": 0.7206, + "step": 7005 + }, + { + "epoch": 0.21472355032487433, + "grad_norm": 1.5436475469645543, + "learning_rate": 9.131579927432559e-06, + "loss": 0.6616, + "step": 7006 + }, + { + "epoch": 0.21475419884761554, + "grad_norm": 1.6450575312369302, + "learning_rate": 9.131300377499983e-06, + "loss": 0.6359, + "step": 7007 + }, + { + "epoch": 0.21478484737035675, + "grad_norm": 1.597360617481595, + "learning_rate": 9.131020786860644e-06, + "loss": 0.6772, + "step": 7008 + }, + { + "epoch": 0.21481549589309795, + "grad_norm": 1.8022081875444382, + "learning_rate": 9.130741155517305e-06, + "loss": 0.7597, + "step": 7009 + }, + { + "epoch": 0.21484614441583916, + "grad_norm": 0.8725924806950642, + "learning_rate": 9.130461483472712e-06, + "loss": 0.4877, + "step": 7010 + }, + { + "epoch": 0.21487679293858036, + "grad_norm": 1.8063927046529586, + "learning_rate": 9.130181770729628e-06, + "loss": 0.6423, + "step": 7011 + }, + { + "epoch": 0.21490744146132157, + "grad_norm": 1.827249535151622, + "learning_rate": 9.129902017290806e-06, + "loss": 0.7238, + "step": 7012 + }, + { + "epoch": 0.21493808998406277, + "grad_norm": 2.098724405863711, + "learning_rate": 9.129622223159002e-06, + "loss": 0.7468, + "step": 7013 + }, + { + "epoch": 0.21496873850680398, + "grad_norm": 1.748393074990259, + "learning_rate": 9.129342388336973e-06, + "loss": 0.7185, + "step": 7014 + }, + { + "epoch": 0.21499938702954519, + "grad_norm": 1.7112139822646133, + "learning_rate": 9.129062512827478e-06, + "loss": 0.6547, + "step": 7015 + }, + { + "epoch": 0.2150300355522864, + "grad_norm": 3.18292279914859, + "learning_rate": 9.128782596633275e-06, + "loss": 0.7058, + "step": 7016 + }, + { + "epoch": 0.2150606840750276, + "grad_norm": 0.8129329295633559, + "learning_rate": 9.128502639757117e-06, + "loss": 0.4898, + "step": 7017 + }, + { + "epoch": 0.21509133259776878, + "grad_norm": 1.6599690675297474, + "learning_rate": 9.12822264220177e-06, + "loss": 0.6836, + "step": 7018 + }, + { + "epoch": 0.21512198112050998, + "grad_norm": 1.7744747341751226, + "learning_rate": 9.127942603969987e-06, + "loss": 0.7636, + "step": 7019 + }, + { + "epoch": 0.2151526296432512, + "grad_norm": 1.6705404481426673, + "learning_rate": 9.12766252506453e-06, + "loss": 0.7048, + "step": 7020 + }, + { + "epoch": 0.2151832781659924, + "grad_norm": 1.784120696397485, + "learning_rate": 9.127382405488156e-06, + "loss": 0.7101, + "step": 7021 + }, + { + "epoch": 0.2152139266887336, + "grad_norm": 1.7993686503370603, + "learning_rate": 9.12710224524363e-06, + "loss": 0.6915, + "step": 7022 + }, + { + "epoch": 0.2152445752114748, + "grad_norm": 1.764336306913328, + "learning_rate": 9.126822044333707e-06, + "loss": 0.6505, + "step": 7023 + }, + { + "epoch": 0.215275223734216, + "grad_norm": 1.6860632942826916, + "learning_rate": 9.126541802761153e-06, + "loss": 0.6615, + "step": 7024 + }, + { + "epoch": 0.21530587225695721, + "grad_norm": 1.8189986520425985, + "learning_rate": 9.126261520528725e-06, + "loss": 0.776, + "step": 7025 + }, + { + "epoch": 0.21533652077969842, + "grad_norm": 1.6442997428061474, + "learning_rate": 9.125981197639187e-06, + "loss": 0.7303, + "step": 7026 + }, + { + "epoch": 0.21536716930243963, + "grad_norm": 1.6999955981610342, + "learning_rate": 9.125700834095301e-06, + "loss": 0.8109, + "step": 7027 + }, + { + "epoch": 0.21539781782518083, + "grad_norm": 1.8677500168970826, + "learning_rate": 9.12542042989983e-06, + "loss": 0.7518, + "step": 7028 + }, + { + "epoch": 0.21542846634792204, + "grad_norm": 1.872191154374633, + "learning_rate": 9.125139985055533e-06, + "loss": 0.6508, + "step": 7029 + }, + { + "epoch": 0.21545911487066324, + "grad_norm": 1.6762230645933132, + "learning_rate": 9.124859499565178e-06, + "loss": 0.7309, + "step": 7030 + }, + { + "epoch": 0.21548976339340445, + "grad_norm": 0.8506978456735566, + "learning_rate": 9.124578973431527e-06, + "loss": 0.4638, + "step": 7031 + }, + { + "epoch": 0.21552041191614565, + "grad_norm": 1.7361269736824667, + "learning_rate": 9.124298406657345e-06, + "loss": 0.7812, + "step": 7032 + }, + { + "epoch": 0.21555106043888683, + "grad_norm": 1.634276017816826, + "learning_rate": 9.124017799245396e-06, + "loss": 0.74, + "step": 7033 + }, + { + "epoch": 0.21558170896162804, + "grad_norm": 1.8648427853950407, + "learning_rate": 9.123737151198442e-06, + "loss": 0.6514, + "step": 7034 + }, + { + "epoch": 0.21561235748436924, + "grad_norm": 1.722044296469583, + "learning_rate": 9.12345646251925e-06, + "loss": 0.621, + "step": 7035 + }, + { + "epoch": 0.21564300600711045, + "grad_norm": 1.6884829917505568, + "learning_rate": 9.12317573321059e-06, + "loss": 0.6873, + "step": 7036 + }, + { + "epoch": 0.21567365452985165, + "grad_norm": 1.9897700076369427, + "learning_rate": 9.122894963275221e-06, + "loss": 0.8689, + "step": 7037 + }, + { + "epoch": 0.21570430305259286, + "grad_norm": 1.8172517524263982, + "learning_rate": 9.122614152715917e-06, + "loss": 0.7379, + "step": 7038 + }, + { + "epoch": 0.21573495157533407, + "grad_norm": 1.8900692846755418, + "learning_rate": 9.122333301535438e-06, + "loss": 0.8025, + "step": 7039 + }, + { + "epoch": 0.21576560009807527, + "grad_norm": 0.8624868582150736, + "learning_rate": 9.122052409736554e-06, + "loss": 0.488, + "step": 7040 + }, + { + "epoch": 0.21579624862081648, + "grad_norm": 1.738261637091519, + "learning_rate": 9.121771477322035e-06, + "loss": 0.7093, + "step": 7041 + }, + { + "epoch": 0.21582689714355768, + "grad_norm": 1.8656479654735891, + "learning_rate": 9.121490504294645e-06, + "loss": 0.8324, + "step": 7042 + }, + { + "epoch": 0.2158575456662989, + "grad_norm": 2.1669598402821184, + "learning_rate": 9.121209490657156e-06, + "loss": 0.7224, + "step": 7043 + }, + { + "epoch": 0.2158881941890401, + "grad_norm": 2.0877385774822286, + "learning_rate": 9.120928436412334e-06, + "loss": 0.7274, + "step": 7044 + }, + { + "epoch": 0.2159188427117813, + "grad_norm": 1.7418619965732447, + "learning_rate": 9.120647341562952e-06, + "loss": 0.6997, + "step": 7045 + }, + { + "epoch": 0.2159494912345225, + "grad_norm": 0.8188971671813499, + "learning_rate": 9.120366206111777e-06, + "loss": 0.4785, + "step": 7046 + }, + { + "epoch": 0.2159801397572637, + "grad_norm": 1.4934270705168542, + "learning_rate": 9.120085030061577e-06, + "loss": 0.6084, + "step": 7047 + }, + { + "epoch": 0.21601078828000492, + "grad_norm": 1.5601515778332495, + "learning_rate": 9.119803813415126e-06, + "loss": 0.7146, + "step": 7048 + }, + { + "epoch": 0.2160414368027461, + "grad_norm": 1.708619498438601, + "learning_rate": 9.119522556175196e-06, + "loss": 0.7821, + "step": 7049 + }, + { + "epoch": 0.2160720853254873, + "grad_norm": 0.8219705883500764, + "learning_rate": 9.119241258344554e-06, + "loss": 0.5102, + "step": 7050 + }, + { + "epoch": 0.2161027338482285, + "grad_norm": 1.7481020773937648, + "learning_rate": 9.118959919925977e-06, + "loss": 0.687, + "step": 7051 + }, + { + "epoch": 0.2161333823709697, + "grad_norm": 1.8334622480544054, + "learning_rate": 9.11867854092223e-06, + "loss": 0.8046, + "step": 7052 + }, + { + "epoch": 0.21616403089371092, + "grad_norm": 1.6530030925327817, + "learning_rate": 9.118397121336092e-06, + "loss": 0.6857, + "step": 7053 + }, + { + "epoch": 0.21619467941645212, + "grad_norm": 1.7008933610733916, + "learning_rate": 9.118115661170333e-06, + "loss": 0.7062, + "step": 7054 + }, + { + "epoch": 0.21622532793919333, + "grad_norm": 0.7967563821403555, + "learning_rate": 9.117834160427726e-06, + "loss": 0.4804, + "step": 7055 + }, + { + "epoch": 0.21625597646193453, + "grad_norm": 1.7910497957750997, + "learning_rate": 9.117552619111046e-06, + "loss": 0.8931, + "step": 7056 + }, + { + "epoch": 0.21628662498467574, + "grad_norm": 1.5147829238799952, + "learning_rate": 9.117271037223065e-06, + "loss": 0.6528, + "step": 7057 + }, + { + "epoch": 0.21631727350741695, + "grad_norm": 1.728459307467497, + "learning_rate": 9.116989414766561e-06, + "loss": 0.709, + "step": 7058 + }, + { + "epoch": 0.21634792203015815, + "grad_norm": 1.79336557276472, + "learning_rate": 9.116707751744307e-06, + "loss": 0.7659, + "step": 7059 + }, + { + "epoch": 0.21637857055289936, + "grad_norm": 1.9691897384826764, + "learning_rate": 9.116426048159077e-06, + "loss": 0.713, + "step": 7060 + }, + { + "epoch": 0.21640921907564056, + "grad_norm": 1.5997935082473445, + "learning_rate": 9.116144304013648e-06, + "loss": 0.6656, + "step": 7061 + }, + { + "epoch": 0.21643986759838177, + "grad_norm": 1.7042856765830088, + "learning_rate": 9.115862519310797e-06, + "loss": 0.7825, + "step": 7062 + }, + { + "epoch": 0.21647051612112297, + "grad_norm": 1.8974012093032582, + "learning_rate": 9.115580694053298e-06, + "loss": 0.729, + "step": 7063 + }, + { + "epoch": 0.21650116464386415, + "grad_norm": 1.5229856527185082, + "learning_rate": 9.11529882824393e-06, + "loss": 0.7275, + "step": 7064 + }, + { + "epoch": 0.21653181316660536, + "grad_norm": 1.8244652489474025, + "learning_rate": 9.115016921885471e-06, + "loss": 0.7218, + "step": 7065 + }, + { + "epoch": 0.21656246168934656, + "grad_norm": 1.5023927868695335, + "learning_rate": 9.114734974980697e-06, + "loss": 0.6806, + "step": 7066 + }, + { + "epoch": 0.21659311021208777, + "grad_norm": 1.7266523186297662, + "learning_rate": 9.114452987532387e-06, + "loss": 0.6992, + "step": 7067 + }, + { + "epoch": 0.21662375873482898, + "grad_norm": 1.9458815186348506, + "learning_rate": 9.114170959543317e-06, + "loss": 0.7006, + "step": 7068 + }, + { + "epoch": 0.21665440725757018, + "grad_norm": 0.850397067808177, + "learning_rate": 9.11388889101627e-06, + "loss": 0.4816, + "step": 7069 + }, + { + "epoch": 0.2166850557803114, + "grad_norm": 1.8129581366849368, + "learning_rate": 9.11360678195402e-06, + "loss": 0.7751, + "step": 7070 + }, + { + "epoch": 0.2167157043030526, + "grad_norm": 1.7804917087480037, + "learning_rate": 9.113324632359352e-06, + "loss": 0.6106, + "step": 7071 + }, + { + "epoch": 0.2167463528257938, + "grad_norm": 1.7868080034841054, + "learning_rate": 9.113042442235045e-06, + "loss": 0.7226, + "step": 7072 + }, + { + "epoch": 0.216777001348535, + "grad_norm": 1.6521230715269768, + "learning_rate": 9.112760211583878e-06, + "loss": 0.6204, + "step": 7073 + }, + { + "epoch": 0.2168076498712762, + "grad_norm": 0.8209854826762336, + "learning_rate": 9.112477940408631e-06, + "loss": 0.5079, + "step": 7074 + }, + { + "epoch": 0.21683829839401741, + "grad_norm": 1.7349357934519176, + "learning_rate": 9.112195628712086e-06, + "loss": 0.7549, + "step": 7075 + }, + { + "epoch": 0.21686894691675862, + "grad_norm": 1.733846921448947, + "learning_rate": 9.111913276497026e-06, + "loss": 0.7849, + "step": 7076 + }, + { + "epoch": 0.21689959543949983, + "grad_norm": 1.6691899940676953, + "learning_rate": 9.111630883766233e-06, + "loss": 0.6442, + "step": 7077 + }, + { + "epoch": 0.21693024396224103, + "grad_norm": 1.7138772532214286, + "learning_rate": 9.111348450522491e-06, + "loss": 0.6531, + "step": 7078 + }, + { + "epoch": 0.21696089248498224, + "grad_norm": 2.1407485039041636, + "learning_rate": 9.111065976768578e-06, + "loss": 0.7851, + "step": 7079 + }, + { + "epoch": 0.21699154100772342, + "grad_norm": 1.857524654077512, + "learning_rate": 9.11078346250728e-06, + "loss": 0.7032, + "step": 7080 + }, + { + "epoch": 0.21702218953046462, + "grad_norm": 1.8001895156653005, + "learning_rate": 9.110500907741383e-06, + "loss": 0.6799, + "step": 7081 + }, + { + "epoch": 0.21705283805320583, + "grad_norm": 0.8314110419405524, + "learning_rate": 9.110218312473667e-06, + "loss": 0.4768, + "step": 7082 + }, + { + "epoch": 0.21708348657594703, + "grad_norm": 2.451298643712965, + "learning_rate": 9.109935676706918e-06, + "loss": 0.7256, + "step": 7083 + }, + { + "epoch": 0.21711413509868824, + "grad_norm": 1.8993601257018564, + "learning_rate": 9.109653000443921e-06, + "loss": 0.7641, + "step": 7084 + }, + { + "epoch": 0.21714478362142944, + "grad_norm": 1.7145148245858406, + "learning_rate": 9.109370283687462e-06, + "loss": 0.6789, + "step": 7085 + }, + { + "epoch": 0.21717543214417065, + "grad_norm": 1.9836327630937687, + "learning_rate": 9.109087526440328e-06, + "loss": 0.6377, + "step": 7086 + }, + { + "epoch": 0.21720608066691185, + "grad_norm": 1.8761217470782527, + "learning_rate": 9.108804728705302e-06, + "loss": 0.833, + "step": 7087 + }, + { + "epoch": 0.21723672918965306, + "grad_norm": 1.8424136422740092, + "learning_rate": 9.108521890485172e-06, + "loss": 0.7587, + "step": 7088 + }, + { + "epoch": 0.21726737771239427, + "grad_norm": 1.666166729724519, + "learning_rate": 9.108239011782722e-06, + "loss": 0.7655, + "step": 7089 + }, + { + "epoch": 0.21729802623513547, + "grad_norm": 1.839023541702506, + "learning_rate": 9.107956092600745e-06, + "loss": 0.6438, + "step": 7090 + }, + { + "epoch": 0.21732867475787668, + "grad_norm": 0.8504605381907147, + "learning_rate": 9.107673132942025e-06, + "loss": 0.4951, + "step": 7091 + }, + { + "epoch": 0.21735932328061788, + "grad_norm": 0.9066900424999271, + "learning_rate": 9.10739013280935e-06, + "loss": 0.5163, + "step": 7092 + }, + { + "epoch": 0.2173899718033591, + "grad_norm": 1.9552861736759144, + "learning_rate": 9.10710709220551e-06, + "loss": 0.8272, + "step": 7093 + }, + { + "epoch": 0.2174206203261003, + "grad_norm": 0.8191989015384193, + "learning_rate": 9.10682401113329e-06, + "loss": 0.4971, + "step": 7094 + }, + { + "epoch": 0.21745126884884147, + "grad_norm": 1.6699209127753465, + "learning_rate": 9.106540889595485e-06, + "loss": 0.6154, + "step": 7095 + }, + { + "epoch": 0.21748191737158268, + "grad_norm": 1.8522933356871243, + "learning_rate": 9.106257727594883e-06, + "loss": 0.7287, + "step": 7096 + }, + { + "epoch": 0.21751256589432388, + "grad_norm": 1.7324377290465198, + "learning_rate": 9.105974525134272e-06, + "loss": 0.7382, + "step": 7097 + }, + { + "epoch": 0.2175432144170651, + "grad_norm": 1.985336576738761, + "learning_rate": 9.105691282216442e-06, + "loss": 0.7701, + "step": 7098 + }, + { + "epoch": 0.2175738629398063, + "grad_norm": 1.7359304731852356, + "learning_rate": 9.105407998844186e-06, + "loss": 0.7413, + "step": 7099 + }, + { + "epoch": 0.2176045114625475, + "grad_norm": 1.5447268371008172, + "learning_rate": 9.105124675020294e-06, + "loss": 0.6839, + "step": 7100 + }, + { + "epoch": 0.2176351599852887, + "grad_norm": 1.8473447659477975, + "learning_rate": 9.104841310747559e-06, + "loss": 0.7534, + "step": 7101 + }, + { + "epoch": 0.2176658085080299, + "grad_norm": 0.9785913230250081, + "learning_rate": 9.104557906028773e-06, + "loss": 0.4797, + "step": 7102 + }, + { + "epoch": 0.21769645703077112, + "grad_norm": 1.7789952146970998, + "learning_rate": 9.104274460866726e-06, + "loss": 0.6797, + "step": 7103 + }, + { + "epoch": 0.21772710555351232, + "grad_norm": 0.8878850837012774, + "learning_rate": 9.103990975264214e-06, + "loss": 0.5123, + "step": 7104 + }, + { + "epoch": 0.21775775407625353, + "grad_norm": 1.6209520818787677, + "learning_rate": 9.103707449224028e-06, + "loss": 0.6494, + "step": 7105 + }, + { + "epoch": 0.21778840259899473, + "grad_norm": 1.7358920015548536, + "learning_rate": 9.103423882748963e-06, + "loss": 0.74, + "step": 7106 + }, + { + "epoch": 0.21781905112173594, + "grad_norm": 1.6912836227239112, + "learning_rate": 9.103140275841812e-06, + "loss": 0.6389, + "step": 7107 + }, + { + "epoch": 0.21784969964447715, + "grad_norm": 1.8788804583675693, + "learning_rate": 9.10285662850537e-06, + "loss": 0.7635, + "step": 7108 + }, + { + "epoch": 0.21788034816721835, + "grad_norm": 1.6454567445055497, + "learning_rate": 9.102572940742433e-06, + "loss": 0.6553, + "step": 7109 + }, + { + "epoch": 0.21791099668995956, + "grad_norm": 1.8076331137805495, + "learning_rate": 9.102289212555795e-06, + "loss": 0.7861, + "step": 7110 + }, + { + "epoch": 0.21794164521270074, + "grad_norm": 1.7422786965801842, + "learning_rate": 9.102005443948252e-06, + "loss": 0.7382, + "step": 7111 + }, + { + "epoch": 0.21797229373544194, + "grad_norm": 1.076544475564577, + "learning_rate": 9.1017216349226e-06, + "loss": 0.4892, + "step": 7112 + }, + { + "epoch": 0.21800294225818315, + "grad_norm": 1.6538280281415974, + "learning_rate": 9.101437785481633e-06, + "loss": 0.6501, + "step": 7113 + }, + { + "epoch": 0.21803359078092435, + "grad_norm": 2.1269975455692993, + "learning_rate": 9.101153895628152e-06, + "loss": 0.8212, + "step": 7114 + }, + { + "epoch": 0.21806423930366556, + "grad_norm": 1.5462189900265397, + "learning_rate": 9.10086996536495e-06, + "loss": 0.7037, + "step": 7115 + }, + { + "epoch": 0.21809488782640676, + "grad_norm": 1.8154367217653815, + "learning_rate": 9.10058599469483e-06, + "loss": 0.7465, + "step": 7116 + }, + { + "epoch": 0.21812553634914797, + "grad_norm": 1.7653320686205385, + "learning_rate": 9.100301983620587e-06, + "loss": 0.766, + "step": 7117 + }, + { + "epoch": 0.21815618487188917, + "grad_norm": 1.6784367383421954, + "learning_rate": 9.100017932145017e-06, + "loss": 0.7138, + "step": 7118 + }, + { + "epoch": 0.21818683339463038, + "grad_norm": 1.8617341804643892, + "learning_rate": 9.099733840270923e-06, + "loss": 0.6513, + "step": 7119 + }, + { + "epoch": 0.2182174819173716, + "grad_norm": 1.7198980581363201, + "learning_rate": 9.099449708001102e-06, + "loss": 0.6759, + "step": 7120 + }, + { + "epoch": 0.2182481304401128, + "grad_norm": 1.5751393625097927, + "learning_rate": 9.099165535338355e-06, + "loss": 0.7908, + "step": 7121 + }, + { + "epoch": 0.218278778962854, + "grad_norm": 1.751582865864355, + "learning_rate": 9.09888132228548e-06, + "loss": 0.6562, + "step": 7122 + }, + { + "epoch": 0.2183094274855952, + "grad_norm": 1.62409236227919, + "learning_rate": 9.098597068845279e-06, + "loss": 0.7783, + "step": 7123 + }, + { + "epoch": 0.2183400760083364, + "grad_norm": 1.5965814800743048, + "learning_rate": 9.098312775020552e-06, + "loss": 0.6204, + "step": 7124 + }, + { + "epoch": 0.21837072453107761, + "grad_norm": 1.6214402375000039, + "learning_rate": 9.098028440814101e-06, + "loss": 0.6427, + "step": 7125 + }, + { + "epoch": 0.2184013730538188, + "grad_norm": 1.6471174298633096, + "learning_rate": 9.097744066228728e-06, + "loss": 0.597, + "step": 7126 + }, + { + "epoch": 0.21843202157656, + "grad_norm": 1.7051990411057367, + "learning_rate": 9.097459651267233e-06, + "loss": 0.7877, + "step": 7127 + }, + { + "epoch": 0.2184626700993012, + "grad_norm": 2.008263002779389, + "learning_rate": 9.09717519593242e-06, + "loss": 0.7684, + "step": 7128 + }, + { + "epoch": 0.2184933186220424, + "grad_norm": 2.101900627539187, + "learning_rate": 9.096890700227093e-06, + "loss": 0.787, + "step": 7129 + }, + { + "epoch": 0.21852396714478362, + "grad_norm": 1.8417664227620556, + "learning_rate": 9.096606164154052e-06, + "loss": 0.6023, + "step": 7130 + }, + { + "epoch": 0.21855461566752482, + "grad_norm": 1.9804543929299612, + "learning_rate": 9.096321587716101e-06, + "loss": 0.7846, + "step": 7131 + }, + { + "epoch": 0.21858526419026603, + "grad_norm": 1.9966369782621776, + "learning_rate": 9.096036970916048e-06, + "loss": 0.7447, + "step": 7132 + }, + { + "epoch": 0.21861591271300723, + "grad_norm": 1.9167500033888802, + "learning_rate": 9.095752313756695e-06, + "loss": 0.7933, + "step": 7133 + }, + { + "epoch": 0.21864656123574844, + "grad_norm": 1.5971198593046203, + "learning_rate": 9.095467616240844e-06, + "loss": 0.7363, + "step": 7134 + }, + { + "epoch": 0.21867720975848964, + "grad_norm": 1.8182875227065476, + "learning_rate": 9.095182878371304e-06, + "loss": 0.6705, + "step": 7135 + }, + { + "epoch": 0.21870785828123085, + "grad_norm": 2.032758601275622, + "learning_rate": 9.09489810015088e-06, + "loss": 0.788, + "step": 7136 + }, + { + "epoch": 0.21873850680397205, + "grad_norm": 1.800561875782836, + "learning_rate": 9.094613281582376e-06, + "loss": 0.7474, + "step": 7137 + }, + { + "epoch": 0.21876915532671326, + "grad_norm": 1.726116553720526, + "learning_rate": 9.0943284226686e-06, + "loss": 0.7179, + "step": 7138 + }, + { + "epoch": 0.21879980384945447, + "grad_norm": 1.7429955680533564, + "learning_rate": 9.094043523412359e-06, + "loss": 0.695, + "step": 7139 + }, + { + "epoch": 0.21883045237219567, + "grad_norm": 2.235209876078084, + "learning_rate": 9.093758583816459e-06, + "loss": 0.6729, + "step": 7140 + }, + { + "epoch": 0.21886110089493688, + "grad_norm": 1.7081785188254075, + "learning_rate": 9.09347360388371e-06, + "loss": 0.7927, + "step": 7141 + }, + { + "epoch": 0.21889174941767806, + "grad_norm": 1.685805615242057, + "learning_rate": 9.093188583616917e-06, + "loss": 0.7145, + "step": 7142 + }, + { + "epoch": 0.21892239794041926, + "grad_norm": 1.8288839656481648, + "learning_rate": 9.092903523018888e-06, + "loss": 0.7068, + "step": 7143 + }, + { + "epoch": 0.21895304646316047, + "grad_norm": 0.9958586841237852, + "learning_rate": 9.092618422092434e-06, + "loss": 0.4855, + "step": 7144 + }, + { + "epoch": 0.21898369498590167, + "grad_norm": 1.6517927178381622, + "learning_rate": 9.092333280840365e-06, + "loss": 0.7049, + "step": 7145 + }, + { + "epoch": 0.21901434350864288, + "grad_norm": 1.899153315400226, + "learning_rate": 9.092048099265489e-06, + "loss": 0.7608, + "step": 7146 + }, + { + "epoch": 0.21904499203138408, + "grad_norm": 1.9422444919516944, + "learning_rate": 9.091762877370616e-06, + "loss": 0.7484, + "step": 7147 + }, + { + "epoch": 0.2190756405541253, + "grad_norm": 1.9905349317510126, + "learning_rate": 9.091477615158555e-06, + "loss": 0.7789, + "step": 7148 + }, + { + "epoch": 0.2191062890768665, + "grad_norm": 1.4513958240139753, + "learning_rate": 9.09119231263212e-06, + "loss": 0.6376, + "step": 7149 + }, + { + "epoch": 0.2191369375996077, + "grad_norm": 1.8774005521308978, + "learning_rate": 9.09090696979412e-06, + "loss": 0.7463, + "step": 7150 + }, + { + "epoch": 0.2191675861223489, + "grad_norm": 1.9056489875393248, + "learning_rate": 9.090621586647365e-06, + "loss": 0.7718, + "step": 7151 + }, + { + "epoch": 0.2191982346450901, + "grad_norm": 1.9179650116424083, + "learning_rate": 9.09033616319467e-06, + "loss": 0.7196, + "step": 7152 + }, + { + "epoch": 0.21922888316783132, + "grad_norm": 1.8266027743953452, + "learning_rate": 9.090050699438848e-06, + "loss": 0.7897, + "step": 7153 + }, + { + "epoch": 0.21925953169057252, + "grad_norm": 1.711201598071196, + "learning_rate": 9.089765195382708e-06, + "loss": 0.6653, + "step": 7154 + }, + { + "epoch": 0.21929018021331373, + "grad_norm": 1.770993527223573, + "learning_rate": 9.089479651029065e-06, + "loss": 0.7374, + "step": 7155 + }, + { + "epoch": 0.21932082873605493, + "grad_norm": 1.7401481026511076, + "learning_rate": 9.089194066380735e-06, + "loss": 0.6571, + "step": 7156 + }, + { + "epoch": 0.2193514772587961, + "grad_norm": 1.6986952667070658, + "learning_rate": 9.088908441440527e-06, + "loss": 0.6751, + "step": 7157 + }, + { + "epoch": 0.21938212578153732, + "grad_norm": 1.9708440101748483, + "learning_rate": 9.088622776211257e-06, + "loss": 0.6809, + "step": 7158 + }, + { + "epoch": 0.21941277430427852, + "grad_norm": 1.909115477459308, + "learning_rate": 9.088337070695743e-06, + "loss": 0.791, + "step": 7159 + }, + { + "epoch": 0.21944342282701973, + "grad_norm": 1.8287194599251584, + "learning_rate": 9.088051324896798e-06, + "loss": 0.7394, + "step": 7160 + }, + { + "epoch": 0.21947407134976094, + "grad_norm": 1.6171295116437245, + "learning_rate": 9.087765538817237e-06, + "loss": 0.6184, + "step": 7161 + }, + { + "epoch": 0.21950471987250214, + "grad_norm": 1.662653801356007, + "learning_rate": 9.087479712459876e-06, + "loss": 0.7169, + "step": 7162 + }, + { + "epoch": 0.21953536839524335, + "grad_norm": 1.084999562415439, + "learning_rate": 9.08719384582753e-06, + "loss": 0.5126, + "step": 7163 + }, + { + "epoch": 0.21956601691798455, + "grad_norm": 1.3373762577302382, + "learning_rate": 9.08690793892302e-06, + "loss": 0.4807, + "step": 7164 + }, + { + "epoch": 0.21959666544072576, + "grad_norm": 1.727804775885603, + "learning_rate": 9.08662199174916e-06, + "loss": 0.7004, + "step": 7165 + }, + { + "epoch": 0.21962731396346696, + "grad_norm": 2.786002722833153, + "learning_rate": 9.086336004308767e-06, + "loss": 0.8123, + "step": 7166 + }, + { + "epoch": 0.21965796248620817, + "grad_norm": 1.9817181146050258, + "learning_rate": 9.08604997660466e-06, + "loss": 0.7922, + "step": 7167 + }, + { + "epoch": 0.21968861100894937, + "grad_norm": 1.6964585462054103, + "learning_rate": 9.085763908639657e-06, + "loss": 0.6965, + "step": 7168 + }, + { + "epoch": 0.21971925953169058, + "grad_norm": 1.8904201213455456, + "learning_rate": 9.085477800416575e-06, + "loss": 0.7365, + "step": 7169 + }, + { + "epoch": 0.2197499080544318, + "grad_norm": 1.6859618539335506, + "learning_rate": 9.085191651938238e-06, + "loss": 0.6414, + "step": 7170 + }, + { + "epoch": 0.219780556577173, + "grad_norm": 1.6895164518384773, + "learning_rate": 9.08490546320746e-06, + "loss": 0.6566, + "step": 7171 + }, + { + "epoch": 0.2198112050999142, + "grad_norm": 1.8492999181450736, + "learning_rate": 9.084619234227066e-06, + "loss": 0.7021, + "step": 7172 + }, + { + "epoch": 0.21984185362265538, + "grad_norm": 2.1256985186760797, + "learning_rate": 9.084332964999871e-06, + "loss": 0.6651, + "step": 7173 + }, + { + "epoch": 0.21987250214539658, + "grad_norm": 1.9187385668497148, + "learning_rate": 9.0840466555287e-06, + "loss": 0.7223, + "step": 7174 + }, + { + "epoch": 0.2199031506681378, + "grad_norm": 1.7608639076065302, + "learning_rate": 9.083760305816372e-06, + "loss": 0.6647, + "step": 7175 + }, + { + "epoch": 0.219933799190879, + "grad_norm": 1.676761760457416, + "learning_rate": 9.083473915865707e-06, + "loss": 0.7336, + "step": 7176 + }, + { + "epoch": 0.2199644477136202, + "grad_norm": 1.7133592716971309, + "learning_rate": 9.083187485679531e-06, + "loss": 0.7099, + "step": 7177 + }, + { + "epoch": 0.2199950962363614, + "grad_norm": 1.870922696327476, + "learning_rate": 9.082901015260664e-06, + "loss": 0.6777, + "step": 7178 + }, + { + "epoch": 0.2200257447591026, + "grad_norm": 1.5546845266727736, + "learning_rate": 9.082614504611928e-06, + "loss": 0.668, + "step": 7179 + }, + { + "epoch": 0.22005639328184382, + "grad_norm": 1.713011783793158, + "learning_rate": 9.082327953736146e-06, + "loss": 0.7066, + "step": 7180 + }, + { + "epoch": 0.22008704180458502, + "grad_norm": 1.6066551286274326, + "learning_rate": 9.082041362636142e-06, + "loss": 0.5397, + "step": 7181 + }, + { + "epoch": 0.22011769032732623, + "grad_norm": 1.5453266571898603, + "learning_rate": 9.08175473131474e-06, + "loss": 0.6993, + "step": 7182 + }, + { + "epoch": 0.22014833885006743, + "grad_norm": 1.71013112284106, + "learning_rate": 9.081468059774766e-06, + "loss": 0.6777, + "step": 7183 + }, + { + "epoch": 0.22017898737280864, + "grad_norm": 1.6482812045842195, + "learning_rate": 9.081181348019041e-06, + "loss": 0.5292, + "step": 7184 + }, + { + "epoch": 0.22020963589554984, + "grad_norm": 1.904771103490963, + "learning_rate": 9.080894596050393e-06, + "loss": 0.6782, + "step": 7185 + }, + { + "epoch": 0.22024028441829105, + "grad_norm": 1.627708923244157, + "learning_rate": 9.080607803871646e-06, + "loss": 0.7441, + "step": 7186 + }, + { + "epoch": 0.22027093294103225, + "grad_norm": 1.7762294669889718, + "learning_rate": 9.080320971485628e-06, + "loss": 0.7483, + "step": 7187 + }, + { + "epoch": 0.22030158146377343, + "grad_norm": 1.507482443965449, + "learning_rate": 9.080034098895161e-06, + "loss": 0.7359, + "step": 7188 + }, + { + "epoch": 0.22033222998651464, + "grad_norm": 2.0317459520731287, + "learning_rate": 9.079747186103077e-06, + "loss": 0.748, + "step": 7189 + }, + { + "epoch": 0.22036287850925584, + "grad_norm": 0.9792320388775644, + "learning_rate": 9.079460233112196e-06, + "loss": 0.508, + "step": 7190 + }, + { + "epoch": 0.22039352703199705, + "grad_norm": 1.7278607940431043, + "learning_rate": 9.079173239925352e-06, + "loss": 0.5999, + "step": 7191 + }, + { + "epoch": 0.22042417555473826, + "grad_norm": 1.835907348189121, + "learning_rate": 9.078886206545371e-06, + "loss": 0.7881, + "step": 7192 + }, + { + "epoch": 0.22045482407747946, + "grad_norm": 1.6527005580010437, + "learning_rate": 9.07859913297508e-06, + "loss": 0.5725, + "step": 7193 + }, + { + "epoch": 0.22048547260022067, + "grad_norm": 1.699597321390202, + "learning_rate": 9.078312019217308e-06, + "loss": 0.7704, + "step": 7194 + }, + { + "epoch": 0.22051612112296187, + "grad_norm": 1.6779269244522512, + "learning_rate": 9.078024865274883e-06, + "loss": 0.6607, + "step": 7195 + }, + { + "epoch": 0.22054676964570308, + "grad_norm": 2.028041284409162, + "learning_rate": 9.077737671150637e-06, + "loss": 0.7893, + "step": 7196 + }, + { + "epoch": 0.22057741816844428, + "grad_norm": 1.94729211953956, + "learning_rate": 9.077450436847397e-06, + "loss": 0.7558, + "step": 7197 + }, + { + "epoch": 0.2206080666911855, + "grad_norm": 1.0180382802404044, + "learning_rate": 9.077163162367997e-06, + "loss": 0.5135, + "step": 7198 + }, + { + "epoch": 0.2206387152139267, + "grad_norm": 0.913026134720954, + "learning_rate": 9.076875847715262e-06, + "loss": 0.4957, + "step": 7199 + }, + { + "epoch": 0.2206693637366679, + "grad_norm": 1.760425771437767, + "learning_rate": 9.076588492892029e-06, + "loss": 0.6693, + "step": 7200 + }, + { + "epoch": 0.2207000122594091, + "grad_norm": 0.8013422642168897, + "learning_rate": 9.076301097901126e-06, + "loss": 0.4988, + "step": 7201 + }, + { + "epoch": 0.2207306607821503, + "grad_norm": 1.6329299314610557, + "learning_rate": 9.076013662745385e-06, + "loss": 0.7046, + "step": 7202 + }, + { + "epoch": 0.22076130930489152, + "grad_norm": 1.7879319706179868, + "learning_rate": 9.075726187427639e-06, + "loss": 0.6744, + "step": 7203 + }, + { + "epoch": 0.2207919578276327, + "grad_norm": 1.7132276097212145, + "learning_rate": 9.075438671950719e-06, + "loss": 0.6471, + "step": 7204 + }, + { + "epoch": 0.2208226063503739, + "grad_norm": 1.7184992747332986, + "learning_rate": 9.07515111631746e-06, + "loss": 0.7373, + "step": 7205 + }, + { + "epoch": 0.2208532548731151, + "grad_norm": 0.969503889052093, + "learning_rate": 9.074863520530691e-06, + "loss": 0.5158, + "step": 7206 + }, + { + "epoch": 0.2208839033958563, + "grad_norm": 0.9398062549341248, + "learning_rate": 9.074575884593252e-06, + "loss": 0.477, + "step": 7207 + }, + { + "epoch": 0.22091455191859752, + "grad_norm": 1.5958201379735826, + "learning_rate": 9.074288208507976e-06, + "loss": 0.6675, + "step": 7208 + }, + { + "epoch": 0.22094520044133872, + "grad_norm": 1.7966319584668742, + "learning_rate": 9.074000492277695e-06, + "loss": 0.7831, + "step": 7209 + }, + { + "epoch": 0.22097584896407993, + "grad_norm": 0.8206600430566764, + "learning_rate": 9.073712735905242e-06, + "loss": 0.5053, + "step": 7210 + }, + { + "epoch": 0.22100649748682114, + "grad_norm": 0.8930602852992586, + "learning_rate": 9.073424939393458e-06, + "loss": 0.4875, + "step": 7211 + }, + { + "epoch": 0.22103714600956234, + "grad_norm": 1.723512288130864, + "learning_rate": 9.073137102745174e-06, + "loss": 0.7268, + "step": 7212 + }, + { + "epoch": 0.22106779453230355, + "grad_norm": 1.8763337498577557, + "learning_rate": 9.07284922596323e-06, + "loss": 0.7159, + "step": 7213 + }, + { + "epoch": 0.22109844305504475, + "grad_norm": 0.86149875332806, + "learning_rate": 9.072561309050459e-06, + "loss": 0.4959, + "step": 7214 + }, + { + "epoch": 0.22112909157778596, + "grad_norm": 1.8653608484237292, + "learning_rate": 9.072273352009699e-06, + "loss": 0.6249, + "step": 7215 + }, + { + "epoch": 0.22115974010052716, + "grad_norm": 1.6854613793544393, + "learning_rate": 9.071985354843789e-06, + "loss": 0.6698, + "step": 7216 + }, + { + "epoch": 0.22119038862326837, + "grad_norm": 2.0369295662963127, + "learning_rate": 9.071697317555564e-06, + "loss": 0.7596, + "step": 7217 + }, + { + "epoch": 0.22122103714600957, + "grad_norm": 1.8714142746098434, + "learning_rate": 9.071409240147865e-06, + "loss": 0.6767, + "step": 7218 + }, + { + "epoch": 0.22125168566875075, + "grad_norm": 1.6978276700971047, + "learning_rate": 9.071121122623528e-06, + "loss": 0.6708, + "step": 7219 + }, + { + "epoch": 0.22128233419149196, + "grad_norm": 1.825811090882698, + "learning_rate": 9.070832964985393e-06, + "loss": 0.8332, + "step": 7220 + }, + { + "epoch": 0.22131298271423316, + "grad_norm": 2.073185129672899, + "learning_rate": 9.0705447672363e-06, + "loss": 0.6504, + "step": 7221 + }, + { + "epoch": 0.22134363123697437, + "grad_norm": 1.6584040856201727, + "learning_rate": 9.070256529379087e-06, + "loss": 0.7122, + "step": 7222 + }, + { + "epoch": 0.22137427975971558, + "grad_norm": 1.646583631819351, + "learning_rate": 9.069968251416596e-06, + "loss": 0.7556, + "step": 7223 + }, + { + "epoch": 0.22140492828245678, + "grad_norm": 1.6415409284982136, + "learning_rate": 9.069679933351666e-06, + "loss": 0.7305, + "step": 7224 + }, + { + "epoch": 0.221435576805198, + "grad_norm": 1.9851587703552713, + "learning_rate": 9.069391575187137e-06, + "loss": 0.7011, + "step": 7225 + }, + { + "epoch": 0.2214662253279392, + "grad_norm": 1.8465499341180633, + "learning_rate": 9.069103176925853e-06, + "loss": 0.6967, + "step": 7226 + }, + { + "epoch": 0.2214968738506804, + "grad_norm": 1.6861340738754969, + "learning_rate": 9.068814738570655e-06, + "loss": 0.8042, + "step": 7227 + }, + { + "epoch": 0.2215275223734216, + "grad_norm": 0.920022790084979, + "learning_rate": 9.068526260124383e-06, + "loss": 0.5039, + "step": 7228 + }, + { + "epoch": 0.2215581708961628, + "grad_norm": 1.6930808249761977, + "learning_rate": 9.06823774158988e-06, + "loss": 0.7368, + "step": 7229 + }, + { + "epoch": 0.22158881941890402, + "grad_norm": 1.7099261459256894, + "learning_rate": 9.067949182969993e-06, + "loss": 0.8041, + "step": 7230 + }, + { + "epoch": 0.22161946794164522, + "grad_norm": 1.6232175587205757, + "learning_rate": 9.067660584267559e-06, + "loss": 0.6318, + "step": 7231 + }, + { + "epoch": 0.22165011646438643, + "grad_norm": 1.6960210191641032, + "learning_rate": 9.067371945485426e-06, + "loss": 0.8125, + "step": 7232 + }, + { + "epoch": 0.22168076498712763, + "grad_norm": 0.7805752960541723, + "learning_rate": 9.067083266626436e-06, + "loss": 0.4608, + "step": 7233 + }, + { + "epoch": 0.22171141350986884, + "grad_norm": 1.8640614787455065, + "learning_rate": 9.066794547693433e-06, + "loss": 0.7562, + "step": 7234 + }, + { + "epoch": 0.22174206203261002, + "grad_norm": 1.695900219607567, + "learning_rate": 9.066505788689264e-06, + "loss": 0.6987, + "step": 7235 + }, + { + "epoch": 0.22177271055535122, + "grad_norm": 1.5979812633744346, + "learning_rate": 9.066216989616772e-06, + "loss": 0.6635, + "step": 7236 + }, + { + "epoch": 0.22180335907809243, + "grad_norm": 2.1146072001813874, + "learning_rate": 9.065928150478804e-06, + "loss": 0.7272, + "step": 7237 + }, + { + "epoch": 0.22183400760083363, + "grad_norm": 1.6866397295740716, + "learning_rate": 9.065639271278205e-06, + "loss": 0.7063, + "step": 7238 + }, + { + "epoch": 0.22186465612357484, + "grad_norm": 1.73297591248751, + "learning_rate": 9.065350352017822e-06, + "loss": 0.8331, + "step": 7239 + }, + { + "epoch": 0.22189530464631604, + "grad_norm": 0.7848873585771222, + "learning_rate": 9.065061392700504e-06, + "loss": 0.4836, + "step": 7240 + }, + { + "epoch": 0.22192595316905725, + "grad_norm": 1.7234138887695196, + "learning_rate": 9.064772393329094e-06, + "loss": 0.7538, + "step": 7241 + }, + { + "epoch": 0.22195660169179846, + "grad_norm": 1.6892772306129742, + "learning_rate": 9.064483353906443e-06, + "loss": 0.7004, + "step": 7242 + }, + { + "epoch": 0.22198725021453966, + "grad_norm": 1.9072274486215854, + "learning_rate": 9.064194274435396e-06, + "loss": 0.7109, + "step": 7243 + }, + { + "epoch": 0.22201789873728087, + "grad_norm": 1.8406695342281725, + "learning_rate": 9.063905154918804e-06, + "loss": 0.7185, + "step": 7244 + }, + { + "epoch": 0.22204854726002207, + "grad_norm": 0.8284376035524147, + "learning_rate": 9.063615995359514e-06, + "loss": 0.4964, + "step": 7245 + }, + { + "epoch": 0.22207919578276328, + "grad_norm": 1.790291978774024, + "learning_rate": 9.063326795760377e-06, + "loss": 0.7357, + "step": 7246 + }, + { + "epoch": 0.22210984430550448, + "grad_norm": 1.860853383299195, + "learning_rate": 9.06303755612424e-06, + "loss": 0.6909, + "step": 7247 + }, + { + "epoch": 0.2221404928282457, + "grad_norm": 2.0761117729083844, + "learning_rate": 9.062748276453956e-06, + "loss": 0.7171, + "step": 7248 + }, + { + "epoch": 0.2221711413509869, + "grad_norm": 1.6656554006915596, + "learning_rate": 9.062458956752374e-06, + "loss": 0.694, + "step": 7249 + }, + { + "epoch": 0.22220178987372807, + "grad_norm": 1.8381050145345985, + "learning_rate": 9.062169597022343e-06, + "loss": 0.8065, + "step": 7250 + }, + { + "epoch": 0.22223243839646928, + "grad_norm": 1.6774585281370433, + "learning_rate": 9.061880197266715e-06, + "loss": 0.6964, + "step": 7251 + }, + { + "epoch": 0.22226308691921048, + "grad_norm": 1.941513060290912, + "learning_rate": 9.061590757488343e-06, + "loss": 0.7205, + "step": 7252 + }, + { + "epoch": 0.2222937354419517, + "grad_norm": 1.6065259079363639, + "learning_rate": 9.061301277690079e-06, + "loss": 0.6258, + "step": 7253 + }, + { + "epoch": 0.2223243839646929, + "grad_norm": 1.811983273380415, + "learning_rate": 9.061011757874773e-06, + "loss": 0.8115, + "step": 7254 + }, + { + "epoch": 0.2223550324874341, + "grad_norm": 1.8618329061269305, + "learning_rate": 9.06072219804528e-06, + "loss": 0.6889, + "step": 7255 + }, + { + "epoch": 0.2223856810101753, + "grad_norm": 1.6307342060928272, + "learning_rate": 9.060432598204452e-06, + "loss": 0.6928, + "step": 7256 + }, + { + "epoch": 0.2224163295329165, + "grad_norm": 1.682667239113293, + "learning_rate": 9.060142958355143e-06, + "loss": 0.7426, + "step": 7257 + }, + { + "epoch": 0.22244697805565772, + "grad_norm": 1.7203704057104983, + "learning_rate": 9.059853278500206e-06, + "loss": 0.6835, + "step": 7258 + }, + { + "epoch": 0.22247762657839892, + "grad_norm": 1.8120804140796578, + "learning_rate": 9.059563558642495e-06, + "loss": 0.7622, + "step": 7259 + }, + { + "epoch": 0.22250827510114013, + "grad_norm": 1.8514417277572586, + "learning_rate": 9.059273798784867e-06, + "loss": 0.7653, + "step": 7260 + }, + { + "epoch": 0.22253892362388134, + "grad_norm": 1.8013851247477497, + "learning_rate": 9.058983998930176e-06, + "loss": 0.727, + "step": 7261 + }, + { + "epoch": 0.22256957214662254, + "grad_norm": 1.5693566106691212, + "learning_rate": 9.058694159081275e-06, + "loss": 0.6653, + "step": 7262 + }, + { + "epoch": 0.22260022066936375, + "grad_norm": 1.8292601227007605, + "learning_rate": 9.058404279241024e-06, + "loss": 0.8285, + "step": 7263 + }, + { + "epoch": 0.22263086919210495, + "grad_norm": 1.8295029240946261, + "learning_rate": 9.058114359412277e-06, + "loss": 0.7143, + "step": 7264 + }, + { + "epoch": 0.22266151771484616, + "grad_norm": 1.6594400053516851, + "learning_rate": 9.057824399597892e-06, + "loss": 0.7377, + "step": 7265 + }, + { + "epoch": 0.22269216623758734, + "grad_norm": 1.7200358872977202, + "learning_rate": 9.057534399800722e-06, + "loss": 0.762, + "step": 7266 + }, + { + "epoch": 0.22272281476032854, + "grad_norm": 1.9787723609032866, + "learning_rate": 9.05724436002363e-06, + "loss": 0.7061, + "step": 7267 + }, + { + "epoch": 0.22275346328306975, + "grad_norm": 1.6440623020067011, + "learning_rate": 9.05695428026947e-06, + "loss": 0.6879, + "step": 7268 + }, + { + "epoch": 0.22278411180581095, + "grad_norm": 1.8384179820800728, + "learning_rate": 9.0566641605411e-06, + "loss": 0.7911, + "step": 7269 + }, + { + "epoch": 0.22281476032855216, + "grad_norm": 1.5847521152861799, + "learning_rate": 9.05637400084138e-06, + "loss": 0.7171, + "step": 7270 + }, + { + "epoch": 0.22284540885129336, + "grad_norm": 1.6926432437048657, + "learning_rate": 9.056083801173172e-06, + "loss": 0.7123, + "step": 7271 + }, + { + "epoch": 0.22287605737403457, + "grad_norm": 1.5667543768580676, + "learning_rate": 9.055793561539332e-06, + "loss": 0.6684, + "step": 7272 + }, + { + "epoch": 0.22290670589677578, + "grad_norm": 1.712712810243882, + "learning_rate": 9.05550328194272e-06, + "loss": 0.7779, + "step": 7273 + }, + { + "epoch": 0.22293735441951698, + "grad_norm": 1.8953906881466893, + "learning_rate": 9.055212962386196e-06, + "loss": 0.8389, + "step": 7274 + }, + { + "epoch": 0.2229680029422582, + "grad_norm": 1.7091018896868844, + "learning_rate": 9.054922602872621e-06, + "loss": 0.8096, + "step": 7275 + }, + { + "epoch": 0.2229986514649994, + "grad_norm": 1.843104408403717, + "learning_rate": 9.054632203404856e-06, + "loss": 0.7079, + "step": 7276 + }, + { + "epoch": 0.2230292999877406, + "grad_norm": 1.9492363288829004, + "learning_rate": 9.054341763985764e-06, + "loss": 0.6595, + "step": 7277 + }, + { + "epoch": 0.2230599485104818, + "grad_norm": 0.8839794415471485, + "learning_rate": 9.054051284618205e-06, + "loss": 0.4715, + "step": 7278 + }, + { + "epoch": 0.223090597033223, + "grad_norm": 1.9182775873334696, + "learning_rate": 9.053760765305039e-06, + "loss": 0.8098, + "step": 7279 + }, + { + "epoch": 0.22312124555596421, + "grad_norm": 1.744755994479875, + "learning_rate": 9.053470206049133e-06, + "loss": 0.7154, + "step": 7280 + }, + { + "epoch": 0.2231518940787054, + "grad_norm": 1.674311190560118, + "learning_rate": 9.053179606853346e-06, + "loss": 0.61, + "step": 7281 + }, + { + "epoch": 0.2231825426014466, + "grad_norm": 0.7752217457177947, + "learning_rate": 9.052888967720546e-06, + "loss": 0.4793, + "step": 7282 + }, + { + "epoch": 0.2232131911241878, + "grad_norm": 1.692762397168351, + "learning_rate": 9.052598288653592e-06, + "loss": 0.7314, + "step": 7283 + }, + { + "epoch": 0.223243839646929, + "grad_norm": 1.94534398039471, + "learning_rate": 9.052307569655351e-06, + "loss": 0.6985, + "step": 7284 + }, + { + "epoch": 0.22327448816967022, + "grad_norm": 1.8133342534150314, + "learning_rate": 9.052016810728686e-06, + "loss": 0.7568, + "step": 7285 + }, + { + "epoch": 0.22330513669241142, + "grad_norm": 1.6322749827820264, + "learning_rate": 9.051726011876464e-06, + "loss": 0.6493, + "step": 7286 + }, + { + "epoch": 0.22333578521515263, + "grad_norm": 1.6971951179425881, + "learning_rate": 9.051435173101549e-06, + "loss": 0.5797, + "step": 7287 + }, + { + "epoch": 0.22336643373789383, + "grad_norm": 1.750093496055058, + "learning_rate": 9.051144294406803e-06, + "loss": 0.687, + "step": 7288 + }, + { + "epoch": 0.22339708226063504, + "grad_norm": 2.0514547466399335, + "learning_rate": 9.0508533757951e-06, + "loss": 0.801, + "step": 7289 + }, + { + "epoch": 0.22342773078337624, + "grad_norm": 1.9362584166232653, + "learning_rate": 9.050562417269301e-06, + "loss": 0.7886, + "step": 7290 + }, + { + "epoch": 0.22345837930611745, + "grad_norm": 0.9641870557316453, + "learning_rate": 9.050271418832272e-06, + "loss": 0.4719, + "step": 7291 + }, + { + "epoch": 0.22348902782885866, + "grad_norm": 1.7908365366408745, + "learning_rate": 9.049980380486887e-06, + "loss": 0.6364, + "step": 7292 + }, + { + "epoch": 0.22351967635159986, + "grad_norm": 2.309166068038384, + "learning_rate": 9.049689302236005e-06, + "loss": 0.838, + "step": 7293 + }, + { + "epoch": 0.22355032487434107, + "grad_norm": 1.7595374235578616, + "learning_rate": 9.049398184082499e-06, + "loss": 0.7315, + "step": 7294 + }, + { + "epoch": 0.22358097339708227, + "grad_norm": 0.7909986982947056, + "learning_rate": 9.049107026029236e-06, + "loss": 0.4785, + "step": 7295 + }, + { + "epoch": 0.22361162191982348, + "grad_norm": 1.9542372159774366, + "learning_rate": 9.048815828079087e-06, + "loss": 0.7365, + "step": 7296 + }, + { + "epoch": 0.22364227044256466, + "grad_norm": 1.769232272303367, + "learning_rate": 9.04852459023492e-06, + "loss": 0.7041, + "step": 7297 + }, + { + "epoch": 0.22367291896530586, + "grad_norm": 1.9858459225642504, + "learning_rate": 9.048233312499604e-06, + "loss": 0.7583, + "step": 7298 + }, + { + "epoch": 0.22370356748804707, + "grad_norm": 1.855506082236666, + "learning_rate": 9.047941994876008e-06, + "loss": 0.7912, + "step": 7299 + }, + { + "epoch": 0.22373421601078827, + "grad_norm": 0.8135987569853617, + "learning_rate": 9.047650637367005e-06, + "loss": 0.4939, + "step": 7300 + }, + { + "epoch": 0.22376486453352948, + "grad_norm": 1.854998989699253, + "learning_rate": 9.047359239975464e-06, + "loss": 0.6514, + "step": 7301 + }, + { + "epoch": 0.22379551305627068, + "grad_norm": 1.9938552471042168, + "learning_rate": 9.047067802704259e-06, + "loss": 0.783, + "step": 7302 + }, + { + "epoch": 0.2238261615790119, + "grad_norm": 1.8556695419391374, + "learning_rate": 9.046776325556257e-06, + "loss": 0.8172, + "step": 7303 + }, + { + "epoch": 0.2238568101017531, + "grad_norm": 1.576687555616452, + "learning_rate": 9.046484808534333e-06, + "loss": 0.6548, + "step": 7304 + }, + { + "epoch": 0.2238874586244943, + "grad_norm": 1.725530857246149, + "learning_rate": 9.04619325164136e-06, + "loss": 0.7868, + "step": 7305 + }, + { + "epoch": 0.2239181071472355, + "grad_norm": 1.721247726874564, + "learning_rate": 9.045901654880207e-06, + "loss": 0.642, + "step": 7306 + }, + { + "epoch": 0.2239487556699767, + "grad_norm": 1.7356774106853285, + "learning_rate": 9.045610018253752e-06, + "loss": 0.744, + "step": 7307 + }, + { + "epoch": 0.22397940419271792, + "grad_norm": 0.9350285207837065, + "learning_rate": 9.045318341764866e-06, + "loss": 0.498, + "step": 7308 + }, + { + "epoch": 0.22401005271545912, + "grad_norm": 0.8978290677889298, + "learning_rate": 9.045026625416423e-06, + "loss": 0.4991, + "step": 7309 + }, + { + "epoch": 0.22404070123820033, + "grad_norm": 0.7697166999009274, + "learning_rate": 9.044734869211298e-06, + "loss": 0.4766, + "step": 7310 + }, + { + "epoch": 0.22407134976094154, + "grad_norm": 1.8507168670535858, + "learning_rate": 9.044443073152364e-06, + "loss": 0.5882, + "step": 7311 + }, + { + "epoch": 0.2241019982836827, + "grad_norm": 1.6977939144912715, + "learning_rate": 9.0441512372425e-06, + "loss": 0.772, + "step": 7312 + }, + { + "epoch": 0.22413264680642392, + "grad_norm": 1.8434480849639878, + "learning_rate": 9.043859361484578e-06, + "loss": 0.7969, + "step": 7313 + }, + { + "epoch": 0.22416329532916512, + "grad_norm": 1.6835559164067668, + "learning_rate": 9.043567445881475e-06, + "loss": 0.7406, + "step": 7314 + }, + { + "epoch": 0.22419394385190633, + "grad_norm": 1.8271958142514924, + "learning_rate": 9.043275490436068e-06, + "loss": 0.6769, + "step": 7315 + }, + { + "epoch": 0.22422459237464754, + "grad_norm": 1.8695538517436159, + "learning_rate": 9.042983495151232e-06, + "loss": 0.7777, + "step": 7316 + }, + { + "epoch": 0.22425524089738874, + "grad_norm": 1.3777433253693159, + "learning_rate": 9.042691460029846e-06, + "loss": 0.4916, + "step": 7317 + }, + { + "epoch": 0.22428588942012995, + "grad_norm": 1.07567908918064, + "learning_rate": 9.042399385074785e-06, + "loss": 0.4892, + "step": 7318 + }, + { + "epoch": 0.22431653794287115, + "grad_norm": 1.747189176709027, + "learning_rate": 9.042107270288932e-06, + "loss": 0.7829, + "step": 7319 + }, + { + "epoch": 0.22434718646561236, + "grad_norm": 1.8890013866472137, + "learning_rate": 9.04181511567516e-06, + "loss": 0.732, + "step": 7320 + }, + { + "epoch": 0.22437783498835356, + "grad_norm": 1.1313582660041663, + "learning_rate": 9.041522921236347e-06, + "loss": 0.4958, + "step": 7321 + }, + { + "epoch": 0.22440848351109477, + "grad_norm": 1.7950409630297854, + "learning_rate": 9.041230686975377e-06, + "loss": 0.7532, + "step": 7322 + }, + { + "epoch": 0.22443913203383598, + "grad_norm": 2.092643128188025, + "learning_rate": 9.040938412895127e-06, + "loss": 0.7904, + "step": 7323 + }, + { + "epoch": 0.22446978055657718, + "grad_norm": 1.9710779262755875, + "learning_rate": 9.040646098998477e-06, + "loss": 0.7464, + "step": 7324 + }, + { + "epoch": 0.2245004290793184, + "grad_norm": 1.611054358256088, + "learning_rate": 9.040353745288307e-06, + "loss": 0.6535, + "step": 7325 + }, + { + "epoch": 0.2245310776020596, + "grad_norm": 1.792171168963958, + "learning_rate": 9.040061351767498e-06, + "loss": 0.7703, + "step": 7326 + }, + { + "epoch": 0.2245617261248008, + "grad_norm": 1.9831924321629775, + "learning_rate": 9.039768918438931e-06, + "loss": 0.8186, + "step": 7327 + }, + { + "epoch": 0.22459237464754198, + "grad_norm": 1.7673798241862637, + "learning_rate": 9.039476445305486e-06, + "loss": 0.7066, + "step": 7328 + }, + { + "epoch": 0.22462302317028318, + "grad_norm": 1.9960963603509412, + "learning_rate": 9.039183932370046e-06, + "loss": 0.9158, + "step": 7329 + }, + { + "epoch": 0.2246536716930244, + "grad_norm": 1.8738636121410894, + "learning_rate": 9.038891379635494e-06, + "loss": 0.7513, + "step": 7330 + }, + { + "epoch": 0.2246843202157656, + "grad_norm": 2.566215284877947, + "learning_rate": 9.038598787104714e-06, + "loss": 0.6784, + "step": 7331 + }, + { + "epoch": 0.2247149687385068, + "grad_norm": 1.1123051777531872, + "learning_rate": 9.038306154780585e-06, + "loss": 0.5105, + "step": 7332 + }, + { + "epoch": 0.224745617261248, + "grad_norm": 2.031834277190326, + "learning_rate": 9.03801348266599e-06, + "loss": 0.7472, + "step": 7333 + }, + { + "epoch": 0.2247762657839892, + "grad_norm": 2.0722590369538665, + "learning_rate": 9.037720770763818e-06, + "loss": 0.822, + "step": 7334 + }, + { + "epoch": 0.22480691430673042, + "grad_norm": 0.8583338555831637, + "learning_rate": 9.037428019076948e-06, + "loss": 0.4817, + "step": 7335 + }, + { + "epoch": 0.22483756282947162, + "grad_norm": 1.7474456867133734, + "learning_rate": 9.037135227608269e-06, + "loss": 0.6867, + "step": 7336 + }, + { + "epoch": 0.22486821135221283, + "grad_norm": 1.8147732856327528, + "learning_rate": 9.036842396360661e-06, + "loss": 0.7229, + "step": 7337 + }, + { + "epoch": 0.22489885987495403, + "grad_norm": 1.6997150323684835, + "learning_rate": 9.036549525337015e-06, + "loss": 0.7886, + "step": 7338 + }, + { + "epoch": 0.22492950839769524, + "grad_norm": 0.8310282445576195, + "learning_rate": 9.036256614540211e-06, + "loss": 0.4874, + "step": 7339 + }, + { + "epoch": 0.22496015692043644, + "grad_norm": 1.7843050198678156, + "learning_rate": 9.03596366397314e-06, + "loss": 0.7817, + "step": 7340 + }, + { + "epoch": 0.22499080544317765, + "grad_norm": 1.7150573420508477, + "learning_rate": 9.035670673638684e-06, + "loss": 0.6944, + "step": 7341 + }, + { + "epoch": 0.22502145396591886, + "grad_norm": 1.832059321671727, + "learning_rate": 9.035377643539735e-06, + "loss": 0.6548, + "step": 7342 + }, + { + "epoch": 0.22505210248866003, + "grad_norm": 0.8830273463733054, + "learning_rate": 9.035084573679176e-06, + "loss": 0.5083, + "step": 7343 + }, + { + "epoch": 0.22508275101140124, + "grad_norm": 1.8531448266959554, + "learning_rate": 9.034791464059896e-06, + "loss": 0.69, + "step": 7344 + }, + { + "epoch": 0.22511339953414244, + "grad_norm": 1.510718998315085, + "learning_rate": 9.034498314684784e-06, + "loss": 0.7419, + "step": 7345 + }, + { + "epoch": 0.22514404805688365, + "grad_norm": 1.3946141886417553, + "learning_rate": 9.034205125556728e-06, + "loss": 0.5987, + "step": 7346 + }, + { + "epoch": 0.22517469657962486, + "grad_norm": 1.9465184293911604, + "learning_rate": 9.033911896678617e-06, + "loss": 0.7444, + "step": 7347 + }, + { + "epoch": 0.22520534510236606, + "grad_norm": 1.7593388433218584, + "learning_rate": 9.033618628053338e-06, + "loss": 0.714, + "step": 7348 + }, + { + "epoch": 0.22523599362510727, + "grad_norm": 1.8702907577029988, + "learning_rate": 9.033325319683786e-06, + "loss": 0.6347, + "step": 7349 + }, + { + "epoch": 0.22526664214784847, + "grad_norm": 1.6817512704654256, + "learning_rate": 9.033031971572845e-06, + "loss": 0.688, + "step": 7350 + }, + { + "epoch": 0.22529729067058968, + "grad_norm": 1.902151016438782, + "learning_rate": 9.032738583723407e-06, + "loss": 0.7309, + "step": 7351 + }, + { + "epoch": 0.22532793919333088, + "grad_norm": 1.6661332109399751, + "learning_rate": 9.032445156138367e-06, + "loss": 0.6891, + "step": 7352 + }, + { + "epoch": 0.2253585877160721, + "grad_norm": 1.6184671368626498, + "learning_rate": 9.032151688820612e-06, + "loss": 0.7682, + "step": 7353 + }, + { + "epoch": 0.2253892362388133, + "grad_norm": 1.9749647320498323, + "learning_rate": 9.031858181773034e-06, + "loss": 0.706, + "step": 7354 + }, + { + "epoch": 0.2254198847615545, + "grad_norm": 1.9275089190934822, + "learning_rate": 9.031564634998527e-06, + "loss": 0.7226, + "step": 7355 + }, + { + "epoch": 0.2254505332842957, + "grad_norm": 1.9793737959946218, + "learning_rate": 9.031271048499982e-06, + "loss": 0.5854, + "step": 7356 + }, + { + "epoch": 0.2254811818070369, + "grad_norm": 0.9576483630309244, + "learning_rate": 9.030977422280291e-06, + "loss": 0.4736, + "step": 7357 + }, + { + "epoch": 0.22551183032977812, + "grad_norm": 1.668753277176103, + "learning_rate": 9.030683756342348e-06, + "loss": 0.6467, + "step": 7358 + }, + { + "epoch": 0.2255424788525193, + "grad_norm": 1.5900423856917376, + "learning_rate": 9.030390050689047e-06, + "loss": 0.6059, + "step": 7359 + }, + { + "epoch": 0.2255731273752605, + "grad_norm": 1.8217902539437194, + "learning_rate": 9.030096305323281e-06, + "loss": 0.7737, + "step": 7360 + }, + { + "epoch": 0.2256037758980017, + "grad_norm": 1.828066462481493, + "learning_rate": 9.029802520247946e-06, + "loss": 0.7215, + "step": 7361 + }, + { + "epoch": 0.2256344244207429, + "grad_norm": 2.0514331879784007, + "learning_rate": 9.029508695465935e-06, + "loss": 0.7073, + "step": 7362 + }, + { + "epoch": 0.22566507294348412, + "grad_norm": 1.5675773699407725, + "learning_rate": 9.029214830980145e-06, + "loss": 0.6185, + "step": 7363 + }, + { + "epoch": 0.22569572146622532, + "grad_norm": 1.997871703883092, + "learning_rate": 9.028920926793468e-06, + "loss": 0.7496, + "step": 7364 + }, + { + "epoch": 0.22572636998896653, + "grad_norm": 2.0435382235068094, + "learning_rate": 9.028626982908805e-06, + "loss": 0.7148, + "step": 7365 + }, + { + "epoch": 0.22575701851170774, + "grad_norm": 0.8501281561335114, + "learning_rate": 9.028332999329048e-06, + "loss": 0.5057, + "step": 7366 + }, + { + "epoch": 0.22578766703444894, + "grad_norm": 1.817556536236826, + "learning_rate": 9.028038976057097e-06, + "loss": 0.697, + "step": 7367 + }, + { + "epoch": 0.22581831555719015, + "grad_norm": 2.0380122607537836, + "learning_rate": 9.027744913095844e-06, + "loss": 0.8457, + "step": 7368 + }, + { + "epoch": 0.22584896407993135, + "grad_norm": 1.8283837409249746, + "learning_rate": 9.027450810448193e-06, + "loss": 0.7151, + "step": 7369 + }, + { + "epoch": 0.22587961260267256, + "grad_norm": 0.7917315226602317, + "learning_rate": 9.027156668117036e-06, + "loss": 0.486, + "step": 7370 + }, + { + "epoch": 0.22591026112541376, + "grad_norm": 1.6645484543561386, + "learning_rate": 9.026862486105277e-06, + "loss": 0.625, + "step": 7371 + }, + { + "epoch": 0.22594090964815497, + "grad_norm": 1.6687522843917661, + "learning_rate": 9.026568264415809e-06, + "loss": 0.7009, + "step": 7372 + }, + { + "epoch": 0.22597155817089618, + "grad_norm": 1.6409136856848014, + "learning_rate": 9.026274003051535e-06, + "loss": 0.5715, + "step": 7373 + }, + { + "epoch": 0.22600220669363735, + "grad_norm": 0.777723288139691, + "learning_rate": 9.025979702015352e-06, + "loss": 0.4977, + "step": 7374 + }, + { + "epoch": 0.22603285521637856, + "grad_norm": 1.9571108868832459, + "learning_rate": 9.025685361310162e-06, + "loss": 0.6856, + "step": 7375 + }, + { + "epoch": 0.22606350373911976, + "grad_norm": 0.7927736366851938, + "learning_rate": 9.025390980938864e-06, + "loss": 0.4967, + "step": 7376 + }, + { + "epoch": 0.22609415226186097, + "grad_norm": 1.794310704206597, + "learning_rate": 9.025096560904359e-06, + "loss": 0.6879, + "step": 7377 + }, + { + "epoch": 0.22612480078460218, + "grad_norm": 1.7813731692340908, + "learning_rate": 9.024802101209547e-06, + "loss": 0.718, + "step": 7378 + }, + { + "epoch": 0.22615544930734338, + "grad_norm": 1.5826376723103295, + "learning_rate": 9.02450760185733e-06, + "loss": 0.675, + "step": 7379 + }, + { + "epoch": 0.2261860978300846, + "grad_norm": 1.8878406309992657, + "learning_rate": 9.02421306285061e-06, + "loss": 0.8221, + "step": 7380 + }, + { + "epoch": 0.2262167463528258, + "grad_norm": 1.7495265822532284, + "learning_rate": 9.023918484192289e-06, + "loss": 0.7302, + "step": 7381 + }, + { + "epoch": 0.226247394875567, + "grad_norm": 0.7624243008558022, + "learning_rate": 9.023623865885272e-06, + "loss": 0.4642, + "step": 7382 + }, + { + "epoch": 0.2262780433983082, + "grad_norm": 1.871343029624403, + "learning_rate": 9.023329207932456e-06, + "loss": 0.8199, + "step": 7383 + }, + { + "epoch": 0.2263086919210494, + "grad_norm": 1.6834516697789979, + "learning_rate": 9.02303451033675e-06, + "loss": 0.7489, + "step": 7384 + }, + { + "epoch": 0.22633934044379062, + "grad_norm": 1.6333539874420007, + "learning_rate": 9.022739773101055e-06, + "loss": 0.685, + "step": 7385 + }, + { + "epoch": 0.22636998896653182, + "grad_norm": 1.7539694441849316, + "learning_rate": 9.022444996228276e-06, + "loss": 0.7607, + "step": 7386 + }, + { + "epoch": 0.22640063748927303, + "grad_norm": 0.7961571609692681, + "learning_rate": 9.022150179721316e-06, + "loss": 0.4352, + "step": 7387 + }, + { + "epoch": 0.22643128601201423, + "grad_norm": 0.802640093236222, + "learning_rate": 9.021855323583082e-06, + "loss": 0.4839, + "step": 7388 + }, + { + "epoch": 0.22646193453475544, + "grad_norm": 1.7105033586217593, + "learning_rate": 9.02156042781648e-06, + "loss": 0.7056, + "step": 7389 + }, + { + "epoch": 0.22649258305749662, + "grad_norm": 1.9136666270502205, + "learning_rate": 9.021265492424412e-06, + "loss": 0.7275, + "step": 7390 + }, + { + "epoch": 0.22652323158023782, + "grad_norm": 1.7041922550928055, + "learning_rate": 9.020970517409786e-06, + "loss": 0.7735, + "step": 7391 + }, + { + "epoch": 0.22655388010297903, + "grad_norm": 0.8485304101015206, + "learning_rate": 9.020675502775511e-06, + "loss": 0.5029, + "step": 7392 + }, + { + "epoch": 0.22658452862572023, + "grad_norm": 0.8088453756981732, + "learning_rate": 9.020380448524489e-06, + "loss": 0.4776, + "step": 7393 + }, + { + "epoch": 0.22661517714846144, + "grad_norm": 0.8024676258309783, + "learning_rate": 9.020085354659631e-06, + "loss": 0.4712, + "step": 7394 + }, + { + "epoch": 0.22664582567120264, + "grad_norm": 1.4995694471123937, + "learning_rate": 9.019790221183844e-06, + "loss": 0.5836, + "step": 7395 + }, + { + "epoch": 0.22667647419394385, + "grad_norm": 1.6948246389061454, + "learning_rate": 9.019495048100035e-06, + "loss": 0.7179, + "step": 7396 + }, + { + "epoch": 0.22670712271668506, + "grad_norm": 1.9245791337139233, + "learning_rate": 9.019199835411112e-06, + "loss": 0.7472, + "step": 7397 + }, + { + "epoch": 0.22673777123942626, + "grad_norm": 3.5822502543234176, + "learning_rate": 9.018904583119987e-06, + "loss": 0.8344, + "step": 7398 + }, + { + "epoch": 0.22676841976216747, + "grad_norm": 1.7580857804421093, + "learning_rate": 9.018609291229565e-06, + "loss": 0.8005, + "step": 7399 + }, + { + "epoch": 0.22679906828490867, + "grad_norm": 1.6496621593178746, + "learning_rate": 9.018313959742756e-06, + "loss": 0.7068, + "step": 7400 + }, + { + "epoch": 0.22682971680764988, + "grad_norm": 2.075945366568093, + "learning_rate": 9.018018588662474e-06, + "loss": 0.7193, + "step": 7401 + }, + { + "epoch": 0.22686036533039108, + "grad_norm": 1.623383656882588, + "learning_rate": 9.017723177991627e-06, + "loss": 0.66, + "step": 7402 + }, + { + "epoch": 0.2268910138531323, + "grad_norm": 1.747881194577664, + "learning_rate": 9.017427727733124e-06, + "loss": 0.8069, + "step": 7403 + }, + { + "epoch": 0.2269216623758735, + "grad_norm": 1.8240325967915605, + "learning_rate": 9.017132237889877e-06, + "loss": 0.7584, + "step": 7404 + }, + { + "epoch": 0.22695231089861467, + "grad_norm": 1.9222153165228208, + "learning_rate": 9.0168367084648e-06, + "loss": 0.5589, + "step": 7405 + }, + { + "epoch": 0.22698295942135588, + "grad_norm": 1.8479223361928179, + "learning_rate": 9.016541139460803e-06, + "loss": 0.7356, + "step": 7406 + }, + { + "epoch": 0.22701360794409708, + "grad_norm": 1.8507739521306228, + "learning_rate": 9.016245530880798e-06, + "loss": 0.7899, + "step": 7407 + }, + { + "epoch": 0.2270442564668383, + "grad_norm": 1.8247152266754691, + "learning_rate": 9.015949882727697e-06, + "loss": 0.7397, + "step": 7408 + }, + { + "epoch": 0.2270749049895795, + "grad_norm": 1.6347478447719714, + "learning_rate": 9.015654195004416e-06, + "loss": 0.6529, + "step": 7409 + }, + { + "epoch": 0.2271055535123207, + "grad_norm": 1.8143628064711597, + "learning_rate": 9.015358467713865e-06, + "loss": 0.7606, + "step": 7410 + }, + { + "epoch": 0.2271362020350619, + "grad_norm": 1.9008817899677717, + "learning_rate": 9.015062700858963e-06, + "loss": 0.7889, + "step": 7411 + }, + { + "epoch": 0.2271668505578031, + "grad_norm": 1.9551459968481706, + "learning_rate": 9.014766894442619e-06, + "loss": 0.7561, + "step": 7412 + }, + { + "epoch": 0.22719749908054432, + "grad_norm": 8.280010845250105, + "learning_rate": 9.01447104846775e-06, + "loss": 0.7261, + "step": 7413 + }, + { + "epoch": 0.22722814760328552, + "grad_norm": 1.666390036090097, + "learning_rate": 9.01417516293727e-06, + "loss": 0.6849, + "step": 7414 + }, + { + "epoch": 0.22725879612602673, + "grad_norm": 1.7069703800267022, + "learning_rate": 9.013879237854095e-06, + "loss": 0.7156, + "step": 7415 + }, + { + "epoch": 0.22728944464876794, + "grad_norm": 1.0276684048388376, + "learning_rate": 9.013583273221141e-06, + "loss": 0.4881, + "step": 7416 + }, + { + "epoch": 0.22732009317150914, + "grad_norm": 1.9532530118977538, + "learning_rate": 9.013287269041322e-06, + "loss": 0.7511, + "step": 7417 + }, + { + "epoch": 0.22735074169425035, + "grad_norm": 0.8767870989202853, + "learning_rate": 9.01299122531756e-06, + "loss": 0.4998, + "step": 7418 + }, + { + "epoch": 0.22738139021699155, + "grad_norm": 1.5413280291963574, + "learning_rate": 9.012695142052767e-06, + "loss": 0.714, + "step": 7419 + }, + { + "epoch": 0.22741203873973276, + "grad_norm": 1.9515530230582279, + "learning_rate": 9.012399019249863e-06, + "loss": 0.8569, + "step": 7420 + }, + { + "epoch": 0.22744268726247394, + "grad_norm": 1.9637964653811855, + "learning_rate": 9.012102856911764e-06, + "loss": 0.7652, + "step": 7421 + }, + { + "epoch": 0.22747333578521514, + "grad_norm": 1.6251994772882068, + "learning_rate": 9.011806655041389e-06, + "loss": 0.6493, + "step": 7422 + }, + { + "epoch": 0.22750398430795635, + "grad_norm": 1.7584835449139846, + "learning_rate": 9.011510413641658e-06, + "loss": 0.7391, + "step": 7423 + }, + { + "epoch": 0.22753463283069755, + "grad_norm": 1.193536966249129, + "learning_rate": 9.011214132715486e-06, + "loss": 0.5114, + "step": 7424 + }, + { + "epoch": 0.22756528135343876, + "grad_norm": 1.8433746696958389, + "learning_rate": 9.010917812265796e-06, + "loss": 0.777, + "step": 7425 + }, + { + "epoch": 0.22759592987617996, + "grad_norm": 0.8952373482459111, + "learning_rate": 9.010621452295508e-06, + "loss": 0.4856, + "step": 7426 + }, + { + "epoch": 0.22762657839892117, + "grad_norm": 1.9047347509121975, + "learning_rate": 9.010325052807538e-06, + "loss": 0.7104, + "step": 7427 + }, + { + "epoch": 0.22765722692166238, + "grad_norm": 1.8036684180237428, + "learning_rate": 9.01002861380481e-06, + "loss": 0.6428, + "step": 7428 + }, + { + "epoch": 0.22768787544440358, + "grad_norm": 1.8736651102821884, + "learning_rate": 9.009732135290246e-06, + "loss": 0.8524, + "step": 7429 + }, + { + "epoch": 0.2277185239671448, + "grad_norm": 1.7388362790713772, + "learning_rate": 9.009435617266764e-06, + "loss": 0.7415, + "step": 7430 + }, + { + "epoch": 0.227749172489886, + "grad_norm": 0.8877390085200039, + "learning_rate": 9.009139059737286e-06, + "loss": 0.4759, + "step": 7431 + }, + { + "epoch": 0.2277798210126272, + "grad_norm": 14.954790981825134, + "learning_rate": 9.008842462704737e-06, + "loss": 0.7493, + "step": 7432 + }, + { + "epoch": 0.2278104695353684, + "grad_norm": 2.0943493907391035, + "learning_rate": 9.008545826172037e-06, + "loss": 0.7329, + "step": 7433 + }, + { + "epoch": 0.2278411180581096, + "grad_norm": 1.4954035016810665, + "learning_rate": 9.00824915014211e-06, + "loss": 0.7357, + "step": 7434 + }, + { + "epoch": 0.22787176658085082, + "grad_norm": 1.7214370374846975, + "learning_rate": 9.007952434617877e-06, + "loss": 0.7177, + "step": 7435 + }, + { + "epoch": 0.227902415103592, + "grad_norm": 1.7574668705275223, + "learning_rate": 9.007655679602262e-06, + "loss": 0.6682, + "step": 7436 + }, + { + "epoch": 0.2279330636263332, + "grad_norm": 1.7612996818782602, + "learning_rate": 9.007358885098192e-06, + "loss": 0.7437, + "step": 7437 + }, + { + "epoch": 0.2279637121490744, + "grad_norm": 2.4597486346858846, + "learning_rate": 9.00706205110859e-06, + "loss": 0.8342, + "step": 7438 + }, + { + "epoch": 0.2279943606718156, + "grad_norm": 1.8370819024449374, + "learning_rate": 9.00676517763638e-06, + "loss": 0.7323, + "step": 7439 + }, + { + "epoch": 0.22802500919455682, + "grad_norm": 2.178312696415652, + "learning_rate": 9.006468264684487e-06, + "loss": 0.7408, + "step": 7440 + }, + { + "epoch": 0.22805565771729802, + "grad_norm": 5.212624397393382, + "learning_rate": 9.006171312255837e-06, + "loss": 0.472, + "step": 7441 + }, + { + "epoch": 0.22808630624003923, + "grad_norm": 2.0145862253622995, + "learning_rate": 9.005874320353356e-06, + "loss": 0.7408, + "step": 7442 + }, + { + "epoch": 0.22811695476278043, + "grad_norm": 1.8770751094409104, + "learning_rate": 9.005577288979972e-06, + "loss": 0.7109, + "step": 7443 + }, + { + "epoch": 0.22814760328552164, + "grad_norm": 2.0961657821552815, + "learning_rate": 9.00528021813861e-06, + "loss": 0.7234, + "step": 7444 + }, + { + "epoch": 0.22817825180826284, + "grad_norm": 1.7507077764924595, + "learning_rate": 9.004983107832195e-06, + "loss": 0.7509, + "step": 7445 + }, + { + "epoch": 0.22820890033100405, + "grad_norm": 0.8613965285498534, + "learning_rate": 9.004685958063657e-06, + "loss": 0.4956, + "step": 7446 + }, + { + "epoch": 0.22823954885374526, + "grad_norm": 1.9155073400943763, + "learning_rate": 9.004388768835926e-06, + "loss": 0.735, + "step": 7447 + }, + { + "epoch": 0.22827019737648646, + "grad_norm": 1.6998283076178544, + "learning_rate": 9.004091540151926e-06, + "loss": 0.8113, + "step": 7448 + }, + { + "epoch": 0.22830084589922767, + "grad_norm": 1.8834941399780225, + "learning_rate": 9.003794272014587e-06, + "loss": 0.7887, + "step": 7449 + }, + { + "epoch": 0.22833149442196887, + "grad_norm": 1.507108782329804, + "learning_rate": 9.003496964426842e-06, + "loss": 0.748, + "step": 7450 + }, + { + "epoch": 0.22836214294471008, + "grad_norm": 0.9364477450422084, + "learning_rate": 9.003199617391613e-06, + "loss": 0.4992, + "step": 7451 + }, + { + "epoch": 0.22839279146745126, + "grad_norm": 1.7000564450478652, + "learning_rate": 9.002902230911836e-06, + "loss": 0.7032, + "step": 7452 + }, + { + "epoch": 0.22842343999019246, + "grad_norm": 1.7884451837221182, + "learning_rate": 9.002604804990438e-06, + "loss": 0.6919, + "step": 7453 + }, + { + "epoch": 0.22845408851293367, + "grad_norm": 1.6042107853892984, + "learning_rate": 9.002307339630352e-06, + "loss": 0.6095, + "step": 7454 + }, + { + "epoch": 0.22848473703567487, + "grad_norm": 1.8887033623732594, + "learning_rate": 9.002009834834506e-06, + "loss": 0.7595, + "step": 7455 + }, + { + "epoch": 0.22851538555841608, + "grad_norm": 1.655610188022896, + "learning_rate": 9.001712290605835e-06, + "loss": 0.7315, + "step": 7456 + }, + { + "epoch": 0.22854603408115728, + "grad_norm": 1.8086791405464122, + "learning_rate": 9.001414706947269e-06, + "loss": 0.7216, + "step": 7457 + }, + { + "epoch": 0.2285766826038985, + "grad_norm": 1.5450312558067283, + "learning_rate": 9.00111708386174e-06, + "loss": 0.6683, + "step": 7458 + }, + { + "epoch": 0.2286073311266397, + "grad_norm": 1.7208542260947313, + "learning_rate": 9.000819421352178e-06, + "loss": 0.7694, + "step": 7459 + }, + { + "epoch": 0.2286379796493809, + "grad_norm": 1.8644302399254309, + "learning_rate": 9.000521719421522e-06, + "loss": 0.8704, + "step": 7460 + }, + { + "epoch": 0.2286686281721221, + "grad_norm": 1.5038753697887308, + "learning_rate": 9.0002239780727e-06, + "loss": 0.7747, + "step": 7461 + }, + { + "epoch": 0.2286992766948633, + "grad_norm": 1.0666509893018903, + "learning_rate": 8.999926197308649e-06, + "loss": 0.4868, + "step": 7462 + }, + { + "epoch": 0.22872992521760452, + "grad_norm": 1.0247455413604094, + "learning_rate": 8.999628377132298e-06, + "loss": 0.4924, + "step": 7463 + }, + { + "epoch": 0.22876057374034572, + "grad_norm": 1.843593633534192, + "learning_rate": 8.99933051754659e-06, + "loss": 0.6761, + "step": 7464 + }, + { + "epoch": 0.22879122226308693, + "grad_norm": 1.7182781691720073, + "learning_rate": 8.999032618554453e-06, + "loss": 0.7166, + "step": 7465 + }, + { + "epoch": 0.22882187078582814, + "grad_norm": 1.787841100919115, + "learning_rate": 8.998734680158824e-06, + "loss": 0.7153, + "step": 7466 + }, + { + "epoch": 0.2288525193085693, + "grad_norm": 1.7082010428636345, + "learning_rate": 8.99843670236264e-06, + "loss": 0.772, + "step": 7467 + }, + { + "epoch": 0.22888316783131052, + "grad_norm": 0.8244874567176131, + "learning_rate": 8.998138685168836e-06, + "loss": 0.4714, + "step": 7468 + }, + { + "epoch": 0.22891381635405172, + "grad_norm": 1.8263201508846025, + "learning_rate": 8.997840628580348e-06, + "loss": 0.7136, + "step": 7469 + }, + { + "epoch": 0.22894446487679293, + "grad_norm": 1.7596123318926005, + "learning_rate": 8.997542532600114e-06, + "loss": 0.7617, + "step": 7470 + }, + { + "epoch": 0.22897511339953414, + "grad_norm": 1.7551753521978672, + "learning_rate": 8.99724439723107e-06, + "loss": 0.692, + "step": 7471 + }, + { + "epoch": 0.22900576192227534, + "grad_norm": 1.694845811996046, + "learning_rate": 8.996946222476156e-06, + "loss": 0.6949, + "step": 7472 + }, + { + "epoch": 0.22903641044501655, + "grad_norm": 0.8467714911133734, + "learning_rate": 8.996648008338307e-06, + "loss": 0.4856, + "step": 7473 + }, + { + "epoch": 0.22906705896775775, + "grad_norm": 1.7778127535596824, + "learning_rate": 8.996349754820461e-06, + "loss": 0.7064, + "step": 7474 + }, + { + "epoch": 0.22909770749049896, + "grad_norm": 2.0033991635493917, + "learning_rate": 8.996051461925562e-06, + "loss": 0.7542, + "step": 7475 + }, + { + "epoch": 0.22912835601324016, + "grad_norm": 1.9709314328654575, + "learning_rate": 8.995753129656542e-06, + "loss": 0.6982, + "step": 7476 + }, + { + "epoch": 0.22915900453598137, + "grad_norm": 0.8252666583493803, + "learning_rate": 8.995454758016345e-06, + "loss": 0.4723, + "step": 7477 + }, + { + "epoch": 0.22918965305872258, + "grad_norm": 1.9813354997784174, + "learning_rate": 8.99515634700791e-06, + "loss": 0.7071, + "step": 7478 + }, + { + "epoch": 0.22922030158146378, + "grad_norm": 1.6849049077805172, + "learning_rate": 8.994857896634178e-06, + "loss": 0.6074, + "step": 7479 + }, + { + "epoch": 0.229250950104205, + "grad_norm": 2.196193512333205, + "learning_rate": 8.994559406898088e-06, + "loss": 0.7266, + "step": 7480 + }, + { + "epoch": 0.2292815986269462, + "grad_norm": 1.8813318480097532, + "learning_rate": 8.994260877802585e-06, + "loss": 0.7322, + "step": 7481 + }, + { + "epoch": 0.2293122471496874, + "grad_norm": 1.7712517010034177, + "learning_rate": 8.993962309350605e-06, + "loss": 0.7146, + "step": 7482 + }, + { + "epoch": 0.22934289567242858, + "grad_norm": 1.7289664484823624, + "learning_rate": 8.993663701545091e-06, + "loss": 0.6691, + "step": 7483 + }, + { + "epoch": 0.22937354419516978, + "grad_norm": 1.7906940687381714, + "learning_rate": 8.993365054388989e-06, + "loss": 0.7471, + "step": 7484 + }, + { + "epoch": 0.229404192717911, + "grad_norm": 1.9006139544616678, + "learning_rate": 8.99306636788524e-06, + "loss": 0.7302, + "step": 7485 + }, + { + "epoch": 0.2294348412406522, + "grad_norm": 1.6955503786258137, + "learning_rate": 8.992767642036786e-06, + "loss": 0.7446, + "step": 7486 + }, + { + "epoch": 0.2294654897633934, + "grad_norm": 2.0155398193817193, + "learning_rate": 8.992468876846569e-06, + "loss": 0.7509, + "step": 7487 + }, + { + "epoch": 0.2294961382861346, + "grad_norm": 1.8141700135574812, + "learning_rate": 8.992170072317536e-06, + "loss": 0.7919, + "step": 7488 + }, + { + "epoch": 0.2295267868088758, + "grad_norm": 0.8954147384386071, + "learning_rate": 8.99187122845263e-06, + "loss": 0.4872, + "step": 7489 + }, + { + "epoch": 0.22955743533161702, + "grad_norm": 3.0591132656343683, + "learning_rate": 8.991572345254796e-06, + "loss": 0.7492, + "step": 7490 + }, + { + "epoch": 0.22958808385435822, + "grad_norm": 1.75964852023931, + "learning_rate": 8.991273422726975e-06, + "loss": 0.7529, + "step": 7491 + }, + { + "epoch": 0.22961873237709943, + "grad_norm": 1.6415432372541978, + "learning_rate": 8.990974460872119e-06, + "loss": 0.6724, + "step": 7492 + }, + { + "epoch": 0.22964938089984063, + "grad_norm": 1.8217824858949387, + "learning_rate": 8.99067545969317e-06, + "loss": 0.6932, + "step": 7493 + }, + { + "epoch": 0.22968002942258184, + "grad_norm": 1.921852064878497, + "learning_rate": 8.990376419193074e-06, + "loss": 0.7288, + "step": 7494 + }, + { + "epoch": 0.22971067794532304, + "grad_norm": 2.841297330763125, + "learning_rate": 8.990077339374778e-06, + "loss": 0.7059, + "step": 7495 + }, + { + "epoch": 0.22974132646806425, + "grad_norm": 1.6546936695217285, + "learning_rate": 8.98977822024123e-06, + "loss": 0.7801, + "step": 7496 + }, + { + "epoch": 0.22977197499080546, + "grad_norm": 1.7606417330754707, + "learning_rate": 8.989479061795377e-06, + "loss": 0.7375, + "step": 7497 + }, + { + "epoch": 0.22980262351354663, + "grad_norm": 1.7194699053042206, + "learning_rate": 8.989179864040166e-06, + "loss": 0.7622, + "step": 7498 + }, + { + "epoch": 0.22983327203628784, + "grad_norm": 1.6245526873394294, + "learning_rate": 8.988880626978543e-06, + "loss": 0.6872, + "step": 7499 + }, + { + "epoch": 0.22986392055902904, + "grad_norm": 1.6924912966326289, + "learning_rate": 8.98858135061346e-06, + "loss": 0.692, + "step": 7500 + }, + { + "epoch": 0.22989456908177025, + "grad_norm": 1.9957486329048024, + "learning_rate": 8.988282034947864e-06, + "loss": 0.7681, + "step": 7501 + }, + { + "epoch": 0.22992521760451146, + "grad_norm": 0.9252861823909375, + "learning_rate": 8.987982679984704e-06, + "loss": 0.4994, + "step": 7502 + }, + { + "epoch": 0.22995586612725266, + "grad_norm": 0.8678482992723914, + "learning_rate": 8.987683285726931e-06, + "loss": 0.4711, + "step": 7503 + }, + { + "epoch": 0.22998651464999387, + "grad_norm": 1.7501575892422234, + "learning_rate": 8.987383852177497e-06, + "loss": 0.716, + "step": 7504 + }, + { + "epoch": 0.23001716317273507, + "grad_norm": 1.8016928085381196, + "learning_rate": 8.987084379339345e-06, + "loss": 0.6688, + "step": 7505 + }, + { + "epoch": 0.23004781169547628, + "grad_norm": 1.6985989350950663, + "learning_rate": 8.986784867215433e-06, + "loss": 0.7438, + "step": 7506 + }, + { + "epoch": 0.23007846021821748, + "grad_norm": 2.127681637577671, + "learning_rate": 8.98648531580871e-06, + "loss": 0.7182, + "step": 7507 + }, + { + "epoch": 0.2301091087409587, + "grad_norm": 2.18185517017369, + "learning_rate": 8.986185725122125e-06, + "loss": 0.7612, + "step": 7508 + }, + { + "epoch": 0.2301397572636999, + "grad_norm": 1.5793089803452287, + "learning_rate": 8.985886095158634e-06, + "loss": 0.7266, + "step": 7509 + }, + { + "epoch": 0.2301704057864411, + "grad_norm": 1.8095469874716552, + "learning_rate": 8.985586425921187e-06, + "loss": 0.7354, + "step": 7510 + }, + { + "epoch": 0.2302010543091823, + "grad_norm": 1.6706981755411754, + "learning_rate": 8.985286717412737e-06, + "loss": 0.6761, + "step": 7511 + }, + { + "epoch": 0.2302317028319235, + "grad_norm": 1.8890402789646192, + "learning_rate": 8.984986969636238e-06, + "loss": 0.6421, + "step": 7512 + }, + { + "epoch": 0.23026235135466472, + "grad_norm": 2.0693819838685994, + "learning_rate": 8.984687182594642e-06, + "loss": 0.7658, + "step": 7513 + }, + { + "epoch": 0.2302929998774059, + "grad_norm": 1.625527153752677, + "learning_rate": 8.984387356290905e-06, + "loss": 0.7613, + "step": 7514 + }, + { + "epoch": 0.2303236484001471, + "grad_norm": 2.6043289378331544, + "learning_rate": 8.984087490727978e-06, + "loss": 0.6605, + "step": 7515 + }, + { + "epoch": 0.2303542969228883, + "grad_norm": 6.298558726223969, + "learning_rate": 8.983787585908819e-06, + "loss": 0.6626, + "step": 7516 + }, + { + "epoch": 0.2303849454456295, + "grad_norm": 1.8515298411973133, + "learning_rate": 8.98348764183638e-06, + "loss": 0.8638, + "step": 7517 + }, + { + "epoch": 0.23041559396837072, + "grad_norm": 1.774102655813211, + "learning_rate": 8.983187658513618e-06, + "loss": 0.7633, + "step": 7518 + }, + { + "epoch": 0.23044624249111192, + "grad_norm": 1.669396571255921, + "learning_rate": 8.982887635943492e-06, + "loss": 0.7042, + "step": 7519 + }, + { + "epoch": 0.23047689101385313, + "grad_norm": 2.069236874875307, + "learning_rate": 8.982587574128953e-06, + "loss": 0.6617, + "step": 7520 + }, + { + "epoch": 0.23050753953659434, + "grad_norm": 2.410635943727434, + "learning_rate": 8.98228747307296e-06, + "loss": 0.6365, + "step": 7521 + }, + { + "epoch": 0.23053818805933554, + "grad_norm": 1.3873627979353875, + "learning_rate": 8.981987332778468e-06, + "loss": 0.4951, + "step": 7522 + }, + { + "epoch": 0.23056883658207675, + "grad_norm": 1.9003040406383271, + "learning_rate": 8.981687153248438e-06, + "loss": 0.7235, + "step": 7523 + }, + { + "epoch": 0.23059948510481795, + "grad_norm": 1.6373793818074986, + "learning_rate": 8.981386934485825e-06, + "loss": 0.7211, + "step": 7524 + }, + { + "epoch": 0.23063013362755916, + "grad_norm": 1.923457128328767, + "learning_rate": 8.98108667649359e-06, + "loss": 0.782, + "step": 7525 + }, + { + "epoch": 0.23066078215030036, + "grad_norm": 1.7933320142471163, + "learning_rate": 8.980786379274685e-06, + "loss": 0.7545, + "step": 7526 + }, + { + "epoch": 0.23069143067304157, + "grad_norm": 1.6792982021292944, + "learning_rate": 8.980486042832076e-06, + "loss": 0.6556, + "step": 7527 + }, + { + "epoch": 0.23072207919578278, + "grad_norm": 1.7858221799060996, + "learning_rate": 8.98018566716872e-06, + "loss": 0.6768, + "step": 7528 + }, + { + "epoch": 0.23075272771852395, + "grad_norm": 1.6938421614527146, + "learning_rate": 8.979885252287575e-06, + "loss": 0.6814, + "step": 7529 + }, + { + "epoch": 0.23078337624126516, + "grad_norm": 1.829965827366096, + "learning_rate": 8.9795847981916e-06, + "loss": 0.7425, + "step": 7530 + }, + { + "epoch": 0.23081402476400636, + "grad_norm": 2.0432045282159956, + "learning_rate": 8.979284304883762e-06, + "loss": 0.7242, + "step": 7531 + }, + { + "epoch": 0.23084467328674757, + "grad_norm": 1.1797534635867108, + "learning_rate": 8.978983772367015e-06, + "loss": 0.4544, + "step": 7532 + }, + { + "epoch": 0.23087532180948878, + "grad_norm": 1.8884169626491174, + "learning_rate": 8.978683200644325e-06, + "loss": 0.6565, + "step": 7533 + }, + { + "epoch": 0.23090597033222998, + "grad_norm": 2.110229918195631, + "learning_rate": 8.97838258971865e-06, + "loss": 0.7244, + "step": 7534 + }, + { + "epoch": 0.2309366188549712, + "grad_norm": 1.939517964162014, + "learning_rate": 8.978081939592953e-06, + "loss": 0.7806, + "step": 7535 + }, + { + "epoch": 0.2309672673777124, + "grad_norm": 1.7969708224235488, + "learning_rate": 8.9777812502702e-06, + "loss": 0.7306, + "step": 7536 + }, + { + "epoch": 0.2309979159004536, + "grad_norm": 1.5289061207756527, + "learning_rate": 8.977480521753346e-06, + "loss": 0.7225, + "step": 7537 + }, + { + "epoch": 0.2310285644231948, + "grad_norm": 1.6995852082185836, + "learning_rate": 8.977179754045362e-06, + "loss": 0.756, + "step": 7538 + }, + { + "epoch": 0.231059212945936, + "grad_norm": 1.939708011984613, + "learning_rate": 8.976878947149206e-06, + "loss": 0.7069, + "step": 7539 + }, + { + "epoch": 0.23108986146867722, + "grad_norm": 1.6838916396280597, + "learning_rate": 8.976578101067845e-06, + "loss": 0.6767, + "step": 7540 + }, + { + "epoch": 0.23112050999141842, + "grad_norm": 1.9164396589816814, + "learning_rate": 8.976277215804243e-06, + "loss": 0.6278, + "step": 7541 + }, + { + "epoch": 0.23115115851415963, + "grad_norm": 1.6401061252744498, + "learning_rate": 8.975976291361364e-06, + "loss": 0.7089, + "step": 7542 + }, + { + "epoch": 0.23118180703690083, + "grad_norm": 1.8646374504445185, + "learning_rate": 8.975675327742173e-06, + "loss": 0.6912, + "step": 7543 + }, + { + "epoch": 0.23121245555964204, + "grad_norm": 1.7705732353886598, + "learning_rate": 8.975374324949638e-06, + "loss": 0.6047, + "step": 7544 + }, + { + "epoch": 0.23124310408238322, + "grad_norm": 1.7862955786341959, + "learning_rate": 8.975073282986719e-06, + "loss": 0.7255, + "step": 7545 + }, + { + "epoch": 0.23127375260512442, + "grad_norm": 1.9322278533110826, + "learning_rate": 8.974772201856387e-06, + "loss": 0.7503, + "step": 7546 + }, + { + "epoch": 0.23130440112786563, + "grad_norm": 2.1054682081265206, + "learning_rate": 8.974471081561608e-06, + "loss": 0.7686, + "step": 7547 + }, + { + "epoch": 0.23133504965060683, + "grad_norm": 1.7701146853511955, + "learning_rate": 8.97416992210535e-06, + "loss": 0.7805, + "step": 7548 + }, + { + "epoch": 0.23136569817334804, + "grad_norm": 1.0096927310578072, + "learning_rate": 8.973868723490578e-06, + "loss": 0.4792, + "step": 7549 + }, + { + "epoch": 0.23139634669608924, + "grad_norm": 1.876229081204876, + "learning_rate": 8.97356748572026e-06, + "loss": 0.7572, + "step": 7550 + }, + { + "epoch": 0.23142699521883045, + "grad_norm": 0.8353699041934703, + "learning_rate": 8.973266208797365e-06, + "loss": 0.4711, + "step": 7551 + }, + { + "epoch": 0.23145764374157166, + "grad_norm": 1.7067112297601839, + "learning_rate": 8.972964892724862e-06, + "loss": 0.7048, + "step": 7552 + }, + { + "epoch": 0.23148829226431286, + "grad_norm": 1.896070265325058, + "learning_rate": 8.97266353750572e-06, + "loss": 0.7838, + "step": 7553 + }, + { + "epoch": 0.23151894078705407, + "grad_norm": 1.7488896196308947, + "learning_rate": 8.972362143142905e-06, + "loss": 0.72, + "step": 7554 + }, + { + "epoch": 0.23154958930979527, + "grad_norm": 0.9340185410490639, + "learning_rate": 8.972060709639393e-06, + "loss": 0.4886, + "step": 7555 + }, + { + "epoch": 0.23158023783253648, + "grad_norm": 2.1615749384447205, + "learning_rate": 8.971759236998147e-06, + "loss": 0.7464, + "step": 7556 + }, + { + "epoch": 0.23161088635527768, + "grad_norm": 1.6164091281709878, + "learning_rate": 8.971457725222143e-06, + "loss": 0.6487, + "step": 7557 + }, + { + "epoch": 0.2316415348780189, + "grad_norm": 1.7045289462627713, + "learning_rate": 8.971156174314349e-06, + "loss": 0.6863, + "step": 7558 + }, + { + "epoch": 0.2316721834007601, + "grad_norm": 0.7978432337021354, + "learning_rate": 8.970854584277738e-06, + "loss": 0.4854, + "step": 7559 + }, + { + "epoch": 0.23170283192350127, + "grad_norm": 1.9476787890902527, + "learning_rate": 8.970552955115282e-06, + "loss": 0.6827, + "step": 7560 + }, + { + "epoch": 0.23173348044624248, + "grad_norm": 2.908039161772329, + "learning_rate": 8.970251286829949e-06, + "loss": 0.7291, + "step": 7561 + }, + { + "epoch": 0.23176412896898368, + "grad_norm": 1.8141063983572638, + "learning_rate": 8.969949579424715e-06, + "loss": 0.6734, + "step": 7562 + }, + { + "epoch": 0.2317947774917249, + "grad_norm": 1.7020352415385274, + "learning_rate": 8.969647832902552e-06, + "loss": 0.6375, + "step": 7563 + }, + { + "epoch": 0.2318254260144661, + "grad_norm": 1.575911656924497, + "learning_rate": 8.969346047266436e-06, + "loss": 0.7297, + "step": 7564 + }, + { + "epoch": 0.2318560745372073, + "grad_norm": 1.7736144420679192, + "learning_rate": 8.969044222519333e-06, + "loss": 0.7047, + "step": 7565 + }, + { + "epoch": 0.2318867230599485, + "grad_norm": 1.7738402192818903, + "learning_rate": 8.968742358664227e-06, + "loss": 0.6938, + "step": 7566 + }, + { + "epoch": 0.2319173715826897, + "grad_norm": 1.6458049912492878, + "learning_rate": 8.968440455704085e-06, + "loss": 0.7039, + "step": 7567 + }, + { + "epoch": 0.23194802010543092, + "grad_norm": 1.6370343295205776, + "learning_rate": 8.968138513641882e-06, + "loss": 0.6626, + "step": 7568 + }, + { + "epoch": 0.23197866862817212, + "grad_norm": 0.9918232195452558, + "learning_rate": 8.967836532480595e-06, + "loss": 0.5005, + "step": 7569 + }, + { + "epoch": 0.23200931715091333, + "grad_norm": 1.662871176285762, + "learning_rate": 8.967534512223202e-06, + "loss": 0.7317, + "step": 7570 + }, + { + "epoch": 0.23203996567365454, + "grad_norm": 1.6044991197277594, + "learning_rate": 8.967232452872676e-06, + "loss": 0.6881, + "step": 7571 + }, + { + "epoch": 0.23207061419639574, + "grad_norm": 1.836797828022533, + "learning_rate": 8.966930354431991e-06, + "loss": 0.7897, + "step": 7572 + }, + { + "epoch": 0.23210126271913695, + "grad_norm": 1.8830910678513548, + "learning_rate": 8.966628216904128e-06, + "loss": 0.7331, + "step": 7573 + }, + { + "epoch": 0.23213191124187815, + "grad_norm": 1.9089680201699766, + "learning_rate": 8.966326040292062e-06, + "loss": 0.6994, + "step": 7574 + }, + { + "epoch": 0.23216255976461936, + "grad_norm": 0.8583866179976062, + "learning_rate": 8.966023824598771e-06, + "loss": 0.4979, + "step": 7575 + }, + { + "epoch": 0.23219320828736054, + "grad_norm": 0.867356929758002, + "learning_rate": 8.965721569827233e-06, + "loss": 0.4903, + "step": 7576 + }, + { + "epoch": 0.23222385681010174, + "grad_norm": 1.612645069173317, + "learning_rate": 8.965419275980425e-06, + "loss": 0.6986, + "step": 7577 + }, + { + "epoch": 0.23225450533284295, + "grad_norm": 1.628677197750012, + "learning_rate": 8.965116943061325e-06, + "loss": 0.698, + "step": 7578 + }, + { + "epoch": 0.23228515385558415, + "grad_norm": 1.7838882034967591, + "learning_rate": 8.964814571072916e-06, + "loss": 0.7434, + "step": 7579 + }, + { + "epoch": 0.23231580237832536, + "grad_norm": 1.702504542508258, + "learning_rate": 8.964512160018173e-06, + "loss": 0.6747, + "step": 7580 + }, + { + "epoch": 0.23234645090106656, + "grad_norm": 1.6568920766573112, + "learning_rate": 8.964209709900078e-06, + "loss": 0.7267, + "step": 7581 + }, + { + "epoch": 0.23237709942380777, + "grad_norm": 1.8092752405549115, + "learning_rate": 8.963907220721609e-06, + "loss": 0.7332, + "step": 7582 + }, + { + "epoch": 0.23240774794654898, + "grad_norm": 1.5672172665030553, + "learning_rate": 8.963604692485748e-06, + "loss": 0.6846, + "step": 7583 + }, + { + "epoch": 0.23243839646929018, + "grad_norm": 1.7086188888654417, + "learning_rate": 8.963302125195476e-06, + "loss": 0.6528, + "step": 7584 + }, + { + "epoch": 0.2324690449920314, + "grad_norm": 1.6079717527876258, + "learning_rate": 8.962999518853775e-06, + "loss": 0.7355, + "step": 7585 + }, + { + "epoch": 0.2324996935147726, + "grad_norm": 1.7276753270745715, + "learning_rate": 8.962696873463625e-06, + "loss": 0.7641, + "step": 7586 + }, + { + "epoch": 0.2325303420375138, + "grad_norm": 1.6498272681210686, + "learning_rate": 8.96239418902801e-06, + "loss": 0.7649, + "step": 7587 + }, + { + "epoch": 0.232560990560255, + "grad_norm": 1.8364928970930117, + "learning_rate": 8.962091465549912e-06, + "loss": 0.6775, + "step": 7588 + }, + { + "epoch": 0.2325916390829962, + "grad_norm": 1.1572847845209135, + "learning_rate": 8.96178870303231e-06, + "loss": 0.5134, + "step": 7589 + }, + { + "epoch": 0.23262228760573742, + "grad_norm": 2.0189906185947093, + "learning_rate": 8.961485901478193e-06, + "loss": 0.7617, + "step": 7590 + }, + { + "epoch": 0.2326529361284786, + "grad_norm": 1.6402398372137055, + "learning_rate": 8.96118306089054e-06, + "loss": 0.7167, + "step": 7591 + }, + { + "epoch": 0.2326835846512198, + "grad_norm": 1.8736637123742865, + "learning_rate": 8.960880181272338e-06, + "loss": 0.7802, + "step": 7592 + }, + { + "epoch": 0.232714233173961, + "grad_norm": 1.6049395407181197, + "learning_rate": 8.960577262626569e-06, + "loss": 0.6851, + "step": 7593 + }, + { + "epoch": 0.2327448816967022, + "grad_norm": 1.6519044109740757, + "learning_rate": 8.96027430495622e-06, + "loss": 0.7102, + "step": 7594 + }, + { + "epoch": 0.23277553021944342, + "grad_norm": 1.6639307203723237, + "learning_rate": 8.959971308264275e-06, + "loss": 0.6726, + "step": 7595 + }, + { + "epoch": 0.23280617874218462, + "grad_norm": 1.729208943423311, + "learning_rate": 8.959668272553717e-06, + "loss": 0.6493, + "step": 7596 + }, + { + "epoch": 0.23283682726492583, + "grad_norm": 1.6406832082277325, + "learning_rate": 8.959365197827537e-06, + "loss": 0.6626, + "step": 7597 + }, + { + "epoch": 0.23286747578766703, + "grad_norm": 0.9575963044106042, + "learning_rate": 8.95906208408872e-06, + "loss": 0.4698, + "step": 7598 + }, + { + "epoch": 0.23289812431040824, + "grad_norm": 1.7825757963172488, + "learning_rate": 8.958758931340247e-06, + "loss": 0.7761, + "step": 7599 + }, + { + "epoch": 0.23292877283314944, + "grad_norm": 1.7391421963944567, + "learning_rate": 8.958455739585113e-06, + "loss": 0.638, + "step": 7600 + }, + { + "epoch": 0.23295942135589065, + "grad_norm": 1.781329755595209, + "learning_rate": 8.958152508826299e-06, + "loss": 0.7412, + "step": 7601 + }, + { + "epoch": 0.23299006987863186, + "grad_norm": 1.9969283786633611, + "learning_rate": 8.957849239066797e-06, + "loss": 0.6599, + "step": 7602 + }, + { + "epoch": 0.23302071840137306, + "grad_norm": 1.6778111382729026, + "learning_rate": 8.957545930309595e-06, + "loss": 0.7544, + "step": 7603 + }, + { + "epoch": 0.23305136692411427, + "grad_norm": 1.6836716291115903, + "learning_rate": 8.95724258255768e-06, + "loss": 0.6952, + "step": 7604 + }, + { + "epoch": 0.23308201544685547, + "grad_norm": 1.78819494266077, + "learning_rate": 8.95693919581404e-06, + "loss": 0.7136, + "step": 7605 + }, + { + "epoch": 0.23311266396959668, + "grad_norm": 1.6522825278435986, + "learning_rate": 8.956635770081665e-06, + "loss": 0.7087, + "step": 7606 + }, + { + "epoch": 0.23314331249233786, + "grad_norm": 1.7395466937216928, + "learning_rate": 8.956332305363546e-06, + "loss": 0.7918, + "step": 7607 + }, + { + "epoch": 0.23317396101507906, + "grad_norm": 1.8860723232184553, + "learning_rate": 8.956028801662675e-06, + "loss": 0.8237, + "step": 7608 + }, + { + "epoch": 0.23320460953782027, + "grad_norm": 1.0214457244449628, + "learning_rate": 8.955725258982038e-06, + "loss": 0.4872, + "step": 7609 + }, + { + "epoch": 0.23323525806056147, + "grad_norm": 1.579610240989424, + "learning_rate": 8.955421677324628e-06, + "loss": 0.7179, + "step": 7610 + }, + { + "epoch": 0.23326590658330268, + "grad_norm": 1.8802734127542773, + "learning_rate": 8.955118056693436e-06, + "loss": 0.7391, + "step": 7611 + }, + { + "epoch": 0.23329655510604388, + "grad_norm": 1.5691456544340552, + "learning_rate": 8.954814397091454e-06, + "loss": 0.7359, + "step": 7612 + }, + { + "epoch": 0.2333272036287851, + "grad_norm": 1.6521592874675894, + "learning_rate": 8.954510698521674e-06, + "loss": 0.6812, + "step": 7613 + }, + { + "epoch": 0.2333578521515263, + "grad_norm": 2.0053041513757917, + "learning_rate": 8.954206960987088e-06, + "loss": 0.7228, + "step": 7614 + }, + { + "epoch": 0.2333885006742675, + "grad_norm": 0.8242552751665423, + "learning_rate": 8.953903184490688e-06, + "loss": 0.4896, + "step": 7615 + }, + { + "epoch": 0.2334191491970087, + "grad_norm": 1.9316993776889095, + "learning_rate": 8.953599369035471e-06, + "loss": 0.8169, + "step": 7616 + }, + { + "epoch": 0.2334497977197499, + "grad_norm": 1.9330443913863042, + "learning_rate": 8.953295514624428e-06, + "loss": 0.6484, + "step": 7617 + }, + { + "epoch": 0.23348044624249112, + "grad_norm": 0.7911193561115963, + "learning_rate": 8.95299162126055e-06, + "loss": 0.4783, + "step": 7618 + }, + { + "epoch": 0.23351109476523232, + "grad_norm": 1.8367052117669442, + "learning_rate": 8.952687688946836e-06, + "loss": 0.7801, + "step": 7619 + }, + { + "epoch": 0.23354174328797353, + "grad_norm": 1.853510076427441, + "learning_rate": 8.952383717686277e-06, + "loss": 0.6952, + "step": 7620 + }, + { + "epoch": 0.23357239181071474, + "grad_norm": 1.7691408780981297, + "learning_rate": 8.952079707481872e-06, + "loss": 0.6251, + "step": 7621 + }, + { + "epoch": 0.2336030403334559, + "grad_norm": 1.826736157978531, + "learning_rate": 8.951775658336612e-06, + "loss": 0.6483, + "step": 7622 + }, + { + "epoch": 0.23363368885619712, + "grad_norm": 1.7652453373049493, + "learning_rate": 8.951471570253498e-06, + "loss": 0.6971, + "step": 7623 + }, + { + "epoch": 0.23366433737893832, + "grad_norm": 1.6667493750662232, + "learning_rate": 8.951167443235522e-06, + "loss": 0.7103, + "step": 7624 + }, + { + "epoch": 0.23369498590167953, + "grad_norm": 1.8868819058054231, + "learning_rate": 8.950863277285683e-06, + "loss": 0.7262, + "step": 7625 + }, + { + "epoch": 0.23372563442442074, + "grad_norm": 1.611723642105559, + "learning_rate": 8.950559072406977e-06, + "loss": 0.6967, + "step": 7626 + }, + { + "epoch": 0.23375628294716194, + "grad_norm": 1.684290012431321, + "learning_rate": 8.950254828602402e-06, + "loss": 0.6687, + "step": 7627 + }, + { + "epoch": 0.23378693146990315, + "grad_norm": 1.5924256348355144, + "learning_rate": 8.949950545874954e-06, + "loss": 0.6492, + "step": 7628 + }, + { + "epoch": 0.23381757999264435, + "grad_norm": 1.6740443025210414, + "learning_rate": 8.949646224227635e-06, + "loss": 0.6973, + "step": 7629 + }, + { + "epoch": 0.23384822851538556, + "grad_norm": 1.578995138800541, + "learning_rate": 8.94934186366344e-06, + "loss": 0.6999, + "step": 7630 + }, + { + "epoch": 0.23387887703812676, + "grad_norm": 1.6466023709891493, + "learning_rate": 8.94903746418537e-06, + "loss": 0.7719, + "step": 7631 + }, + { + "epoch": 0.23390952556086797, + "grad_norm": 1.9466302874170074, + "learning_rate": 8.94873302579642e-06, + "loss": 0.719, + "step": 7632 + }, + { + "epoch": 0.23394017408360918, + "grad_norm": 1.8084180059487753, + "learning_rate": 8.948428548499597e-06, + "loss": 0.7347, + "step": 7633 + }, + { + "epoch": 0.23397082260635038, + "grad_norm": 1.6691352786277696, + "learning_rate": 8.948124032297897e-06, + "loss": 0.7207, + "step": 7634 + }, + { + "epoch": 0.2340014711290916, + "grad_norm": 1.8770732247099984, + "learning_rate": 8.94781947719432e-06, + "loss": 0.653, + "step": 7635 + }, + { + "epoch": 0.2340321196518328, + "grad_norm": 1.8529762926924658, + "learning_rate": 8.947514883191868e-06, + "loss": 0.6224, + "step": 7636 + }, + { + "epoch": 0.234062768174574, + "grad_norm": 1.7994089457576308, + "learning_rate": 8.94721025029354e-06, + "loss": 0.6456, + "step": 7637 + }, + { + "epoch": 0.23409341669731518, + "grad_norm": 1.8712725308140705, + "learning_rate": 8.94690557850234e-06, + "loss": 0.8002, + "step": 7638 + }, + { + "epoch": 0.23412406522005638, + "grad_norm": 1.6856377576080406, + "learning_rate": 8.946600867821272e-06, + "loss": 0.8136, + "step": 7639 + }, + { + "epoch": 0.2341547137427976, + "grad_norm": 1.0146678289724234, + "learning_rate": 8.946296118253333e-06, + "loss": 0.524, + "step": 7640 + }, + { + "epoch": 0.2341853622655388, + "grad_norm": 1.9968928386960065, + "learning_rate": 8.945991329801528e-06, + "loss": 0.8366, + "step": 7641 + }, + { + "epoch": 0.23421601078828, + "grad_norm": 1.9798797179462617, + "learning_rate": 8.945686502468865e-06, + "loss": 0.7774, + "step": 7642 + }, + { + "epoch": 0.2342466593110212, + "grad_norm": 1.6199651867519216, + "learning_rate": 8.94538163625834e-06, + "loss": 0.7109, + "step": 7643 + }, + { + "epoch": 0.2342773078337624, + "grad_norm": 1.948200069456506, + "learning_rate": 8.945076731172961e-06, + "loss": 0.6386, + "step": 7644 + }, + { + "epoch": 0.23430795635650362, + "grad_norm": 1.747499563514388, + "learning_rate": 8.944771787215731e-06, + "loss": 0.7014, + "step": 7645 + }, + { + "epoch": 0.23433860487924482, + "grad_norm": 1.590395960708311, + "learning_rate": 8.944466804389657e-06, + "loss": 0.7619, + "step": 7646 + }, + { + "epoch": 0.23436925340198603, + "grad_norm": 1.635001843040509, + "learning_rate": 8.94416178269774e-06, + "loss": 0.7489, + "step": 7647 + }, + { + "epoch": 0.23439990192472723, + "grad_norm": 1.9074463812519815, + "learning_rate": 8.94385672214299e-06, + "loss": 0.7481, + "step": 7648 + }, + { + "epoch": 0.23443055044746844, + "grad_norm": 1.8291644815883157, + "learning_rate": 8.94355162272841e-06, + "loss": 0.7337, + "step": 7649 + }, + { + "epoch": 0.23446119897020964, + "grad_norm": 1.7424164376184228, + "learning_rate": 8.943246484457006e-06, + "loss": 0.7021, + "step": 7650 + }, + { + "epoch": 0.23449184749295085, + "grad_norm": 1.5969401686526663, + "learning_rate": 8.942941307331786e-06, + "loss": 0.5715, + "step": 7651 + }, + { + "epoch": 0.23452249601569206, + "grad_norm": 1.592333479139271, + "learning_rate": 8.942636091355756e-06, + "loss": 0.69, + "step": 7652 + }, + { + "epoch": 0.23455314453843326, + "grad_norm": 1.8008072124399817, + "learning_rate": 8.942330836531925e-06, + "loss": 0.7417, + "step": 7653 + }, + { + "epoch": 0.23458379306117444, + "grad_norm": 1.779479684626396, + "learning_rate": 8.9420255428633e-06, + "loss": 0.6338, + "step": 7654 + }, + { + "epoch": 0.23461444158391564, + "grad_norm": 1.8329589778008166, + "learning_rate": 8.941720210352886e-06, + "loss": 0.6465, + "step": 7655 + }, + { + "epoch": 0.23464509010665685, + "grad_norm": 1.8863133849047626, + "learning_rate": 8.941414839003695e-06, + "loss": 0.7276, + "step": 7656 + }, + { + "epoch": 0.23467573862939806, + "grad_norm": 1.6675075214387458, + "learning_rate": 8.941109428818737e-06, + "loss": 0.7628, + "step": 7657 + }, + { + "epoch": 0.23470638715213926, + "grad_norm": 1.782800168723298, + "learning_rate": 8.940803979801019e-06, + "loss": 0.6977, + "step": 7658 + }, + { + "epoch": 0.23473703567488047, + "grad_norm": 1.738538691306624, + "learning_rate": 8.940498491953549e-06, + "loss": 0.7852, + "step": 7659 + }, + { + "epoch": 0.23476768419762167, + "grad_norm": 1.8110590899161632, + "learning_rate": 8.940192965279342e-06, + "loss": 0.6588, + "step": 7660 + }, + { + "epoch": 0.23479833272036288, + "grad_norm": 2.055493794757792, + "learning_rate": 8.939887399781404e-06, + "loss": 0.7044, + "step": 7661 + }, + { + "epoch": 0.23482898124310408, + "grad_norm": 1.6673216614239454, + "learning_rate": 8.939581795462747e-06, + "loss": 0.7189, + "step": 7662 + }, + { + "epoch": 0.2348596297658453, + "grad_norm": 1.9265972140294088, + "learning_rate": 8.939276152326384e-06, + "loss": 0.6983, + "step": 7663 + }, + { + "epoch": 0.2348902782885865, + "grad_norm": 1.0506054302455627, + "learning_rate": 8.938970470375324e-06, + "loss": 0.5189, + "step": 7664 + }, + { + "epoch": 0.2349209268113277, + "grad_norm": 1.8440251483779808, + "learning_rate": 8.93866474961258e-06, + "loss": 0.7599, + "step": 7665 + }, + { + "epoch": 0.2349515753340689, + "grad_norm": 1.831378521414513, + "learning_rate": 8.938358990041164e-06, + "loss": 0.704, + "step": 7666 + }, + { + "epoch": 0.2349822238568101, + "grad_norm": 1.640873006908131, + "learning_rate": 8.938053191664091e-06, + "loss": 0.5419, + "step": 7667 + }, + { + "epoch": 0.23501287237955132, + "grad_norm": 0.7371241951570491, + "learning_rate": 8.937747354484372e-06, + "loss": 0.4889, + "step": 7668 + }, + { + "epoch": 0.2350435209022925, + "grad_norm": 1.9684696278966671, + "learning_rate": 8.93744147850502e-06, + "loss": 0.7216, + "step": 7669 + }, + { + "epoch": 0.2350741694250337, + "grad_norm": 0.8427777607165567, + "learning_rate": 8.93713556372905e-06, + "loss": 0.4974, + "step": 7670 + }, + { + "epoch": 0.2351048179477749, + "grad_norm": 0.8116486844698865, + "learning_rate": 8.936829610159477e-06, + "loss": 0.5104, + "step": 7671 + }, + { + "epoch": 0.2351354664705161, + "grad_norm": 0.8206953230417033, + "learning_rate": 8.936523617799312e-06, + "loss": 0.4874, + "step": 7672 + }, + { + "epoch": 0.23516611499325732, + "grad_norm": 1.696604298854288, + "learning_rate": 8.936217586651574e-06, + "loss": 0.7463, + "step": 7673 + }, + { + "epoch": 0.23519676351599852, + "grad_norm": 1.5066380400410475, + "learning_rate": 8.935911516719278e-06, + "loss": 0.6646, + "step": 7674 + }, + { + "epoch": 0.23522741203873973, + "grad_norm": 1.7407584241579148, + "learning_rate": 8.935605408005437e-06, + "loss": 0.71, + "step": 7675 + }, + { + "epoch": 0.23525806056148094, + "grad_norm": 1.6184661504130098, + "learning_rate": 8.93529926051307e-06, + "loss": 0.7367, + "step": 7676 + }, + { + "epoch": 0.23528870908422214, + "grad_norm": 1.5548904371032741, + "learning_rate": 8.934993074245193e-06, + "loss": 0.6325, + "step": 7677 + }, + { + "epoch": 0.23531935760696335, + "grad_norm": 1.9500527842827877, + "learning_rate": 8.93468684920482e-06, + "loss": 0.6927, + "step": 7678 + }, + { + "epoch": 0.23535000612970455, + "grad_norm": 1.8328778678153692, + "learning_rate": 8.934380585394972e-06, + "loss": 0.764, + "step": 7679 + }, + { + "epoch": 0.23538065465244576, + "grad_norm": 1.9062895052822235, + "learning_rate": 8.934074282818667e-06, + "loss": 0.7926, + "step": 7680 + }, + { + "epoch": 0.23541130317518696, + "grad_norm": 1.670828988176945, + "learning_rate": 8.93376794147892e-06, + "loss": 0.6507, + "step": 7681 + }, + { + "epoch": 0.23544195169792817, + "grad_norm": 1.732739622716536, + "learning_rate": 8.933461561378752e-06, + "loss": 0.6146, + "step": 7682 + }, + { + "epoch": 0.23547260022066938, + "grad_norm": 2.6639783067607334, + "learning_rate": 8.933155142521179e-06, + "loss": 0.8019, + "step": 7683 + }, + { + "epoch": 0.23550324874341058, + "grad_norm": 1.7403360157248073, + "learning_rate": 8.932848684909223e-06, + "loss": 0.593, + "step": 7684 + }, + { + "epoch": 0.23553389726615176, + "grad_norm": 1.7583003778467219, + "learning_rate": 8.932542188545903e-06, + "loss": 0.6918, + "step": 7685 + }, + { + "epoch": 0.23556454578889297, + "grad_norm": 1.5843903324709525, + "learning_rate": 8.93223565343424e-06, + "loss": 0.6555, + "step": 7686 + }, + { + "epoch": 0.23559519431163417, + "grad_norm": 1.4120719653405565, + "learning_rate": 8.93192907957725e-06, + "loss": 0.5926, + "step": 7687 + }, + { + "epoch": 0.23562584283437538, + "grad_norm": 1.742945045627312, + "learning_rate": 8.931622466977959e-06, + "loss": 0.7611, + "step": 7688 + }, + { + "epoch": 0.23565649135711658, + "grad_norm": 1.5584956419584408, + "learning_rate": 8.931315815639385e-06, + "loss": 0.7149, + "step": 7689 + }, + { + "epoch": 0.2356871398798578, + "grad_norm": 1.6996532169347929, + "learning_rate": 8.93100912556455e-06, + "loss": 0.7452, + "step": 7690 + }, + { + "epoch": 0.235717788402599, + "grad_norm": 0.9642606794397475, + "learning_rate": 8.930702396756476e-06, + "loss": 0.4779, + "step": 7691 + }, + { + "epoch": 0.2357484369253402, + "grad_norm": 1.7164147390189795, + "learning_rate": 8.930395629218187e-06, + "loss": 0.8456, + "step": 7692 + }, + { + "epoch": 0.2357790854480814, + "grad_norm": 0.8702406767091625, + "learning_rate": 8.930088822952703e-06, + "loss": 0.5081, + "step": 7693 + }, + { + "epoch": 0.2358097339708226, + "grad_norm": 2.1646014876968316, + "learning_rate": 8.92978197796305e-06, + "loss": 0.761, + "step": 7694 + }, + { + "epoch": 0.23584038249356382, + "grad_norm": 1.8102837803778582, + "learning_rate": 8.92947509425225e-06, + "loss": 0.6535, + "step": 7695 + }, + { + "epoch": 0.23587103101630502, + "grad_norm": 0.8538073300852668, + "learning_rate": 8.929168171823323e-06, + "loss": 0.4668, + "step": 7696 + }, + { + "epoch": 0.23590167953904623, + "grad_norm": 1.9374820873371938, + "learning_rate": 8.928861210679298e-06, + "loss": 0.6964, + "step": 7697 + }, + { + "epoch": 0.23593232806178743, + "grad_norm": 1.5567819546350503, + "learning_rate": 8.928554210823201e-06, + "loss": 0.7598, + "step": 7698 + }, + { + "epoch": 0.23596297658452864, + "grad_norm": 1.8562901502638587, + "learning_rate": 8.92824717225805e-06, + "loss": 0.8606, + "step": 7699 + }, + { + "epoch": 0.23599362510726982, + "grad_norm": 1.7576003936927231, + "learning_rate": 8.927940094986879e-06, + "loss": 0.7068, + "step": 7700 + }, + { + "epoch": 0.23602427363001102, + "grad_norm": 2.0832948440911543, + "learning_rate": 8.927632979012707e-06, + "loss": 0.737, + "step": 7701 + }, + { + "epoch": 0.23605492215275223, + "grad_norm": 0.9096206353652991, + "learning_rate": 8.927325824338561e-06, + "loss": 0.4892, + "step": 7702 + }, + { + "epoch": 0.23608557067549343, + "grad_norm": 0.8707309748090155, + "learning_rate": 8.92701863096747e-06, + "loss": 0.4968, + "step": 7703 + }, + { + "epoch": 0.23611621919823464, + "grad_norm": 1.6815161753100512, + "learning_rate": 8.92671139890246e-06, + "loss": 0.6785, + "step": 7704 + }, + { + "epoch": 0.23614686772097584, + "grad_norm": 1.802833484487542, + "learning_rate": 8.926404128146558e-06, + "loss": 0.6896, + "step": 7705 + }, + { + "epoch": 0.23617751624371705, + "grad_norm": 1.9411281926845294, + "learning_rate": 8.92609681870279e-06, + "loss": 0.7166, + "step": 7706 + }, + { + "epoch": 0.23620816476645826, + "grad_norm": 1.7945542410233093, + "learning_rate": 8.925789470574187e-06, + "loss": 0.7189, + "step": 7707 + }, + { + "epoch": 0.23623881328919946, + "grad_norm": 1.858206251178851, + "learning_rate": 8.925482083763776e-06, + "loss": 0.7617, + "step": 7708 + }, + { + "epoch": 0.23626946181194067, + "grad_norm": 1.751603668384798, + "learning_rate": 8.925174658274585e-06, + "loss": 0.6429, + "step": 7709 + }, + { + "epoch": 0.23630011033468187, + "grad_norm": 1.9114732412774307, + "learning_rate": 8.924867194109643e-06, + "loss": 0.6649, + "step": 7710 + }, + { + "epoch": 0.23633075885742308, + "grad_norm": 1.8999283674723222, + "learning_rate": 8.924559691271983e-06, + "loss": 0.753, + "step": 7711 + }, + { + "epoch": 0.23636140738016428, + "grad_norm": 1.8963224921579456, + "learning_rate": 8.92425214976463e-06, + "loss": 0.7483, + "step": 7712 + }, + { + "epoch": 0.2363920559029055, + "grad_norm": 1.0604017303329731, + "learning_rate": 8.923944569590617e-06, + "loss": 0.4651, + "step": 7713 + }, + { + "epoch": 0.2364227044256467, + "grad_norm": 1.8553114568844618, + "learning_rate": 8.923636950752974e-06, + "loss": 0.6099, + "step": 7714 + }, + { + "epoch": 0.2364533529483879, + "grad_norm": 1.8783757922649418, + "learning_rate": 8.923329293254732e-06, + "loss": 0.776, + "step": 7715 + }, + { + "epoch": 0.23648400147112908, + "grad_norm": 1.6683564069484162, + "learning_rate": 8.923021597098924e-06, + "loss": 0.7114, + "step": 7716 + }, + { + "epoch": 0.23651464999387029, + "grad_norm": 1.9318708701491314, + "learning_rate": 8.922713862288579e-06, + "loss": 0.7827, + "step": 7717 + }, + { + "epoch": 0.2365452985166115, + "grad_norm": 1.7768425446118754, + "learning_rate": 8.922406088826732e-06, + "loss": 0.6442, + "step": 7718 + }, + { + "epoch": 0.2365759470393527, + "grad_norm": 0.812698622686433, + "learning_rate": 8.922098276716413e-06, + "loss": 0.4758, + "step": 7719 + }, + { + "epoch": 0.2366065955620939, + "grad_norm": 1.7386721828399025, + "learning_rate": 8.921790425960658e-06, + "loss": 0.6626, + "step": 7720 + }, + { + "epoch": 0.2366372440848351, + "grad_norm": 1.6327342077592342, + "learning_rate": 8.921482536562495e-06, + "loss": 0.6793, + "step": 7721 + }, + { + "epoch": 0.2366678926075763, + "grad_norm": 0.8289692835134876, + "learning_rate": 8.921174608524964e-06, + "loss": 0.4881, + "step": 7722 + }, + { + "epoch": 0.23669854113031752, + "grad_norm": 0.8442198183112527, + "learning_rate": 8.920866641851094e-06, + "loss": 0.49, + "step": 7723 + }, + { + "epoch": 0.23672918965305872, + "grad_norm": 1.6471567781074699, + "learning_rate": 8.920558636543924e-06, + "loss": 0.7015, + "step": 7724 + }, + { + "epoch": 0.23675983817579993, + "grad_norm": 1.7588154607227846, + "learning_rate": 8.920250592606486e-06, + "loss": 0.7191, + "step": 7725 + }, + { + "epoch": 0.23679048669854114, + "grad_norm": 1.8463325520208898, + "learning_rate": 8.919942510041817e-06, + "loss": 0.6594, + "step": 7726 + }, + { + "epoch": 0.23682113522128234, + "grad_norm": 1.9203458674140317, + "learning_rate": 8.91963438885295e-06, + "loss": 0.8239, + "step": 7727 + }, + { + "epoch": 0.23685178374402355, + "grad_norm": 1.8639526368684949, + "learning_rate": 8.919326229042922e-06, + "loss": 0.7076, + "step": 7728 + }, + { + "epoch": 0.23688243226676475, + "grad_norm": 1.683014989982066, + "learning_rate": 8.91901803061477e-06, + "loss": 0.7377, + "step": 7729 + }, + { + "epoch": 0.23691308078950596, + "grad_norm": 1.0121222792290445, + "learning_rate": 8.918709793571532e-06, + "loss": 0.4854, + "step": 7730 + }, + { + "epoch": 0.23694372931224714, + "grad_norm": 1.5720955434036572, + "learning_rate": 8.918401517916243e-06, + "loss": 0.6552, + "step": 7731 + }, + { + "epoch": 0.23697437783498834, + "grad_norm": 1.8923546204338402, + "learning_rate": 8.918093203651941e-06, + "loss": 0.7846, + "step": 7732 + }, + { + "epoch": 0.23700502635772955, + "grad_norm": 1.8070491371219708, + "learning_rate": 8.917784850781665e-06, + "loss": 0.7135, + "step": 7733 + }, + { + "epoch": 0.23703567488047075, + "grad_norm": 1.9305613993925337, + "learning_rate": 8.917476459308452e-06, + "loss": 0.738, + "step": 7734 + }, + { + "epoch": 0.23706632340321196, + "grad_norm": 2.1199751202606927, + "learning_rate": 8.917168029235341e-06, + "loss": 0.7293, + "step": 7735 + }, + { + "epoch": 0.23709697192595316, + "grad_norm": 2.009717889052549, + "learning_rate": 8.916859560565372e-06, + "loss": 0.7346, + "step": 7736 + }, + { + "epoch": 0.23712762044869437, + "grad_norm": 1.6638217424881092, + "learning_rate": 8.916551053301582e-06, + "loss": 0.7008, + "step": 7737 + }, + { + "epoch": 0.23715826897143558, + "grad_norm": 1.8683930786677798, + "learning_rate": 8.916242507447013e-06, + "loss": 0.6732, + "step": 7738 + }, + { + "epoch": 0.23718891749417678, + "grad_norm": 1.636206494526885, + "learning_rate": 8.915933923004705e-06, + "loss": 0.7599, + "step": 7739 + }, + { + "epoch": 0.237219566016918, + "grad_norm": 1.7704878483930417, + "learning_rate": 8.915625299977699e-06, + "loss": 0.7773, + "step": 7740 + }, + { + "epoch": 0.2372502145396592, + "grad_norm": 1.7243760600470035, + "learning_rate": 8.915316638369033e-06, + "loss": 0.6613, + "step": 7741 + }, + { + "epoch": 0.2372808630624004, + "grad_norm": 1.0224894869075867, + "learning_rate": 8.915007938181752e-06, + "loss": 0.4785, + "step": 7742 + }, + { + "epoch": 0.2373115115851416, + "grad_norm": 1.5473652045769617, + "learning_rate": 8.914699199418895e-06, + "loss": 0.8099, + "step": 7743 + }, + { + "epoch": 0.2373421601078828, + "grad_norm": 0.8100567039332447, + "learning_rate": 8.914390422083506e-06, + "loss": 0.5065, + "step": 7744 + }, + { + "epoch": 0.23737280863062402, + "grad_norm": 0.7928590739030718, + "learning_rate": 8.914081606178627e-06, + "loss": 0.5099, + "step": 7745 + }, + { + "epoch": 0.23740345715336522, + "grad_norm": 1.7817029351787674, + "learning_rate": 8.9137727517073e-06, + "loss": 0.67, + "step": 7746 + }, + { + "epoch": 0.2374341056761064, + "grad_norm": 1.7602618477865122, + "learning_rate": 8.913463858672566e-06, + "loss": 0.8256, + "step": 7747 + }, + { + "epoch": 0.2374647541988476, + "grad_norm": 1.8261798033353691, + "learning_rate": 8.913154927077475e-06, + "loss": 0.6459, + "step": 7748 + }, + { + "epoch": 0.2374954027215888, + "grad_norm": 1.7738577980219237, + "learning_rate": 8.912845956925064e-06, + "loss": 0.6483, + "step": 7749 + }, + { + "epoch": 0.23752605124433002, + "grad_norm": 1.7154861831026667, + "learning_rate": 8.912536948218385e-06, + "loss": 0.732, + "step": 7750 + }, + { + "epoch": 0.23755669976707122, + "grad_norm": 1.2193151747549482, + "learning_rate": 8.912227900960475e-06, + "loss": 0.4766, + "step": 7751 + }, + { + "epoch": 0.23758734828981243, + "grad_norm": 0.9817058219741279, + "learning_rate": 8.911918815154384e-06, + "loss": 0.4651, + "step": 7752 + }, + { + "epoch": 0.23761799681255363, + "grad_norm": 1.7756326857794975, + "learning_rate": 8.911609690803154e-06, + "loss": 0.614, + "step": 7753 + }, + { + "epoch": 0.23764864533529484, + "grad_norm": 1.9623380806559432, + "learning_rate": 8.911300527909836e-06, + "loss": 0.8314, + "step": 7754 + }, + { + "epoch": 0.23767929385803604, + "grad_norm": 1.7489377747401385, + "learning_rate": 8.91099132647747e-06, + "loss": 0.8006, + "step": 7755 + }, + { + "epoch": 0.23770994238077725, + "grad_norm": 1.0541862207621748, + "learning_rate": 8.910682086509108e-06, + "loss": 0.4841, + "step": 7756 + }, + { + "epoch": 0.23774059090351846, + "grad_norm": 1.814463794138197, + "learning_rate": 8.910372808007795e-06, + "loss": 0.7458, + "step": 7757 + }, + { + "epoch": 0.23777123942625966, + "grad_norm": 1.7803199338398377, + "learning_rate": 8.910063490976576e-06, + "loss": 0.695, + "step": 7758 + }, + { + "epoch": 0.23780188794900087, + "grad_norm": 1.751552136373409, + "learning_rate": 8.909754135418503e-06, + "loss": 0.7143, + "step": 7759 + }, + { + "epoch": 0.23783253647174207, + "grad_norm": 1.8904096540345037, + "learning_rate": 8.909444741336622e-06, + "loss": 0.6794, + "step": 7760 + }, + { + "epoch": 0.23786318499448328, + "grad_norm": 1.735454488223871, + "learning_rate": 8.90913530873398e-06, + "loss": 0.6768, + "step": 7761 + }, + { + "epoch": 0.23789383351722446, + "grad_norm": 1.6400239327380448, + "learning_rate": 8.90882583761363e-06, + "loss": 0.6031, + "step": 7762 + }, + { + "epoch": 0.23792448203996566, + "grad_norm": 1.5818392763277358, + "learning_rate": 8.908516327978618e-06, + "loss": 0.6604, + "step": 7763 + }, + { + "epoch": 0.23795513056270687, + "grad_norm": 0.9446032921123202, + "learning_rate": 8.908206779831995e-06, + "loss": 0.5088, + "step": 7764 + }, + { + "epoch": 0.23798577908544807, + "grad_norm": 1.7193947748074185, + "learning_rate": 8.907897193176809e-06, + "loss": 0.747, + "step": 7765 + }, + { + "epoch": 0.23801642760818928, + "grad_norm": 1.6172587982581252, + "learning_rate": 8.907587568016112e-06, + "loss": 0.66, + "step": 7766 + }, + { + "epoch": 0.23804707613093049, + "grad_norm": 1.626763979876953, + "learning_rate": 8.907277904352955e-06, + "loss": 0.7223, + "step": 7767 + }, + { + "epoch": 0.2380777246536717, + "grad_norm": 1.564173368246776, + "learning_rate": 8.906968202190392e-06, + "loss": 0.6429, + "step": 7768 + }, + { + "epoch": 0.2381083731764129, + "grad_norm": 2.0383714879359314, + "learning_rate": 8.906658461531469e-06, + "loss": 0.8008, + "step": 7769 + }, + { + "epoch": 0.2381390216991541, + "grad_norm": 1.7031136454295268, + "learning_rate": 8.90634868237924e-06, + "loss": 0.6945, + "step": 7770 + }, + { + "epoch": 0.2381696702218953, + "grad_norm": 1.8424948245155568, + "learning_rate": 8.90603886473676e-06, + "loss": 0.7812, + "step": 7771 + }, + { + "epoch": 0.2382003187446365, + "grad_norm": 0.8552369073149164, + "learning_rate": 8.905729008607079e-06, + "loss": 0.5058, + "step": 7772 + }, + { + "epoch": 0.23823096726737772, + "grad_norm": 1.5228152888007875, + "learning_rate": 8.905419113993252e-06, + "loss": 0.7617, + "step": 7773 + }, + { + "epoch": 0.23826161579011892, + "grad_norm": 1.724880651896165, + "learning_rate": 8.905109180898328e-06, + "loss": 0.6506, + "step": 7774 + }, + { + "epoch": 0.23829226431286013, + "grad_norm": 1.6742191512321756, + "learning_rate": 8.904799209325367e-06, + "loss": 0.7808, + "step": 7775 + }, + { + "epoch": 0.23832291283560134, + "grad_norm": 1.8843000686428966, + "learning_rate": 8.904489199277419e-06, + "loss": 0.6877, + "step": 7776 + }, + { + "epoch": 0.23835356135834254, + "grad_norm": 0.8044872518611815, + "learning_rate": 8.904179150757539e-06, + "loss": 0.4646, + "step": 7777 + }, + { + "epoch": 0.23838420988108372, + "grad_norm": 1.6293429417426863, + "learning_rate": 8.903869063768784e-06, + "loss": 0.7264, + "step": 7778 + }, + { + "epoch": 0.23841485840382493, + "grad_norm": 1.7203025012583149, + "learning_rate": 8.903558938314209e-06, + "loss": 0.6805, + "step": 7779 + }, + { + "epoch": 0.23844550692656613, + "grad_norm": 1.8434264276517203, + "learning_rate": 8.90324877439687e-06, + "loss": 0.7676, + "step": 7780 + }, + { + "epoch": 0.23847615544930734, + "grad_norm": 1.7951983597594712, + "learning_rate": 8.90293857201982e-06, + "loss": 0.6452, + "step": 7781 + }, + { + "epoch": 0.23850680397204854, + "grad_norm": 0.8617413344309398, + "learning_rate": 8.902628331186117e-06, + "loss": 0.4758, + "step": 7782 + }, + { + "epoch": 0.23853745249478975, + "grad_norm": 1.9832389627133489, + "learning_rate": 8.902318051898819e-06, + "loss": 0.76, + "step": 7783 + }, + { + "epoch": 0.23856810101753095, + "grad_norm": 0.80067759881273, + "learning_rate": 8.902007734160985e-06, + "loss": 0.4975, + "step": 7784 + }, + { + "epoch": 0.23859874954027216, + "grad_norm": 0.8210850861920713, + "learning_rate": 8.90169737797567e-06, + "loss": 0.506, + "step": 7785 + }, + { + "epoch": 0.23862939806301336, + "grad_norm": 1.6896925971208805, + "learning_rate": 8.90138698334593e-06, + "loss": 0.6996, + "step": 7786 + }, + { + "epoch": 0.23866004658575457, + "grad_norm": 1.8566627747655944, + "learning_rate": 8.901076550274827e-06, + "loss": 0.6785, + "step": 7787 + }, + { + "epoch": 0.23869069510849578, + "grad_norm": 1.5646094810561018, + "learning_rate": 8.900766078765417e-06, + "loss": 0.7604, + "step": 7788 + }, + { + "epoch": 0.23872134363123698, + "grad_norm": 1.8747368730946852, + "learning_rate": 8.900455568820763e-06, + "loss": 0.7281, + "step": 7789 + }, + { + "epoch": 0.2387519921539782, + "grad_norm": 1.7655831894629719, + "learning_rate": 8.900145020443922e-06, + "loss": 0.7499, + "step": 7790 + }, + { + "epoch": 0.2387826406767194, + "grad_norm": 1.6500747684941672, + "learning_rate": 8.899834433637955e-06, + "loss": 0.7344, + "step": 7791 + }, + { + "epoch": 0.2388132891994606, + "grad_norm": 1.8874999626104683, + "learning_rate": 8.89952380840592e-06, + "loss": 0.7561, + "step": 7792 + }, + { + "epoch": 0.23884393772220178, + "grad_norm": 1.690011992263975, + "learning_rate": 8.89921314475088e-06, + "loss": 0.7485, + "step": 7793 + }, + { + "epoch": 0.23887458624494298, + "grad_norm": 1.6780046046852015, + "learning_rate": 8.898902442675894e-06, + "loss": 0.7004, + "step": 7794 + }, + { + "epoch": 0.2389052347676842, + "grad_norm": 1.5655683228504527, + "learning_rate": 8.898591702184027e-06, + "loss": 0.7356, + "step": 7795 + }, + { + "epoch": 0.2389358832904254, + "grad_norm": 1.7234040222531823, + "learning_rate": 8.898280923278336e-06, + "loss": 0.7335, + "step": 7796 + }, + { + "epoch": 0.2389665318131666, + "grad_norm": 1.2201884227964348, + "learning_rate": 8.897970105961887e-06, + "loss": 0.4918, + "step": 7797 + }, + { + "epoch": 0.2389971803359078, + "grad_norm": 1.8627245429946804, + "learning_rate": 8.897659250237742e-06, + "loss": 0.7146, + "step": 7798 + }, + { + "epoch": 0.239027828858649, + "grad_norm": 0.939068980722789, + "learning_rate": 8.897348356108961e-06, + "loss": 0.4915, + "step": 7799 + }, + { + "epoch": 0.23905847738139022, + "grad_norm": 0.8450397510716464, + "learning_rate": 8.897037423578611e-06, + "loss": 0.4839, + "step": 7800 + }, + { + "epoch": 0.23908912590413142, + "grad_norm": 1.8638648718005149, + "learning_rate": 8.896726452649754e-06, + "loss": 0.7189, + "step": 7801 + }, + { + "epoch": 0.23911977442687263, + "grad_norm": 1.855705492825267, + "learning_rate": 8.896415443325453e-06, + "loss": 0.8377, + "step": 7802 + }, + { + "epoch": 0.23915042294961383, + "grad_norm": 1.0923725573692054, + "learning_rate": 8.896104395608775e-06, + "loss": 0.4737, + "step": 7803 + }, + { + "epoch": 0.23918107147235504, + "grad_norm": 1.8565798840141345, + "learning_rate": 8.895793309502782e-06, + "loss": 0.7343, + "step": 7804 + }, + { + "epoch": 0.23921171999509624, + "grad_norm": 1.9133433427813424, + "learning_rate": 8.895482185010543e-06, + "loss": 0.7276, + "step": 7805 + }, + { + "epoch": 0.23924236851783745, + "grad_norm": 1.817799765955026, + "learning_rate": 8.89517102213512e-06, + "loss": 0.6891, + "step": 7806 + }, + { + "epoch": 0.23927301704057866, + "grad_norm": 1.7168970759075994, + "learning_rate": 8.89485982087958e-06, + "loss": 0.6142, + "step": 7807 + }, + { + "epoch": 0.23930366556331986, + "grad_norm": 1.8293020677994256, + "learning_rate": 8.89454858124699e-06, + "loss": 0.6046, + "step": 7808 + }, + { + "epoch": 0.23933431408606104, + "grad_norm": 1.9894603287776573, + "learning_rate": 8.894237303240417e-06, + "loss": 0.6931, + "step": 7809 + }, + { + "epoch": 0.23936496260880225, + "grad_norm": 1.6300331163375192, + "learning_rate": 8.893925986862928e-06, + "loss": 0.8164, + "step": 7810 + }, + { + "epoch": 0.23939561113154345, + "grad_norm": 1.7850451146772353, + "learning_rate": 8.893614632117589e-06, + "loss": 0.6814, + "step": 7811 + }, + { + "epoch": 0.23942625965428466, + "grad_norm": 1.695893789697916, + "learning_rate": 8.893303239007468e-06, + "loss": 0.6797, + "step": 7812 + }, + { + "epoch": 0.23945690817702586, + "grad_norm": 1.1171840220799176, + "learning_rate": 8.892991807535635e-06, + "loss": 0.5008, + "step": 7813 + }, + { + "epoch": 0.23948755669976707, + "grad_norm": 1.8464594406225916, + "learning_rate": 8.892680337705157e-06, + "loss": 0.6274, + "step": 7814 + }, + { + "epoch": 0.23951820522250827, + "grad_norm": 1.7892305372291746, + "learning_rate": 8.892368829519105e-06, + "loss": 0.7202, + "step": 7815 + }, + { + "epoch": 0.23954885374524948, + "grad_norm": 1.6283624259571976, + "learning_rate": 8.892057282980545e-06, + "loss": 0.6837, + "step": 7816 + }, + { + "epoch": 0.23957950226799068, + "grad_norm": 1.817690690752984, + "learning_rate": 8.89174569809255e-06, + "loss": 0.7503, + "step": 7817 + }, + { + "epoch": 0.2396101507907319, + "grad_norm": 1.6022716742614256, + "learning_rate": 8.891434074858189e-06, + "loss": 0.6576, + "step": 7818 + }, + { + "epoch": 0.2396407993134731, + "grad_norm": 1.5680596041034172, + "learning_rate": 8.891122413280533e-06, + "loss": 0.6963, + "step": 7819 + }, + { + "epoch": 0.2396714478362143, + "grad_norm": 1.9980706162956552, + "learning_rate": 8.890810713362651e-06, + "loss": 0.7831, + "step": 7820 + }, + { + "epoch": 0.2397020963589555, + "grad_norm": 1.7786711669178648, + "learning_rate": 8.890498975107616e-06, + "loss": 0.7817, + "step": 7821 + }, + { + "epoch": 0.2397327448816967, + "grad_norm": 1.6610092867710793, + "learning_rate": 8.890187198518498e-06, + "loss": 0.7122, + "step": 7822 + }, + { + "epoch": 0.23976339340443792, + "grad_norm": 1.5506579463527974, + "learning_rate": 8.889875383598372e-06, + "loss": 0.6899, + "step": 7823 + }, + { + "epoch": 0.2397940419271791, + "grad_norm": 1.763635427727064, + "learning_rate": 8.889563530350307e-06, + "loss": 0.7287, + "step": 7824 + }, + { + "epoch": 0.2398246904499203, + "grad_norm": 1.9187885180923616, + "learning_rate": 8.88925163877738e-06, + "loss": 0.6304, + "step": 7825 + }, + { + "epoch": 0.2398553389726615, + "grad_norm": 1.7204413915372483, + "learning_rate": 8.888939708882658e-06, + "loss": 0.6928, + "step": 7826 + }, + { + "epoch": 0.2398859874954027, + "grad_norm": 1.5702213218851686, + "learning_rate": 8.888627740669221e-06, + "loss": 0.7462, + "step": 7827 + }, + { + "epoch": 0.23991663601814392, + "grad_norm": 1.6509912314653472, + "learning_rate": 8.888315734140139e-06, + "loss": 0.6602, + "step": 7828 + }, + { + "epoch": 0.23994728454088513, + "grad_norm": 1.9325560620131828, + "learning_rate": 8.888003689298487e-06, + "loss": 0.7406, + "step": 7829 + }, + { + "epoch": 0.23997793306362633, + "grad_norm": 1.8283687714257353, + "learning_rate": 8.88769160614734e-06, + "loss": 0.7756, + "step": 7830 + }, + { + "epoch": 0.24000858158636754, + "grad_norm": 1.5516082006176717, + "learning_rate": 8.887379484689772e-06, + "loss": 0.6712, + "step": 7831 + }, + { + "epoch": 0.24003923010910874, + "grad_norm": 1.7500147357311229, + "learning_rate": 8.88706732492886e-06, + "loss": 0.7499, + "step": 7832 + }, + { + "epoch": 0.24006987863184995, + "grad_norm": 1.71307354829612, + "learning_rate": 8.88675512686768e-06, + "loss": 0.673, + "step": 7833 + }, + { + "epoch": 0.24010052715459115, + "grad_norm": 2.277100882436193, + "learning_rate": 8.886442890509305e-06, + "loss": 0.7328, + "step": 7834 + }, + { + "epoch": 0.24013117567733236, + "grad_norm": 2.070272278905906, + "learning_rate": 8.886130615856815e-06, + "loss": 0.7488, + "step": 7835 + }, + { + "epoch": 0.24016182420007356, + "grad_norm": 1.7164242234766085, + "learning_rate": 8.885818302913286e-06, + "loss": 0.8083, + "step": 7836 + }, + { + "epoch": 0.24019247272281477, + "grad_norm": 1.8848594948548638, + "learning_rate": 8.885505951681795e-06, + "loss": 0.7542, + "step": 7837 + }, + { + "epoch": 0.24022312124555598, + "grad_norm": 1.769131764010906, + "learning_rate": 8.88519356216542e-06, + "loss": 0.7526, + "step": 7838 + }, + { + "epoch": 0.24025376976829718, + "grad_norm": 1.016611439653614, + "learning_rate": 8.884881134367239e-06, + "loss": 0.5263, + "step": 7839 + }, + { + "epoch": 0.24028441829103836, + "grad_norm": 0.9023337185112525, + "learning_rate": 8.884568668290329e-06, + "loss": 0.5105, + "step": 7840 + }, + { + "epoch": 0.24031506681377957, + "grad_norm": 1.4470845040720175, + "learning_rate": 8.88425616393777e-06, + "loss": 0.6868, + "step": 7841 + }, + { + "epoch": 0.24034571533652077, + "grad_norm": 1.7310700664674477, + "learning_rate": 8.883943621312644e-06, + "loss": 0.6584, + "step": 7842 + }, + { + "epoch": 0.24037636385926198, + "grad_norm": 0.8474592760561769, + "learning_rate": 8.883631040418024e-06, + "loss": 0.4957, + "step": 7843 + }, + { + "epoch": 0.24040701238200318, + "grad_norm": 1.8261389178865841, + "learning_rate": 8.883318421256994e-06, + "loss": 0.7148, + "step": 7844 + }, + { + "epoch": 0.2404376609047444, + "grad_norm": 2.0343844127992883, + "learning_rate": 8.883005763832636e-06, + "loss": 0.7226, + "step": 7845 + }, + { + "epoch": 0.2404683094274856, + "grad_norm": 1.7029269806639353, + "learning_rate": 8.882693068148027e-06, + "loss": 0.634, + "step": 7846 + }, + { + "epoch": 0.2404989579502268, + "grad_norm": 0.997129631383225, + "learning_rate": 8.882380334206252e-06, + "loss": 0.4902, + "step": 7847 + }, + { + "epoch": 0.240529606472968, + "grad_norm": 1.9133890535069569, + "learning_rate": 8.882067562010388e-06, + "loss": 0.7176, + "step": 7848 + }, + { + "epoch": 0.2405602549957092, + "grad_norm": 1.5777816003382643, + "learning_rate": 8.881754751563521e-06, + "loss": 0.6384, + "step": 7849 + }, + { + "epoch": 0.24059090351845042, + "grad_norm": 1.8341848968958792, + "learning_rate": 8.88144190286873e-06, + "loss": 0.7134, + "step": 7850 + }, + { + "epoch": 0.24062155204119162, + "grad_norm": 0.8598740767073961, + "learning_rate": 8.881129015929098e-06, + "loss": 0.5051, + "step": 7851 + }, + { + "epoch": 0.24065220056393283, + "grad_norm": 0.8387238763836885, + "learning_rate": 8.88081609074771e-06, + "loss": 0.4981, + "step": 7852 + }, + { + "epoch": 0.24068284908667403, + "grad_norm": 1.8751068563543256, + "learning_rate": 8.880503127327648e-06, + "loss": 0.7005, + "step": 7853 + }, + { + "epoch": 0.24071349760941524, + "grad_norm": 2.2731829507235917, + "learning_rate": 8.880190125671998e-06, + "loss": 0.6619, + "step": 7854 + }, + { + "epoch": 0.24074414613215642, + "grad_norm": 2.4172658806212053, + "learning_rate": 8.879877085783838e-06, + "loss": 0.6399, + "step": 7855 + }, + { + "epoch": 0.24077479465489762, + "grad_norm": 1.613196206762624, + "learning_rate": 8.879564007666257e-06, + "loss": 0.6739, + "step": 7856 + }, + { + "epoch": 0.24080544317763883, + "grad_norm": 1.8298006620795249, + "learning_rate": 8.879250891322341e-06, + "loss": 0.7757, + "step": 7857 + }, + { + "epoch": 0.24083609170038003, + "grad_norm": 1.8714250616231585, + "learning_rate": 8.878937736755172e-06, + "loss": 0.6646, + "step": 7858 + }, + { + "epoch": 0.24086674022312124, + "grad_norm": 1.9646990401945852, + "learning_rate": 8.878624543967837e-06, + "loss": 0.6951, + "step": 7859 + }, + { + "epoch": 0.24089738874586245, + "grad_norm": 1.0158829730380827, + "learning_rate": 8.878311312963423e-06, + "loss": 0.5027, + "step": 7860 + }, + { + "epoch": 0.24092803726860365, + "grad_norm": 2.1640075016597184, + "learning_rate": 8.877998043745015e-06, + "loss": 0.7997, + "step": 7861 + }, + { + "epoch": 0.24095868579134486, + "grad_norm": 1.8494285883265047, + "learning_rate": 8.8776847363157e-06, + "loss": 0.7806, + "step": 7862 + }, + { + "epoch": 0.24098933431408606, + "grad_norm": 1.9014564901310709, + "learning_rate": 8.877371390678565e-06, + "loss": 0.6619, + "step": 7863 + }, + { + "epoch": 0.24101998283682727, + "grad_norm": 1.929085063021647, + "learning_rate": 8.877058006836698e-06, + "loss": 0.7892, + "step": 7864 + }, + { + "epoch": 0.24105063135956847, + "grad_norm": 0.8348605419152214, + "learning_rate": 8.876744584793186e-06, + "loss": 0.4929, + "step": 7865 + }, + { + "epoch": 0.24108127988230968, + "grad_norm": 1.8276383761524555, + "learning_rate": 8.876431124551118e-06, + "loss": 0.7648, + "step": 7866 + }, + { + "epoch": 0.24111192840505088, + "grad_norm": 1.6684262531011036, + "learning_rate": 8.876117626113583e-06, + "loss": 0.7008, + "step": 7867 + }, + { + "epoch": 0.2411425769277921, + "grad_norm": 1.7786788022804683, + "learning_rate": 8.875804089483669e-06, + "loss": 0.7525, + "step": 7868 + }, + { + "epoch": 0.2411732254505333, + "grad_norm": 1.692470469315091, + "learning_rate": 8.875490514664464e-06, + "loss": 0.7435, + "step": 7869 + }, + { + "epoch": 0.2412038739732745, + "grad_norm": 1.781572198587456, + "learning_rate": 8.875176901659061e-06, + "loss": 0.6964, + "step": 7870 + }, + { + "epoch": 0.24123452249601568, + "grad_norm": 0.8394319839569279, + "learning_rate": 8.874863250470547e-06, + "loss": 0.4794, + "step": 7871 + }, + { + "epoch": 0.24126517101875689, + "grad_norm": 0.8326407440573272, + "learning_rate": 8.874549561102014e-06, + "loss": 0.4883, + "step": 7872 + }, + { + "epoch": 0.2412958195414981, + "grad_norm": 0.8110578139773392, + "learning_rate": 8.874235833556554e-06, + "loss": 0.4905, + "step": 7873 + }, + { + "epoch": 0.2413264680642393, + "grad_norm": 0.7699510437699241, + "learning_rate": 8.873922067837258e-06, + "loss": 0.4891, + "step": 7874 + }, + { + "epoch": 0.2413571165869805, + "grad_norm": 1.732705879069986, + "learning_rate": 8.873608263947216e-06, + "loss": 0.6793, + "step": 7875 + }, + { + "epoch": 0.2413877651097217, + "grad_norm": 1.7728664317097447, + "learning_rate": 8.87329442188952e-06, + "loss": 0.7424, + "step": 7876 + }, + { + "epoch": 0.2414184136324629, + "grad_norm": 1.7974740663131266, + "learning_rate": 8.872980541667261e-06, + "loss": 0.6243, + "step": 7877 + }, + { + "epoch": 0.24144906215520412, + "grad_norm": 1.7367487637775703, + "learning_rate": 8.872666623283539e-06, + "loss": 0.7239, + "step": 7878 + }, + { + "epoch": 0.24147971067794533, + "grad_norm": 1.9819925940793799, + "learning_rate": 8.872352666741438e-06, + "loss": 0.7482, + "step": 7879 + }, + { + "epoch": 0.24151035920068653, + "grad_norm": 1.5995496097065853, + "learning_rate": 8.872038672044056e-06, + "loss": 0.713, + "step": 7880 + }, + { + "epoch": 0.24154100772342774, + "grad_norm": 1.697325928587626, + "learning_rate": 8.871724639194487e-06, + "loss": 0.69, + "step": 7881 + }, + { + "epoch": 0.24157165624616894, + "grad_norm": 2.8622825655431825, + "learning_rate": 8.871410568195824e-06, + "loss": 0.7376, + "step": 7882 + }, + { + "epoch": 0.24160230476891015, + "grad_norm": 1.1143513284773956, + "learning_rate": 8.871096459051162e-06, + "loss": 0.4792, + "step": 7883 + }, + { + "epoch": 0.24163295329165135, + "grad_norm": 1.6492496337040676, + "learning_rate": 8.870782311763596e-06, + "loss": 0.7216, + "step": 7884 + }, + { + "epoch": 0.24166360181439256, + "grad_norm": 1.9849541790029894, + "learning_rate": 8.870468126336221e-06, + "loss": 0.8293, + "step": 7885 + }, + { + "epoch": 0.24169425033713374, + "grad_norm": 1.4657863937573028, + "learning_rate": 8.870153902772133e-06, + "loss": 0.6287, + "step": 7886 + }, + { + "epoch": 0.24172489885987494, + "grad_norm": 0.8450692424063045, + "learning_rate": 8.86983964107443e-06, + "loss": 0.4926, + "step": 7887 + }, + { + "epoch": 0.24175554738261615, + "grad_norm": 1.9018189929033904, + "learning_rate": 8.869525341246209e-06, + "loss": 0.7063, + "step": 7888 + }, + { + "epoch": 0.24178619590535735, + "grad_norm": 1.9016768654526015, + "learning_rate": 8.86921100329056e-06, + "loss": 0.6228, + "step": 7889 + }, + { + "epoch": 0.24181684442809856, + "grad_norm": 1.9438583313361206, + "learning_rate": 8.868896627210587e-06, + "loss": 0.7319, + "step": 7890 + }, + { + "epoch": 0.24184749295083977, + "grad_norm": 1.7150827718158979, + "learning_rate": 8.868582213009387e-06, + "loss": 0.7499, + "step": 7891 + }, + { + "epoch": 0.24187814147358097, + "grad_norm": 1.71619071071877, + "learning_rate": 8.868267760690055e-06, + "loss": 0.7152, + "step": 7892 + }, + { + "epoch": 0.24190878999632218, + "grad_norm": 1.582325649848641, + "learning_rate": 8.867953270255691e-06, + "loss": 0.6989, + "step": 7893 + }, + { + "epoch": 0.24193943851906338, + "grad_norm": 1.7790915843505803, + "learning_rate": 8.867638741709395e-06, + "loss": 0.6965, + "step": 7894 + }, + { + "epoch": 0.2419700870418046, + "grad_norm": 2.0084940052048963, + "learning_rate": 8.867324175054264e-06, + "loss": 0.8288, + "step": 7895 + }, + { + "epoch": 0.2420007355645458, + "grad_norm": 1.6150350809914615, + "learning_rate": 8.867009570293398e-06, + "loss": 0.6682, + "step": 7896 + }, + { + "epoch": 0.242031384087287, + "grad_norm": 1.5189101090453774, + "learning_rate": 8.866694927429897e-06, + "loss": 0.6734, + "step": 7897 + }, + { + "epoch": 0.2420620326100282, + "grad_norm": 1.8662751154272903, + "learning_rate": 8.866380246466863e-06, + "loss": 0.7168, + "step": 7898 + }, + { + "epoch": 0.2420926811327694, + "grad_norm": 1.6382462651814118, + "learning_rate": 8.866065527407393e-06, + "loss": 0.7702, + "step": 7899 + }, + { + "epoch": 0.24212332965551062, + "grad_norm": 1.7452699154071067, + "learning_rate": 8.865750770254593e-06, + "loss": 0.7858, + "step": 7900 + }, + { + "epoch": 0.24215397817825182, + "grad_norm": 1.6812833672820375, + "learning_rate": 8.865435975011559e-06, + "loss": 0.7259, + "step": 7901 + }, + { + "epoch": 0.242184626700993, + "grad_norm": 3.0514133224559705, + "learning_rate": 8.865121141681397e-06, + "loss": 0.6732, + "step": 7902 + }, + { + "epoch": 0.2422152752237342, + "grad_norm": 1.0512363200192612, + "learning_rate": 8.864806270267207e-06, + "loss": 0.4818, + "step": 7903 + }, + { + "epoch": 0.2422459237464754, + "grad_norm": 1.7852234434614633, + "learning_rate": 8.864491360772091e-06, + "loss": 0.7533, + "step": 7904 + }, + { + "epoch": 0.24227657226921662, + "grad_norm": 1.7916077888775972, + "learning_rate": 8.864176413199155e-06, + "loss": 0.6799, + "step": 7905 + }, + { + "epoch": 0.24230722079195782, + "grad_norm": 2.0300871694580587, + "learning_rate": 8.8638614275515e-06, + "loss": 0.7369, + "step": 7906 + }, + { + "epoch": 0.24233786931469903, + "grad_norm": 1.7170755965624647, + "learning_rate": 8.863546403832227e-06, + "loss": 0.7278, + "step": 7907 + }, + { + "epoch": 0.24236851783744023, + "grad_norm": 0.819830672078477, + "learning_rate": 8.863231342044445e-06, + "loss": 0.5003, + "step": 7908 + }, + { + "epoch": 0.24239916636018144, + "grad_norm": 0.8364532256824666, + "learning_rate": 8.862916242191255e-06, + "loss": 0.4841, + "step": 7909 + }, + { + "epoch": 0.24242981488292265, + "grad_norm": 1.7578393747575654, + "learning_rate": 8.862601104275763e-06, + "loss": 0.6936, + "step": 7910 + }, + { + "epoch": 0.24246046340566385, + "grad_norm": 1.7762418587193167, + "learning_rate": 8.862285928301075e-06, + "loss": 0.6933, + "step": 7911 + }, + { + "epoch": 0.24249111192840506, + "grad_norm": 1.761090166571021, + "learning_rate": 8.861970714270294e-06, + "loss": 0.7842, + "step": 7912 + }, + { + "epoch": 0.24252176045114626, + "grad_norm": 1.8159695344446134, + "learning_rate": 8.86165546218653e-06, + "loss": 0.6971, + "step": 7913 + }, + { + "epoch": 0.24255240897388747, + "grad_norm": 1.7283024370403324, + "learning_rate": 8.861340172052883e-06, + "loss": 0.7427, + "step": 7914 + }, + { + "epoch": 0.24258305749662867, + "grad_norm": 0.9039604794215287, + "learning_rate": 8.861024843872465e-06, + "loss": 0.4859, + "step": 7915 + }, + { + "epoch": 0.24261370601936988, + "grad_norm": 1.821807833475637, + "learning_rate": 8.860709477648383e-06, + "loss": 0.7522, + "step": 7916 + }, + { + "epoch": 0.24264435454211106, + "grad_norm": 1.7586021314455558, + "learning_rate": 8.86039407338374e-06, + "loss": 0.7633, + "step": 7917 + }, + { + "epoch": 0.24267500306485226, + "grad_norm": 1.6895845103492972, + "learning_rate": 8.86007863108165e-06, + "loss": 0.727, + "step": 7918 + }, + { + "epoch": 0.24270565158759347, + "grad_norm": 1.6648954930509212, + "learning_rate": 8.859763150745215e-06, + "loss": 0.6608, + "step": 7919 + }, + { + "epoch": 0.24273630011033467, + "grad_norm": 1.667247073294282, + "learning_rate": 8.859447632377547e-06, + "loss": 0.665, + "step": 7920 + }, + { + "epoch": 0.24276694863307588, + "grad_norm": 1.5882611743008441, + "learning_rate": 8.859132075981753e-06, + "loss": 0.7691, + "step": 7921 + }, + { + "epoch": 0.24279759715581709, + "grad_norm": 1.9102321203489965, + "learning_rate": 8.858816481560944e-06, + "loss": 0.7833, + "step": 7922 + }, + { + "epoch": 0.2428282456785583, + "grad_norm": 1.6407518556080884, + "learning_rate": 8.85850084911823e-06, + "loss": 0.6771, + "step": 7923 + }, + { + "epoch": 0.2428588942012995, + "grad_norm": 0.8688635241076963, + "learning_rate": 8.858185178656718e-06, + "loss": 0.4833, + "step": 7924 + }, + { + "epoch": 0.2428895427240407, + "grad_norm": 1.758428013458981, + "learning_rate": 8.857869470179521e-06, + "loss": 0.735, + "step": 7925 + }, + { + "epoch": 0.2429201912467819, + "grad_norm": 1.7977999541590386, + "learning_rate": 8.857553723689749e-06, + "loss": 0.7479, + "step": 7926 + }, + { + "epoch": 0.2429508397695231, + "grad_norm": 2.016725538020713, + "learning_rate": 8.857237939190513e-06, + "loss": 0.6758, + "step": 7927 + }, + { + "epoch": 0.24298148829226432, + "grad_norm": 1.5889254889381998, + "learning_rate": 8.856922116684924e-06, + "loss": 0.7572, + "step": 7928 + }, + { + "epoch": 0.24301213681500552, + "grad_norm": 1.8131000625135516, + "learning_rate": 8.856606256176096e-06, + "loss": 0.7827, + "step": 7929 + }, + { + "epoch": 0.24304278533774673, + "grad_norm": 1.6569666352402759, + "learning_rate": 8.856290357667139e-06, + "loss": 0.6865, + "step": 7930 + }, + { + "epoch": 0.24307343386048794, + "grad_norm": 1.743622062966663, + "learning_rate": 8.855974421161167e-06, + "loss": 0.7272, + "step": 7931 + }, + { + "epoch": 0.24310408238322914, + "grad_norm": 1.609455778011754, + "learning_rate": 8.855658446661292e-06, + "loss": 0.6809, + "step": 7932 + }, + { + "epoch": 0.24313473090597032, + "grad_norm": 1.6810408254929528, + "learning_rate": 8.855342434170628e-06, + "loss": 0.7862, + "step": 7933 + }, + { + "epoch": 0.24316537942871153, + "grad_norm": 1.6012521897452296, + "learning_rate": 8.855026383692288e-06, + "loss": 0.7082, + "step": 7934 + }, + { + "epoch": 0.24319602795145273, + "grad_norm": 1.8672998505509404, + "learning_rate": 8.854710295229387e-06, + "loss": 0.6886, + "step": 7935 + }, + { + "epoch": 0.24322667647419394, + "grad_norm": 1.8158193456295963, + "learning_rate": 8.854394168785038e-06, + "loss": 0.6343, + "step": 7936 + }, + { + "epoch": 0.24325732499693514, + "grad_norm": 1.6972089384070814, + "learning_rate": 8.85407800436236e-06, + "loss": 0.6361, + "step": 7937 + }, + { + "epoch": 0.24328797351967635, + "grad_norm": 1.5075051695058392, + "learning_rate": 8.853761801964462e-06, + "loss": 0.6221, + "step": 7938 + }, + { + "epoch": 0.24331862204241755, + "grad_norm": 1.7937796998335398, + "learning_rate": 8.853445561594466e-06, + "loss": 0.8186, + "step": 7939 + }, + { + "epoch": 0.24334927056515876, + "grad_norm": 1.9903859329842168, + "learning_rate": 8.853129283255484e-06, + "loss": 0.7557, + "step": 7940 + }, + { + "epoch": 0.24337991908789997, + "grad_norm": 1.6242730011914777, + "learning_rate": 8.852812966950633e-06, + "loss": 0.6766, + "step": 7941 + }, + { + "epoch": 0.24341056761064117, + "grad_norm": 1.7467649291914307, + "learning_rate": 8.852496612683028e-06, + "loss": 0.7877, + "step": 7942 + }, + { + "epoch": 0.24344121613338238, + "grad_norm": 1.5682288436298346, + "learning_rate": 8.852180220455791e-06, + "loss": 0.6587, + "step": 7943 + }, + { + "epoch": 0.24347186465612358, + "grad_norm": 0.9022637479873721, + "learning_rate": 8.851863790272036e-06, + "loss": 0.4681, + "step": 7944 + }, + { + "epoch": 0.2435025131788648, + "grad_norm": 1.6652819201563889, + "learning_rate": 8.851547322134882e-06, + "loss": 0.7412, + "step": 7945 + }, + { + "epoch": 0.243533161701606, + "grad_norm": 1.6819204143776718, + "learning_rate": 8.851230816047445e-06, + "loss": 0.6458, + "step": 7946 + }, + { + "epoch": 0.2435638102243472, + "grad_norm": 1.6116868381353353, + "learning_rate": 8.850914272012846e-06, + "loss": 0.6711, + "step": 7947 + }, + { + "epoch": 0.24359445874708838, + "grad_norm": 1.7549628908498993, + "learning_rate": 8.850597690034204e-06, + "loss": 0.731, + "step": 7948 + }, + { + "epoch": 0.24362510726982958, + "grad_norm": 1.930822748038165, + "learning_rate": 8.850281070114637e-06, + "loss": 0.6981, + "step": 7949 + }, + { + "epoch": 0.2436557557925708, + "grad_norm": 1.7812360286481155, + "learning_rate": 8.849964412257268e-06, + "loss": 0.6983, + "step": 7950 + }, + { + "epoch": 0.243686404315312, + "grad_norm": 1.782003441249021, + "learning_rate": 8.84964771646521e-06, + "loss": 0.6882, + "step": 7951 + }, + { + "epoch": 0.2437170528380532, + "grad_norm": 1.8801309821663963, + "learning_rate": 8.849330982741594e-06, + "loss": 0.7032, + "step": 7952 + }, + { + "epoch": 0.2437477013607944, + "grad_norm": 1.5651267523258867, + "learning_rate": 8.849014211089529e-06, + "loss": 0.7275, + "step": 7953 + }, + { + "epoch": 0.2437783498835356, + "grad_norm": 1.6992505650689416, + "learning_rate": 8.848697401512146e-06, + "loss": 0.7433, + "step": 7954 + }, + { + "epoch": 0.24380899840627682, + "grad_norm": 0.9214129986837052, + "learning_rate": 8.848380554012562e-06, + "loss": 0.4966, + "step": 7955 + }, + { + "epoch": 0.24383964692901802, + "grad_norm": 1.6905878852720009, + "learning_rate": 8.848063668593898e-06, + "loss": 0.6133, + "step": 7956 + }, + { + "epoch": 0.24387029545175923, + "grad_norm": 1.6600213680779792, + "learning_rate": 8.84774674525928e-06, + "loss": 0.7046, + "step": 7957 + }, + { + "epoch": 0.24390094397450043, + "grad_norm": 1.6933700875566493, + "learning_rate": 8.847429784011828e-06, + "loss": 0.644, + "step": 7958 + }, + { + "epoch": 0.24393159249724164, + "grad_norm": 1.7396747136362185, + "learning_rate": 8.847112784854666e-06, + "loss": 0.7091, + "step": 7959 + }, + { + "epoch": 0.24396224101998285, + "grad_norm": 1.8684029324543772, + "learning_rate": 8.846795747790918e-06, + "loss": 0.8563, + "step": 7960 + }, + { + "epoch": 0.24399288954272405, + "grad_norm": 1.782190523627196, + "learning_rate": 8.846478672823707e-06, + "loss": 0.6396, + "step": 7961 + }, + { + "epoch": 0.24402353806546526, + "grad_norm": 1.88665669311605, + "learning_rate": 8.846161559956156e-06, + "loss": 0.6891, + "step": 7962 + }, + { + "epoch": 0.24405418658820646, + "grad_norm": 0.8549543866694338, + "learning_rate": 8.845844409191393e-06, + "loss": 0.4972, + "step": 7963 + }, + { + "epoch": 0.24408483511094764, + "grad_norm": 0.8115505413127505, + "learning_rate": 8.84552722053254e-06, + "loss": 0.4874, + "step": 7964 + }, + { + "epoch": 0.24411548363368885, + "grad_norm": 1.8782718269229484, + "learning_rate": 8.845209993982724e-06, + "loss": 0.8132, + "step": 7965 + }, + { + "epoch": 0.24414613215643005, + "grad_norm": 1.654585731009314, + "learning_rate": 8.84489272954507e-06, + "loss": 0.7092, + "step": 7966 + }, + { + "epoch": 0.24417678067917126, + "grad_norm": 0.7681090078381487, + "learning_rate": 8.844575427222703e-06, + "loss": 0.4723, + "step": 7967 + }, + { + "epoch": 0.24420742920191246, + "grad_norm": 1.6850436745072663, + "learning_rate": 8.844258087018754e-06, + "loss": 0.6517, + "step": 7968 + }, + { + "epoch": 0.24423807772465367, + "grad_norm": 1.6648204215851632, + "learning_rate": 8.843940708936342e-06, + "loss": 0.7072, + "step": 7969 + }, + { + "epoch": 0.24426872624739487, + "grad_norm": 2.17486121220585, + "learning_rate": 8.843623292978602e-06, + "loss": 0.7681, + "step": 7970 + }, + { + "epoch": 0.24429937477013608, + "grad_norm": 1.850345111509204, + "learning_rate": 8.843305839148657e-06, + "loss": 0.6737, + "step": 7971 + }, + { + "epoch": 0.24433002329287729, + "grad_norm": 2.0668904582274354, + "learning_rate": 8.842988347449636e-06, + "loss": 0.8028, + "step": 7972 + }, + { + "epoch": 0.2443606718156185, + "grad_norm": 1.796834015294838, + "learning_rate": 8.842670817884669e-06, + "loss": 0.7576, + "step": 7973 + }, + { + "epoch": 0.2443913203383597, + "grad_norm": 1.826894411582645, + "learning_rate": 8.842353250456882e-06, + "loss": 0.7397, + "step": 7974 + }, + { + "epoch": 0.2444219688611009, + "grad_norm": 1.7887689438772474, + "learning_rate": 8.842035645169404e-06, + "loss": 0.7118, + "step": 7975 + }, + { + "epoch": 0.2444526173838421, + "grad_norm": 1.5909419256103294, + "learning_rate": 8.841718002025367e-06, + "loss": 0.7121, + "step": 7976 + }, + { + "epoch": 0.2444832659065833, + "grad_norm": 1.6220988984450107, + "learning_rate": 8.841400321027899e-06, + "loss": 0.7054, + "step": 7977 + }, + { + "epoch": 0.24451391442932452, + "grad_norm": 1.6853923608883696, + "learning_rate": 8.841082602180134e-06, + "loss": 0.6402, + "step": 7978 + }, + { + "epoch": 0.2445445629520657, + "grad_norm": 1.0078356248100935, + "learning_rate": 8.840764845485196e-06, + "loss": 0.4977, + "step": 7979 + }, + { + "epoch": 0.2445752114748069, + "grad_norm": 1.7691628351092485, + "learning_rate": 8.840447050946219e-06, + "loss": 0.7434, + "step": 7980 + }, + { + "epoch": 0.2446058599975481, + "grad_norm": 1.6779778562897696, + "learning_rate": 8.840129218566335e-06, + "loss": 0.7703, + "step": 7981 + }, + { + "epoch": 0.24463650852028931, + "grad_norm": 0.8354567872897779, + "learning_rate": 8.839811348348677e-06, + "loss": 0.4926, + "step": 7982 + }, + { + "epoch": 0.24466715704303052, + "grad_norm": 0.8557073698910221, + "learning_rate": 8.839493440296376e-06, + "loss": 0.4778, + "step": 7983 + }, + { + "epoch": 0.24469780556577173, + "grad_norm": 1.6078060754676384, + "learning_rate": 8.83917549441256e-06, + "loss": 0.6879, + "step": 7984 + }, + { + "epoch": 0.24472845408851293, + "grad_norm": 0.8542456549668138, + "learning_rate": 8.838857510700369e-06, + "loss": 0.5034, + "step": 7985 + }, + { + "epoch": 0.24475910261125414, + "grad_norm": 1.7917961988324247, + "learning_rate": 8.83853948916293e-06, + "loss": 0.6808, + "step": 7986 + }, + { + "epoch": 0.24478975113399534, + "grad_norm": 1.871489170515039, + "learning_rate": 8.838221429803381e-06, + "loss": 0.7326, + "step": 7987 + }, + { + "epoch": 0.24482039965673655, + "grad_norm": 1.6702175590870718, + "learning_rate": 8.837903332624855e-06, + "loss": 0.6812, + "step": 7988 + }, + { + "epoch": 0.24485104817947775, + "grad_norm": 1.8134931694739158, + "learning_rate": 8.837585197630483e-06, + "loss": 0.7379, + "step": 7989 + }, + { + "epoch": 0.24488169670221896, + "grad_norm": 2.0082388044993724, + "learning_rate": 8.837267024823404e-06, + "loss": 0.713, + "step": 7990 + }, + { + "epoch": 0.24491234522496017, + "grad_norm": 1.8410771407823308, + "learning_rate": 8.836948814206751e-06, + "loss": 0.6607, + "step": 7991 + }, + { + "epoch": 0.24494299374770137, + "grad_norm": 1.745148739359042, + "learning_rate": 8.83663056578366e-06, + "loss": 0.7057, + "step": 7992 + }, + { + "epoch": 0.24497364227044258, + "grad_norm": 1.6580785340925759, + "learning_rate": 8.836312279557264e-06, + "loss": 0.663, + "step": 7993 + }, + { + "epoch": 0.24500429079318378, + "grad_norm": 1.976297761336539, + "learning_rate": 8.835993955530704e-06, + "loss": 0.7288, + "step": 7994 + }, + { + "epoch": 0.24503493931592496, + "grad_norm": 1.5964860229734923, + "learning_rate": 8.835675593707113e-06, + "loss": 0.7018, + "step": 7995 + }, + { + "epoch": 0.24506558783866617, + "grad_norm": 1.7531683886770775, + "learning_rate": 8.83535719408963e-06, + "loss": 0.7296, + "step": 7996 + }, + { + "epoch": 0.24509623636140737, + "grad_norm": 1.8877884594947731, + "learning_rate": 8.83503875668139e-06, + "loss": 0.6852, + "step": 7997 + }, + { + "epoch": 0.24512688488414858, + "grad_norm": 1.9427233805460684, + "learning_rate": 8.834720281485533e-06, + "loss": 0.7654, + "step": 7998 + }, + { + "epoch": 0.24515753340688978, + "grad_norm": 1.7302660609281746, + "learning_rate": 8.834401768505194e-06, + "loss": 0.7062, + "step": 7999 + }, + { + "epoch": 0.245188181929631, + "grad_norm": 1.7159207587006038, + "learning_rate": 8.834083217743516e-06, + "loss": 0.6646, + "step": 8000 + }, + { + "epoch": 0.2452188304523722, + "grad_norm": 1.828903659312408, + "learning_rate": 8.833764629203634e-06, + "loss": 0.7411, + "step": 8001 + }, + { + "epoch": 0.2452494789751134, + "grad_norm": 1.6804320267053996, + "learning_rate": 8.833446002888689e-06, + "loss": 0.6629, + "step": 8002 + }, + { + "epoch": 0.2452801274978546, + "grad_norm": 1.8777956177191655, + "learning_rate": 8.833127338801818e-06, + "loss": 0.7866, + "step": 8003 + }, + { + "epoch": 0.2453107760205958, + "grad_norm": 1.8918128045691969, + "learning_rate": 8.832808636946162e-06, + "loss": 0.6645, + "step": 8004 + }, + { + "epoch": 0.24534142454333702, + "grad_norm": 1.6312506889632612, + "learning_rate": 8.832489897324863e-06, + "loss": 0.6705, + "step": 8005 + }, + { + "epoch": 0.24537207306607822, + "grad_norm": 1.7893780704653304, + "learning_rate": 8.83217111994106e-06, + "loss": 0.7855, + "step": 8006 + }, + { + "epoch": 0.24540272158881943, + "grad_norm": 1.50274221587021, + "learning_rate": 8.831852304797896e-06, + "loss": 0.5396, + "step": 8007 + }, + { + "epoch": 0.24543337011156063, + "grad_norm": 1.6768894084621193, + "learning_rate": 8.831533451898508e-06, + "loss": 0.7364, + "step": 8008 + }, + { + "epoch": 0.24546401863430184, + "grad_norm": 1.925880737520859, + "learning_rate": 8.831214561246044e-06, + "loss": 0.7163, + "step": 8009 + }, + { + "epoch": 0.24549466715704302, + "grad_norm": 1.7615683259058121, + "learning_rate": 8.830895632843641e-06, + "loss": 0.6969, + "step": 8010 + }, + { + "epoch": 0.24552531567978422, + "grad_norm": 1.5489840283278447, + "learning_rate": 8.830576666694443e-06, + "loss": 0.6547, + "step": 8011 + }, + { + "epoch": 0.24555596420252543, + "grad_norm": 1.7814653414553923, + "learning_rate": 8.830257662801594e-06, + "loss": 0.6992, + "step": 8012 + }, + { + "epoch": 0.24558661272526663, + "grad_norm": 2.154353675445491, + "learning_rate": 8.829938621168234e-06, + "loss": 0.7685, + "step": 8013 + }, + { + "epoch": 0.24561726124800784, + "grad_norm": 1.085678926980741, + "learning_rate": 8.829619541797511e-06, + "loss": 0.4932, + "step": 8014 + }, + { + "epoch": 0.24564790977074905, + "grad_norm": 1.6221629001502402, + "learning_rate": 8.829300424692566e-06, + "loss": 0.7352, + "step": 8015 + }, + { + "epoch": 0.24567855829349025, + "grad_norm": 1.5428521078971296, + "learning_rate": 8.828981269856543e-06, + "loss": 0.6513, + "step": 8016 + }, + { + "epoch": 0.24570920681623146, + "grad_norm": 1.715657416815919, + "learning_rate": 8.828662077292588e-06, + "loss": 0.7309, + "step": 8017 + }, + { + "epoch": 0.24573985533897266, + "grad_norm": 1.7274204644539477, + "learning_rate": 8.828342847003848e-06, + "loss": 0.8097, + "step": 8018 + }, + { + "epoch": 0.24577050386171387, + "grad_norm": 1.7367096721031674, + "learning_rate": 8.828023578993466e-06, + "loss": 0.7066, + "step": 8019 + }, + { + "epoch": 0.24580115238445507, + "grad_norm": 2.0295865126118504, + "learning_rate": 8.827704273264588e-06, + "loss": 0.6796, + "step": 8020 + }, + { + "epoch": 0.24583180090719628, + "grad_norm": 1.7204259765651797, + "learning_rate": 8.82738492982036e-06, + "loss": 0.6363, + "step": 8021 + }, + { + "epoch": 0.24586244942993749, + "grad_norm": 1.6646845919950077, + "learning_rate": 8.82706554866393e-06, + "loss": 0.7221, + "step": 8022 + }, + { + "epoch": 0.2458930979526787, + "grad_norm": 1.5413961423025662, + "learning_rate": 8.826746129798442e-06, + "loss": 0.6591, + "step": 8023 + }, + { + "epoch": 0.2459237464754199, + "grad_norm": 1.74270464815785, + "learning_rate": 8.826426673227047e-06, + "loss": 0.7214, + "step": 8024 + }, + { + "epoch": 0.2459543949981611, + "grad_norm": 1.578001911740241, + "learning_rate": 8.826107178952889e-06, + "loss": 0.7302, + "step": 8025 + }, + { + "epoch": 0.24598504352090228, + "grad_norm": 1.8379219412436318, + "learning_rate": 8.825787646979119e-06, + "loss": 0.7209, + "step": 8026 + }, + { + "epoch": 0.24601569204364349, + "grad_norm": 1.7168398724106444, + "learning_rate": 8.825468077308885e-06, + "loss": 0.7645, + "step": 8027 + }, + { + "epoch": 0.2460463405663847, + "grad_norm": 1.0633223877679365, + "learning_rate": 8.825148469945335e-06, + "loss": 0.4798, + "step": 8028 + }, + { + "epoch": 0.2460769890891259, + "grad_norm": 1.908272408255897, + "learning_rate": 8.824828824891618e-06, + "loss": 0.73, + "step": 8029 + }, + { + "epoch": 0.2461076376118671, + "grad_norm": 1.9130739822265423, + "learning_rate": 8.824509142150885e-06, + "loss": 0.7059, + "step": 8030 + }, + { + "epoch": 0.2461382861346083, + "grad_norm": 1.8946705854652655, + "learning_rate": 8.824189421726284e-06, + "loss": 0.7046, + "step": 8031 + }, + { + "epoch": 0.24616893465734951, + "grad_norm": 1.780757457810362, + "learning_rate": 8.823869663620967e-06, + "loss": 0.7575, + "step": 8032 + }, + { + "epoch": 0.24619958318009072, + "grad_norm": 1.5833455739258302, + "learning_rate": 8.823549867838082e-06, + "loss": 0.7024, + "step": 8033 + }, + { + "epoch": 0.24623023170283193, + "grad_norm": 1.6035741215604673, + "learning_rate": 8.823230034380784e-06, + "loss": 0.6304, + "step": 8034 + }, + { + "epoch": 0.24626088022557313, + "grad_norm": 1.8593356877728557, + "learning_rate": 8.822910163252221e-06, + "loss": 0.7271, + "step": 8035 + }, + { + "epoch": 0.24629152874831434, + "grad_norm": 1.7501212004565672, + "learning_rate": 8.822590254455547e-06, + "loss": 0.7989, + "step": 8036 + }, + { + "epoch": 0.24632217727105554, + "grad_norm": 1.597393767964925, + "learning_rate": 8.822270307993912e-06, + "loss": 0.5857, + "step": 8037 + }, + { + "epoch": 0.24635282579379675, + "grad_norm": 1.9412929215282402, + "learning_rate": 8.82195032387047e-06, + "loss": 0.7697, + "step": 8038 + }, + { + "epoch": 0.24638347431653795, + "grad_norm": 1.6334101462723745, + "learning_rate": 8.821630302088374e-06, + "loss": 0.6273, + "step": 8039 + }, + { + "epoch": 0.24641412283927916, + "grad_norm": 1.6608878080382592, + "learning_rate": 8.821310242650776e-06, + "loss": 0.6628, + "step": 8040 + }, + { + "epoch": 0.24644477136202034, + "grad_norm": 1.6253787309443626, + "learning_rate": 8.82099014556083e-06, + "loss": 0.7116, + "step": 8041 + }, + { + "epoch": 0.24647541988476154, + "grad_norm": 1.924294584305308, + "learning_rate": 8.820670010821693e-06, + "loss": 0.7398, + "step": 8042 + }, + { + "epoch": 0.24650606840750275, + "grad_norm": 1.7848870937413674, + "learning_rate": 8.820349838436515e-06, + "loss": 0.7588, + "step": 8043 + }, + { + "epoch": 0.24653671693024395, + "grad_norm": 1.1218684962649488, + "learning_rate": 8.820029628408453e-06, + "loss": 0.4891, + "step": 8044 + }, + { + "epoch": 0.24656736545298516, + "grad_norm": 1.9468870280657689, + "learning_rate": 8.819709380740662e-06, + "loss": 0.7178, + "step": 8045 + }, + { + "epoch": 0.24659801397572637, + "grad_norm": 2.1336816009410384, + "learning_rate": 8.819389095436295e-06, + "loss": 0.7512, + "step": 8046 + }, + { + "epoch": 0.24662866249846757, + "grad_norm": 1.6905705883273028, + "learning_rate": 8.819068772498514e-06, + "loss": 0.7063, + "step": 8047 + }, + { + "epoch": 0.24665931102120878, + "grad_norm": 1.6237807074468171, + "learning_rate": 8.818748411930468e-06, + "loss": 0.704, + "step": 8048 + }, + { + "epoch": 0.24668995954394998, + "grad_norm": 1.8139199245143738, + "learning_rate": 8.818428013735319e-06, + "loss": 0.7588, + "step": 8049 + }, + { + "epoch": 0.2467206080666912, + "grad_norm": 1.6398101270657128, + "learning_rate": 8.81810757791622e-06, + "loss": 0.7238, + "step": 8050 + }, + { + "epoch": 0.2467512565894324, + "grad_norm": 1.7200380688321009, + "learning_rate": 8.81778710447633e-06, + "loss": 0.7312, + "step": 8051 + }, + { + "epoch": 0.2467819051121736, + "grad_norm": 1.7154601116773878, + "learning_rate": 8.817466593418808e-06, + "loss": 0.6267, + "step": 8052 + }, + { + "epoch": 0.2468125536349148, + "grad_norm": 2.020308154116351, + "learning_rate": 8.817146044746809e-06, + "loss": 0.7205, + "step": 8053 + }, + { + "epoch": 0.246843202157656, + "grad_norm": 0.9549264731421447, + "learning_rate": 8.816825458463496e-06, + "loss": 0.4814, + "step": 8054 + }, + { + "epoch": 0.24687385068039722, + "grad_norm": 1.7975205159350511, + "learning_rate": 8.816504834572024e-06, + "loss": 0.726, + "step": 8055 + }, + { + "epoch": 0.24690449920313842, + "grad_norm": 0.824066236344961, + "learning_rate": 8.816184173075553e-06, + "loss": 0.5032, + "step": 8056 + }, + { + "epoch": 0.2469351477258796, + "grad_norm": 1.7930192610336615, + "learning_rate": 8.815863473977244e-06, + "loss": 0.7163, + "step": 8057 + }, + { + "epoch": 0.2469657962486208, + "grad_norm": 1.9451412750348747, + "learning_rate": 8.815542737280254e-06, + "loss": 0.842, + "step": 8058 + }, + { + "epoch": 0.246996444771362, + "grad_norm": 1.906914849331054, + "learning_rate": 8.815221962987747e-06, + "loss": 0.7484, + "step": 8059 + }, + { + "epoch": 0.24702709329410322, + "grad_norm": 1.7759992635799693, + "learning_rate": 8.814901151102882e-06, + "loss": 0.7076, + "step": 8060 + }, + { + "epoch": 0.24705774181684442, + "grad_norm": 1.7504604877509267, + "learning_rate": 8.814580301628818e-06, + "loss": 0.7225, + "step": 8061 + }, + { + "epoch": 0.24708839033958563, + "grad_norm": 1.6880965837909796, + "learning_rate": 8.81425941456872e-06, + "loss": 0.7485, + "step": 8062 + }, + { + "epoch": 0.24711903886232683, + "grad_norm": 0.8180802853882532, + "learning_rate": 8.813938489925747e-06, + "loss": 0.4822, + "step": 8063 + }, + { + "epoch": 0.24714968738506804, + "grad_norm": 1.8349195427446265, + "learning_rate": 8.813617527703062e-06, + "loss": 0.6452, + "step": 8064 + }, + { + "epoch": 0.24718033590780925, + "grad_norm": 2.0834182069815443, + "learning_rate": 8.813296527903828e-06, + "loss": 0.7657, + "step": 8065 + }, + { + "epoch": 0.24721098443055045, + "grad_norm": 1.833911492211358, + "learning_rate": 8.812975490531208e-06, + "loss": 0.6459, + "step": 8066 + }, + { + "epoch": 0.24724163295329166, + "grad_norm": 1.6210590788830959, + "learning_rate": 8.812654415588366e-06, + "loss": 0.7199, + "step": 8067 + }, + { + "epoch": 0.24727228147603286, + "grad_norm": 1.7729579535881799, + "learning_rate": 8.812333303078462e-06, + "loss": 0.6381, + "step": 8068 + }, + { + "epoch": 0.24730292999877407, + "grad_norm": 1.787819578109249, + "learning_rate": 8.812012153004665e-06, + "loss": 0.724, + "step": 8069 + }, + { + "epoch": 0.24733357852151527, + "grad_norm": 1.8636070855956623, + "learning_rate": 8.811690965370135e-06, + "loss": 0.8098, + "step": 8070 + }, + { + "epoch": 0.24736422704425648, + "grad_norm": 0.8757204077897007, + "learning_rate": 8.81136974017804e-06, + "loss": 0.503, + "step": 8071 + }, + { + "epoch": 0.24739487556699766, + "grad_norm": 0.8296041602401055, + "learning_rate": 8.811048477431543e-06, + "loss": 0.4728, + "step": 8072 + }, + { + "epoch": 0.24742552408973886, + "grad_norm": 1.7371236791374447, + "learning_rate": 8.810727177133811e-06, + "loss": 0.7976, + "step": 8073 + }, + { + "epoch": 0.24745617261248007, + "grad_norm": 1.84858500111401, + "learning_rate": 8.810405839288008e-06, + "loss": 0.6569, + "step": 8074 + }, + { + "epoch": 0.24748682113522127, + "grad_norm": 1.634642631384614, + "learning_rate": 8.810084463897302e-06, + "loss": 0.6719, + "step": 8075 + }, + { + "epoch": 0.24751746965796248, + "grad_norm": 1.7618898404482233, + "learning_rate": 8.80976305096486e-06, + "loss": 0.6692, + "step": 8076 + }, + { + "epoch": 0.24754811818070369, + "grad_norm": 1.7048278322314527, + "learning_rate": 8.809441600493846e-06, + "loss": 0.6902, + "step": 8077 + }, + { + "epoch": 0.2475787667034449, + "grad_norm": 0.9194366410593979, + "learning_rate": 8.80912011248743e-06, + "loss": 0.4912, + "step": 8078 + }, + { + "epoch": 0.2476094152261861, + "grad_norm": 1.6605733817072015, + "learning_rate": 8.80879858694878e-06, + "loss": 0.6199, + "step": 8079 + }, + { + "epoch": 0.2476400637489273, + "grad_norm": 1.646999820046466, + "learning_rate": 8.808477023881061e-06, + "loss": 0.6749, + "step": 8080 + }, + { + "epoch": 0.2476707122716685, + "grad_norm": 0.8039230149470897, + "learning_rate": 8.808155423287444e-06, + "loss": 0.4741, + "step": 8081 + }, + { + "epoch": 0.24770136079440971, + "grad_norm": 1.8606754712434794, + "learning_rate": 8.807833785171098e-06, + "loss": 0.7595, + "step": 8082 + }, + { + "epoch": 0.24773200931715092, + "grad_norm": 0.8108455884815702, + "learning_rate": 8.807512109535192e-06, + "loss": 0.4653, + "step": 8083 + }, + { + "epoch": 0.24776265783989213, + "grad_norm": 1.6108879801395906, + "learning_rate": 8.807190396382893e-06, + "loss": 0.667, + "step": 8084 + }, + { + "epoch": 0.24779330636263333, + "grad_norm": 1.8486720364561011, + "learning_rate": 8.806868645717374e-06, + "loss": 0.6752, + "step": 8085 + }, + { + "epoch": 0.24782395488537454, + "grad_norm": 1.7396994392325402, + "learning_rate": 8.806546857541804e-06, + "loss": 0.7266, + "step": 8086 + }, + { + "epoch": 0.24785460340811574, + "grad_norm": 1.8732019164657232, + "learning_rate": 8.806225031859354e-06, + "loss": 0.7456, + "step": 8087 + }, + { + "epoch": 0.24788525193085692, + "grad_norm": 1.7488859775991634, + "learning_rate": 8.805903168673196e-06, + "loss": 0.7313, + "step": 8088 + }, + { + "epoch": 0.24791590045359813, + "grad_norm": 1.6303549400081747, + "learning_rate": 8.805581267986499e-06, + "loss": 0.6999, + "step": 8089 + }, + { + "epoch": 0.24794654897633933, + "grad_norm": 1.5845065237565525, + "learning_rate": 8.805259329802435e-06, + "loss": 0.6666, + "step": 8090 + }, + { + "epoch": 0.24797719749908054, + "grad_norm": 1.641344033965189, + "learning_rate": 8.80493735412418e-06, + "loss": 0.7153, + "step": 8091 + }, + { + "epoch": 0.24800784602182174, + "grad_norm": 1.9342264404231848, + "learning_rate": 8.804615340954901e-06, + "loss": 0.7707, + "step": 8092 + }, + { + "epoch": 0.24803849454456295, + "grad_norm": 1.6237903789331474, + "learning_rate": 8.804293290297777e-06, + "loss": 0.6972, + "step": 8093 + }, + { + "epoch": 0.24806914306730415, + "grad_norm": 1.7733389906725845, + "learning_rate": 8.803971202155975e-06, + "loss": 0.6776, + "step": 8094 + }, + { + "epoch": 0.24809979159004536, + "grad_norm": 1.6779057403607223, + "learning_rate": 8.803649076532672e-06, + "loss": 0.7671, + "step": 8095 + }, + { + "epoch": 0.24813044011278657, + "grad_norm": 1.624910838015011, + "learning_rate": 8.80332691343104e-06, + "loss": 0.7072, + "step": 8096 + }, + { + "epoch": 0.24816108863552777, + "grad_norm": 1.6593531141744151, + "learning_rate": 8.803004712854258e-06, + "loss": 0.6506, + "step": 8097 + }, + { + "epoch": 0.24819173715826898, + "grad_norm": 1.7800167468958394, + "learning_rate": 8.802682474805495e-06, + "loss": 0.7219, + "step": 8098 + }, + { + "epoch": 0.24822238568101018, + "grad_norm": 1.6643312557521797, + "learning_rate": 8.80236019928793e-06, + "loss": 0.7537, + "step": 8099 + }, + { + "epoch": 0.2482530342037514, + "grad_norm": 1.8112247012058549, + "learning_rate": 8.802037886304736e-06, + "loss": 0.6496, + "step": 8100 + }, + { + "epoch": 0.2482836827264926, + "grad_norm": 1.7057185025596442, + "learning_rate": 8.80171553585909e-06, + "loss": 0.683, + "step": 8101 + }, + { + "epoch": 0.2483143312492338, + "grad_norm": 1.6293941995008292, + "learning_rate": 8.80139314795417e-06, + "loss": 0.7514, + "step": 8102 + }, + { + "epoch": 0.24834497977197498, + "grad_norm": 1.5232707284630618, + "learning_rate": 8.801070722593147e-06, + "loss": 0.7463, + "step": 8103 + }, + { + "epoch": 0.24837562829471618, + "grad_norm": 1.5865403100536668, + "learning_rate": 8.800748259779206e-06, + "loss": 0.6583, + "step": 8104 + }, + { + "epoch": 0.2484062768174574, + "grad_norm": 1.6668793419158758, + "learning_rate": 8.800425759515517e-06, + "loss": 0.7488, + "step": 8105 + }, + { + "epoch": 0.2484369253401986, + "grad_norm": 1.7115498811486738, + "learning_rate": 8.800103221805261e-06, + "loss": 0.683, + "step": 8106 + }, + { + "epoch": 0.2484675738629398, + "grad_norm": 1.0063081100720115, + "learning_rate": 8.799780646651617e-06, + "loss": 0.4699, + "step": 8107 + }, + { + "epoch": 0.248498222385681, + "grad_norm": 2.1437879455006073, + "learning_rate": 8.799458034057761e-06, + "loss": 0.8035, + "step": 8108 + }, + { + "epoch": 0.2485288709084222, + "grad_norm": 1.8654765602390526, + "learning_rate": 8.799135384026874e-06, + "loss": 0.7081, + "step": 8109 + }, + { + "epoch": 0.24855951943116342, + "grad_norm": 1.604386897150601, + "learning_rate": 8.798812696562132e-06, + "loss": 0.7015, + "step": 8110 + }, + { + "epoch": 0.24859016795390462, + "grad_norm": 1.7495979755025188, + "learning_rate": 8.798489971666717e-06, + "loss": 0.7362, + "step": 8111 + }, + { + "epoch": 0.24862081647664583, + "grad_norm": 1.890923968420445, + "learning_rate": 8.798167209343811e-06, + "loss": 0.7201, + "step": 8112 + }, + { + "epoch": 0.24865146499938703, + "grad_norm": 1.8272539958772693, + "learning_rate": 8.79784440959659e-06, + "loss": 0.6812, + "step": 8113 + }, + { + "epoch": 0.24868211352212824, + "grad_norm": 1.8362982460947905, + "learning_rate": 8.797521572428234e-06, + "loss": 0.7149, + "step": 8114 + }, + { + "epoch": 0.24871276204486945, + "grad_norm": 1.5871885591816028, + "learning_rate": 8.79719869784193e-06, + "loss": 0.6701, + "step": 8115 + }, + { + "epoch": 0.24874341056761065, + "grad_norm": 1.5177388331232216, + "learning_rate": 8.796875785840853e-06, + "loss": 0.6792, + "step": 8116 + }, + { + "epoch": 0.24877405909035186, + "grad_norm": 1.6861062136228848, + "learning_rate": 8.796552836428188e-06, + "loss": 0.6554, + "step": 8117 + }, + { + "epoch": 0.24880470761309306, + "grad_norm": 1.486216567048406, + "learning_rate": 8.796229849607116e-06, + "loss": 0.6737, + "step": 8118 + }, + { + "epoch": 0.24883535613583424, + "grad_norm": 1.6080475721668437, + "learning_rate": 8.795906825380821e-06, + "loss": 0.6267, + "step": 8119 + }, + { + "epoch": 0.24886600465857545, + "grad_norm": 1.7221002485390313, + "learning_rate": 8.795583763752486e-06, + "loss": 0.7256, + "step": 8120 + }, + { + "epoch": 0.24889665318131665, + "grad_norm": 1.6005269719873279, + "learning_rate": 8.795260664725291e-06, + "loss": 0.6601, + "step": 8121 + }, + { + "epoch": 0.24892730170405786, + "grad_norm": 1.688612334546882, + "learning_rate": 8.794937528302422e-06, + "loss": 0.6979, + "step": 8122 + }, + { + "epoch": 0.24895795022679906, + "grad_norm": 1.6995791968154887, + "learning_rate": 8.794614354487063e-06, + "loss": 0.6512, + "step": 8123 + }, + { + "epoch": 0.24898859874954027, + "grad_norm": 1.7375649941180298, + "learning_rate": 8.794291143282398e-06, + "loss": 0.6611, + "step": 8124 + }, + { + "epoch": 0.24901924727228147, + "grad_norm": 1.818864755727158, + "learning_rate": 8.793967894691612e-06, + "loss": 0.698, + "step": 8125 + }, + { + "epoch": 0.24904989579502268, + "grad_norm": 1.7101611371028524, + "learning_rate": 8.793644608717888e-06, + "loss": 0.6335, + "step": 8126 + }, + { + "epoch": 0.24908054431776389, + "grad_norm": 1.7119103396908064, + "learning_rate": 8.793321285364416e-06, + "loss": 0.6923, + "step": 8127 + }, + { + "epoch": 0.2491111928405051, + "grad_norm": 1.7560670780132992, + "learning_rate": 8.792997924634376e-06, + "loss": 0.6371, + "step": 8128 + }, + { + "epoch": 0.2491418413632463, + "grad_norm": 1.6482877191684315, + "learning_rate": 8.792674526530957e-06, + "loss": 0.637, + "step": 8129 + }, + { + "epoch": 0.2491724898859875, + "grad_norm": 2.1016266214028443, + "learning_rate": 8.792351091057348e-06, + "loss": 0.8843, + "step": 8130 + }, + { + "epoch": 0.2492031384087287, + "grad_norm": 1.6273333789583677, + "learning_rate": 8.792027618216731e-06, + "loss": 0.6632, + "step": 8131 + }, + { + "epoch": 0.24923378693146991, + "grad_norm": 1.785935000285777, + "learning_rate": 8.791704108012295e-06, + "loss": 0.7459, + "step": 8132 + }, + { + "epoch": 0.24926443545421112, + "grad_norm": 1.8436676070897169, + "learning_rate": 8.791380560447231e-06, + "loss": 0.7509, + "step": 8133 + }, + { + "epoch": 0.2492950839769523, + "grad_norm": 2.0695827643833584, + "learning_rate": 8.791056975524722e-06, + "loss": 0.7703, + "step": 8134 + }, + { + "epoch": 0.2493257324996935, + "grad_norm": 1.7651411067896026, + "learning_rate": 8.79073335324796e-06, + "loss": 0.7208, + "step": 8135 + }, + { + "epoch": 0.2493563810224347, + "grad_norm": 1.0489514162792806, + "learning_rate": 8.790409693620132e-06, + "loss": 0.4844, + "step": 8136 + }, + { + "epoch": 0.24938702954517591, + "grad_norm": 1.5950323131073934, + "learning_rate": 8.790085996644426e-06, + "loss": 0.7027, + "step": 8137 + }, + { + "epoch": 0.24941767806791712, + "grad_norm": 1.669429751117589, + "learning_rate": 8.789762262324035e-06, + "loss": 0.7274, + "step": 8138 + }, + { + "epoch": 0.24944832659065833, + "grad_norm": 1.7399541810651085, + "learning_rate": 8.789438490662146e-06, + "loss": 0.683, + "step": 8139 + }, + { + "epoch": 0.24947897511339953, + "grad_norm": 0.8158320755806553, + "learning_rate": 8.78911468166195e-06, + "loss": 0.4722, + "step": 8140 + }, + { + "epoch": 0.24950962363614074, + "grad_norm": 0.7660723634566271, + "learning_rate": 8.788790835326637e-06, + "loss": 0.4821, + "step": 8141 + }, + { + "epoch": 0.24954027215888194, + "grad_norm": 1.7908154717399816, + "learning_rate": 8.7884669516594e-06, + "loss": 0.6775, + "step": 8142 + }, + { + "epoch": 0.24957092068162315, + "grad_norm": 1.6755110832429814, + "learning_rate": 8.788143030663427e-06, + "loss": 0.7292, + "step": 8143 + }, + { + "epoch": 0.24960156920436435, + "grad_norm": 1.606974487648688, + "learning_rate": 8.787819072341914e-06, + "loss": 0.6495, + "step": 8144 + }, + { + "epoch": 0.24963221772710556, + "grad_norm": 2.226780894722849, + "learning_rate": 8.787495076698049e-06, + "loss": 0.6802, + "step": 8145 + }, + { + "epoch": 0.24966286624984677, + "grad_norm": 0.9354319240718054, + "learning_rate": 8.787171043735025e-06, + "loss": 0.474, + "step": 8146 + }, + { + "epoch": 0.24969351477258797, + "grad_norm": 1.9008648492028457, + "learning_rate": 8.786846973456036e-06, + "loss": 0.7644, + "step": 8147 + }, + { + "epoch": 0.24972416329532918, + "grad_norm": 1.9671117277050176, + "learning_rate": 8.786522865864275e-06, + "loss": 0.8012, + "step": 8148 + }, + { + "epoch": 0.24975481181807038, + "grad_norm": 1.8203806563774312, + "learning_rate": 8.786198720962937e-06, + "loss": 0.7702, + "step": 8149 + }, + { + "epoch": 0.24978546034081156, + "grad_norm": 1.743996173674167, + "learning_rate": 8.785874538755212e-06, + "loss": 0.7765, + "step": 8150 + }, + { + "epoch": 0.24981610886355277, + "grad_norm": 1.5812807604108041, + "learning_rate": 8.785550319244298e-06, + "loss": 0.64, + "step": 8151 + }, + { + "epoch": 0.24984675738629397, + "grad_norm": 2.0274197904731794, + "learning_rate": 8.785226062433387e-06, + "loss": 0.6955, + "step": 8152 + }, + { + "epoch": 0.24987740590903518, + "grad_norm": 1.7121658790914607, + "learning_rate": 8.784901768325676e-06, + "loss": 0.7778, + "step": 8153 + }, + { + "epoch": 0.24990805443177638, + "grad_norm": 0.9731024984712795, + "learning_rate": 8.784577436924359e-06, + "loss": 0.4603, + "step": 8154 + }, + { + "epoch": 0.2499387029545176, + "grad_norm": 1.9066827604145415, + "learning_rate": 8.784253068232634e-06, + "loss": 0.7787, + "step": 8155 + }, + { + "epoch": 0.2499693514772588, + "grad_norm": 1.6848128605170574, + "learning_rate": 8.783928662253693e-06, + "loss": 0.7419, + "step": 8156 + }, + { + "epoch": 0.25, + "grad_norm": 1.7387493538107448, + "learning_rate": 8.783604218990735e-06, + "loss": 0.7048, + "step": 8157 + }, + { + "epoch": 0.2500306485227412, + "grad_norm": 1.611657850609936, + "learning_rate": 8.783279738446957e-06, + "loss": 0.6691, + "step": 8158 + }, + { + "epoch": 0.2500612970454824, + "grad_norm": 1.7170447168162335, + "learning_rate": 8.782955220625556e-06, + "loss": 0.7266, + "step": 8159 + }, + { + "epoch": 0.2500919455682236, + "grad_norm": 1.706139617991794, + "learning_rate": 8.78263066552973e-06, + "loss": 0.7313, + "step": 8160 + }, + { + "epoch": 0.2501225940909648, + "grad_norm": 1.7930833030209823, + "learning_rate": 8.782306073162674e-06, + "loss": 0.6645, + "step": 8161 + }, + { + "epoch": 0.25015324261370603, + "grad_norm": 1.8582458425705777, + "learning_rate": 8.78198144352759e-06, + "loss": 0.679, + "step": 8162 + }, + { + "epoch": 0.25018389113644723, + "grad_norm": 1.888517741309403, + "learning_rate": 8.781656776627674e-06, + "loss": 0.7583, + "step": 8163 + }, + { + "epoch": 0.25021453965918844, + "grad_norm": 2.085155758029387, + "learning_rate": 8.78133207246613e-06, + "loss": 0.8652, + "step": 8164 + }, + { + "epoch": 0.25024518818192965, + "grad_norm": 1.670592341576558, + "learning_rate": 8.78100733104615e-06, + "loss": 0.6159, + "step": 8165 + }, + { + "epoch": 0.25027583670467085, + "grad_norm": 1.6629622438669016, + "learning_rate": 8.780682552370937e-06, + "loss": 0.7593, + "step": 8166 + }, + { + "epoch": 0.25030648522741206, + "grad_norm": 1.0174974009887146, + "learning_rate": 8.780357736443693e-06, + "loss": 0.4958, + "step": 8167 + }, + { + "epoch": 0.25033713375015326, + "grad_norm": 1.7628717827119185, + "learning_rate": 8.780032883267617e-06, + "loss": 0.7557, + "step": 8168 + }, + { + "epoch": 0.25036778227289447, + "grad_norm": 0.8084029416295773, + "learning_rate": 8.779707992845909e-06, + "loss": 0.4752, + "step": 8169 + }, + { + "epoch": 0.2503984307956357, + "grad_norm": 0.7979079743431448, + "learning_rate": 8.779383065181772e-06, + "loss": 0.5014, + "step": 8170 + }, + { + "epoch": 0.2504290793183769, + "grad_norm": 1.4804405823958047, + "learning_rate": 8.779058100278407e-06, + "loss": 0.6498, + "step": 8171 + }, + { + "epoch": 0.2504597278411181, + "grad_norm": 1.6783741844980617, + "learning_rate": 8.778733098139014e-06, + "loss": 0.7676, + "step": 8172 + }, + { + "epoch": 0.25049037636385924, + "grad_norm": 1.6078346803198134, + "learning_rate": 8.778408058766796e-06, + "loss": 0.6572, + "step": 8173 + }, + { + "epoch": 0.25052102488660044, + "grad_norm": 0.974346512454923, + "learning_rate": 8.778082982164959e-06, + "loss": 0.4752, + "step": 8174 + }, + { + "epoch": 0.25055167340934165, + "grad_norm": 1.7907482833604942, + "learning_rate": 8.777757868336703e-06, + "loss": 0.7784, + "step": 8175 + }, + { + "epoch": 0.25058232193208285, + "grad_norm": 1.838933913203009, + "learning_rate": 8.777432717285232e-06, + "loss": 0.6834, + "step": 8176 + }, + { + "epoch": 0.25061297045482406, + "grad_norm": 1.9728130276071572, + "learning_rate": 8.777107529013751e-06, + "loss": 0.7776, + "step": 8177 + }, + { + "epoch": 0.25064361897756526, + "grad_norm": 1.757887785278414, + "learning_rate": 8.776782303525462e-06, + "loss": 0.667, + "step": 8178 + }, + { + "epoch": 0.25067426750030647, + "grad_norm": 1.7018872744337081, + "learning_rate": 8.776457040823572e-06, + "loss": 0.7176, + "step": 8179 + }, + { + "epoch": 0.2507049160230477, + "grad_norm": 1.8967408895938391, + "learning_rate": 8.776131740911283e-06, + "loss": 0.7582, + "step": 8180 + }, + { + "epoch": 0.2507355645457889, + "grad_norm": 1.6178065143960134, + "learning_rate": 8.775806403791802e-06, + "loss": 0.6689, + "step": 8181 + }, + { + "epoch": 0.2507662130685301, + "grad_norm": 0.8463481179078944, + "learning_rate": 8.775481029468334e-06, + "loss": 0.4747, + "step": 8182 + }, + { + "epoch": 0.2507968615912713, + "grad_norm": 1.805977631354502, + "learning_rate": 8.775155617944087e-06, + "loss": 0.6889, + "step": 8183 + }, + { + "epoch": 0.2508275101140125, + "grad_norm": 1.8703418288419706, + "learning_rate": 8.774830169222263e-06, + "loss": 0.7077, + "step": 8184 + }, + { + "epoch": 0.2508581586367537, + "grad_norm": 1.7707433979291214, + "learning_rate": 8.774504683306076e-06, + "loss": 0.7087, + "step": 8185 + }, + { + "epoch": 0.2508888071594949, + "grad_norm": 2.013909643949386, + "learning_rate": 8.774179160198725e-06, + "loss": 0.7301, + "step": 8186 + }, + { + "epoch": 0.2509194556822361, + "grad_norm": 2.1668065318121674, + "learning_rate": 8.773853599903422e-06, + "loss": 0.7039, + "step": 8187 + }, + { + "epoch": 0.2509501042049773, + "grad_norm": 0.9274470922401972, + "learning_rate": 8.773528002423373e-06, + "loss": 0.4854, + "step": 8188 + }, + { + "epoch": 0.2509807527277185, + "grad_norm": 1.6340687693893292, + "learning_rate": 8.773202367761788e-06, + "loss": 0.8318, + "step": 8189 + }, + { + "epoch": 0.25101140125045973, + "grad_norm": 1.6539129392252416, + "learning_rate": 8.772876695921874e-06, + "loss": 0.813, + "step": 8190 + }, + { + "epoch": 0.25104204977320094, + "grad_norm": 1.8687013059307644, + "learning_rate": 8.772550986906843e-06, + "loss": 0.6456, + "step": 8191 + }, + { + "epoch": 0.25107269829594214, + "grad_norm": 2.192829487211569, + "learning_rate": 8.7722252407199e-06, + "loss": 0.6596, + "step": 8192 + }, + { + "epoch": 0.25110334681868335, + "grad_norm": 0.8504424880010482, + "learning_rate": 8.771899457364256e-06, + "loss": 0.4762, + "step": 8193 + }, + { + "epoch": 0.25113399534142455, + "grad_norm": 2.037314422551662, + "learning_rate": 8.771573636843123e-06, + "loss": 0.7456, + "step": 8194 + }, + { + "epoch": 0.25116464386416576, + "grad_norm": 1.643845185104976, + "learning_rate": 8.771247779159708e-06, + "loss": 0.7417, + "step": 8195 + }, + { + "epoch": 0.25119529238690697, + "grad_norm": 1.8064172119136421, + "learning_rate": 8.770921884317225e-06, + "loss": 0.7511, + "step": 8196 + }, + { + "epoch": 0.25122594090964817, + "grad_norm": 1.9012556350400156, + "learning_rate": 8.770595952318885e-06, + "loss": 0.7458, + "step": 8197 + }, + { + "epoch": 0.2512565894323894, + "grad_norm": 1.812873854619686, + "learning_rate": 8.770269983167896e-06, + "loss": 0.7851, + "step": 8198 + }, + { + "epoch": 0.2512872379551306, + "grad_norm": 1.9789564468626701, + "learning_rate": 8.769943976867473e-06, + "loss": 0.6821, + "step": 8199 + }, + { + "epoch": 0.2513178864778718, + "grad_norm": 2.069635542925194, + "learning_rate": 8.76961793342083e-06, + "loss": 0.7192, + "step": 8200 + }, + { + "epoch": 0.251348535000613, + "grad_norm": 1.921008008944625, + "learning_rate": 8.769291852831172e-06, + "loss": 0.7794, + "step": 8201 + }, + { + "epoch": 0.2513791835233542, + "grad_norm": 0.8282896500383892, + "learning_rate": 8.76896573510172e-06, + "loss": 0.4611, + "step": 8202 + }, + { + "epoch": 0.2514098320460954, + "grad_norm": 0.8573182664049317, + "learning_rate": 8.768639580235685e-06, + "loss": 0.4802, + "step": 8203 + }, + { + "epoch": 0.25144048056883656, + "grad_norm": 0.8091192037748193, + "learning_rate": 8.768313388236278e-06, + "loss": 0.4983, + "step": 8204 + }, + { + "epoch": 0.25147112909157776, + "grad_norm": 1.9615884055017265, + "learning_rate": 8.767987159106717e-06, + "loss": 0.8018, + "step": 8205 + }, + { + "epoch": 0.25150177761431897, + "grad_norm": 1.7776841390516476, + "learning_rate": 8.767660892850214e-06, + "loss": 0.7857, + "step": 8206 + }, + { + "epoch": 0.25153242613706017, + "grad_norm": 1.5838204722958238, + "learning_rate": 8.767334589469982e-06, + "loss": 0.57, + "step": 8207 + }, + { + "epoch": 0.2515630746598014, + "grad_norm": 1.48235702700454, + "learning_rate": 8.76700824896924e-06, + "loss": 0.6656, + "step": 8208 + }, + { + "epoch": 0.2515937231825426, + "grad_norm": 1.8555142208456803, + "learning_rate": 8.766681871351202e-06, + "loss": 0.7462, + "step": 8209 + }, + { + "epoch": 0.2516243717052838, + "grad_norm": 1.6489534878269785, + "learning_rate": 8.766355456619085e-06, + "loss": 0.7315, + "step": 8210 + }, + { + "epoch": 0.251655020228025, + "grad_norm": 1.7664394913818218, + "learning_rate": 8.766029004776102e-06, + "loss": 0.741, + "step": 8211 + }, + { + "epoch": 0.2516856687507662, + "grad_norm": 2.0911168673781604, + "learning_rate": 8.765702515825472e-06, + "loss": 0.6413, + "step": 8212 + }, + { + "epoch": 0.2517163172735074, + "grad_norm": 1.7440782396855483, + "learning_rate": 8.765375989770412e-06, + "loss": 0.6666, + "step": 8213 + }, + { + "epoch": 0.2517469657962486, + "grad_norm": 1.1988887003975797, + "learning_rate": 8.765049426614138e-06, + "loss": 0.4807, + "step": 8214 + }, + { + "epoch": 0.2517776143189898, + "grad_norm": 1.926882277046341, + "learning_rate": 8.764722826359871e-06, + "loss": 0.6693, + "step": 8215 + }, + { + "epoch": 0.251808262841731, + "grad_norm": 1.9948593652432978, + "learning_rate": 8.764396189010824e-06, + "loss": 0.7532, + "step": 8216 + }, + { + "epoch": 0.25183891136447223, + "grad_norm": 1.8323598933230683, + "learning_rate": 8.76406951457022e-06, + "loss": 0.8217, + "step": 8217 + }, + { + "epoch": 0.25186955988721343, + "grad_norm": 1.5370188421474722, + "learning_rate": 8.763742803041275e-06, + "loss": 0.694, + "step": 8218 + }, + { + "epoch": 0.25190020840995464, + "grad_norm": 1.5846346750336235, + "learning_rate": 8.76341605442721e-06, + "loss": 0.6859, + "step": 8219 + }, + { + "epoch": 0.25193085693269585, + "grad_norm": 1.6804769798747972, + "learning_rate": 8.763089268731244e-06, + "loss": 0.7771, + "step": 8220 + }, + { + "epoch": 0.25196150545543705, + "grad_norm": 1.7029969325987473, + "learning_rate": 8.762762445956595e-06, + "loss": 0.6863, + "step": 8221 + }, + { + "epoch": 0.25199215397817826, + "grad_norm": 1.6443582541998143, + "learning_rate": 8.762435586106486e-06, + "loss": 0.6981, + "step": 8222 + }, + { + "epoch": 0.25202280250091946, + "grad_norm": 2.1359923616377188, + "learning_rate": 8.762108689184136e-06, + "loss": 0.7147, + "step": 8223 + }, + { + "epoch": 0.25205345102366067, + "grad_norm": 1.5194244211318049, + "learning_rate": 8.761781755192767e-06, + "loss": 0.5966, + "step": 8224 + }, + { + "epoch": 0.2520840995464019, + "grad_norm": 1.6034737395569219, + "learning_rate": 8.7614547841356e-06, + "loss": 0.6927, + "step": 8225 + }, + { + "epoch": 0.2521147480691431, + "grad_norm": 1.8938661517901905, + "learning_rate": 8.761127776015857e-06, + "loss": 0.6931, + "step": 8226 + }, + { + "epoch": 0.2521453965918843, + "grad_norm": 1.731872524263541, + "learning_rate": 8.760800730836758e-06, + "loss": 0.6708, + "step": 8227 + }, + { + "epoch": 0.2521760451146255, + "grad_norm": 0.976621854872663, + "learning_rate": 8.760473648601528e-06, + "loss": 0.4888, + "step": 8228 + }, + { + "epoch": 0.2522066936373667, + "grad_norm": 1.7767663852087168, + "learning_rate": 8.76014652931339e-06, + "loss": 0.6859, + "step": 8229 + }, + { + "epoch": 0.2522373421601079, + "grad_norm": 0.913138626010244, + "learning_rate": 8.759819372975565e-06, + "loss": 0.4955, + "step": 8230 + }, + { + "epoch": 0.2522679906828491, + "grad_norm": 1.837701732190321, + "learning_rate": 8.759492179591278e-06, + "loss": 0.7655, + "step": 8231 + }, + { + "epoch": 0.2522986392055903, + "grad_norm": 1.7501831626599966, + "learning_rate": 8.759164949163752e-06, + "loss": 0.746, + "step": 8232 + }, + { + "epoch": 0.2523292877283315, + "grad_norm": 1.6850112439157803, + "learning_rate": 8.758837681696213e-06, + "loss": 0.7269, + "step": 8233 + }, + { + "epoch": 0.2523599362510727, + "grad_norm": 1.866159913242569, + "learning_rate": 8.758510377191884e-06, + "loss": 0.69, + "step": 8234 + }, + { + "epoch": 0.2523905847738139, + "grad_norm": 1.6624840662951734, + "learning_rate": 8.75818303565399e-06, + "loss": 0.6927, + "step": 8235 + }, + { + "epoch": 0.2524212332965551, + "grad_norm": 1.7397181416295182, + "learning_rate": 8.757855657085758e-06, + "loss": 0.7279, + "step": 8236 + }, + { + "epoch": 0.2524518818192963, + "grad_norm": 1.5935995844899413, + "learning_rate": 8.757528241490413e-06, + "loss": 0.6828, + "step": 8237 + }, + { + "epoch": 0.2524825303420375, + "grad_norm": 1.7677658219263013, + "learning_rate": 8.75720078887118e-06, + "loss": 0.7094, + "step": 8238 + }, + { + "epoch": 0.2525131788647787, + "grad_norm": 1.5714384426715315, + "learning_rate": 8.756873299231287e-06, + "loss": 0.6689, + "step": 8239 + }, + { + "epoch": 0.2525438273875199, + "grad_norm": 1.900585319850464, + "learning_rate": 8.756545772573962e-06, + "loss": 0.8531, + "step": 8240 + }, + { + "epoch": 0.2525744759102611, + "grad_norm": 2.14097767970716, + "learning_rate": 8.756218208902426e-06, + "loss": 0.69, + "step": 8241 + }, + { + "epoch": 0.2526051244330023, + "grad_norm": 1.636633298585525, + "learning_rate": 8.755890608219914e-06, + "loss": 0.7415, + "step": 8242 + }, + { + "epoch": 0.2526357729557435, + "grad_norm": 1.7753144181186924, + "learning_rate": 8.75556297052965e-06, + "loss": 0.7016, + "step": 8243 + }, + { + "epoch": 0.2526664214784847, + "grad_norm": 1.8722813045013813, + "learning_rate": 8.755235295834862e-06, + "loss": 0.6017, + "step": 8244 + }, + { + "epoch": 0.25269707000122593, + "grad_norm": 1.7157896936352532, + "learning_rate": 8.754907584138781e-06, + "loss": 0.6938, + "step": 8245 + }, + { + "epoch": 0.25272771852396714, + "grad_norm": 1.7328921438625293, + "learning_rate": 8.754579835444634e-06, + "loss": 0.6427, + "step": 8246 + }, + { + "epoch": 0.25275836704670834, + "grad_norm": 1.822997615673366, + "learning_rate": 8.754252049755654e-06, + "loss": 0.6055, + "step": 8247 + }, + { + "epoch": 0.25278901556944955, + "grad_norm": 1.5288033741886486, + "learning_rate": 8.753924227075064e-06, + "loss": 0.708, + "step": 8248 + }, + { + "epoch": 0.25281966409219075, + "grad_norm": 1.7466755472021698, + "learning_rate": 8.7535963674061e-06, + "loss": 0.6839, + "step": 8249 + }, + { + "epoch": 0.25285031261493196, + "grad_norm": 1.641636708630159, + "learning_rate": 8.753268470751991e-06, + "loss": 0.6614, + "step": 8250 + }, + { + "epoch": 0.25288096113767317, + "grad_norm": 1.7379526611913296, + "learning_rate": 8.752940537115969e-06, + "loss": 0.7069, + "step": 8251 + }, + { + "epoch": 0.25291160966041437, + "grad_norm": 1.7091435084163253, + "learning_rate": 8.752612566501259e-06, + "loss": 0.6555, + "step": 8252 + }, + { + "epoch": 0.2529422581831556, + "grad_norm": 1.776480587558469, + "learning_rate": 8.752284558911101e-06, + "loss": 0.78, + "step": 8253 + }, + { + "epoch": 0.2529729067058968, + "grad_norm": 1.647085658533588, + "learning_rate": 8.751956514348722e-06, + "loss": 0.6941, + "step": 8254 + }, + { + "epoch": 0.253003555228638, + "grad_norm": 1.7103061597925557, + "learning_rate": 8.751628432817355e-06, + "loss": 0.7701, + "step": 8255 + }, + { + "epoch": 0.2530342037513792, + "grad_norm": 2.265872227185117, + "learning_rate": 8.751300314320234e-06, + "loss": 0.6755, + "step": 8256 + }, + { + "epoch": 0.2530648522741204, + "grad_norm": 1.7044844841331945, + "learning_rate": 8.750972158860592e-06, + "loss": 0.7091, + "step": 8257 + }, + { + "epoch": 0.2530955007968616, + "grad_norm": 1.722608895529567, + "learning_rate": 8.75064396644166e-06, + "loss": 0.6429, + "step": 8258 + }, + { + "epoch": 0.2531261493196028, + "grad_norm": 1.6472284562349635, + "learning_rate": 8.750315737066674e-06, + "loss": 0.745, + "step": 8259 + }, + { + "epoch": 0.253156797842344, + "grad_norm": 1.3344208663472525, + "learning_rate": 8.749987470738867e-06, + "loss": 0.5095, + "step": 8260 + }, + { + "epoch": 0.2531874463650852, + "grad_norm": 1.794578519586371, + "learning_rate": 8.749659167461475e-06, + "loss": 0.6777, + "step": 8261 + }, + { + "epoch": 0.25321809488782643, + "grad_norm": 2.048429465100954, + "learning_rate": 8.749330827237731e-06, + "loss": 0.681, + "step": 8262 + }, + { + "epoch": 0.25324874341056763, + "grad_norm": 0.8246517294855131, + "learning_rate": 8.749002450070871e-06, + "loss": 0.5008, + "step": 8263 + }, + { + "epoch": 0.25327939193330884, + "grad_norm": 1.7215280094815173, + "learning_rate": 8.748674035964132e-06, + "loss": 0.7888, + "step": 8264 + }, + { + "epoch": 0.25331004045605005, + "grad_norm": 1.8364643278916226, + "learning_rate": 8.748345584920748e-06, + "loss": 0.7279, + "step": 8265 + }, + { + "epoch": 0.2533406889787912, + "grad_norm": 1.6511052126261205, + "learning_rate": 8.748017096943956e-06, + "loss": 0.73, + "step": 8266 + }, + { + "epoch": 0.2533713375015324, + "grad_norm": 1.8686740114582991, + "learning_rate": 8.74768857203699e-06, + "loss": 0.723, + "step": 8267 + }, + { + "epoch": 0.2534019860242736, + "grad_norm": 1.7294498260635622, + "learning_rate": 8.747360010203092e-06, + "loss": 0.7136, + "step": 8268 + }, + { + "epoch": 0.2534326345470148, + "grad_norm": 1.8350705722785912, + "learning_rate": 8.747031411445496e-06, + "loss": 0.7161, + "step": 8269 + }, + { + "epoch": 0.253463283069756, + "grad_norm": 1.6817595411799233, + "learning_rate": 8.746702775767442e-06, + "loss": 0.6531, + "step": 8270 + }, + { + "epoch": 0.2534939315924972, + "grad_norm": 1.8885827598490665, + "learning_rate": 8.746374103172166e-06, + "loss": 0.7254, + "step": 8271 + }, + { + "epoch": 0.25352458011523843, + "grad_norm": 1.1156748165963646, + "learning_rate": 8.746045393662908e-06, + "loss": 0.4927, + "step": 8272 + }, + { + "epoch": 0.25355522863797963, + "grad_norm": 1.6128572040630464, + "learning_rate": 8.745716647242905e-06, + "loss": 0.6681, + "step": 8273 + }, + { + "epoch": 0.25358587716072084, + "grad_norm": 1.4940177758009865, + "learning_rate": 8.7453878639154e-06, + "loss": 0.624, + "step": 8274 + }, + { + "epoch": 0.25361652568346205, + "grad_norm": 1.7229532937936725, + "learning_rate": 8.745059043683629e-06, + "loss": 0.7201, + "step": 8275 + }, + { + "epoch": 0.25364717420620325, + "grad_norm": 1.734836621776242, + "learning_rate": 8.744730186550831e-06, + "loss": 0.7491, + "step": 8276 + }, + { + "epoch": 0.25367782272894446, + "grad_norm": 1.7770292859507255, + "learning_rate": 8.74440129252025e-06, + "loss": 0.6832, + "step": 8277 + }, + { + "epoch": 0.25370847125168566, + "grad_norm": 1.947432529096878, + "learning_rate": 8.744072361595124e-06, + "loss": 0.7898, + "step": 8278 + }, + { + "epoch": 0.25373911977442687, + "grad_norm": 1.8571941385540038, + "learning_rate": 8.743743393778697e-06, + "loss": 0.7425, + "step": 8279 + }, + { + "epoch": 0.2537697682971681, + "grad_norm": 1.6851886819254365, + "learning_rate": 8.743414389074208e-06, + "loss": 0.697, + "step": 8280 + }, + { + "epoch": 0.2538004168199093, + "grad_norm": 1.8395055556305475, + "learning_rate": 8.743085347484899e-06, + "loss": 0.6782, + "step": 8281 + }, + { + "epoch": 0.2538310653426505, + "grad_norm": 1.7953066331554905, + "learning_rate": 8.742756269014012e-06, + "loss": 0.7665, + "step": 8282 + }, + { + "epoch": 0.2538617138653917, + "grad_norm": 1.7350825275581094, + "learning_rate": 8.74242715366479e-06, + "loss": 0.7984, + "step": 8283 + }, + { + "epoch": 0.2538923623881329, + "grad_norm": 1.9248612823342641, + "learning_rate": 8.742098001440474e-06, + "loss": 0.8518, + "step": 8284 + }, + { + "epoch": 0.2539230109108741, + "grad_norm": 1.8742549623983331, + "learning_rate": 8.741768812344311e-06, + "loss": 0.8276, + "step": 8285 + }, + { + "epoch": 0.2539536594336153, + "grad_norm": 1.6578467902490612, + "learning_rate": 8.741439586379543e-06, + "loss": 0.6042, + "step": 8286 + }, + { + "epoch": 0.2539843079563565, + "grad_norm": 1.7502781604046858, + "learning_rate": 8.74111032354941e-06, + "loss": 0.7668, + "step": 8287 + }, + { + "epoch": 0.2540149564790977, + "grad_norm": 1.6029650239218078, + "learning_rate": 8.740781023857163e-06, + "loss": 0.5953, + "step": 8288 + }, + { + "epoch": 0.2540456050018389, + "grad_norm": 1.757026709465014, + "learning_rate": 8.740451687306043e-06, + "loss": 0.6633, + "step": 8289 + }, + { + "epoch": 0.25407625352458013, + "grad_norm": 1.6723836608121532, + "learning_rate": 8.740122313899295e-06, + "loss": 0.6962, + "step": 8290 + }, + { + "epoch": 0.25410690204732134, + "grad_norm": 1.6419029238388363, + "learning_rate": 8.739792903640166e-06, + "loss": 0.6772, + "step": 8291 + }, + { + "epoch": 0.25413755057006254, + "grad_norm": 1.7828527402774517, + "learning_rate": 8.7394634565319e-06, + "loss": 0.6509, + "step": 8292 + }, + { + "epoch": 0.25416819909280375, + "grad_norm": 1.9158920541299576, + "learning_rate": 8.739133972577744e-06, + "loss": 0.7664, + "step": 8293 + }, + { + "epoch": 0.25419884761554495, + "grad_norm": 1.6339000001963637, + "learning_rate": 8.738804451780943e-06, + "loss": 0.6162, + "step": 8294 + }, + { + "epoch": 0.25422949613828616, + "grad_norm": 1.8233820564685999, + "learning_rate": 8.738474894144747e-06, + "loss": 0.7016, + "step": 8295 + }, + { + "epoch": 0.25426014466102737, + "grad_norm": 1.6539776428525337, + "learning_rate": 8.7381452996724e-06, + "loss": 0.5503, + "step": 8296 + }, + { + "epoch": 0.2542907931837685, + "grad_norm": 1.731269014378508, + "learning_rate": 8.737815668367152e-06, + "loss": 0.7178, + "step": 8297 + }, + { + "epoch": 0.2543214417065097, + "grad_norm": 1.723799860252847, + "learning_rate": 8.737486000232247e-06, + "loss": 0.6867, + "step": 8298 + }, + { + "epoch": 0.2543520902292509, + "grad_norm": 1.8229923255071994, + "learning_rate": 8.737156295270938e-06, + "loss": 0.6226, + "step": 8299 + }, + { + "epoch": 0.25438273875199213, + "grad_norm": 2.23370665616045, + "learning_rate": 8.736826553486473e-06, + "loss": 0.6859, + "step": 8300 + }, + { + "epoch": 0.25441338727473334, + "grad_norm": 1.0144723484981943, + "learning_rate": 8.736496774882099e-06, + "loss": 0.4914, + "step": 8301 + }, + { + "epoch": 0.25444403579747454, + "grad_norm": 1.7596120261317694, + "learning_rate": 8.736166959461065e-06, + "loss": 0.6908, + "step": 8302 + }, + { + "epoch": 0.25447468432021575, + "grad_norm": 1.6681673037284852, + "learning_rate": 8.735837107226624e-06, + "loss": 0.7938, + "step": 8303 + }, + { + "epoch": 0.25450533284295696, + "grad_norm": 1.9536644188452459, + "learning_rate": 8.735507218182023e-06, + "loss": 0.8104, + "step": 8304 + }, + { + "epoch": 0.25453598136569816, + "grad_norm": 2.0851369005629445, + "learning_rate": 8.735177292330514e-06, + "loss": 0.7633, + "step": 8305 + }, + { + "epoch": 0.25456662988843937, + "grad_norm": 0.8316338286235772, + "learning_rate": 8.734847329675349e-06, + "loss": 0.4758, + "step": 8306 + }, + { + "epoch": 0.25459727841118057, + "grad_norm": 1.9066066874540006, + "learning_rate": 8.734517330219775e-06, + "loss": 0.7462, + "step": 8307 + }, + { + "epoch": 0.2546279269339218, + "grad_norm": 1.7128679333296792, + "learning_rate": 8.734187293967046e-06, + "loss": 0.6289, + "step": 8308 + }, + { + "epoch": 0.254658575456663, + "grad_norm": 0.8111693474772278, + "learning_rate": 8.733857220920416e-06, + "loss": 0.4985, + "step": 8309 + }, + { + "epoch": 0.2546892239794042, + "grad_norm": 1.9152715503715343, + "learning_rate": 8.733527111083136e-06, + "loss": 0.6873, + "step": 8310 + }, + { + "epoch": 0.2547198725021454, + "grad_norm": 1.7940728422080654, + "learning_rate": 8.733196964458457e-06, + "loss": 0.7284, + "step": 8311 + }, + { + "epoch": 0.2547505210248866, + "grad_norm": 1.8553497153925342, + "learning_rate": 8.732866781049632e-06, + "loss": 0.7085, + "step": 8312 + }, + { + "epoch": 0.2547811695476278, + "grad_norm": 1.6516916871140994, + "learning_rate": 8.732536560859917e-06, + "loss": 0.7796, + "step": 8313 + }, + { + "epoch": 0.254811818070369, + "grad_norm": 1.646349480443621, + "learning_rate": 8.732206303892564e-06, + "loss": 0.7516, + "step": 8314 + }, + { + "epoch": 0.2548424665931102, + "grad_norm": 1.8598157789135503, + "learning_rate": 8.731876010150827e-06, + "loss": 0.8129, + "step": 8315 + }, + { + "epoch": 0.2548731151158514, + "grad_norm": 0.7933146506037795, + "learning_rate": 8.731545679637962e-06, + "loss": 0.4705, + "step": 8316 + }, + { + "epoch": 0.25490376363859263, + "grad_norm": 2.0118974653611907, + "learning_rate": 8.731215312357221e-06, + "loss": 0.7507, + "step": 8317 + }, + { + "epoch": 0.25493441216133383, + "grad_norm": 1.95910520741383, + "learning_rate": 8.730884908311862e-06, + "loss": 0.8248, + "step": 8318 + }, + { + "epoch": 0.25496506068407504, + "grad_norm": 1.6848872809108226, + "learning_rate": 8.730554467505139e-06, + "loss": 0.6736, + "step": 8319 + }, + { + "epoch": 0.25499570920681625, + "grad_norm": 1.5453417656516217, + "learning_rate": 8.730223989940307e-06, + "loss": 0.6274, + "step": 8320 + }, + { + "epoch": 0.25502635772955745, + "grad_norm": 1.5746022282973824, + "learning_rate": 8.729893475620626e-06, + "loss": 0.7231, + "step": 8321 + }, + { + "epoch": 0.25505700625229866, + "grad_norm": 1.4355065081186082, + "learning_rate": 8.729562924549348e-06, + "loss": 0.5973, + "step": 8322 + }, + { + "epoch": 0.25508765477503986, + "grad_norm": 1.8074943767672662, + "learning_rate": 8.729232336729734e-06, + "loss": 0.7404, + "step": 8323 + }, + { + "epoch": 0.25511830329778107, + "grad_norm": 1.7692299665931723, + "learning_rate": 8.728901712165039e-06, + "loss": 0.7513, + "step": 8324 + }, + { + "epoch": 0.2551489518205223, + "grad_norm": 1.7560312540894252, + "learning_rate": 8.728571050858522e-06, + "loss": 0.751, + "step": 8325 + }, + { + "epoch": 0.2551796003432635, + "grad_norm": 1.6045489504995958, + "learning_rate": 8.72824035281344e-06, + "loss": 0.712, + "step": 8326 + }, + { + "epoch": 0.2552102488660047, + "grad_norm": 1.5826984275734102, + "learning_rate": 8.727909618033051e-06, + "loss": 0.6891, + "step": 8327 + }, + { + "epoch": 0.25524089738874584, + "grad_norm": 0.9087535493071844, + "learning_rate": 8.727578846520615e-06, + "loss": 0.5023, + "step": 8328 + }, + { + "epoch": 0.25527154591148704, + "grad_norm": 0.7977649531790899, + "learning_rate": 8.727248038279392e-06, + "loss": 0.5029, + "step": 8329 + }, + { + "epoch": 0.25530219443422825, + "grad_norm": 2.166310332558424, + "learning_rate": 8.72691719331264e-06, + "loss": 0.8332, + "step": 8330 + }, + { + "epoch": 0.25533284295696945, + "grad_norm": 1.759692088934856, + "learning_rate": 8.72658631162362e-06, + "loss": 0.7507, + "step": 8331 + }, + { + "epoch": 0.25536349147971066, + "grad_norm": 1.6367398163481908, + "learning_rate": 8.72625539321559e-06, + "loss": 0.7159, + "step": 8332 + }, + { + "epoch": 0.25539414000245186, + "grad_norm": 1.9151824130894286, + "learning_rate": 8.725924438091813e-06, + "loss": 0.6953, + "step": 8333 + }, + { + "epoch": 0.25542478852519307, + "grad_norm": 0.8597640278463627, + "learning_rate": 8.72559344625555e-06, + "loss": 0.4669, + "step": 8334 + }, + { + "epoch": 0.2554554370479343, + "grad_norm": 1.6546234705621516, + "learning_rate": 8.72526241771006e-06, + "loss": 0.7102, + "step": 8335 + }, + { + "epoch": 0.2554860855706755, + "grad_norm": 1.705046322015247, + "learning_rate": 8.724931352458605e-06, + "loss": 0.6572, + "step": 8336 + }, + { + "epoch": 0.2555167340934167, + "grad_norm": 0.8556264499014846, + "learning_rate": 8.72460025050445e-06, + "loss": 0.4818, + "step": 8337 + }, + { + "epoch": 0.2555473826161579, + "grad_norm": 1.9989113330714605, + "learning_rate": 8.724269111850857e-06, + "loss": 0.7341, + "step": 8338 + }, + { + "epoch": 0.2555780311388991, + "grad_norm": 1.5481197218282226, + "learning_rate": 8.723937936501086e-06, + "loss": 0.7037, + "step": 8339 + }, + { + "epoch": 0.2556086796616403, + "grad_norm": 1.8759560569021076, + "learning_rate": 8.723606724458402e-06, + "loss": 0.6535, + "step": 8340 + }, + { + "epoch": 0.2556393281843815, + "grad_norm": 0.8425278117290913, + "learning_rate": 8.72327547572607e-06, + "loss": 0.4829, + "step": 8341 + }, + { + "epoch": 0.2556699767071227, + "grad_norm": 2.020639733341731, + "learning_rate": 8.72294419030735e-06, + "loss": 0.7256, + "step": 8342 + }, + { + "epoch": 0.2557006252298639, + "grad_norm": 1.6610525492974524, + "learning_rate": 8.72261286820551e-06, + "loss": 0.6732, + "step": 8343 + }, + { + "epoch": 0.2557312737526051, + "grad_norm": 2.0221452712459276, + "learning_rate": 8.72228150942381e-06, + "loss": 0.7789, + "step": 8344 + }, + { + "epoch": 0.25576192227534633, + "grad_norm": 1.7813775382748243, + "learning_rate": 8.72195011396552e-06, + "loss": 0.7362, + "step": 8345 + }, + { + "epoch": 0.25579257079808754, + "grad_norm": 1.7663585805781763, + "learning_rate": 8.721618681833903e-06, + "loss": 0.7699, + "step": 8346 + }, + { + "epoch": 0.25582321932082874, + "grad_norm": 1.6518378176899262, + "learning_rate": 8.721287213032225e-06, + "loss": 0.7187, + "step": 8347 + }, + { + "epoch": 0.25585386784356995, + "grad_norm": 1.9312860893602808, + "learning_rate": 8.720955707563752e-06, + "loss": 0.7198, + "step": 8348 + }, + { + "epoch": 0.25588451636631115, + "grad_norm": 1.5491731922888994, + "learning_rate": 8.72062416543175e-06, + "loss": 0.6844, + "step": 8349 + }, + { + "epoch": 0.25591516488905236, + "grad_norm": 1.5836333638695335, + "learning_rate": 8.720292586639485e-06, + "loss": 0.639, + "step": 8350 + }, + { + "epoch": 0.25594581341179357, + "grad_norm": 1.6161671644672757, + "learning_rate": 8.719960971190227e-06, + "loss": 0.7637, + "step": 8351 + }, + { + "epoch": 0.25597646193453477, + "grad_norm": 1.8297514160265647, + "learning_rate": 8.719629319087242e-06, + "loss": 0.7231, + "step": 8352 + }, + { + "epoch": 0.256007110457276, + "grad_norm": 1.7585163469256149, + "learning_rate": 8.719297630333796e-06, + "loss": 0.7422, + "step": 8353 + }, + { + "epoch": 0.2560377589800172, + "grad_norm": 1.6530969914452474, + "learning_rate": 8.71896590493316e-06, + "loss": 0.7474, + "step": 8354 + }, + { + "epoch": 0.2560684075027584, + "grad_norm": 1.6547793943024607, + "learning_rate": 8.718634142888601e-06, + "loss": 0.7818, + "step": 8355 + }, + { + "epoch": 0.2560990560254996, + "grad_norm": 1.7148493476266373, + "learning_rate": 8.718302344203388e-06, + "loss": 0.6701, + "step": 8356 + }, + { + "epoch": 0.2561297045482408, + "grad_norm": 0.8488275056588236, + "learning_rate": 8.717970508880791e-06, + "loss": 0.4966, + "step": 8357 + }, + { + "epoch": 0.256160353070982, + "grad_norm": 1.5752875622996636, + "learning_rate": 8.71763863692408e-06, + "loss": 0.7094, + "step": 8358 + }, + { + "epoch": 0.25619100159372316, + "grad_norm": 1.7607109785691897, + "learning_rate": 8.717306728336523e-06, + "loss": 0.7121, + "step": 8359 + }, + { + "epoch": 0.25622165011646436, + "grad_norm": 1.8844511059619384, + "learning_rate": 8.716974783121393e-06, + "loss": 0.7355, + "step": 8360 + }, + { + "epoch": 0.25625229863920557, + "grad_norm": 1.6737969043364742, + "learning_rate": 8.716642801281959e-06, + "loss": 0.8603, + "step": 8361 + }, + { + "epoch": 0.2562829471619468, + "grad_norm": 1.7270632077697883, + "learning_rate": 8.716310782821493e-06, + "loss": 0.7059, + "step": 8362 + }, + { + "epoch": 0.256313595684688, + "grad_norm": 1.632963320385186, + "learning_rate": 8.715978727743263e-06, + "loss": 0.6565, + "step": 8363 + }, + { + "epoch": 0.2563442442074292, + "grad_norm": 1.8928079839220038, + "learning_rate": 8.715646636050548e-06, + "loss": 0.7782, + "step": 8364 + }, + { + "epoch": 0.2563748927301704, + "grad_norm": 1.5248743970718015, + "learning_rate": 8.715314507746613e-06, + "loss": 0.6248, + "step": 8365 + }, + { + "epoch": 0.2564055412529116, + "grad_norm": 1.4626851328737283, + "learning_rate": 8.714982342834735e-06, + "loss": 0.5851, + "step": 8366 + }, + { + "epoch": 0.2564361897756528, + "grad_norm": 1.899507200576532, + "learning_rate": 8.714650141318185e-06, + "loss": 0.7447, + "step": 8367 + }, + { + "epoch": 0.256466838298394, + "grad_norm": 1.595971339481739, + "learning_rate": 8.714317903200238e-06, + "loss": 0.7067, + "step": 8368 + }, + { + "epoch": 0.2564974868211352, + "grad_norm": 1.6141286466020368, + "learning_rate": 8.713985628484165e-06, + "loss": 0.7525, + "step": 8369 + }, + { + "epoch": 0.2565281353438764, + "grad_norm": 0.8928952972274125, + "learning_rate": 8.713653317173241e-06, + "loss": 0.4856, + "step": 8370 + }, + { + "epoch": 0.2565587838666176, + "grad_norm": 1.628387386351008, + "learning_rate": 8.713320969270742e-06, + "loss": 0.7011, + "step": 8371 + }, + { + "epoch": 0.25658943238935883, + "grad_norm": 1.7262389381856573, + "learning_rate": 8.71298858477994e-06, + "loss": 0.6405, + "step": 8372 + }, + { + "epoch": 0.25662008091210003, + "grad_norm": 0.7475825246800886, + "learning_rate": 8.712656163704111e-06, + "loss": 0.4744, + "step": 8373 + }, + { + "epoch": 0.25665072943484124, + "grad_norm": 1.6988921699195267, + "learning_rate": 8.712323706046533e-06, + "loss": 0.738, + "step": 8374 + }, + { + "epoch": 0.25668137795758245, + "grad_norm": 1.7186923814668655, + "learning_rate": 8.71199121181048e-06, + "loss": 0.7245, + "step": 8375 + }, + { + "epoch": 0.25671202648032365, + "grad_norm": 1.7616682378150483, + "learning_rate": 8.711658680999226e-06, + "loss": 0.633, + "step": 8376 + }, + { + "epoch": 0.25674267500306486, + "grad_norm": 1.6646229979507021, + "learning_rate": 8.71132611361605e-06, + "loss": 0.7871, + "step": 8377 + }, + { + "epoch": 0.25677332352580606, + "grad_norm": 0.8566501211132116, + "learning_rate": 8.710993509664226e-06, + "loss": 0.498, + "step": 8378 + }, + { + "epoch": 0.25680397204854727, + "grad_norm": 1.902028600784724, + "learning_rate": 8.710660869147038e-06, + "loss": 0.7252, + "step": 8379 + }, + { + "epoch": 0.2568346205712885, + "grad_norm": 1.628030777225232, + "learning_rate": 8.710328192067757e-06, + "loss": 0.7226, + "step": 8380 + }, + { + "epoch": 0.2568652690940297, + "grad_norm": 1.6795883045808453, + "learning_rate": 8.709995478429661e-06, + "loss": 0.76, + "step": 8381 + }, + { + "epoch": 0.2568959176167709, + "grad_norm": 1.744381732612409, + "learning_rate": 8.709662728236033e-06, + "loss": 0.7082, + "step": 8382 + }, + { + "epoch": 0.2569265661395121, + "grad_norm": 1.7195527509856674, + "learning_rate": 8.709329941490147e-06, + "loss": 0.7453, + "step": 8383 + }, + { + "epoch": 0.2569572146622533, + "grad_norm": 1.9695978369579445, + "learning_rate": 8.708997118195287e-06, + "loss": 0.6672, + "step": 8384 + }, + { + "epoch": 0.2569878631849945, + "grad_norm": 1.9737477454884977, + "learning_rate": 8.708664258354727e-06, + "loss": 0.7991, + "step": 8385 + }, + { + "epoch": 0.2570185117077357, + "grad_norm": 1.7263626474072786, + "learning_rate": 8.708331361971748e-06, + "loss": 0.7323, + "step": 8386 + }, + { + "epoch": 0.2570491602304769, + "grad_norm": 1.6415251651808465, + "learning_rate": 8.707998429049633e-06, + "loss": 0.6733, + "step": 8387 + }, + { + "epoch": 0.2570798087532181, + "grad_norm": 1.705268797026797, + "learning_rate": 8.707665459591662e-06, + "loss": 0.71, + "step": 8388 + }, + { + "epoch": 0.2571104572759593, + "grad_norm": 1.6168413916741675, + "learning_rate": 8.707332453601112e-06, + "loss": 0.661, + "step": 8389 + }, + { + "epoch": 0.2571411057987005, + "grad_norm": 0.7957108621982146, + "learning_rate": 8.706999411081268e-06, + "loss": 0.4685, + "step": 8390 + }, + { + "epoch": 0.2571717543214417, + "grad_norm": 1.6289105976152514, + "learning_rate": 8.706666332035409e-06, + "loss": 0.76, + "step": 8391 + }, + { + "epoch": 0.2572024028441829, + "grad_norm": 1.7697569943672944, + "learning_rate": 8.70633321646682e-06, + "loss": 0.7895, + "step": 8392 + }, + { + "epoch": 0.2572330513669241, + "grad_norm": 1.5554597884148278, + "learning_rate": 8.70600006437878e-06, + "loss": 0.6851, + "step": 8393 + }, + { + "epoch": 0.2572636998896653, + "grad_norm": 1.492122360799134, + "learning_rate": 8.705666875774575e-06, + "loss": 0.6699, + "step": 8394 + }, + { + "epoch": 0.2572943484124065, + "grad_norm": 1.6648421365489796, + "learning_rate": 8.705333650657486e-06, + "loss": 0.7094, + "step": 8395 + }, + { + "epoch": 0.2573249969351477, + "grad_norm": 1.8745776757613501, + "learning_rate": 8.705000389030795e-06, + "loss": 0.6561, + "step": 8396 + }, + { + "epoch": 0.2573556454578889, + "grad_norm": 1.6478844441586078, + "learning_rate": 8.704667090897787e-06, + "loss": 0.7007, + "step": 8397 + }, + { + "epoch": 0.2573862939806301, + "grad_norm": 0.8660134124910732, + "learning_rate": 8.704333756261748e-06, + "loss": 0.4991, + "step": 8398 + }, + { + "epoch": 0.2574169425033713, + "grad_norm": 1.7642672283527596, + "learning_rate": 8.704000385125959e-06, + "loss": 0.7039, + "step": 8399 + }, + { + "epoch": 0.25744759102611253, + "grad_norm": 1.8390203778092833, + "learning_rate": 8.703666977493707e-06, + "loss": 0.7797, + "step": 8400 + }, + { + "epoch": 0.25747823954885374, + "grad_norm": 1.8512791971932985, + "learning_rate": 8.703333533368279e-06, + "loss": 0.7402, + "step": 8401 + }, + { + "epoch": 0.25750888807159494, + "grad_norm": 1.6336932925181702, + "learning_rate": 8.703000052752954e-06, + "loss": 0.7674, + "step": 8402 + }, + { + "epoch": 0.25753953659433615, + "grad_norm": 1.6435016101171762, + "learning_rate": 8.702666535651026e-06, + "loss": 0.6615, + "step": 8403 + }, + { + "epoch": 0.25757018511707735, + "grad_norm": 1.6897857501187918, + "learning_rate": 8.702332982065775e-06, + "loss": 0.6853, + "step": 8404 + }, + { + "epoch": 0.25760083363981856, + "grad_norm": 2.044840043285815, + "learning_rate": 8.701999392000491e-06, + "loss": 0.8007, + "step": 8405 + }, + { + "epoch": 0.25763148216255977, + "grad_norm": 1.5896832195814568, + "learning_rate": 8.701665765458458e-06, + "loss": 0.69, + "step": 8406 + }, + { + "epoch": 0.25766213068530097, + "grad_norm": 1.6560088341296983, + "learning_rate": 8.701332102442967e-06, + "loss": 0.6806, + "step": 8407 + }, + { + "epoch": 0.2576927792080422, + "grad_norm": 1.821710846250818, + "learning_rate": 8.700998402957303e-06, + "loss": 0.7182, + "step": 8408 + }, + { + "epoch": 0.2577234277307834, + "grad_norm": 1.8763975626845038, + "learning_rate": 8.700664667004754e-06, + "loss": 0.6723, + "step": 8409 + }, + { + "epoch": 0.2577540762535246, + "grad_norm": 1.803636368854129, + "learning_rate": 8.700330894588612e-06, + "loss": 0.6775, + "step": 8410 + }, + { + "epoch": 0.2577847247762658, + "grad_norm": 1.7692628988179118, + "learning_rate": 8.69999708571216e-06, + "loss": 0.822, + "step": 8411 + }, + { + "epoch": 0.257815373299007, + "grad_norm": 1.7870267297821676, + "learning_rate": 8.69966324037869e-06, + "loss": 0.6762, + "step": 8412 + }, + { + "epoch": 0.2578460218217482, + "grad_norm": 1.8151291462373427, + "learning_rate": 8.699329358591492e-06, + "loss": 0.6969, + "step": 8413 + }, + { + "epoch": 0.2578766703444894, + "grad_norm": 0.894432608241415, + "learning_rate": 8.698995440353856e-06, + "loss": 0.4893, + "step": 8414 + }, + { + "epoch": 0.2579073188672306, + "grad_norm": 0.8304190327532127, + "learning_rate": 8.698661485669072e-06, + "loss": 0.4756, + "step": 8415 + }, + { + "epoch": 0.2579379673899718, + "grad_norm": 0.7896765247088179, + "learning_rate": 8.698327494540428e-06, + "loss": 0.4865, + "step": 8416 + }, + { + "epoch": 0.25796861591271303, + "grad_norm": 0.8026917293556042, + "learning_rate": 8.69799346697122e-06, + "loss": 0.4861, + "step": 8417 + }, + { + "epoch": 0.25799926443545423, + "grad_norm": 1.7824994794959894, + "learning_rate": 8.697659402964733e-06, + "loss": 0.733, + "step": 8418 + }, + { + "epoch": 0.25802991295819544, + "grad_norm": 1.7203201920795668, + "learning_rate": 8.697325302524264e-06, + "loss": 0.7811, + "step": 8419 + }, + { + "epoch": 0.25806056148093665, + "grad_norm": 0.9051596020771672, + "learning_rate": 8.696991165653102e-06, + "loss": 0.505, + "step": 8420 + }, + { + "epoch": 0.2580912100036778, + "grad_norm": 2.2793610767950043, + "learning_rate": 8.69665699235454e-06, + "loss": 0.5741, + "step": 8421 + }, + { + "epoch": 0.258121858526419, + "grad_norm": 1.6865910765304273, + "learning_rate": 8.69632278263187e-06, + "loss": 0.7146, + "step": 8422 + }, + { + "epoch": 0.2581525070491602, + "grad_norm": 1.9445233380063225, + "learning_rate": 8.695988536488387e-06, + "loss": 0.7548, + "step": 8423 + }, + { + "epoch": 0.2581831555719014, + "grad_norm": 1.6613248804481688, + "learning_rate": 8.695654253927384e-06, + "loss": 0.7603, + "step": 8424 + }, + { + "epoch": 0.2582138040946426, + "grad_norm": 1.5959817071263203, + "learning_rate": 8.695319934952152e-06, + "loss": 0.5625, + "step": 8425 + }, + { + "epoch": 0.2582444526173838, + "grad_norm": 1.7448403506541623, + "learning_rate": 8.694985579565988e-06, + "loss": 0.6718, + "step": 8426 + }, + { + "epoch": 0.25827510114012503, + "grad_norm": 1.7252749782483305, + "learning_rate": 8.694651187772187e-06, + "loss": 0.6633, + "step": 8427 + }, + { + "epoch": 0.25830574966286624, + "grad_norm": 1.8978434705515421, + "learning_rate": 8.694316759574042e-06, + "loss": 0.6093, + "step": 8428 + }, + { + "epoch": 0.25833639818560744, + "grad_norm": 1.6446452372650187, + "learning_rate": 8.693982294974847e-06, + "loss": 0.754, + "step": 8429 + }, + { + "epoch": 0.25836704670834865, + "grad_norm": 1.8230963016586048, + "learning_rate": 8.693647793977901e-06, + "loss": 0.6715, + "step": 8430 + }, + { + "epoch": 0.25839769523108985, + "grad_norm": 1.7912011222275717, + "learning_rate": 8.693313256586498e-06, + "loss": 0.691, + "step": 8431 + }, + { + "epoch": 0.25842834375383106, + "grad_norm": 1.9714513978124761, + "learning_rate": 8.692978682803934e-06, + "loss": 0.6875, + "step": 8432 + }, + { + "epoch": 0.25845899227657226, + "grad_norm": 1.8259549950306262, + "learning_rate": 8.692644072633506e-06, + "loss": 0.715, + "step": 8433 + }, + { + "epoch": 0.25848964079931347, + "grad_norm": 1.5357883934941288, + "learning_rate": 8.692309426078514e-06, + "loss": 0.7127, + "step": 8434 + }, + { + "epoch": 0.2585202893220547, + "grad_norm": 1.7450304988694465, + "learning_rate": 8.691974743142249e-06, + "loss": 0.6854, + "step": 8435 + }, + { + "epoch": 0.2585509378447959, + "grad_norm": 0.9135339102238139, + "learning_rate": 8.691640023828014e-06, + "loss": 0.4765, + "step": 8436 + }, + { + "epoch": 0.2585815863675371, + "grad_norm": 1.8377768733257325, + "learning_rate": 8.691305268139104e-06, + "loss": 0.7354, + "step": 8437 + }, + { + "epoch": 0.2586122348902783, + "grad_norm": 1.622286887480452, + "learning_rate": 8.69097047607882e-06, + "loss": 0.6727, + "step": 8438 + }, + { + "epoch": 0.2586428834130195, + "grad_norm": 1.6866220790249387, + "learning_rate": 8.690635647650458e-06, + "loss": 0.7221, + "step": 8439 + }, + { + "epoch": 0.2586735319357607, + "grad_norm": 1.6313440350686965, + "learning_rate": 8.690300782857319e-06, + "loss": 0.6423, + "step": 8440 + }, + { + "epoch": 0.2587041804585019, + "grad_norm": 1.5006000577946714, + "learning_rate": 8.689965881702704e-06, + "loss": 0.7429, + "step": 8441 + }, + { + "epoch": 0.2587348289812431, + "grad_norm": 1.8519979415544612, + "learning_rate": 8.68963094418991e-06, + "loss": 0.7038, + "step": 8442 + }, + { + "epoch": 0.2587654775039843, + "grad_norm": 1.7410413029992657, + "learning_rate": 8.689295970322238e-06, + "loss": 0.6721, + "step": 8443 + }, + { + "epoch": 0.2587961260267255, + "grad_norm": 1.8523111951284714, + "learning_rate": 8.688960960102987e-06, + "loss": 0.6977, + "step": 8444 + }, + { + "epoch": 0.25882677454946673, + "grad_norm": 2.1003546702586715, + "learning_rate": 8.688625913535464e-06, + "loss": 0.6906, + "step": 8445 + }, + { + "epoch": 0.25885742307220794, + "grad_norm": 1.8044856572840349, + "learning_rate": 8.688290830622963e-06, + "loss": 0.7599, + "step": 8446 + }, + { + "epoch": 0.25888807159494914, + "grad_norm": 1.5740630368265167, + "learning_rate": 8.68795571136879e-06, + "loss": 0.7055, + "step": 8447 + }, + { + "epoch": 0.25891872011769035, + "grad_norm": 0.8751725550364577, + "learning_rate": 8.687620555776244e-06, + "loss": 0.4838, + "step": 8448 + }, + { + "epoch": 0.25894936864043155, + "grad_norm": 0.8331170627296626, + "learning_rate": 8.687285363848631e-06, + "loss": 0.4739, + "step": 8449 + }, + { + "epoch": 0.25898001716317276, + "grad_norm": 1.7093781363378315, + "learning_rate": 8.686950135589251e-06, + "loss": 0.7387, + "step": 8450 + }, + { + "epoch": 0.25901066568591397, + "grad_norm": 1.948451158796109, + "learning_rate": 8.686614871001409e-06, + "loss": 0.686, + "step": 8451 + }, + { + "epoch": 0.2590413142086551, + "grad_norm": 1.8222141979195707, + "learning_rate": 8.686279570088408e-06, + "loss": 0.7868, + "step": 8452 + }, + { + "epoch": 0.2590719627313963, + "grad_norm": 0.8271225610604901, + "learning_rate": 8.68594423285355e-06, + "loss": 0.4708, + "step": 8453 + }, + { + "epoch": 0.2591026112541375, + "grad_norm": 1.6239412938851956, + "learning_rate": 8.68560885930014e-06, + "loss": 0.7232, + "step": 8454 + }, + { + "epoch": 0.25913325977687873, + "grad_norm": 1.617598153864809, + "learning_rate": 8.685273449431483e-06, + "loss": 0.6308, + "step": 8455 + }, + { + "epoch": 0.25916390829961994, + "grad_norm": 1.7308594164856388, + "learning_rate": 8.684938003250885e-06, + "loss": 0.7159, + "step": 8456 + }, + { + "epoch": 0.25919455682236114, + "grad_norm": 1.5319419193171344, + "learning_rate": 8.684602520761648e-06, + "loss": 0.82, + "step": 8457 + }, + { + "epoch": 0.25922520534510235, + "grad_norm": 1.6648188440411562, + "learning_rate": 8.684267001967082e-06, + "loss": 0.6608, + "step": 8458 + }, + { + "epoch": 0.25925585386784356, + "grad_norm": 1.7988088646456675, + "learning_rate": 8.683931446870488e-06, + "loss": 0.6927, + "step": 8459 + }, + { + "epoch": 0.25928650239058476, + "grad_norm": 1.759916004911238, + "learning_rate": 8.683595855475176e-06, + "loss": 0.726, + "step": 8460 + }, + { + "epoch": 0.25931715091332597, + "grad_norm": 1.595522240783577, + "learning_rate": 8.683260227784452e-06, + "loss": 0.7251, + "step": 8461 + }, + { + "epoch": 0.2593477994360672, + "grad_norm": 1.0055642973650452, + "learning_rate": 8.682924563801622e-06, + "loss": 0.4816, + "step": 8462 + }, + { + "epoch": 0.2593784479588084, + "grad_norm": 0.8592047927951098, + "learning_rate": 8.682588863529994e-06, + "loss": 0.4957, + "step": 8463 + }, + { + "epoch": 0.2594090964815496, + "grad_norm": 1.7894870254427424, + "learning_rate": 8.682253126972878e-06, + "loss": 0.712, + "step": 8464 + }, + { + "epoch": 0.2594397450042908, + "grad_norm": 1.79757790372583, + "learning_rate": 8.681917354133576e-06, + "loss": 0.7273, + "step": 8465 + }, + { + "epoch": 0.259470393527032, + "grad_norm": 0.8527681198276372, + "learning_rate": 8.681581545015403e-06, + "loss": 0.4798, + "step": 8466 + }, + { + "epoch": 0.2595010420497732, + "grad_norm": 1.7517069637225229, + "learning_rate": 8.681245699621663e-06, + "loss": 0.7207, + "step": 8467 + }, + { + "epoch": 0.2595316905725144, + "grad_norm": 0.9127124687717627, + "learning_rate": 8.68090981795567e-06, + "loss": 0.4765, + "step": 8468 + }, + { + "epoch": 0.2595623390952556, + "grad_norm": 1.690348492220137, + "learning_rate": 8.680573900020727e-06, + "loss": 0.581, + "step": 8469 + }, + { + "epoch": 0.2595929876179968, + "grad_norm": 1.7062995512510506, + "learning_rate": 8.680237945820152e-06, + "loss": 0.6375, + "step": 8470 + }, + { + "epoch": 0.259623636140738, + "grad_norm": 1.7800282257668192, + "learning_rate": 8.679901955357247e-06, + "loss": 0.6687, + "step": 8471 + }, + { + "epoch": 0.25965428466347923, + "grad_norm": 1.6635165373857868, + "learning_rate": 8.679565928635329e-06, + "loss": 0.6632, + "step": 8472 + }, + { + "epoch": 0.25968493318622043, + "grad_norm": 1.8502104089963, + "learning_rate": 8.679229865657705e-06, + "loss": 0.7001, + "step": 8473 + }, + { + "epoch": 0.25971558170896164, + "grad_norm": 1.6566539775371072, + "learning_rate": 8.678893766427688e-06, + "loss": 0.7919, + "step": 8474 + }, + { + "epoch": 0.25974623023170285, + "grad_norm": 1.7066634303018888, + "learning_rate": 8.67855763094859e-06, + "loss": 0.633, + "step": 8475 + }, + { + "epoch": 0.25977687875444405, + "grad_norm": 1.62568018511291, + "learning_rate": 8.678221459223722e-06, + "loss": 0.6698, + "step": 8476 + }, + { + "epoch": 0.25980752727718526, + "grad_norm": 1.7680565318011723, + "learning_rate": 8.677885251256397e-06, + "loss": 0.6695, + "step": 8477 + }, + { + "epoch": 0.25983817579992646, + "grad_norm": 1.8216460987875525, + "learning_rate": 8.677549007049928e-06, + "loss": 0.8264, + "step": 8478 + }, + { + "epoch": 0.25986882432266767, + "grad_norm": 1.75256850421271, + "learning_rate": 8.677212726607627e-06, + "loss": 0.6416, + "step": 8479 + }, + { + "epoch": 0.2598994728454089, + "grad_norm": 1.514472459312885, + "learning_rate": 8.676876409932809e-06, + "loss": 0.6064, + "step": 8480 + }, + { + "epoch": 0.2599301213681501, + "grad_norm": 1.5664644566360446, + "learning_rate": 8.676540057028785e-06, + "loss": 0.6814, + "step": 8481 + }, + { + "epoch": 0.2599607698908913, + "grad_norm": 1.0933268253050048, + "learning_rate": 8.676203667898872e-06, + "loss": 0.4924, + "step": 8482 + }, + { + "epoch": 0.25999141841363244, + "grad_norm": 1.7356497855003268, + "learning_rate": 8.675867242546386e-06, + "loss": 0.7571, + "step": 8483 + }, + { + "epoch": 0.26002206693637364, + "grad_norm": 1.8242292356528549, + "learning_rate": 8.675530780974637e-06, + "loss": 0.7873, + "step": 8484 + }, + { + "epoch": 0.26005271545911485, + "grad_norm": 0.8260855327504402, + "learning_rate": 8.675194283186944e-06, + "loss": 0.4662, + "step": 8485 + }, + { + "epoch": 0.26008336398185605, + "grad_norm": 1.8638840801451486, + "learning_rate": 8.67485774918662e-06, + "loss": 0.7986, + "step": 8486 + }, + { + "epoch": 0.26011401250459726, + "grad_norm": 1.726741250961179, + "learning_rate": 8.674521178976985e-06, + "loss": 0.7422, + "step": 8487 + }, + { + "epoch": 0.26014466102733846, + "grad_norm": 1.6346236473027775, + "learning_rate": 8.67418457256135e-06, + "loss": 0.7034, + "step": 8488 + }, + { + "epoch": 0.26017530955007967, + "grad_norm": 1.8363057196645776, + "learning_rate": 8.673847929943036e-06, + "loss": 0.713, + "step": 8489 + }, + { + "epoch": 0.2602059580728209, + "grad_norm": 1.7996197027694458, + "learning_rate": 8.673511251125358e-06, + "loss": 0.5953, + "step": 8490 + }, + { + "epoch": 0.2602366065955621, + "grad_norm": 1.6453483331567822, + "learning_rate": 8.673174536111632e-06, + "loss": 0.7828, + "step": 8491 + }, + { + "epoch": 0.2602672551183033, + "grad_norm": 1.8672178741293128, + "learning_rate": 8.672837784905178e-06, + "loss": 0.7803, + "step": 8492 + }, + { + "epoch": 0.2602979036410445, + "grad_norm": 1.6903591427063458, + "learning_rate": 8.672500997509316e-06, + "loss": 0.8038, + "step": 8493 + }, + { + "epoch": 0.2603285521637857, + "grad_norm": 1.8119397707369207, + "learning_rate": 8.67216417392736e-06, + "loss": 0.7257, + "step": 8494 + }, + { + "epoch": 0.2603592006865269, + "grad_norm": 1.8177992323664967, + "learning_rate": 8.67182731416263e-06, + "loss": 0.7118, + "step": 8495 + }, + { + "epoch": 0.2603898492092681, + "grad_norm": 1.7426083853854482, + "learning_rate": 8.67149041821845e-06, + "loss": 0.6785, + "step": 8496 + }, + { + "epoch": 0.2604204977320093, + "grad_norm": 1.7401945845716202, + "learning_rate": 8.671153486098131e-06, + "loss": 0.7453, + "step": 8497 + }, + { + "epoch": 0.2604511462547505, + "grad_norm": 1.7720113108696707, + "learning_rate": 8.670816517805e-06, + "loss": 0.6886, + "step": 8498 + }, + { + "epoch": 0.2604817947774917, + "grad_norm": 1.7189114295673438, + "learning_rate": 8.670479513342373e-06, + "loss": 0.5577, + "step": 8499 + }, + { + "epoch": 0.26051244330023293, + "grad_norm": 1.1107746470197215, + "learning_rate": 8.670142472713574e-06, + "loss": 0.5102, + "step": 8500 + }, + { + "epoch": 0.26054309182297414, + "grad_norm": 1.5491410151471403, + "learning_rate": 8.66980539592192e-06, + "loss": 0.7974, + "step": 8501 + }, + { + "epoch": 0.26057374034571534, + "grad_norm": 0.8122563623783458, + "learning_rate": 8.669468282970736e-06, + "loss": 0.4664, + "step": 8502 + }, + { + "epoch": 0.26060438886845655, + "grad_norm": 1.6000290432235387, + "learning_rate": 8.669131133863342e-06, + "loss": 0.7335, + "step": 8503 + }, + { + "epoch": 0.26063503739119775, + "grad_norm": 1.8387822653884545, + "learning_rate": 8.66879394860306e-06, + "loss": 0.799, + "step": 8504 + }, + { + "epoch": 0.26066568591393896, + "grad_norm": 0.8299206116641515, + "learning_rate": 8.668456727193213e-06, + "loss": 0.4603, + "step": 8505 + }, + { + "epoch": 0.26069633443668017, + "grad_norm": 1.7249628434043698, + "learning_rate": 8.668119469637122e-06, + "loss": 0.6504, + "step": 8506 + }, + { + "epoch": 0.26072698295942137, + "grad_norm": 1.722989239020549, + "learning_rate": 8.667782175938112e-06, + "loss": 0.6796, + "step": 8507 + }, + { + "epoch": 0.2607576314821626, + "grad_norm": 1.8454712008497138, + "learning_rate": 8.667444846099507e-06, + "loss": 0.7077, + "step": 8508 + }, + { + "epoch": 0.2607882800049038, + "grad_norm": 0.9013193555328425, + "learning_rate": 8.667107480124629e-06, + "loss": 0.4833, + "step": 8509 + }, + { + "epoch": 0.260818928527645, + "grad_norm": 0.8324404332624555, + "learning_rate": 8.6667700780168e-06, + "loss": 0.4912, + "step": 8510 + }, + { + "epoch": 0.2608495770503862, + "grad_norm": 1.694189811864415, + "learning_rate": 8.66643263977935e-06, + "loss": 0.6765, + "step": 8511 + }, + { + "epoch": 0.2608802255731274, + "grad_norm": 1.604830093395559, + "learning_rate": 8.666095165415602e-06, + "loss": 0.6262, + "step": 8512 + }, + { + "epoch": 0.2609108740958686, + "grad_norm": 1.839268102898304, + "learning_rate": 8.665757654928878e-06, + "loss": 0.7466, + "step": 8513 + }, + { + "epoch": 0.26094152261860976, + "grad_norm": 1.534490826301804, + "learning_rate": 8.665420108322507e-06, + "loss": 0.6226, + "step": 8514 + }, + { + "epoch": 0.26097217114135096, + "grad_norm": 0.908406257161292, + "learning_rate": 8.665082525599812e-06, + "loss": 0.4735, + "step": 8515 + }, + { + "epoch": 0.26100281966409217, + "grad_norm": 1.8385329962762944, + "learning_rate": 8.664744906764124e-06, + "loss": 0.7819, + "step": 8516 + }, + { + "epoch": 0.2610334681868334, + "grad_norm": 1.6275693648951424, + "learning_rate": 8.664407251818765e-06, + "loss": 0.6575, + "step": 8517 + }, + { + "epoch": 0.2610641167095746, + "grad_norm": 2.099361579100184, + "learning_rate": 8.664069560767064e-06, + "loss": 0.7355, + "step": 8518 + }, + { + "epoch": 0.2610947652323158, + "grad_norm": 1.7870135966817828, + "learning_rate": 8.663731833612348e-06, + "loss": 0.6632, + "step": 8519 + }, + { + "epoch": 0.261125413755057, + "grad_norm": 1.6096203375943425, + "learning_rate": 8.663394070357947e-06, + "loss": 0.6, + "step": 8520 + }, + { + "epoch": 0.2611560622777982, + "grad_norm": 1.8411834766061315, + "learning_rate": 8.663056271007185e-06, + "loss": 0.6181, + "step": 8521 + }, + { + "epoch": 0.2611867108005394, + "grad_norm": 1.6427902720707404, + "learning_rate": 8.662718435563391e-06, + "loss": 0.6642, + "step": 8522 + }, + { + "epoch": 0.2612173593232806, + "grad_norm": 0.9213677097026624, + "learning_rate": 8.662380564029897e-06, + "loss": 0.4862, + "step": 8523 + }, + { + "epoch": 0.2612480078460218, + "grad_norm": 1.711023131456096, + "learning_rate": 8.66204265641003e-06, + "loss": 0.7111, + "step": 8524 + }, + { + "epoch": 0.261278656368763, + "grad_norm": 1.6025920534778608, + "learning_rate": 8.66170471270712e-06, + "loss": 0.6402, + "step": 8525 + }, + { + "epoch": 0.2613093048915042, + "grad_norm": 1.6342431369052703, + "learning_rate": 8.661366732924496e-06, + "loss": 0.7141, + "step": 8526 + }, + { + "epoch": 0.26133995341424543, + "grad_norm": 1.927403591764155, + "learning_rate": 8.661028717065488e-06, + "loss": 0.7287, + "step": 8527 + }, + { + "epoch": 0.26137060193698664, + "grad_norm": 2.062127067194271, + "learning_rate": 8.660690665133429e-06, + "loss": 0.7314, + "step": 8528 + }, + { + "epoch": 0.26140125045972784, + "grad_norm": 1.6379001472461423, + "learning_rate": 8.660352577131646e-06, + "loss": 0.6721, + "step": 8529 + }, + { + "epoch": 0.26143189898246905, + "grad_norm": 1.94022176899794, + "learning_rate": 8.660014453063475e-06, + "loss": 0.7038, + "step": 8530 + }, + { + "epoch": 0.26146254750521025, + "grad_norm": 0.8326807005658289, + "learning_rate": 8.659676292932244e-06, + "loss": 0.4768, + "step": 8531 + }, + { + "epoch": 0.26149319602795146, + "grad_norm": 1.6408730259113933, + "learning_rate": 8.659338096741285e-06, + "loss": 0.7016, + "step": 8532 + }, + { + "epoch": 0.26152384455069266, + "grad_norm": 1.6797111048718598, + "learning_rate": 8.658999864493934e-06, + "loss": 0.6781, + "step": 8533 + }, + { + "epoch": 0.26155449307343387, + "grad_norm": 2.034212315972613, + "learning_rate": 8.658661596193519e-06, + "loss": 0.7308, + "step": 8534 + }, + { + "epoch": 0.2615851415961751, + "grad_norm": 1.7621814443559025, + "learning_rate": 8.658323291843375e-06, + "loss": 0.7228, + "step": 8535 + }, + { + "epoch": 0.2616157901189163, + "grad_norm": 1.7572739032157543, + "learning_rate": 8.657984951446838e-06, + "loss": 0.703, + "step": 8536 + }, + { + "epoch": 0.2616464386416575, + "grad_norm": 1.482396555768358, + "learning_rate": 8.657646575007238e-06, + "loss": 0.7663, + "step": 8537 + }, + { + "epoch": 0.2616770871643987, + "grad_norm": 1.7208636984490029, + "learning_rate": 8.65730816252791e-06, + "loss": 0.7347, + "step": 8538 + }, + { + "epoch": 0.2617077356871399, + "grad_norm": 1.678044618502701, + "learning_rate": 8.65696971401219e-06, + "loss": 0.7153, + "step": 8539 + }, + { + "epoch": 0.2617383842098811, + "grad_norm": 1.7667453922876144, + "learning_rate": 8.65663122946341e-06, + "loss": 0.7967, + "step": 8540 + }, + { + "epoch": 0.2617690327326223, + "grad_norm": 1.7282491497048267, + "learning_rate": 8.656292708884908e-06, + "loss": 0.6954, + "step": 8541 + }, + { + "epoch": 0.2617996812553635, + "grad_norm": 1.8413149775304558, + "learning_rate": 8.65595415228002e-06, + "loss": 0.6401, + "step": 8542 + }, + { + "epoch": 0.2618303297781047, + "grad_norm": 0.8403639005222127, + "learning_rate": 8.655615559652078e-06, + "loss": 0.4927, + "step": 8543 + }, + { + "epoch": 0.2618609783008459, + "grad_norm": 1.997669017027658, + "learning_rate": 8.655276931004422e-06, + "loss": 0.6611, + "step": 8544 + }, + { + "epoch": 0.2618916268235871, + "grad_norm": 1.9033567571680712, + "learning_rate": 8.654938266340384e-06, + "loss": 0.6252, + "step": 8545 + }, + { + "epoch": 0.2619222753463283, + "grad_norm": 1.885452020653928, + "learning_rate": 8.654599565663307e-06, + "loss": 0.7545, + "step": 8546 + }, + { + "epoch": 0.2619529238690695, + "grad_norm": 1.8275634564576426, + "learning_rate": 8.654260828976526e-06, + "loss": 0.721, + "step": 8547 + }, + { + "epoch": 0.2619835723918107, + "grad_norm": 2.0957076077263874, + "learning_rate": 8.653922056283376e-06, + "loss": 0.7338, + "step": 8548 + }, + { + "epoch": 0.2620142209145519, + "grad_norm": 1.8628717796433047, + "learning_rate": 8.653583247587198e-06, + "loss": 0.8019, + "step": 8549 + }, + { + "epoch": 0.2620448694372931, + "grad_norm": 1.761431507964122, + "learning_rate": 8.65324440289133e-06, + "loss": 0.6425, + "step": 8550 + }, + { + "epoch": 0.2620755179600343, + "grad_norm": 1.682791521923497, + "learning_rate": 8.65290552219911e-06, + "loss": 0.6761, + "step": 8551 + }, + { + "epoch": 0.2621061664827755, + "grad_norm": 2.0194564047281003, + "learning_rate": 8.652566605513877e-06, + "loss": 0.7582, + "step": 8552 + }, + { + "epoch": 0.2621368150055167, + "grad_norm": 1.54296231118038, + "learning_rate": 8.65222765283897e-06, + "loss": 0.7028, + "step": 8553 + }, + { + "epoch": 0.2621674635282579, + "grad_norm": 1.685510228705197, + "learning_rate": 8.65188866417773e-06, + "loss": 0.6523, + "step": 8554 + }, + { + "epoch": 0.26219811205099913, + "grad_norm": 1.7664886346792887, + "learning_rate": 8.651549639533496e-06, + "loss": 0.6071, + "step": 8555 + }, + { + "epoch": 0.26222876057374034, + "grad_norm": 1.6492578739551054, + "learning_rate": 8.65121057890961e-06, + "loss": 0.6753, + "step": 8556 + }, + { + "epoch": 0.26225940909648154, + "grad_norm": 1.6949036244396574, + "learning_rate": 8.650871482309413e-06, + "loss": 0.7086, + "step": 8557 + }, + { + "epoch": 0.26229005761922275, + "grad_norm": 1.711634096448467, + "learning_rate": 8.650532349736244e-06, + "loss": 0.7245, + "step": 8558 + }, + { + "epoch": 0.26232070614196396, + "grad_norm": 0.8392855579142933, + "learning_rate": 8.650193181193444e-06, + "loss": 0.4923, + "step": 8559 + }, + { + "epoch": 0.26235135466470516, + "grad_norm": 1.9523378758115282, + "learning_rate": 8.649853976684358e-06, + "loss": 0.6575, + "step": 8560 + }, + { + "epoch": 0.26238200318744637, + "grad_norm": 1.628525026802647, + "learning_rate": 8.64951473621233e-06, + "loss": 0.6502, + "step": 8561 + }, + { + "epoch": 0.26241265171018757, + "grad_norm": 1.8497232540525717, + "learning_rate": 8.649175459780695e-06, + "loss": 0.7736, + "step": 8562 + }, + { + "epoch": 0.2624433002329288, + "grad_norm": 1.7655778924804435, + "learning_rate": 8.648836147392802e-06, + "loss": 0.7688, + "step": 8563 + }, + { + "epoch": 0.26247394875567, + "grad_norm": 1.6849766466798262, + "learning_rate": 8.648496799051994e-06, + "loss": 0.6825, + "step": 8564 + }, + { + "epoch": 0.2625045972784112, + "grad_norm": 0.8031825683618768, + "learning_rate": 8.648157414761613e-06, + "loss": 0.4758, + "step": 8565 + }, + { + "epoch": 0.2625352458011524, + "grad_norm": 1.7601157141246022, + "learning_rate": 8.647817994525002e-06, + "loss": 0.7093, + "step": 8566 + }, + { + "epoch": 0.2625658943238936, + "grad_norm": 1.7738343490545299, + "learning_rate": 8.647478538345508e-06, + "loss": 0.7867, + "step": 8567 + }, + { + "epoch": 0.2625965428466348, + "grad_norm": 1.628292213397361, + "learning_rate": 8.647139046226476e-06, + "loss": 0.7503, + "step": 8568 + }, + { + "epoch": 0.262627191369376, + "grad_norm": 1.726636846675555, + "learning_rate": 8.646799518171249e-06, + "loss": 0.8421, + "step": 8569 + }, + { + "epoch": 0.2626578398921172, + "grad_norm": 1.654186495860809, + "learning_rate": 8.646459954183173e-06, + "loss": 0.6643, + "step": 8570 + }, + { + "epoch": 0.2626884884148584, + "grad_norm": 1.8030698981206859, + "learning_rate": 8.646120354265594e-06, + "loss": 0.7981, + "step": 8571 + }, + { + "epoch": 0.26271913693759963, + "grad_norm": 1.7838814167739383, + "learning_rate": 8.645780718421858e-06, + "loss": 0.684, + "step": 8572 + }, + { + "epoch": 0.26274978546034083, + "grad_norm": 0.8868179028867517, + "learning_rate": 8.645441046655312e-06, + "loss": 0.4742, + "step": 8573 + }, + { + "epoch": 0.26278043398308204, + "grad_norm": 1.784794249947012, + "learning_rate": 8.645101338969303e-06, + "loss": 0.6072, + "step": 8574 + }, + { + "epoch": 0.26281108250582325, + "grad_norm": 1.685205874929892, + "learning_rate": 8.644761595367177e-06, + "loss": 0.6764, + "step": 8575 + }, + { + "epoch": 0.2628417310285644, + "grad_norm": 1.6227495739393996, + "learning_rate": 8.644421815852284e-06, + "loss": 0.6891, + "step": 8576 + }, + { + "epoch": 0.2628723795513056, + "grad_norm": 1.789999104051144, + "learning_rate": 8.644082000427968e-06, + "loss": 0.7583, + "step": 8577 + }, + { + "epoch": 0.2629030280740468, + "grad_norm": 1.7077468717998165, + "learning_rate": 8.643742149097582e-06, + "loss": 0.7573, + "step": 8578 + }, + { + "epoch": 0.262933676596788, + "grad_norm": 1.714901955398567, + "learning_rate": 8.64340226186447e-06, + "loss": 0.6955, + "step": 8579 + }, + { + "epoch": 0.2629643251195292, + "grad_norm": 1.6791330203891297, + "learning_rate": 8.643062338731987e-06, + "loss": 0.7014, + "step": 8580 + }, + { + "epoch": 0.2629949736422704, + "grad_norm": 0.8364056233333773, + "learning_rate": 8.642722379703477e-06, + "loss": 0.4752, + "step": 8581 + }, + { + "epoch": 0.26302562216501163, + "grad_norm": 1.7811297737707745, + "learning_rate": 8.64238238478229e-06, + "loss": 0.8627, + "step": 8582 + }, + { + "epoch": 0.26305627068775284, + "grad_norm": 1.764310429027212, + "learning_rate": 8.642042353971778e-06, + "loss": 0.738, + "step": 8583 + }, + { + "epoch": 0.26308691921049404, + "grad_norm": 1.495815016051855, + "learning_rate": 8.641702287275291e-06, + "loss": 0.6509, + "step": 8584 + }, + { + "epoch": 0.26311756773323525, + "grad_norm": 1.6309114014026591, + "learning_rate": 8.641362184696179e-06, + "loss": 0.6799, + "step": 8585 + }, + { + "epoch": 0.26314821625597645, + "grad_norm": 2.0501439841746127, + "learning_rate": 8.641022046237795e-06, + "loss": 0.7873, + "step": 8586 + }, + { + "epoch": 0.26317886477871766, + "grad_norm": 1.681902654305359, + "learning_rate": 8.640681871903488e-06, + "loss": 0.7108, + "step": 8587 + }, + { + "epoch": 0.26320951330145886, + "grad_norm": 1.7452338971270789, + "learning_rate": 8.640341661696612e-06, + "loss": 0.7295, + "step": 8588 + }, + { + "epoch": 0.26324016182420007, + "grad_norm": 0.8679376052737935, + "learning_rate": 8.640001415620519e-06, + "loss": 0.4795, + "step": 8589 + }, + { + "epoch": 0.2632708103469413, + "grad_norm": 1.863058552689238, + "learning_rate": 8.639661133678558e-06, + "loss": 0.7001, + "step": 8590 + }, + { + "epoch": 0.2633014588696825, + "grad_norm": 0.7943582895058307, + "learning_rate": 8.639320815874087e-06, + "loss": 0.4737, + "step": 8591 + }, + { + "epoch": 0.2633321073924237, + "grad_norm": 1.7984453385475556, + "learning_rate": 8.638980462210455e-06, + "loss": 0.751, + "step": 8592 + }, + { + "epoch": 0.2633627559151649, + "grad_norm": 1.7645056765777016, + "learning_rate": 8.638640072691017e-06, + "loss": 0.7323, + "step": 8593 + }, + { + "epoch": 0.2633934044379061, + "grad_norm": 1.8248121795032013, + "learning_rate": 8.63829964731913e-06, + "loss": 0.6924, + "step": 8594 + }, + { + "epoch": 0.2634240529606473, + "grad_norm": 1.854497388944195, + "learning_rate": 8.637959186098143e-06, + "loss": 0.7132, + "step": 8595 + }, + { + "epoch": 0.2634547014833885, + "grad_norm": 1.8053454266515299, + "learning_rate": 8.637618689031415e-06, + "loss": 0.6331, + "step": 8596 + }, + { + "epoch": 0.2634853500061297, + "grad_norm": 1.4643091046989984, + "learning_rate": 8.6372781561223e-06, + "loss": 0.6022, + "step": 8597 + }, + { + "epoch": 0.2635159985288709, + "grad_norm": 1.8860193278588577, + "learning_rate": 8.636937587374152e-06, + "loss": 0.5768, + "step": 8598 + }, + { + "epoch": 0.2635466470516121, + "grad_norm": 1.610164710622763, + "learning_rate": 8.636596982790327e-06, + "loss": 0.6116, + "step": 8599 + }, + { + "epoch": 0.26357729557435333, + "grad_norm": 1.5883194171920014, + "learning_rate": 8.63625634237418e-06, + "loss": 0.7628, + "step": 8600 + }, + { + "epoch": 0.26360794409709454, + "grad_norm": 1.5550471287965657, + "learning_rate": 8.63591566612907e-06, + "loss": 0.6993, + "step": 8601 + }, + { + "epoch": 0.26363859261983574, + "grad_norm": 0.9002496987725015, + "learning_rate": 8.635574954058355e-06, + "loss": 0.4787, + "step": 8602 + }, + { + "epoch": 0.26366924114257695, + "grad_norm": 1.7666624435044733, + "learning_rate": 8.635234206165386e-06, + "loss": 0.646, + "step": 8603 + }, + { + "epoch": 0.26369988966531815, + "grad_norm": 1.6486263387070694, + "learning_rate": 8.634893422453527e-06, + "loss": 0.6098, + "step": 8604 + }, + { + "epoch": 0.26373053818805936, + "grad_norm": 1.5237634289365314, + "learning_rate": 8.634552602926133e-06, + "loss": 0.6123, + "step": 8605 + }, + { + "epoch": 0.26376118671080057, + "grad_norm": 1.6809699800926852, + "learning_rate": 8.63421174758656e-06, + "loss": 0.6127, + "step": 8606 + }, + { + "epoch": 0.2637918352335417, + "grad_norm": 0.8420852166321106, + "learning_rate": 8.63387085643817e-06, + "loss": 0.4783, + "step": 8607 + }, + { + "epoch": 0.2638224837562829, + "grad_norm": 1.6911037663486888, + "learning_rate": 8.633529929484322e-06, + "loss": 0.72, + "step": 8608 + }, + { + "epoch": 0.2638531322790241, + "grad_norm": 1.6048069906501674, + "learning_rate": 8.633188966728374e-06, + "loss": 0.7162, + "step": 8609 + }, + { + "epoch": 0.26388378080176533, + "grad_norm": 1.824023260155792, + "learning_rate": 8.632847968173683e-06, + "loss": 0.7697, + "step": 8610 + }, + { + "epoch": 0.26391442932450654, + "grad_norm": 0.7836945420190689, + "learning_rate": 8.632506933823613e-06, + "loss": 0.4799, + "step": 8611 + }, + { + "epoch": 0.26394507784724774, + "grad_norm": 1.839743867898232, + "learning_rate": 8.632165863681523e-06, + "loss": 0.6635, + "step": 8612 + }, + { + "epoch": 0.26397572636998895, + "grad_norm": 1.7887146727677963, + "learning_rate": 8.631824757750774e-06, + "loss": 0.7065, + "step": 8613 + }, + { + "epoch": 0.26400637489273016, + "grad_norm": 1.8462201646165741, + "learning_rate": 8.631483616034725e-06, + "loss": 0.6937, + "step": 8614 + }, + { + "epoch": 0.26403702341547136, + "grad_norm": 1.6119864217768574, + "learning_rate": 8.631142438536739e-06, + "loss": 0.6935, + "step": 8615 + }, + { + "epoch": 0.26406767193821257, + "grad_norm": 1.824995459264544, + "learning_rate": 8.630801225260177e-06, + "loss": 0.7682, + "step": 8616 + }, + { + "epoch": 0.2640983204609538, + "grad_norm": 1.786365469953861, + "learning_rate": 8.630459976208403e-06, + "loss": 0.651, + "step": 8617 + }, + { + "epoch": 0.264128968983695, + "grad_norm": 1.735308089589714, + "learning_rate": 8.630118691384776e-06, + "loss": 0.6465, + "step": 8618 + }, + { + "epoch": 0.2641596175064362, + "grad_norm": 1.48656396757209, + "learning_rate": 8.629777370792663e-06, + "loss": 0.6203, + "step": 8619 + }, + { + "epoch": 0.2641902660291774, + "grad_norm": 1.6771877068475634, + "learning_rate": 8.629436014435424e-06, + "loss": 0.6814, + "step": 8620 + }, + { + "epoch": 0.2642209145519186, + "grad_norm": 1.7840792189861252, + "learning_rate": 8.629094622316423e-06, + "loss": 0.6765, + "step": 8621 + }, + { + "epoch": 0.2642515630746598, + "grad_norm": 1.6050164592935219, + "learning_rate": 8.628753194439024e-06, + "loss": 0.7729, + "step": 8622 + }, + { + "epoch": 0.264282211597401, + "grad_norm": 1.8120205614343547, + "learning_rate": 8.628411730806592e-06, + "loss": 0.6431, + "step": 8623 + }, + { + "epoch": 0.2643128601201422, + "grad_norm": 0.9387749541688563, + "learning_rate": 8.62807023142249e-06, + "loss": 0.4728, + "step": 8624 + }, + { + "epoch": 0.2643435086428834, + "grad_norm": 1.666973974957228, + "learning_rate": 8.627728696290084e-06, + "loss": 0.7703, + "step": 8625 + }, + { + "epoch": 0.2643741571656246, + "grad_norm": 1.7656225959196084, + "learning_rate": 8.62738712541274e-06, + "loss": 0.6743, + "step": 8626 + }, + { + "epoch": 0.26440480568836583, + "grad_norm": 1.9218171913473998, + "learning_rate": 8.627045518793821e-06, + "loss": 0.6326, + "step": 8627 + }, + { + "epoch": 0.26443545421110703, + "grad_norm": 1.9594603430845936, + "learning_rate": 8.626703876436695e-06, + "loss": 0.7734, + "step": 8628 + }, + { + "epoch": 0.26446610273384824, + "grad_norm": 1.7508222900223998, + "learning_rate": 8.626362198344728e-06, + "loss": 0.6377, + "step": 8629 + }, + { + "epoch": 0.26449675125658945, + "grad_norm": 1.8652785855234169, + "learning_rate": 8.626020484521287e-06, + "loss": 0.6522, + "step": 8630 + }, + { + "epoch": 0.26452739977933065, + "grad_norm": 0.8755726370176181, + "learning_rate": 8.625678734969737e-06, + "loss": 0.4713, + "step": 8631 + }, + { + "epoch": 0.26455804830207186, + "grad_norm": 1.7384300646264683, + "learning_rate": 8.625336949693448e-06, + "loss": 0.7074, + "step": 8632 + }, + { + "epoch": 0.26458869682481306, + "grad_norm": 1.7286367658662656, + "learning_rate": 8.624995128695785e-06, + "loss": 0.6834, + "step": 8633 + }, + { + "epoch": 0.26461934534755427, + "grad_norm": 1.6380641649722913, + "learning_rate": 8.62465327198012e-06, + "loss": 0.7307, + "step": 8634 + }, + { + "epoch": 0.2646499938702955, + "grad_norm": 0.8445714116036024, + "learning_rate": 8.624311379549817e-06, + "loss": 0.4842, + "step": 8635 + }, + { + "epoch": 0.2646806423930367, + "grad_norm": 1.9160925207542323, + "learning_rate": 8.623969451408248e-06, + "loss": 0.7201, + "step": 8636 + }, + { + "epoch": 0.2647112909157779, + "grad_norm": 1.589384189534836, + "learning_rate": 8.623627487558779e-06, + "loss": 0.6915, + "step": 8637 + }, + { + "epoch": 0.26474193943851904, + "grad_norm": 1.6238517053010504, + "learning_rate": 8.623285488004781e-06, + "loss": 0.7698, + "step": 8638 + }, + { + "epoch": 0.26477258796126024, + "grad_norm": 2.1348458174239124, + "learning_rate": 8.622943452749626e-06, + "loss": 0.7226, + "step": 8639 + }, + { + "epoch": 0.26480323648400145, + "grad_norm": 1.4963624399170505, + "learning_rate": 8.62260138179668e-06, + "loss": 0.672, + "step": 8640 + }, + { + "epoch": 0.26483388500674265, + "grad_norm": 1.7625629679741361, + "learning_rate": 8.622259275149317e-06, + "loss": 0.7312, + "step": 8641 + }, + { + "epoch": 0.26486453352948386, + "grad_norm": 1.8470722944856384, + "learning_rate": 8.621917132810906e-06, + "loss": 0.8351, + "step": 8642 + }, + { + "epoch": 0.26489518205222506, + "grad_norm": 1.525427813980741, + "learning_rate": 8.621574954784821e-06, + "loss": 0.6672, + "step": 8643 + }, + { + "epoch": 0.26492583057496627, + "grad_norm": 1.9494884455747512, + "learning_rate": 8.621232741074429e-06, + "loss": 0.6446, + "step": 8644 + }, + { + "epoch": 0.2649564790977075, + "grad_norm": 0.9152131283971016, + "learning_rate": 8.620890491683105e-06, + "loss": 0.484, + "step": 8645 + }, + { + "epoch": 0.2649871276204487, + "grad_norm": 1.8846997660854605, + "learning_rate": 8.62054820661422e-06, + "loss": 0.5751, + "step": 8646 + }, + { + "epoch": 0.2650177761431899, + "grad_norm": 2.0195597364020754, + "learning_rate": 8.620205885871147e-06, + "loss": 0.6746, + "step": 8647 + }, + { + "epoch": 0.2650484246659311, + "grad_norm": 1.851283331256732, + "learning_rate": 8.61986352945726e-06, + "loss": 0.6935, + "step": 8648 + }, + { + "epoch": 0.2650790731886723, + "grad_norm": 1.5992714088645092, + "learning_rate": 8.619521137375932e-06, + "loss": 0.655, + "step": 8649 + }, + { + "epoch": 0.2651097217114135, + "grad_norm": 0.8293089559324867, + "learning_rate": 8.619178709630536e-06, + "loss": 0.4664, + "step": 8650 + }, + { + "epoch": 0.2651403702341547, + "grad_norm": 1.8060118161618566, + "learning_rate": 8.618836246224444e-06, + "loss": 0.7753, + "step": 8651 + }, + { + "epoch": 0.2651710187568959, + "grad_norm": 1.551496028137282, + "learning_rate": 8.618493747161034e-06, + "loss": 0.6937, + "step": 8652 + }, + { + "epoch": 0.2652016672796371, + "grad_norm": 1.633968610650129, + "learning_rate": 8.618151212443679e-06, + "loss": 0.725, + "step": 8653 + }, + { + "epoch": 0.2652323158023783, + "grad_norm": 1.6434824649280186, + "learning_rate": 8.617808642075756e-06, + "loss": 0.662, + "step": 8654 + }, + { + "epoch": 0.26526296432511953, + "grad_norm": 1.6216568402192524, + "learning_rate": 8.617466036060638e-06, + "loss": 0.7523, + "step": 8655 + }, + { + "epoch": 0.26529361284786074, + "grad_norm": 1.8879795318547283, + "learning_rate": 8.6171233944017e-06, + "loss": 0.7796, + "step": 8656 + }, + { + "epoch": 0.26532426137060194, + "grad_norm": 1.9645896997018875, + "learning_rate": 8.61678071710232e-06, + "loss": 0.7197, + "step": 8657 + }, + { + "epoch": 0.26535490989334315, + "grad_norm": 1.5115200021928943, + "learning_rate": 8.616438004165876e-06, + "loss": 0.6382, + "step": 8658 + }, + { + "epoch": 0.26538555841608436, + "grad_norm": 0.8165407007079416, + "learning_rate": 8.616095255595743e-06, + "loss": 0.4917, + "step": 8659 + }, + { + "epoch": 0.26541620693882556, + "grad_norm": 1.7510209015491358, + "learning_rate": 8.615752471395296e-06, + "loss": 0.7871, + "step": 8660 + }, + { + "epoch": 0.26544685546156677, + "grad_norm": 1.6895158689342604, + "learning_rate": 8.615409651567916e-06, + "loss": 0.6701, + "step": 8661 + }, + { + "epoch": 0.26547750398430797, + "grad_norm": 1.7983567401700589, + "learning_rate": 8.61506679611698e-06, + "loss": 0.7456, + "step": 8662 + }, + { + "epoch": 0.2655081525070492, + "grad_norm": 2.1176556929100383, + "learning_rate": 8.614723905045865e-06, + "loss": 0.7372, + "step": 8663 + }, + { + "epoch": 0.2655388010297904, + "grad_norm": 1.746450376565469, + "learning_rate": 8.61438097835795e-06, + "loss": 0.6258, + "step": 8664 + }, + { + "epoch": 0.2655694495525316, + "grad_norm": 1.644780870971775, + "learning_rate": 8.614038016056617e-06, + "loss": 0.6865, + "step": 8665 + }, + { + "epoch": 0.2656000980752728, + "grad_norm": 1.7643082602720246, + "learning_rate": 8.613695018145241e-06, + "loss": 0.7539, + "step": 8666 + }, + { + "epoch": 0.265630746598014, + "grad_norm": 1.6386354344840777, + "learning_rate": 8.613351984627204e-06, + "loss": 0.7383, + "step": 8667 + }, + { + "epoch": 0.2656613951207552, + "grad_norm": 1.8890830335630293, + "learning_rate": 8.613008915505885e-06, + "loss": 0.6986, + "step": 8668 + }, + { + "epoch": 0.26569204364349636, + "grad_norm": 2.0893913870738667, + "learning_rate": 8.612665810784664e-06, + "loss": 0.8127, + "step": 8669 + }, + { + "epoch": 0.26572269216623756, + "grad_norm": 1.7366845220843774, + "learning_rate": 8.612322670466924e-06, + "loss": 0.6914, + "step": 8670 + }, + { + "epoch": 0.26575334068897877, + "grad_norm": 1.9786824715859375, + "learning_rate": 8.611979494556043e-06, + "loss": 0.7321, + "step": 8671 + }, + { + "epoch": 0.26578398921172, + "grad_norm": 1.7312214232406788, + "learning_rate": 8.611636283055405e-06, + "loss": 0.7301, + "step": 8672 + }, + { + "epoch": 0.2658146377344612, + "grad_norm": 1.774504457984491, + "learning_rate": 8.61129303596839e-06, + "loss": 0.6713, + "step": 8673 + }, + { + "epoch": 0.2658452862572024, + "grad_norm": 1.5773100521454266, + "learning_rate": 8.61094975329838e-06, + "loss": 0.6966, + "step": 8674 + }, + { + "epoch": 0.2658759347799436, + "grad_norm": 1.6135717605976552, + "learning_rate": 8.610606435048761e-06, + "loss": 0.6777, + "step": 8675 + }, + { + "epoch": 0.2659065833026848, + "grad_norm": 1.789124008849361, + "learning_rate": 8.61026308122291e-06, + "loss": 0.677, + "step": 8676 + }, + { + "epoch": 0.265937231825426, + "grad_norm": 0.8274027847988817, + "learning_rate": 8.609919691824213e-06, + "loss": 0.4794, + "step": 8677 + }, + { + "epoch": 0.2659678803481672, + "grad_norm": 1.595278970636013, + "learning_rate": 8.609576266856057e-06, + "loss": 0.6604, + "step": 8678 + }, + { + "epoch": 0.2659985288709084, + "grad_norm": 0.8169521104477734, + "learning_rate": 8.60923280632182e-06, + "loss": 0.5046, + "step": 8679 + }, + { + "epoch": 0.2660291773936496, + "grad_norm": 1.7354473141717477, + "learning_rate": 8.608889310224888e-06, + "loss": 0.728, + "step": 8680 + }, + { + "epoch": 0.2660598259163908, + "grad_norm": 1.7522321130809937, + "learning_rate": 8.608545778568648e-06, + "loss": 0.6977, + "step": 8681 + }, + { + "epoch": 0.26609047443913203, + "grad_norm": 1.6454857112745986, + "learning_rate": 8.608202211356483e-06, + "loss": 0.7116, + "step": 8682 + }, + { + "epoch": 0.26612112296187324, + "grad_norm": 1.8657117583084988, + "learning_rate": 8.607858608591778e-06, + "loss": 0.6976, + "step": 8683 + }, + { + "epoch": 0.26615177148461444, + "grad_norm": 0.7938193911656098, + "learning_rate": 8.607514970277917e-06, + "loss": 0.4527, + "step": 8684 + }, + { + "epoch": 0.26618242000735565, + "grad_norm": 1.6742904102412521, + "learning_rate": 8.60717129641829e-06, + "loss": 0.6772, + "step": 8685 + }, + { + "epoch": 0.26621306853009685, + "grad_norm": 1.911009310593633, + "learning_rate": 8.606827587016281e-06, + "loss": 0.6843, + "step": 8686 + }, + { + "epoch": 0.26624371705283806, + "grad_norm": 0.7739527578099358, + "learning_rate": 8.606483842075277e-06, + "loss": 0.4756, + "step": 8687 + }, + { + "epoch": 0.26627436557557926, + "grad_norm": 1.9527738788009537, + "learning_rate": 8.606140061598665e-06, + "loss": 0.6981, + "step": 8688 + }, + { + "epoch": 0.26630501409832047, + "grad_norm": 1.858579591416523, + "learning_rate": 8.605796245589833e-06, + "loss": 0.8354, + "step": 8689 + }, + { + "epoch": 0.2663356626210617, + "grad_norm": 1.6071797202241935, + "learning_rate": 8.605452394052168e-06, + "loss": 0.6035, + "step": 8690 + }, + { + "epoch": 0.2663663111438029, + "grad_norm": 1.6179835993909029, + "learning_rate": 8.605108506989057e-06, + "loss": 0.7575, + "step": 8691 + }, + { + "epoch": 0.2663969596665441, + "grad_norm": 1.6124506026901566, + "learning_rate": 8.604764584403888e-06, + "loss": 0.7217, + "step": 8692 + }, + { + "epoch": 0.2664276081892853, + "grad_norm": 1.6454464940620979, + "learning_rate": 8.604420626300054e-06, + "loss": 0.6769, + "step": 8693 + }, + { + "epoch": 0.2664582567120265, + "grad_norm": 1.766566533379448, + "learning_rate": 8.60407663268094e-06, + "loss": 0.7142, + "step": 8694 + }, + { + "epoch": 0.2664889052347677, + "grad_norm": 0.8822864591025693, + "learning_rate": 8.603732603549938e-06, + "loss": 0.4881, + "step": 8695 + }, + { + "epoch": 0.2665195537575089, + "grad_norm": 0.8697917007295235, + "learning_rate": 8.603388538910435e-06, + "loss": 0.4939, + "step": 8696 + }, + { + "epoch": 0.2665502022802501, + "grad_norm": 1.9066963908619967, + "learning_rate": 8.603044438765824e-06, + "loss": 0.6668, + "step": 8697 + }, + { + "epoch": 0.2665808508029913, + "grad_norm": 1.8432523776331224, + "learning_rate": 8.602700303119493e-06, + "loss": 0.8027, + "step": 8698 + }, + { + "epoch": 0.2666114993257325, + "grad_norm": 2.15680258943209, + "learning_rate": 8.602356131974837e-06, + "loss": 0.8376, + "step": 8699 + }, + { + "epoch": 0.2666421478484737, + "grad_norm": 1.5455186019684908, + "learning_rate": 8.602011925335241e-06, + "loss": 0.5805, + "step": 8700 + }, + { + "epoch": 0.2666727963712149, + "grad_norm": 1.523026259566737, + "learning_rate": 8.601667683204101e-06, + "loss": 0.6664, + "step": 8701 + }, + { + "epoch": 0.2667034448939561, + "grad_norm": 1.7805022320547734, + "learning_rate": 8.601323405584808e-06, + "loss": 0.7712, + "step": 8702 + }, + { + "epoch": 0.2667340934166973, + "grad_norm": 1.9055084070712798, + "learning_rate": 8.600979092480755e-06, + "loss": 0.7448, + "step": 8703 + }, + { + "epoch": 0.2667647419394385, + "grad_norm": 1.6975289547039651, + "learning_rate": 8.600634743895332e-06, + "loss": 0.6765, + "step": 8704 + }, + { + "epoch": 0.2667953904621797, + "grad_norm": 1.969923984091332, + "learning_rate": 8.600290359831935e-06, + "loss": 0.7922, + "step": 8705 + }, + { + "epoch": 0.2668260389849209, + "grad_norm": 1.5861299063925833, + "learning_rate": 8.599945940293955e-06, + "loss": 0.6673, + "step": 8706 + }, + { + "epoch": 0.2668566875076621, + "grad_norm": 3.713289160877192, + "learning_rate": 8.599601485284787e-06, + "loss": 0.6828, + "step": 8707 + }, + { + "epoch": 0.2668873360304033, + "grad_norm": 1.668275100983845, + "learning_rate": 8.599256994807823e-06, + "loss": 0.6466, + "step": 8708 + }, + { + "epoch": 0.2669179845531445, + "grad_norm": 1.6924908916843473, + "learning_rate": 8.598912468866461e-06, + "loss": 0.7305, + "step": 8709 + }, + { + "epoch": 0.26694863307588573, + "grad_norm": 1.8782245391592602, + "learning_rate": 8.598567907464093e-06, + "loss": 0.6944, + "step": 8710 + }, + { + "epoch": 0.26697928159862694, + "grad_norm": 1.7095011256837775, + "learning_rate": 8.598223310604115e-06, + "loss": 0.6719, + "step": 8711 + }, + { + "epoch": 0.26700993012136814, + "grad_norm": 1.505073260728854, + "learning_rate": 8.597878678289921e-06, + "loss": 0.7596, + "step": 8712 + }, + { + "epoch": 0.26704057864410935, + "grad_norm": 1.8102268402957717, + "learning_rate": 8.597534010524908e-06, + "loss": 0.7688, + "step": 8713 + }, + { + "epoch": 0.26707122716685056, + "grad_norm": 2.06934160318398, + "learning_rate": 8.597189307312472e-06, + "loss": 0.7341, + "step": 8714 + }, + { + "epoch": 0.26710187568959176, + "grad_norm": 1.6940441697868125, + "learning_rate": 8.59684456865601e-06, + "loss": 0.6939, + "step": 8715 + }, + { + "epoch": 0.26713252421233297, + "grad_norm": 1.6175300217058737, + "learning_rate": 8.596499794558918e-06, + "loss": 0.6305, + "step": 8716 + }, + { + "epoch": 0.2671631727350742, + "grad_norm": 2.5128432435213712, + "learning_rate": 8.596154985024594e-06, + "loss": 0.6908, + "step": 8717 + }, + { + "epoch": 0.2671938212578154, + "grad_norm": 1.858473104339601, + "learning_rate": 8.595810140056433e-06, + "loss": 0.7902, + "step": 8718 + }, + { + "epoch": 0.2672244697805566, + "grad_norm": 1.8217942345950442, + "learning_rate": 8.595465259657837e-06, + "loss": 0.7146, + "step": 8719 + }, + { + "epoch": 0.2672551183032978, + "grad_norm": 1.8170056331621591, + "learning_rate": 8.5951203438322e-06, + "loss": 0.7026, + "step": 8720 + }, + { + "epoch": 0.267285766826039, + "grad_norm": 1.7971161855864795, + "learning_rate": 8.594775392582923e-06, + "loss": 0.7112, + "step": 8721 + }, + { + "epoch": 0.2673164153487802, + "grad_norm": 1.6973349930262709, + "learning_rate": 8.594430405913403e-06, + "loss": 0.7462, + "step": 8722 + }, + { + "epoch": 0.2673470638715214, + "grad_norm": 1.7162409879669307, + "learning_rate": 8.594085383827043e-06, + "loss": 0.6506, + "step": 8723 + }, + { + "epoch": 0.2673777123942626, + "grad_norm": 1.7690449570594942, + "learning_rate": 8.593740326327237e-06, + "loss": 0.6539, + "step": 8724 + }, + { + "epoch": 0.2674083609170038, + "grad_norm": 1.6480247295831991, + "learning_rate": 8.59339523341739e-06, + "loss": 0.7212, + "step": 8725 + }, + { + "epoch": 0.267439009439745, + "grad_norm": 1.4888856026171802, + "learning_rate": 8.593050105100902e-06, + "loss": 0.7486, + "step": 8726 + }, + { + "epoch": 0.26746965796248623, + "grad_norm": 1.6292571996708247, + "learning_rate": 8.59270494138117e-06, + "loss": 0.7156, + "step": 8727 + }, + { + "epoch": 0.26750030648522743, + "grad_norm": 1.7199360692089334, + "learning_rate": 8.592359742261598e-06, + "loss": 0.7048, + "step": 8728 + }, + { + "epoch": 0.26753095500796864, + "grad_norm": 1.6018973288835465, + "learning_rate": 8.592014507745586e-06, + "loss": 0.6541, + "step": 8729 + }, + { + "epoch": 0.26756160353070985, + "grad_norm": 1.4253419780208052, + "learning_rate": 8.591669237836534e-06, + "loss": 0.6688, + "step": 8730 + }, + { + "epoch": 0.267592252053451, + "grad_norm": 1.7014681861946501, + "learning_rate": 8.591323932537847e-06, + "loss": 0.7267, + "step": 8731 + }, + { + "epoch": 0.2676229005761922, + "grad_norm": 1.8358003887215353, + "learning_rate": 8.590978591852928e-06, + "loss": 0.7471, + "step": 8732 + }, + { + "epoch": 0.2676535490989334, + "grad_norm": 1.320721826278528, + "learning_rate": 8.590633215785178e-06, + "loss": 0.4969, + "step": 8733 + }, + { + "epoch": 0.2676841976216746, + "grad_norm": 1.1232089987861138, + "learning_rate": 8.590287804337998e-06, + "loss": 0.4961, + "step": 8734 + }, + { + "epoch": 0.2677148461444158, + "grad_norm": 1.7618460501468032, + "learning_rate": 8.589942357514796e-06, + "loss": 0.6115, + "step": 8735 + }, + { + "epoch": 0.267745494667157, + "grad_norm": 1.4924463804702695, + "learning_rate": 8.589596875318973e-06, + "loss": 0.6566, + "step": 8736 + }, + { + "epoch": 0.26777614318989823, + "grad_norm": 1.7374634892364293, + "learning_rate": 8.589251357753932e-06, + "loss": 0.6601, + "step": 8737 + }, + { + "epoch": 0.26780679171263944, + "grad_norm": 1.0740270858689396, + "learning_rate": 8.58890580482308e-06, + "loss": 0.4631, + "step": 8738 + }, + { + "epoch": 0.26783744023538064, + "grad_norm": 1.6032944269768523, + "learning_rate": 8.58856021652982e-06, + "loss": 0.7588, + "step": 8739 + }, + { + "epoch": 0.26786808875812185, + "grad_norm": 1.7734405442977217, + "learning_rate": 8.588214592877559e-06, + "loss": 0.6658, + "step": 8740 + }, + { + "epoch": 0.26789873728086305, + "grad_norm": 1.572963683669443, + "learning_rate": 8.587868933869703e-06, + "loss": 0.7255, + "step": 8741 + }, + { + "epoch": 0.26792938580360426, + "grad_norm": 1.9516243882655355, + "learning_rate": 8.587523239509653e-06, + "loss": 0.7084, + "step": 8742 + }, + { + "epoch": 0.26796003432634546, + "grad_norm": 1.6532335727556586, + "learning_rate": 8.587177509800823e-06, + "loss": 0.6793, + "step": 8743 + }, + { + "epoch": 0.26799068284908667, + "grad_norm": 1.8750026471864316, + "learning_rate": 8.586831744746611e-06, + "loss": 0.7189, + "step": 8744 + }, + { + "epoch": 0.2680213313718279, + "grad_norm": 1.7206122433783182, + "learning_rate": 8.586485944350432e-06, + "loss": 0.6665, + "step": 8745 + }, + { + "epoch": 0.2680519798945691, + "grad_norm": 1.8370731596159444, + "learning_rate": 8.586140108615685e-06, + "loss": 0.7208, + "step": 8746 + }, + { + "epoch": 0.2680826284173103, + "grad_norm": 1.6153792561042657, + "learning_rate": 8.585794237545784e-06, + "loss": 0.6697, + "step": 8747 + }, + { + "epoch": 0.2681132769400515, + "grad_norm": 1.8400279429119635, + "learning_rate": 8.585448331144135e-06, + "loss": 0.682, + "step": 8748 + }, + { + "epoch": 0.2681439254627927, + "grad_norm": 1.8547867946154062, + "learning_rate": 8.585102389414147e-06, + "loss": 0.7899, + "step": 8749 + }, + { + "epoch": 0.2681745739855339, + "grad_norm": 1.4871101763981815, + "learning_rate": 8.584756412359228e-06, + "loss": 0.5469, + "step": 8750 + }, + { + "epoch": 0.2682052225082751, + "grad_norm": 1.6563655681084601, + "learning_rate": 8.584410399982786e-06, + "loss": 0.7681, + "step": 8751 + }, + { + "epoch": 0.2682358710310163, + "grad_norm": 1.86225570089154, + "learning_rate": 8.58406435228823e-06, + "loss": 0.7852, + "step": 8752 + }, + { + "epoch": 0.2682665195537575, + "grad_norm": 1.9806064866236888, + "learning_rate": 8.583718269278972e-06, + "loss": 0.7421, + "step": 8753 + }, + { + "epoch": 0.2682971680764987, + "grad_norm": 1.644826635402087, + "learning_rate": 8.58337215095842e-06, + "loss": 0.6329, + "step": 8754 + }, + { + "epoch": 0.26832781659923993, + "grad_norm": 1.141603180759785, + "learning_rate": 8.583025997329988e-06, + "loss": 0.4794, + "step": 8755 + }, + { + "epoch": 0.26835846512198114, + "grad_norm": 1.8326835326512092, + "learning_rate": 8.58267980839708e-06, + "loss": 0.645, + "step": 8756 + }, + { + "epoch": 0.26838911364472234, + "grad_norm": 1.0332687619404066, + "learning_rate": 8.582333584163116e-06, + "loss": 0.493, + "step": 8757 + }, + { + "epoch": 0.26841976216746355, + "grad_norm": 1.6123099677840278, + "learning_rate": 8.5819873246315e-06, + "loss": 0.6666, + "step": 8758 + }, + { + "epoch": 0.26845041069020475, + "grad_norm": 2.436429695680043, + "learning_rate": 8.581641029805646e-06, + "loss": 0.7028, + "step": 8759 + }, + { + "epoch": 0.26848105921294596, + "grad_norm": 1.8376748287466984, + "learning_rate": 8.581294699688966e-06, + "loss": 0.7168, + "step": 8760 + }, + { + "epoch": 0.26851170773568717, + "grad_norm": 1.8180764342602542, + "learning_rate": 8.580948334284875e-06, + "loss": 0.7731, + "step": 8761 + }, + { + "epoch": 0.2685423562584283, + "grad_norm": 1.7561515532006529, + "learning_rate": 8.580601933596784e-06, + "loss": 0.8169, + "step": 8762 + }, + { + "epoch": 0.2685730047811695, + "grad_norm": 0.9652373599390358, + "learning_rate": 8.580255497628104e-06, + "loss": 0.496, + "step": 8763 + }, + { + "epoch": 0.2686036533039107, + "grad_norm": 1.7487489295866459, + "learning_rate": 8.579909026382251e-06, + "loss": 0.7424, + "step": 8764 + }, + { + "epoch": 0.26863430182665193, + "grad_norm": 2.0382255839352483, + "learning_rate": 8.57956251986264e-06, + "loss": 0.7454, + "step": 8765 + }, + { + "epoch": 0.26866495034939314, + "grad_norm": 1.6346101204825385, + "learning_rate": 8.579215978072683e-06, + "loss": 0.6789, + "step": 8766 + }, + { + "epoch": 0.26869559887213434, + "grad_norm": 1.6751747204179404, + "learning_rate": 8.578869401015794e-06, + "loss": 0.7012, + "step": 8767 + }, + { + "epoch": 0.26872624739487555, + "grad_norm": 1.5956455891830115, + "learning_rate": 8.57852278869539e-06, + "loss": 0.6622, + "step": 8768 + }, + { + "epoch": 0.26875689591761676, + "grad_norm": 1.8650906155635447, + "learning_rate": 8.578176141114886e-06, + "loss": 0.749, + "step": 8769 + }, + { + "epoch": 0.26878754444035796, + "grad_norm": 1.6435889894837963, + "learning_rate": 8.577829458277695e-06, + "loss": 0.7131, + "step": 8770 + }, + { + "epoch": 0.26881819296309917, + "grad_norm": 1.7883792680597725, + "learning_rate": 8.577482740187237e-06, + "loss": 0.6572, + "step": 8771 + }, + { + "epoch": 0.2688488414858404, + "grad_norm": 2.020531464049706, + "learning_rate": 8.577135986846925e-06, + "loss": 0.8391, + "step": 8772 + }, + { + "epoch": 0.2688794900085816, + "grad_norm": 1.7276512402995603, + "learning_rate": 8.576789198260178e-06, + "loss": 0.6852, + "step": 8773 + }, + { + "epoch": 0.2689101385313228, + "grad_norm": 1.838240869661493, + "learning_rate": 8.57644237443041e-06, + "loss": 0.7762, + "step": 8774 + }, + { + "epoch": 0.268940787054064, + "grad_norm": 1.7559888301722884, + "learning_rate": 8.576095515361043e-06, + "loss": 0.6904, + "step": 8775 + }, + { + "epoch": 0.2689714355768052, + "grad_norm": 0.8852552122393474, + "learning_rate": 8.575748621055488e-06, + "loss": 0.4567, + "step": 8776 + }, + { + "epoch": 0.2690020840995464, + "grad_norm": 1.6791018310998183, + "learning_rate": 8.57540169151717e-06, + "loss": 0.7316, + "step": 8777 + }, + { + "epoch": 0.2690327326222876, + "grad_norm": 1.7977691022361117, + "learning_rate": 8.575054726749503e-06, + "loss": 0.7474, + "step": 8778 + }, + { + "epoch": 0.2690633811450288, + "grad_norm": 1.8227825511027813, + "learning_rate": 8.574707726755909e-06, + "loss": 0.6689, + "step": 8779 + }, + { + "epoch": 0.26909402966777, + "grad_norm": 0.8026576162447174, + "learning_rate": 8.574360691539803e-06, + "loss": 0.4886, + "step": 8780 + }, + { + "epoch": 0.2691246781905112, + "grad_norm": 1.9800526044573052, + "learning_rate": 8.574013621104607e-06, + "loss": 0.8086, + "step": 8781 + }, + { + "epoch": 0.26915532671325243, + "grad_norm": 1.5727481261544738, + "learning_rate": 8.57366651545374e-06, + "loss": 0.6319, + "step": 8782 + }, + { + "epoch": 0.26918597523599364, + "grad_norm": 1.5475157836237405, + "learning_rate": 8.573319374590622e-06, + "loss": 0.6563, + "step": 8783 + }, + { + "epoch": 0.26921662375873484, + "grad_norm": 1.8757964433062266, + "learning_rate": 8.572972198518676e-06, + "loss": 0.7387, + "step": 8784 + }, + { + "epoch": 0.26924727228147605, + "grad_norm": 1.6888847295916019, + "learning_rate": 8.57262498724132e-06, + "loss": 0.6385, + "step": 8785 + }, + { + "epoch": 0.26927792080421725, + "grad_norm": 1.6035222690608861, + "learning_rate": 8.572277740761976e-06, + "loss": 0.7591, + "step": 8786 + }, + { + "epoch": 0.26930856932695846, + "grad_norm": 0.8806949347125431, + "learning_rate": 8.571930459084065e-06, + "loss": 0.4625, + "step": 8787 + }, + { + "epoch": 0.26933921784969966, + "grad_norm": 0.8571734589138414, + "learning_rate": 8.571583142211009e-06, + "loss": 0.4713, + "step": 8788 + }, + { + "epoch": 0.26936986637244087, + "grad_norm": 0.7799710080341319, + "learning_rate": 8.57123579014623e-06, + "loss": 0.4911, + "step": 8789 + }, + { + "epoch": 0.2694005148951821, + "grad_norm": 1.6173918448308329, + "learning_rate": 8.570888402893154e-06, + "loss": 0.7059, + "step": 8790 + }, + { + "epoch": 0.2694311634179233, + "grad_norm": 1.681891930972527, + "learning_rate": 8.570540980455197e-06, + "loss": 0.7984, + "step": 8791 + }, + { + "epoch": 0.2694618119406645, + "grad_norm": 1.723225237658459, + "learning_rate": 8.570193522835788e-06, + "loss": 0.7825, + "step": 8792 + }, + { + "epoch": 0.26949246046340564, + "grad_norm": 1.8184191601826825, + "learning_rate": 8.56984603003835e-06, + "loss": 0.7387, + "step": 8793 + }, + { + "epoch": 0.26952310898614684, + "grad_norm": 1.571642952603127, + "learning_rate": 8.569498502066302e-06, + "loss": 0.6356, + "step": 8794 + }, + { + "epoch": 0.26955375750888805, + "grad_norm": 1.9892305421920649, + "learning_rate": 8.569150938923077e-06, + "loss": 0.7463, + "step": 8795 + }, + { + "epoch": 0.26958440603162925, + "grad_norm": 1.0545336677901505, + "learning_rate": 8.56880334061209e-06, + "loss": 0.4869, + "step": 8796 + }, + { + "epoch": 0.26961505455437046, + "grad_norm": 1.8461747928576469, + "learning_rate": 8.568455707136774e-06, + "loss": 0.7302, + "step": 8797 + }, + { + "epoch": 0.26964570307711166, + "grad_norm": 1.5264187085312373, + "learning_rate": 8.568108038500548e-06, + "loss": 0.5361, + "step": 8798 + }, + { + "epoch": 0.26967635159985287, + "grad_norm": 0.9080871180551153, + "learning_rate": 8.567760334706843e-06, + "loss": 0.4659, + "step": 8799 + }, + { + "epoch": 0.2697070001225941, + "grad_norm": 1.742546395798348, + "learning_rate": 8.56741259575908e-06, + "loss": 0.651, + "step": 8800 + }, + { + "epoch": 0.2697376486453353, + "grad_norm": 1.6841933877132682, + "learning_rate": 8.56706482166069e-06, + "loss": 0.75, + "step": 8801 + }, + { + "epoch": 0.2697682971680765, + "grad_norm": 2.1895079318585897, + "learning_rate": 8.566717012415096e-06, + "loss": 0.7222, + "step": 8802 + }, + { + "epoch": 0.2697989456908177, + "grad_norm": 1.8003434421449969, + "learning_rate": 8.56636916802573e-06, + "loss": 0.6835, + "step": 8803 + }, + { + "epoch": 0.2698295942135589, + "grad_norm": 2.2701199603501294, + "learning_rate": 8.566021288496013e-06, + "loss": 0.7551, + "step": 8804 + }, + { + "epoch": 0.2698602427363001, + "grad_norm": 1.509687586507239, + "learning_rate": 8.565673373829375e-06, + "loss": 0.6454, + "step": 8805 + }, + { + "epoch": 0.2698908912590413, + "grad_norm": 1.6228588798691859, + "learning_rate": 8.565325424029248e-06, + "loss": 0.7562, + "step": 8806 + }, + { + "epoch": 0.2699215397817825, + "grad_norm": 1.8771108175093567, + "learning_rate": 8.564977439099056e-06, + "loss": 0.6755, + "step": 8807 + }, + { + "epoch": 0.2699521883045237, + "grad_norm": 1.8049972962525018, + "learning_rate": 8.564629419042227e-06, + "loss": 0.6845, + "step": 8808 + }, + { + "epoch": 0.2699828368272649, + "grad_norm": 1.752035913880027, + "learning_rate": 8.564281363862196e-06, + "loss": 0.6821, + "step": 8809 + }, + { + "epoch": 0.27001348535000613, + "grad_norm": 1.7504595618974585, + "learning_rate": 8.563933273562387e-06, + "loss": 0.7217, + "step": 8810 + }, + { + "epoch": 0.27004413387274734, + "grad_norm": 1.6389920254523476, + "learning_rate": 8.563585148146231e-06, + "loss": 0.6802, + "step": 8811 + }, + { + "epoch": 0.27007478239548854, + "grad_norm": 1.5437333963427688, + "learning_rate": 8.56323698761716e-06, + "loss": 0.6462, + "step": 8812 + }, + { + "epoch": 0.27010543091822975, + "grad_norm": 1.8152390740546618, + "learning_rate": 8.562888791978604e-06, + "loss": 0.6856, + "step": 8813 + }, + { + "epoch": 0.27013607944097096, + "grad_norm": 2.417189416125779, + "learning_rate": 8.562540561233991e-06, + "loss": 0.6076, + "step": 8814 + }, + { + "epoch": 0.27016672796371216, + "grad_norm": 1.6823462805788256, + "learning_rate": 8.562192295386756e-06, + "loss": 0.7032, + "step": 8815 + }, + { + "epoch": 0.27019737648645337, + "grad_norm": 1.2739800664773668, + "learning_rate": 8.561843994440327e-06, + "loss": 0.505, + "step": 8816 + }, + { + "epoch": 0.2702280250091946, + "grad_norm": 1.6269708306402013, + "learning_rate": 8.56149565839814e-06, + "loss": 0.6913, + "step": 8817 + }, + { + "epoch": 0.2702586735319358, + "grad_norm": 1.7961490270275995, + "learning_rate": 8.561147287263623e-06, + "loss": 0.7438, + "step": 8818 + }, + { + "epoch": 0.270289322054677, + "grad_norm": 1.9297664765704352, + "learning_rate": 8.560798881040211e-06, + "loss": 0.7373, + "step": 8819 + }, + { + "epoch": 0.2703199705774182, + "grad_norm": 0.797588350021819, + "learning_rate": 8.560450439731337e-06, + "loss": 0.4941, + "step": 8820 + }, + { + "epoch": 0.2703506191001594, + "grad_norm": 1.702505793834867, + "learning_rate": 8.560101963340434e-06, + "loss": 0.6233, + "step": 8821 + }, + { + "epoch": 0.2703812676229006, + "grad_norm": 0.8239591946116277, + "learning_rate": 8.559753451870936e-06, + "loss": 0.4798, + "step": 8822 + }, + { + "epoch": 0.2704119161456418, + "grad_norm": 1.6828080354864063, + "learning_rate": 8.559404905326275e-06, + "loss": 0.8077, + "step": 8823 + }, + { + "epoch": 0.27044256466838296, + "grad_norm": 1.708635310700765, + "learning_rate": 8.559056323709889e-06, + "loss": 0.6692, + "step": 8824 + }, + { + "epoch": 0.27047321319112416, + "grad_norm": 1.8926821279849504, + "learning_rate": 8.558707707025209e-06, + "loss": 0.6339, + "step": 8825 + }, + { + "epoch": 0.27050386171386537, + "grad_norm": 1.537903565782643, + "learning_rate": 8.558359055275671e-06, + "loss": 0.6099, + "step": 8826 + }, + { + "epoch": 0.2705345102366066, + "grad_norm": 1.8023575685731763, + "learning_rate": 8.558010368464711e-06, + "loss": 0.7335, + "step": 8827 + }, + { + "epoch": 0.2705651587593478, + "grad_norm": 1.6308445552780313, + "learning_rate": 8.557661646595766e-06, + "loss": 0.6307, + "step": 8828 + }, + { + "epoch": 0.270595807282089, + "grad_norm": 1.7514276679073437, + "learning_rate": 8.557312889672267e-06, + "loss": 0.766, + "step": 8829 + }, + { + "epoch": 0.2706264558048302, + "grad_norm": 1.836596959831608, + "learning_rate": 8.556964097697657e-06, + "loss": 0.8112, + "step": 8830 + }, + { + "epoch": 0.2706571043275714, + "grad_norm": 1.5905413361891771, + "learning_rate": 8.556615270675368e-06, + "loss": 0.7363, + "step": 8831 + }, + { + "epoch": 0.2706877528503126, + "grad_norm": 0.8215948076323152, + "learning_rate": 8.55626640860884e-06, + "loss": 0.4553, + "step": 8832 + }, + { + "epoch": 0.2707184013730538, + "grad_norm": 0.8256053687122133, + "learning_rate": 8.555917511501508e-06, + "loss": 0.4495, + "step": 8833 + }, + { + "epoch": 0.270749049895795, + "grad_norm": 1.7845342372534814, + "learning_rate": 8.555568579356813e-06, + "loss": 0.7316, + "step": 8834 + }, + { + "epoch": 0.2707796984185362, + "grad_norm": 1.7255267727495989, + "learning_rate": 8.55521961217819e-06, + "loss": 0.6984, + "step": 8835 + }, + { + "epoch": 0.2708103469412774, + "grad_norm": 1.9782240811135685, + "learning_rate": 8.554870609969077e-06, + "loss": 0.8353, + "step": 8836 + }, + { + "epoch": 0.27084099546401863, + "grad_norm": 0.8377351461492188, + "learning_rate": 8.554521572732916e-06, + "loss": 0.4777, + "step": 8837 + }, + { + "epoch": 0.27087164398675984, + "grad_norm": 1.732214562776955, + "learning_rate": 8.554172500473144e-06, + "loss": 0.7866, + "step": 8838 + }, + { + "epoch": 0.27090229250950104, + "grad_norm": 1.5656821695344603, + "learning_rate": 8.553823393193201e-06, + "loss": 0.7611, + "step": 8839 + }, + { + "epoch": 0.27093294103224225, + "grad_norm": 1.7137042851275721, + "learning_rate": 8.553474250896527e-06, + "loss": 0.8427, + "step": 8840 + }, + { + "epoch": 0.27096358955498345, + "grad_norm": 1.5833062539422573, + "learning_rate": 8.553125073586561e-06, + "loss": 0.7037, + "step": 8841 + }, + { + "epoch": 0.27099423807772466, + "grad_norm": 1.6003667913392228, + "learning_rate": 8.552775861266745e-06, + "loss": 0.6149, + "step": 8842 + }, + { + "epoch": 0.27102488660046586, + "grad_norm": 1.6526231020190096, + "learning_rate": 8.552426613940521e-06, + "loss": 0.5874, + "step": 8843 + }, + { + "epoch": 0.27105553512320707, + "grad_norm": 2.0122599096017995, + "learning_rate": 8.552077331611326e-06, + "loss": 0.7873, + "step": 8844 + }, + { + "epoch": 0.2710861836459483, + "grad_norm": 1.6327683844718004, + "learning_rate": 8.551728014282607e-06, + "loss": 0.7197, + "step": 8845 + }, + { + "epoch": 0.2711168321686895, + "grad_norm": 2.0151087052282954, + "learning_rate": 8.5513786619578e-06, + "loss": 0.5964, + "step": 8846 + }, + { + "epoch": 0.2711474806914307, + "grad_norm": 1.581167504382569, + "learning_rate": 8.551029274640353e-06, + "loss": 0.6691, + "step": 8847 + }, + { + "epoch": 0.2711781292141719, + "grad_norm": 1.9235916280997536, + "learning_rate": 8.550679852333705e-06, + "loss": 0.7557, + "step": 8848 + }, + { + "epoch": 0.2712087777369131, + "grad_norm": 1.7804378036148576, + "learning_rate": 8.5503303950413e-06, + "loss": 0.686, + "step": 8849 + }, + { + "epoch": 0.2712394262596543, + "grad_norm": 1.5750640113672645, + "learning_rate": 8.549980902766582e-06, + "loss": 0.6396, + "step": 8850 + }, + { + "epoch": 0.2712700747823955, + "grad_norm": 1.6340863689551708, + "learning_rate": 8.549631375512994e-06, + "loss": 0.6892, + "step": 8851 + }, + { + "epoch": 0.2713007233051367, + "grad_norm": 1.587924546939549, + "learning_rate": 8.549281813283978e-06, + "loss": 0.7135, + "step": 8852 + }, + { + "epoch": 0.2713313718278779, + "grad_norm": 1.7522095417573245, + "learning_rate": 8.548932216082982e-06, + "loss": 0.6564, + "step": 8853 + }, + { + "epoch": 0.2713620203506191, + "grad_norm": 1.6114102508716326, + "learning_rate": 8.548582583913447e-06, + "loss": 0.6672, + "step": 8854 + }, + { + "epoch": 0.2713926688733603, + "grad_norm": 1.6142742732045783, + "learning_rate": 8.54823291677882e-06, + "loss": 0.7644, + "step": 8855 + }, + { + "epoch": 0.2714233173961015, + "grad_norm": 1.8080107685066178, + "learning_rate": 8.547883214682549e-06, + "loss": 0.7918, + "step": 8856 + }, + { + "epoch": 0.2714539659188427, + "grad_norm": 2.3016588914605642, + "learning_rate": 8.547533477628073e-06, + "loss": 0.7264, + "step": 8857 + }, + { + "epoch": 0.2714846144415839, + "grad_norm": 1.7968703671633262, + "learning_rate": 8.547183705618845e-06, + "loss": 0.6844, + "step": 8858 + }, + { + "epoch": 0.2715152629643251, + "grad_norm": 0.9502573799364203, + "learning_rate": 8.546833898658309e-06, + "loss": 0.5001, + "step": 8859 + }, + { + "epoch": 0.2715459114870663, + "grad_norm": 1.747553127168434, + "learning_rate": 8.546484056749908e-06, + "loss": 0.669, + "step": 8860 + }, + { + "epoch": 0.2715765600098075, + "grad_norm": 1.878491219669752, + "learning_rate": 8.546134179897095e-06, + "loss": 0.7148, + "step": 8861 + }, + { + "epoch": 0.2716072085325487, + "grad_norm": 1.839996355946716, + "learning_rate": 8.545784268103312e-06, + "loss": 0.8137, + "step": 8862 + }, + { + "epoch": 0.2716378570552899, + "grad_norm": 1.5920525220525372, + "learning_rate": 8.54543432137201e-06, + "loss": 0.6533, + "step": 8863 + }, + { + "epoch": 0.2716685055780311, + "grad_norm": 1.960347053118997, + "learning_rate": 8.545084339706638e-06, + "loss": 0.6712, + "step": 8864 + }, + { + "epoch": 0.27169915410077233, + "grad_norm": 1.8208342203903862, + "learning_rate": 8.544734323110641e-06, + "loss": 0.7195, + "step": 8865 + }, + { + "epoch": 0.27172980262351354, + "grad_norm": 1.8260524804415383, + "learning_rate": 8.54438427158747e-06, + "loss": 0.6593, + "step": 8866 + }, + { + "epoch": 0.27176045114625474, + "grad_norm": 1.865184219657092, + "learning_rate": 8.544034185140577e-06, + "loss": 0.6733, + "step": 8867 + }, + { + "epoch": 0.27179109966899595, + "grad_norm": 1.4938528022711894, + "learning_rate": 8.543684063773406e-06, + "loss": 0.6733, + "step": 8868 + }, + { + "epoch": 0.27182174819173716, + "grad_norm": 1.7253669793915463, + "learning_rate": 8.54333390748941e-06, + "loss": 0.6589, + "step": 8869 + }, + { + "epoch": 0.27185239671447836, + "grad_norm": 1.9071890434874221, + "learning_rate": 8.542983716292037e-06, + "loss": 0.6431, + "step": 8870 + }, + { + "epoch": 0.27188304523721957, + "grad_norm": 1.5529377671858078, + "learning_rate": 8.54263349018474e-06, + "loss": 0.7034, + "step": 8871 + }, + { + "epoch": 0.2719136937599608, + "grad_norm": 1.6976990889160106, + "learning_rate": 8.542283229170967e-06, + "loss": 0.7746, + "step": 8872 + }, + { + "epoch": 0.271944342282702, + "grad_norm": 1.6340679277043382, + "learning_rate": 8.541932933254174e-06, + "loss": 0.7613, + "step": 8873 + }, + { + "epoch": 0.2719749908054432, + "grad_norm": 1.8749306047483634, + "learning_rate": 8.541582602437808e-06, + "loss": 0.6513, + "step": 8874 + }, + { + "epoch": 0.2720056393281844, + "grad_norm": 1.828331294959217, + "learning_rate": 8.54123223672532e-06, + "loss": 0.6765, + "step": 8875 + }, + { + "epoch": 0.2720362878509256, + "grad_norm": 1.5732171066747733, + "learning_rate": 8.540881836120169e-06, + "loss": 0.7324, + "step": 8876 + }, + { + "epoch": 0.2720669363736668, + "grad_norm": 1.8469949575776654, + "learning_rate": 8.540531400625802e-06, + "loss": 0.7053, + "step": 8877 + }, + { + "epoch": 0.272097584896408, + "grad_norm": 1.693699467740712, + "learning_rate": 8.540180930245671e-06, + "loss": 0.7701, + "step": 8878 + }, + { + "epoch": 0.2721282334191492, + "grad_norm": 1.9770049568913408, + "learning_rate": 8.539830424983236e-06, + "loss": 0.7546, + "step": 8879 + }, + { + "epoch": 0.2721588819418904, + "grad_norm": 0.9851344379416642, + "learning_rate": 8.539479884841941e-06, + "loss": 0.4883, + "step": 8880 + }, + { + "epoch": 0.2721895304646316, + "grad_norm": 1.7765378692131473, + "learning_rate": 8.539129309825249e-06, + "loss": 0.6598, + "step": 8881 + }, + { + "epoch": 0.27222017898737283, + "grad_norm": 1.8034482779842749, + "learning_rate": 8.538778699936608e-06, + "loss": 0.777, + "step": 8882 + }, + { + "epoch": 0.27225082751011404, + "grad_norm": 1.8205186745584194, + "learning_rate": 8.538428055179476e-06, + "loss": 0.6712, + "step": 8883 + }, + { + "epoch": 0.27228147603285524, + "grad_norm": 0.7655008369368597, + "learning_rate": 8.538077375557308e-06, + "loss": 0.4892, + "step": 8884 + }, + { + "epoch": 0.27231212455559645, + "grad_norm": 1.7341538461113861, + "learning_rate": 8.537726661073556e-06, + "loss": 0.7282, + "step": 8885 + }, + { + "epoch": 0.2723427730783376, + "grad_norm": 1.6531500423403311, + "learning_rate": 8.53737591173168e-06, + "loss": 0.6194, + "step": 8886 + }, + { + "epoch": 0.2723734216010788, + "grad_norm": 1.7530550224811159, + "learning_rate": 8.537025127535132e-06, + "loss": 0.7143, + "step": 8887 + }, + { + "epoch": 0.27240407012382, + "grad_norm": 1.6613968329748647, + "learning_rate": 8.536674308487373e-06, + "loss": 0.6828, + "step": 8888 + }, + { + "epoch": 0.2724347186465612, + "grad_norm": 1.5816561529195963, + "learning_rate": 8.536323454591855e-06, + "loss": 0.6963, + "step": 8889 + }, + { + "epoch": 0.2724653671693024, + "grad_norm": 0.845816475943142, + "learning_rate": 8.535972565852038e-06, + "loss": 0.4704, + "step": 8890 + }, + { + "epoch": 0.2724960156920436, + "grad_norm": 1.655668603845714, + "learning_rate": 8.53562164227138e-06, + "loss": 0.6416, + "step": 8891 + }, + { + "epoch": 0.27252666421478483, + "grad_norm": 1.9536905223372159, + "learning_rate": 8.535270683853336e-06, + "loss": 0.8388, + "step": 8892 + }, + { + "epoch": 0.27255731273752604, + "grad_norm": 0.819287426146296, + "learning_rate": 8.534919690601365e-06, + "loss": 0.4648, + "step": 8893 + }, + { + "epoch": 0.27258796126026724, + "grad_norm": 0.7943848154411493, + "learning_rate": 8.534568662518926e-06, + "loss": 0.4819, + "step": 8894 + }, + { + "epoch": 0.27261860978300845, + "grad_norm": 1.5684964159105819, + "learning_rate": 8.534217599609479e-06, + "loss": 0.7038, + "step": 8895 + }, + { + "epoch": 0.27264925830574965, + "grad_norm": 1.6433777458260526, + "learning_rate": 8.53386650187648e-06, + "loss": 0.8252, + "step": 8896 + }, + { + "epoch": 0.27267990682849086, + "grad_norm": 1.6870324069431173, + "learning_rate": 8.53351536932339e-06, + "loss": 0.7796, + "step": 8897 + }, + { + "epoch": 0.27271055535123206, + "grad_norm": 0.8222315621039507, + "learning_rate": 8.533164201953673e-06, + "loss": 0.4893, + "step": 8898 + }, + { + "epoch": 0.27274120387397327, + "grad_norm": 1.557609537116572, + "learning_rate": 8.53281299977078e-06, + "loss": 0.6769, + "step": 8899 + }, + { + "epoch": 0.2727718523967145, + "grad_norm": 1.7644252961963303, + "learning_rate": 8.532461762778179e-06, + "loss": 0.6731, + "step": 8900 + }, + { + "epoch": 0.2728025009194557, + "grad_norm": 1.8459054845479064, + "learning_rate": 8.532110490979327e-06, + "loss": 0.6781, + "step": 8901 + }, + { + "epoch": 0.2728331494421969, + "grad_norm": 1.6439616329408109, + "learning_rate": 8.531759184377688e-06, + "loss": 0.7281, + "step": 8902 + }, + { + "epoch": 0.2728637979649381, + "grad_norm": 1.6248513536698928, + "learning_rate": 8.531407842976722e-06, + "loss": 0.7429, + "step": 8903 + }, + { + "epoch": 0.2728944464876793, + "grad_norm": 1.650080998850224, + "learning_rate": 8.53105646677989e-06, + "loss": 0.6032, + "step": 8904 + }, + { + "epoch": 0.2729250950104205, + "grad_norm": 1.7884122795745152, + "learning_rate": 8.530705055790655e-06, + "loss": 0.7006, + "step": 8905 + }, + { + "epoch": 0.2729557435331617, + "grad_norm": 1.7222333266935599, + "learning_rate": 8.530353610012482e-06, + "loss": 0.5788, + "step": 8906 + }, + { + "epoch": 0.2729863920559029, + "grad_norm": 1.7274635626853594, + "learning_rate": 8.530002129448828e-06, + "loss": 0.7433, + "step": 8907 + }, + { + "epoch": 0.2730170405786441, + "grad_norm": 0.9174566285577179, + "learning_rate": 8.529650614103163e-06, + "loss": 0.4814, + "step": 8908 + }, + { + "epoch": 0.2730476891013853, + "grad_norm": 1.7612035838864213, + "learning_rate": 8.529299063978947e-06, + "loss": 0.7262, + "step": 8909 + }, + { + "epoch": 0.27307833762412653, + "grad_norm": 1.6961099201796734, + "learning_rate": 8.528947479079644e-06, + "loss": 0.7318, + "step": 8910 + }, + { + "epoch": 0.27310898614686774, + "grad_norm": 1.7834245854657134, + "learning_rate": 8.528595859408718e-06, + "loss": 0.7455, + "step": 8911 + }, + { + "epoch": 0.27313963466960894, + "grad_norm": 2.1512413150559073, + "learning_rate": 8.528244204969633e-06, + "loss": 0.6922, + "step": 8912 + }, + { + "epoch": 0.27317028319235015, + "grad_norm": 2.127428462245573, + "learning_rate": 8.527892515765858e-06, + "loss": 0.732, + "step": 8913 + }, + { + "epoch": 0.27320093171509136, + "grad_norm": 1.8548303292643433, + "learning_rate": 8.527540791800853e-06, + "loss": 0.6978, + "step": 8914 + }, + { + "epoch": 0.27323158023783256, + "grad_norm": 0.8204328525789767, + "learning_rate": 8.527189033078087e-06, + "loss": 0.4836, + "step": 8915 + }, + { + "epoch": 0.27326222876057377, + "grad_norm": 1.820817757620297, + "learning_rate": 8.526837239601025e-06, + "loss": 0.8371, + "step": 8916 + }, + { + "epoch": 0.2732928772833149, + "grad_norm": 1.8019718578149675, + "learning_rate": 8.526485411373133e-06, + "loss": 0.6926, + "step": 8917 + }, + { + "epoch": 0.2733235258060561, + "grad_norm": 2.3784679658157786, + "learning_rate": 8.52613354839788e-06, + "loss": 0.6576, + "step": 8918 + }, + { + "epoch": 0.27335417432879733, + "grad_norm": 1.9373370350258148, + "learning_rate": 8.525781650678728e-06, + "loss": 0.8002, + "step": 8919 + }, + { + "epoch": 0.27338482285153853, + "grad_norm": 1.921617007625586, + "learning_rate": 8.525429718219149e-06, + "loss": 0.7306, + "step": 8920 + }, + { + "epoch": 0.27341547137427974, + "grad_norm": 0.7994798709610614, + "learning_rate": 8.525077751022608e-06, + "loss": 0.4801, + "step": 8921 + }, + { + "epoch": 0.27344611989702095, + "grad_norm": 1.7509254672734307, + "learning_rate": 8.524725749092576e-06, + "loss": 0.7677, + "step": 8922 + }, + { + "epoch": 0.27347676841976215, + "grad_norm": 1.6811401727977147, + "learning_rate": 8.524373712432516e-06, + "loss": 0.6405, + "step": 8923 + }, + { + "epoch": 0.27350741694250336, + "grad_norm": 1.4584699144324276, + "learning_rate": 8.5240216410459e-06, + "loss": 0.6635, + "step": 8924 + }, + { + "epoch": 0.27353806546524456, + "grad_norm": 1.8019179614522824, + "learning_rate": 8.5236695349362e-06, + "loss": 0.7513, + "step": 8925 + }, + { + "epoch": 0.27356871398798577, + "grad_norm": 1.5695373516084747, + "learning_rate": 8.523317394106883e-06, + "loss": 0.6253, + "step": 8926 + }, + { + "epoch": 0.273599362510727, + "grad_norm": 2.155425098662471, + "learning_rate": 8.522965218561416e-06, + "loss": 0.5809, + "step": 8927 + }, + { + "epoch": 0.2736300110334682, + "grad_norm": 0.8081088717412701, + "learning_rate": 8.522613008303272e-06, + "loss": 0.4873, + "step": 8928 + }, + { + "epoch": 0.2736606595562094, + "grad_norm": 1.6895120114169198, + "learning_rate": 8.522260763335921e-06, + "loss": 0.6866, + "step": 8929 + }, + { + "epoch": 0.2736913080789506, + "grad_norm": 1.6264514369482441, + "learning_rate": 8.521908483662832e-06, + "loss": 0.7698, + "step": 8930 + }, + { + "epoch": 0.2737219566016918, + "grad_norm": 0.7902895570415639, + "learning_rate": 8.52155616928748e-06, + "loss": 0.4879, + "step": 8931 + }, + { + "epoch": 0.273752605124433, + "grad_norm": 1.7762001771047475, + "learning_rate": 8.52120382021333e-06, + "loss": 0.6542, + "step": 8932 + }, + { + "epoch": 0.2737832536471742, + "grad_norm": 1.7557327949000252, + "learning_rate": 8.520851436443863e-06, + "loss": 0.7257, + "step": 8933 + }, + { + "epoch": 0.2738139021699154, + "grad_norm": 1.7687220370658254, + "learning_rate": 8.520499017982543e-06, + "loss": 0.6754, + "step": 8934 + }, + { + "epoch": 0.2738445506926566, + "grad_norm": 1.483019749862212, + "learning_rate": 8.520146564832846e-06, + "loss": 0.6156, + "step": 8935 + }, + { + "epoch": 0.2738751992153978, + "grad_norm": 1.7981788144519402, + "learning_rate": 8.519794076998244e-06, + "loss": 0.8156, + "step": 8936 + }, + { + "epoch": 0.27390584773813903, + "grad_norm": 1.6267236341131455, + "learning_rate": 8.519441554482211e-06, + "loss": 0.6319, + "step": 8937 + }, + { + "epoch": 0.27393649626088024, + "grad_norm": 1.6051061053232467, + "learning_rate": 8.51908899728822e-06, + "loss": 0.6618, + "step": 8938 + }, + { + "epoch": 0.27396714478362144, + "grad_norm": 1.7011268778953512, + "learning_rate": 8.518736405419742e-06, + "loss": 0.6642, + "step": 8939 + }, + { + "epoch": 0.27399779330636265, + "grad_norm": 1.4798392818504345, + "learning_rate": 8.518383778880258e-06, + "loss": 0.6417, + "step": 8940 + }, + { + "epoch": 0.27402844182910385, + "grad_norm": 1.768032613524761, + "learning_rate": 8.518031117673236e-06, + "loss": 0.6395, + "step": 8941 + }, + { + "epoch": 0.27405909035184506, + "grad_norm": 0.9237556735023857, + "learning_rate": 8.517678421802153e-06, + "loss": 0.4872, + "step": 8942 + }, + { + "epoch": 0.27408973887458626, + "grad_norm": 1.7171307666490894, + "learning_rate": 8.517325691270485e-06, + "loss": 0.6917, + "step": 8943 + }, + { + "epoch": 0.27412038739732747, + "grad_norm": 1.68318631367534, + "learning_rate": 8.516972926081708e-06, + "loss": 0.7705, + "step": 8944 + }, + { + "epoch": 0.2741510359200687, + "grad_norm": 1.518627134125846, + "learning_rate": 8.516620126239297e-06, + "loss": 0.6904, + "step": 8945 + }, + { + "epoch": 0.2741816844428099, + "grad_norm": 1.7948271330717895, + "learning_rate": 8.516267291746727e-06, + "loss": 0.6212, + "step": 8946 + }, + { + "epoch": 0.2742123329655511, + "grad_norm": 1.5842818686450302, + "learning_rate": 8.515914422607476e-06, + "loss": 0.6174, + "step": 8947 + }, + { + "epoch": 0.27424298148829224, + "grad_norm": 1.5997366628355438, + "learning_rate": 8.51556151882502e-06, + "loss": 0.696, + "step": 8948 + }, + { + "epoch": 0.27427363001103344, + "grad_norm": 0.83702692048584, + "learning_rate": 8.515208580402838e-06, + "loss": 0.4717, + "step": 8949 + }, + { + "epoch": 0.27430427853377465, + "grad_norm": 1.671521389339352, + "learning_rate": 8.514855607344406e-06, + "loss": 0.6954, + "step": 8950 + }, + { + "epoch": 0.27433492705651585, + "grad_norm": 1.642686804106307, + "learning_rate": 8.514502599653202e-06, + "loss": 0.6524, + "step": 8951 + }, + { + "epoch": 0.27436557557925706, + "grad_norm": 1.930443683722303, + "learning_rate": 8.514149557332705e-06, + "loss": 0.5977, + "step": 8952 + }, + { + "epoch": 0.27439622410199827, + "grad_norm": 1.8588711483918727, + "learning_rate": 8.513796480386393e-06, + "loss": 0.691, + "step": 8953 + }, + { + "epoch": 0.27442687262473947, + "grad_norm": 1.52118169150677, + "learning_rate": 8.513443368817745e-06, + "loss": 0.5949, + "step": 8954 + }, + { + "epoch": 0.2744575211474807, + "grad_norm": 2.0404655192331855, + "learning_rate": 8.513090222630241e-06, + "loss": 0.6848, + "step": 8955 + }, + { + "epoch": 0.2744881696702219, + "grad_norm": 1.6432140297581899, + "learning_rate": 8.51273704182736e-06, + "loss": 0.715, + "step": 8956 + }, + { + "epoch": 0.2745188181929631, + "grad_norm": 1.8016682472158931, + "learning_rate": 8.512383826412583e-06, + "loss": 0.7759, + "step": 8957 + }, + { + "epoch": 0.2745494667157043, + "grad_norm": 1.683598982792562, + "learning_rate": 8.512030576389388e-06, + "loss": 0.6959, + "step": 8958 + }, + { + "epoch": 0.2745801152384455, + "grad_norm": 1.7648246329330908, + "learning_rate": 8.511677291761258e-06, + "loss": 0.6839, + "step": 8959 + }, + { + "epoch": 0.2746107637611867, + "grad_norm": 1.884651434108522, + "learning_rate": 8.511323972531674e-06, + "loss": 0.6445, + "step": 8960 + }, + { + "epoch": 0.2746414122839279, + "grad_norm": 1.717263793383077, + "learning_rate": 8.510970618704115e-06, + "loss": 0.6467, + "step": 8961 + }, + { + "epoch": 0.2746720608066691, + "grad_norm": 1.957770385886508, + "learning_rate": 8.510617230282064e-06, + "loss": 0.6876, + "step": 8962 + }, + { + "epoch": 0.2747027093294103, + "grad_norm": 1.6186129764501287, + "learning_rate": 8.510263807269003e-06, + "loss": 0.673, + "step": 8963 + }, + { + "epoch": 0.2747333578521515, + "grad_norm": 1.6583991235931448, + "learning_rate": 8.509910349668418e-06, + "loss": 0.7013, + "step": 8964 + }, + { + "epoch": 0.27476400637489273, + "grad_norm": 1.794606508038073, + "learning_rate": 8.509556857483786e-06, + "loss": 0.7349, + "step": 8965 + }, + { + "epoch": 0.27479465489763394, + "grad_norm": 1.6199631123851008, + "learning_rate": 8.509203330718591e-06, + "loss": 0.6888, + "step": 8966 + }, + { + "epoch": 0.27482530342037514, + "grad_norm": 1.7577080716912492, + "learning_rate": 8.508849769376319e-06, + "loss": 0.6529, + "step": 8967 + }, + { + "epoch": 0.27485595194311635, + "grad_norm": 0.841711810591831, + "learning_rate": 8.508496173460453e-06, + "loss": 0.4772, + "step": 8968 + }, + { + "epoch": 0.27488660046585756, + "grad_norm": 1.8617301905271653, + "learning_rate": 8.508142542974476e-06, + "loss": 0.8209, + "step": 8969 + }, + { + "epoch": 0.27491724898859876, + "grad_norm": 1.584263187698999, + "learning_rate": 8.507788877921873e-06, + "loss": 0.6315, + "step": 8970 + }, + { + "epoch": 0.27494789751133997, + "grad_norm": 1.684792796094721, + "learning_rate": 8.507435178306127e-06, + "loss": 0.6973, + "step": 8971 + }, + { + "epoch": 0.2749785460340812, + "grad_norm": 1.8314017902475492, + "learning_rate": 8.507081444130726e-06, + "loss": 0.6656, + "step": 8972 + }, + { + "epoch": 0.2750091945568224, + "grad_norm": 1.7181922690851663, + "learning_rate": 8.506727675399154e-06, + "loss": 0.7271, + "step": 8973 + }, + { + "epoch": 0.2750398430795636, + "grad_norm": 0.7852113438286004, + "learning_rate": 8.506373872114897e-06, + "loss": 0.4746, + "step": 8974 + }, + { + "epoch": 0.2750704916023048, + "grad_norm": 1.69665659570189, + "learning_rate": 8.506020034281442e-06, + "loss": 0.8079, + "step": 8975 + }, + { + "epoch": 0.275101140125046, + "grad_norm": 1.776923016283744, + "learning_rate": 8.505666161902275e-06, + "loss": 0.6934, + "step": 8976 + }, + { + "epoch": 0.2751317886477872, + "grad_norm": 1.8995394673882617, + "learning_rate": 8.50531225498088e-06, + "loss": 0.8192, + "step": 8977 + }, + { + "epoch": 0.2751624371705284, + "grad_norm": 1.7685772333504686, + "learning_rate": 8.504958313520749e-06, + "loss": 0.5661, + "step": 8978 + }, + { + "epoch": 0.27519308569326956, + "grad_norm": 1.910908891269093, + "learning_rate": 8.504604337525366e-06, + "loss": 0.7812, + "step": 8979 + }, + { + "epoch": 0.27522373421601076, + "grad_norm": 1.7350372505074705, + "learning_rate": 8.504250326998219e-06, + "loss": 0.6859, + "step": 8980 + }, + { + "epoch": 0.27525438273875197, + "grad_norm": 2.2127788108982975, + "learning_rate": 8.503896281942798e-06, + "loss": 0.7284, + "step": 8981 + }, + { + "epoch": 0.2752850312614932, + "grad_norm": 1.7108325166222864, + "learning_rate": 8.503542202362589e-06, + "loss": 0.7069, + "step": 8982 + }, + { + "epoch": 0.2753156797842344, + "grad_norm": 0.8533062819018975, + "learning_rate": 8.503188088261083e-06, + "loss": 0.4687, + "step": 8983 + }, + { + "epoch": 0.2753463283069756, + "grad_norm": 1.5284421157648374, + "learning_rate": 8.502833939641768e-06, + "loss": 0.6006, + "step": 8984 + }, + { + "epoch": 0.2753769768297168, + "grad_norm": 1.685049130655252, + "learning_rate": 8.502479756508135e-06, + "loss": 0.6967, + "step": 8985 + }, + { + "epoch": 0.275407625352458, + "grad_norm": 3.56842439211834, + "learning_rate": 8.502125538863673e-06, + "loss": 0.7228, + "step": 8986 + }, + { + "epoch": 0.2754382738751992, + "grad_norm": 1.802476308013116, + "learning_rate": 8.50177128671187e-06, + "loss": 0.8071, + "step": 8987 + }, + { + "epoch": 0.2754689223979404, + "grad_norm": 1.8464821282430939, + "learning_rate": 8.50141700005622e-06, + "loss": 0.6597, + "step": 8988 + }, + { + "epoch": 0.2754995709206816, + "grad_norm": 1.8414605579780177, + "learning_rate": 8.501062678900212e-06, + "loss": 0.7516, + "step": 8989 + }, + { + "epoch": 0.2755302194434228, + "grad_norm": 1.794673669146081, + "learning_rate": 8.500708323247339e-06, + "loss": 0.7016, + "step": 8990 + }, + { + "epoch": 0.275560867966164, + "grad_norm": 0.9141931288407747, + "learning_rate": 8.50035393310109e-06, + "loss": 0.4665, + "step": 8991 + }, + { + "epoch": 0.27559151648890523, + "grad_norm": 0.980848349551805, + "learning_rate": 8.499999508464958e-06, + "loss": 0.4693, + "step": 8992 + }, + { + "epoch": 0.27562216501164644, + "grad_norm": 1.7802828188720556, + "learning_rate": 8.499645049342436e-06, + "loss": 0.7065, + "step": 8993 + }, + { + "epoch": 0.27565281353438764, + "grad_norm": 5.67243509382753, + "learning_rate": 8.499290555737015e-06, + "loss": 0.6994, + "step": 8994 + }, + { + "epoch": 0.27568346205712885, + "grad_norm": 1.9910605182757009, + "learning_rate": 8.49893602765219e-06, + "loss": 0.7711, + "step": 8995 + }, + { + "epoch": 0.27571411057987005, + "grad_norm": 1.6960190461397977, + "learning_rate": 8.498581465091453e-06, + "loss": 0.6193, + "step": 8996 + }, + { + "epoch": 0.27574475910261126, + "grad_norm": 1.8750294748665164, + "learning_rate": 8.498226868058296e-06, + "loss": 0.7498, + "step": 8997 + }, + { + "epoch": 0.27577540762535246, + "grad_norm": 1.834164860180798, + "learning_rate": 8.497872236556214e-06, + "loss": 0.6531, + "step": 8998 + }, + { + "epoch": 0.27580605614809367, + "grad_norm": 1.5337940865499657, + "learning_rate": 8.497517570588704e-06, + "loss": 0.6306, + "step": 8999 + }, + { + "epoch": 0.2758367046708349, + "grad_norm": 1.763417355481271, + "learning_rate": 8.497162870159259e-06, + "loss": 0.704, + "step": 9000 + }, + { + "epoch": 0.2758673531935761, + "grad_norm": 1.8947257749798507, + "learning_rate": 8.496808135271373e-06, + "loss": 0.8322, + "step": 9001 + }, + { + "epoch": 0.2758980017163173, + "grad_norm": 2.0289053606828964, + "learning_rate": 8.49645336592854e-06, + "loss": 0.7709, + "step": 9002 + }, + { + "epoch": 0.2759286502390585, + "grad_norm": 1.6711791389160924, + "learning_rate": 8.49609856213426e-06, + "loss": 0.6883, + "step": 9003 + }, + { + "epoch": 0.2759592987617997, + "grad_norm": 1.9206815944648743, + "learning_rate": 8.495743723892024e-06, + "loss": 0.7094, + "step": 9004 + }, + { + "epoch": 0.2759899472845409, + "grad_norm": 2.003831752920464, + "learning_rate": 8.495388851205332e-06, + "loss": 0.7246, + "step": 9005 + }, + { + "epoch": 0.2760205958072821, + "grad_norm": 1.8690851301465488, + "learning_rate": 8.495033944077679e-06, + "loss": 0.6933, + "step": 9006 + }, + { + "epoch": 0.2760512443300233, + "grad_norm": 1.7331749634060647, + "learning_rate": 8.494679002512562e-06, + "loss": 0.5882, + "step": 9007 + }, + { + "epoch": 0.2760818928527645, + "grad_norm": 1.7986876409866694, + "learning_rate": 8.49432402651348e-06, + "loss": 0.7105, + "step": 9008 + }, + { + "epoch": 0.2761125413755057, + "grad_norm": 1.8479089599857523, + "learning_rate": 8.493969016083928e-06, + "loss": 0.7937, + "step": 9009 + }, + { + "epoch": 0.2761431898982469, + "grad_norm": 2.185286117411614, + "learning_rate": 8.493613971227405e-06, + "loss": 0.6251, + "step": 9010 + }, + { + "epoch": 0.2761738384209881, + "grad_norm": 1.6777117541562163, + "learning_rate": 8.49325889194741e-06, + "loss": 0.6983, + "step": 9011 + }, + { + "epoch": 0.2762044869437293, + "grad_norm": 1.7012244062141935, + "learning_rate": 8.49290377824744e-06, + "loss": 0.7239, + "step": 9012 + }, + { + "epoch": 0.2762351354664705, + "grad_norm": 1.454284130606198, + "learning_rate": 8.492548630131e-06, + "loss": 0.4865, + "step": 9013 + }, + { + "epoch": 0.2762657839892117, + "grad_norm": 1.5819977203683984, + "learning_rate": 8.49219344760158e-06, + "loss": 0.7938, + "step": 9014 + }, + { + "epoch": 0.2762964325119529, + "grad_norm": 1.7094470561246786, + "learning_rate": 8.491838230662685e-06, + "loss": 0.6448, + "step": 9015 + }, + { + "epoch": 0.2763270810346941, + "grad_norm": 0.820485634625417, + "learning_rate": 8.491482979317817e-06, + "loss": 0.4885, + "step": 9016 + }, + { + "epoch": 0.2763577295574353, + "grad_norm": 1.6846669479265801, + "learning_rate": 8.491127693570472e-06, + "loss": 0.6808, + "step": 9017 + }, + { + "epoch": 0.2763883780801765, + "grad_norm": 1.9079865278679335, + "learning_rate": 8.490772373424155e-06, + "loss": 0.7879, + "step": 9018 + }, + { + "epoch": 0.27641902660291773, + "grad_norm": 1.4880458962550738, + "learning_rate": 8.49041701888236e-06, + "loss": 0.6039, + "step": 9019 + }, + { + "epoch": 0.27644967512565893, + "grad_norm": 1.5121278939778633, + "learning_rate": 8.490061629948596e-06, + "loss": 0.6186, + "step": 9020 + }, + { + "epoch": 0.27648032364840014, + "grad_norm": 1.2371307453531069, + "learning_rate": 8.489706206626363e-06, + "loss": 0.4997, + "step": 9021 + }, + { + "epoch": 0.27651097217114134, + "grad_norm": 1.992204941981258, + "learning_rate": 8.489350748919161e-06, + "loss": 0.7718, + "step": 9022 + }, + { + "epoch": 0.27654162069388255, + "grad_norm": 1.6229657915464484, + "learning_rate": 8.488995256830492e-06, + "loss": 0.6702, + "step": 9023 + }, + { + "epoch": 0.27657226921662376, + "grad_norm": 1.6179138130397475, + "learning_rate": 8.48863973036386e-06, + "loss": 0.5626, + "step": 9024 + }, + { + "epoch": 0.27660291773936496, + "grad_norm": 1.8252008425519288, + "learning_rate": 8.48828416952277e-06, + "loss": 0.7251, + "step": 9025 + }, + { + "epoch": 0.27663356626210617, + "grad_norm": 0.8597329940767525, + "learning_rate": 8.487928574310722e-06, + "loss": 0.502, + "step": 9026 + }, + { + "epoch": 0.2766642147848474, + "grad_norm": 1.5209132704134694, + "learning_rate": 8.487572944731221e-06, + "loss": 0.6503, + "step": 9027 + }, + { + "epoch": 0.2766948633075886, + "grad_norm": 0.8026947619609113, + "learning_rate": 8.487217280787772e-06, + "loss": 0.4683, + "step": 9028 + }, + { + "epoch": 0.2767255118303298, + "grad_norm": 0.839903653538065, + "learning_rate": 8.48686158248388e-06, + "loss": 0.4864, + "step": 9029 + }, + { + "epoch": 0.276756160353071, + "grad_norm": 1.952597699774007, + "learning_rate": 8.486505849823048e-06, + "loss": 0.7223, + "step": 9030 + }, + { + "epoch": 0.2767868088758122, + "grad_norm": 1.74804474536977, + "learning_rate": 8.48615008280878e-06, + "loss": 0.6875, + "step": 9031 + }, + { + "epoch": 0.2768174573985534, + "grad_norm": 1.7920652696548212, + "learning_rate": 8.485794281444586e-06, + "loss": 0.6861, + "step": 9032 + }, + { + "epoch": 0.2768481059212946, + "grad_norm": 1.8232663343648345, + "learning_rate": 8.485438445733967e-06, + "loss": 0.6893, + "step": 9033 + }, + { + "epoch": 0.2768787544440358, + "grad_norm": 1.5782898950046054, + "learning_rate": 8.485082575680432e-06, + "loss": 0.7499, + "step": 9034 + }, + { + "epoch": 0.276909402966777, + "grad_norm": 1.8315874920084587, + "learning_rate": 8.484726671287487e-06, + "loss": 0.7021, + "step": 9035 + }, + { + "epoch": 0.2769400514895182, + "grad_norm": 1.8230435482313863, + "learning_rate": 8.484370732558637e-06, + "loss": 0.6525, + "step": 9036 + }, + { + "epoch": 0.27697070001225943, + "grad_norm": 1.5800653087417114, + "learning_rate": 8.484014759497393e-06, + "loss": 0.6476, + "step": 9037 + }, + { + "epoch": 0.27700134853500064, + "grad_norm": 0.9596555023178303, + "learning_rate": 8.48365875210726e-06, + "loss": 0.4791, + "step": 9038 + }, + { + "epoch": 0.27703199705774184, + "grad_norm": 1.7320594913008303, + "learning_rate": 8.483302710391745e-06, + "loss": 0.6637, + "step": 9039 + }, + { + "epoch": 0.27706264558048305, + "grad_norm": 1.9132927697103987, + "learning_rate": 8.482946634354357e-06, + "loss": 0.6716, + "step": 9040 + }, + { + "epoch": 0.2770932941032242, + "grad_norm": 1.7114365186988227, + "learning_rate": 8.482590523998605e-06, + "loss": 0.8054, + "step": 9041 + }, + { + "epoch": 0.2771239426259654, + "grad_norm": 1.9457632727399263, + "learning_rate": 8.482234379327998e-06, + "loss": 0.7892, + "step": 9042 + }, + { + "epoch": 0.2771545911487066, + "grad_norm": 0.8056859466014379, + "learning_rate": 8.481878200346042e-06, + "loss": 0.5033, + "step": 9043 + }, + { + "epoch": 0.2771852396714478, + "grad_norm": 1.797620052324035, + "learning_rate": 8.481521987056251e-06, + "loss": 0.8186, + "step": 9044 + }, + { + "epoch": 0.277215888194189, + "grad_norm": 1.6208492575340088, + "learning_rate": 8.481165739462135e-06, + "loss": 0.6201, + "step": 9045 + }, + { + "epoch": 0.2772465367169302, + "grad_norm": 1.7090892492203087, + "learning_rate": 8.480809457567201e-06, + "loss": 0.6974, + "step": 9046 + }, + { + "epoch": 0.27727718523967143, + "grad_norm": 1.80065226816876, + "learning_rate": 8.48045314137496e-06, + "loss": 0.7709, + "step": 9047 + }, + { + "epoch": 0.27730783376241264, + "grad_norm": 1.7667659611695843, + "learning_rate": 8.480096790888923e-06, + "loss": 0.7044, + "step": 9048 + }, + { + "epoch": 0.27733848228515384, + "grad_norm": 1.7801455983966386, + "learning_rate": 8.479740406112605e-06, + "loss": 0.728, + "step": 9049 + }, + { + "epoch": 0.27736913080789505, + "grad_norm": 0.8554134006351591, + "learning_rate": 8.479383987049512e-06, + "loss": 0.4863, + "step": 9050 + }, + { + "epoch": 0.27739977933063625, + "grad_norm": 1.8515627747452588, + "learning_rate": 8.479027533703158e-06, + "loss": 0.6811, + "step": 9051 + }, + { + "epoch": 0.27743042785337746, + "grad_norm": 0.8065091655939846, + "learning_rate": 8.478671046077055e-06, + "loss": 0.4781, + "step": 9052 + }, + { + "epoch": 0.27746107637611866, + "grad_norm": 0.8025348142966499, + "learning_rate": 8.47831452417472e-06, + "loss": 0.4797, + "step": 9053 + }, + { + "epoch": 0.27749172489885987, + "grad_norm": 2.0185059371846057, + "learning_rate": 8.477957967999659e-06, + "loss": 0.7417, + "step": 9054 + }, + { + "epoch": 0.2775223734216011, + "grad_norm": 1.744862420499543, + "learning_rate": 8.47760137755539e-06, + "loss": 0.5329, + "step": 9055 + }, + { + "epoch": 0.2775530219443423, + "grad_norm": 0.8059743905331227, + "learning_rate": 8.477244752845422e-06, + "loss": 0.4739, + "step": 9056 + }, + { + "epoch": 0.2775836704670835, + "grad_norm": 1.6255448461722164, + "learning_rate": 8.476888093873274e-06, + "loss": 0.6997, + "step": 9057 + }, + { + "epoch": 0.2776143189898247, + "grad_norm": 1.6809903580309289, + "learning_rate": 8.476531400642456e-06, + "loss": 0.6816, + "step": 9058 + }, + { + "epoch": 0.2776449675125659, + "grad_norm": 0.8549844589953546, + "learning_rate": 8.476174673156488e-06, + "loss": 0.4841, + "step": 9059 + }, + { + "epoch": 0.2776756160353071, + "grad_norm": 1.529703202211574, + "learning_rate": 8.475817911418879e-06, + "loss": 0.6558, + "step": 9060 + }, + { + "epoch": 0.2777062645580483, + "grad_norm": 1.543625937263075, + "learning_rate": 8.475461115433147e-06, + "loss": 0.6489, + "step": 9061 + }, + { + "epoch": 0.2777369130807895, + "grad_norm": 0.8091784224801523, + "learning_rate": 8.475104285202807e-06, + "loss": 0.4568, + "step": 9062 + }, + { + "epoch": 0.2777675616035307, + "grad_norm": 1.7937931245826422, + "learning_rate": 8.474747420731377e-06, + "loss": 0.7734, + "step": 9063 + }, + { + "epoch": 0.2777982101262719, + "grad_norm": 1.8079692677423855, + "learning_rate": 8.47439052202237e-06, + "loss": 0.6447, + "step": 9064 + }, + { + "epoch": 0.27782885864901313, + "grad_norm": 1.602998735812019, + "learning_rate": 8.474033589079306e-06, + "loss": 0.7661, + "step": 9065 + }, + { + "epoch": 0.27785950717175434, + "grad_norm": 1.5574291002967964, + "learning_rate": 8.473676621905699e-06, + "loss": 0.6964, + "step": 9066 + }, + { + "epoch": 0.27789015569449554, + "grad_norm": 1.7974346835792863, + "learning_rate": 8.473319620505067e-06, + "loss": 0.7943, + "step": 9067 + }, + { + "epoch": 0.27792080421723675, + "grad_norm": 1.509401108079263, + "learning_rate": 8.472962584880928e-06, + "loss": 0.6054, + "step": 9068 + }, + { + "epoch": 0.27795145273997796, + "grad_norm": 1.8676127254621804, + "learning_rate": 8.4726055150368e-06, + "loss": 0.6955, + "step": 9069 + }, + { + "epoch": 0.27798210126271916, + "grad_norm": 1.6620959380880314, + "learning_rate": 8.472248410976203e-06, + "loss": 0.5907, + "step": 9070 + }, + { + "epoch": 0.27801274978546037, + "grad_norm": 1.759579580423266, + "learning_rate": 8.471891272702652e-06, + "loss": 0.7861, + "step": 9071 + }, + { + "epoch": 0.2780433983082015, + "grad_norm": 0.8765904050937524, + "learning_rate": 8.471534100219671e-06, + "loss": 0.4782, + "step": 9072 + }, + { + "epoch": 0.2780740468309427, + "grad_norm": 1.6595056587335084, + "learning_rate": 8.471176893530774e-06, + "loss": 0.6547, + "step": 9073 + }, + { + "epoch": 0.27810469535368393, + "grad_norm": 1.9927805933563996, + "learning_rate": 8.470819652639483e-06, + "loss": 0.6838, + "step": 9074 + }, + { + "epoch": 0.27813534387642513, + "grad_norm": 1.6969062777499166, + "learning_rate": 8.470462377549318e-06, + "loss": 0.597, + "step": 9075 + }, + { + "epoch": 0.27816599239916634, + "grad_norm": 1.7689089867076027, + "learning_rate": 8.4701050682638e-06, + "loss": 0.7216, + "step": 9076 + }, + { + "epoch": 0.27819664092190755, + "grad_norm": 1.7535763777218731, + "learning_rate": 8.469747724786448e-06, + "loss": 0.6479, + "step": 9077 + }, + { + "epoch": 0.27822728944464875, + "grad_norm": 1.5899275399737332, + "learning_rate": 8.469390347120783e-06, + "loss": 0.6777, + "step": 9078 + }, + { + "epoch": 0.27825793796738996, + "grad_norm": 0.8223827951774887, + "learning_rate": 8.46903293527033e-06, + "loss": 0.4747, + "step": 9079 + }, + { + "epoch": 0.27828858649013116, + "grad_norm": 1.5411226663313817, + "learning_rate": 8.468675489238604e-06, + "loss": 0.6194, + "step": 9080 + }, + { + "epoch": 0.27831923501287237, + "grad_norm": 1.8126218878458713, + "learning_rate": 8.468318009029135e-06, + "loss": 0.7059, + "step": 9081 + }, + { + "epoch": 0.2783498835356136, + "grad_norm": 1.8728665208362822, + "learning_rate": 8.46796049464544e-06, + "loss": 0.752, + "step": 9082 + }, + { + "epoch": 0.2783805320583548, + "grad_norm": 0.7979028104683148, + "learning_rate": 8.467602946091042e-06, + "loss": 0.457, + "step": 9083 + }, + { + "epoch": 0.278411180581096, + "grad_norm": 1.7007016861276996, + "learning_rate": 8.467245363369466e-06, + "loss": 0.7117, + "step": 9084 + }, + { + "epoch": 0.2784418291038372, + "grad_norm": 1.778782427551739, + "learning_rate": 8.466887746484232e-06, + "loss": 0.7367, + "step": 9085 + }, + { + "epoch": 0.2784724776265784, + "grad_norm": 1.947551974349518, + "learning_rate": 8.466530095438868e-06, + "loss": 0.6962, + "step": 9086 + }, + { + "epoch": 0.2785031261493196, + "grad_norm": 0.8213053840103609, + "learning_rate": 8.466172410236896e-06, + "loss": 0.4766, + "step": 9087 + }, + { + "epoch": 0.2785337746720608, + "grad_norm": 1.833732366664141, + "learning_rate": 8.46581469088184e-06, + "loss": 0.6129, + "step": 9088 + }, + { + "epoch": 0.278564423194802, + "grad_norm": 1.7196732448703727, + "learning_rate": 8.465456937377226e-06, + "loss": 0.68, + "step": 9089 + }, + { + "epoch": 0.2785950717175432, + "grad_norm": 1.7361351405065353, + "learning_rate": 8.465099149726577e-06, + "loss": 0.7816, + "step": 9090 + }, + { + "epoch": 0.2786257202402844, + "grad_norm": 1.9209311144194043, + "learning_rate": 8.46474132793342e-06, + "loss": 0.6754, + "step": 9091 + }, + { + "epoch": 0.27865636876302563, + "grad_norm": 1.6205494260837103, + "learning_rate": 8.464383472001278e-06, + "loss": 0.6307, + "step": 9092 + }, + { + "epoch": 0.27868701728576684, + "grad_norm": 1.754292556005674, + "learning_rate": 8.464025581933682e-06, + "loss": 0.7092, + "step": 9093 + }, + { + "epoch": 0.27871766580850804, + "grad_norm": 1.5780941690976715, + "learning_rate": 8.463667657734155e-06, + "loss": 0.7068, + "step": 9094 + }, + { + "epoch": 0.27874831433124925, + "grad_norm": 1.8412435094982464, + "learning_rate": 8.463309699406223e-06, + "loss": 0.7323, + "step": 9095 + }, + { + "epoch": 0.27877896285399045, + "grad_norm": 1.5745559180579491, + "learning_rate": 8.462951706953418e-06, + "loss": 0.7193, + "step": 9096 + }, + { + "epoch": 0.27880961137673166, + "grad_norm": 1.7373316228609617, + "learning_rate": 8.462593680379259e-06, + "loss": 0.6815, + "step": 9097 + }, + { + "epoch": 0.27884025989947286, + "grad_norm": 1.6511486931946961, + "learning_rate": 8.46223561968728e-06, + "loss": 0.7181, + "step": 9098 + }, + { + "epoch": 0.27887090842221407, + "grad_norm": 1.8677460013290668, + "learning_rate": 8.46187752488101e-06, + "loss": 0.6465, + "step": 9099 + }, + { + "epoch": 0.2789015569449553, + "grad_norm": 1.6100785946490492, + "learning_rate": 8.461519395963973e-06, + "loss": 0.6906, + "step": 9100 + }, + { + "epoch": 0.2789322054676965, + "grad_norm": 1.6301902056656625, + "learning_rate": 8.461161232939701e-06, + "loss": 0.6866, + "step": 9101 + }, + { + "epoch": 0.2789628539904377, + "grad_norm": 1.6043882980396227, + "learning_rate": 8.46080303581172e-06, + "loss": 0.7154, + "step": 9102 + }, + { + "epoch": 0.27899350251317884, + "grad_norm": 1.640582728771274, + "learning_rate": 8.460444804583562e-06, + "loss": 0.6891, + "step": 9103 + }, + { + "epoch": 0.27902415103592004, + "grad_norm": 1.801633340495724, + "learning_rate": 8.460086539258758e-06, + "loss": 0.7012, + "step": 9104 + }, + { + "epoch": 0.27905479955866125, + "grad_norm": 2.129223839391183, + "learning_rate": 8.459728239840833e-06, + "loss": 0.7043, + "step": 9105 + }, + { + "epoch": 0.27908544808140245, + "grad_norm": 1.762794609754471, + "learning_rate": 8.459369906333322e-06, + "loss": 0.7614, + "step": 9106 + }, + { + "epoch": 0.27911609660414366, + "grad_norm": 1.7702795146516195, + "learning_rate": 8.459011538739754e-06, + "loss": 0.7504, + "step": 9107 + }, + { + "epoch": 0.27914674512688487, + "grad_norm": 1.589215009566089, + "learning_rate": 8.458653137063658e-06, + "loss": 0.7275, + "step": 9108 + }, + { + "epoch": 0.27917739364962607, + "grad_norm": 1.674496533031126, + "learning_rate": 8.45829470130857e-06, + "loss": 0.7087, + "step": 9109 + }, + { + "epoch": 0.2792080421723673, + "grad_norm": 1.7445474292508978, + "learning_rate": 8.457936231478022e-06, + "loss": 0.7409, + "step": 9110 + }, + { + "epoch": 0.2792386906951085, + "grad_norm": 1.7040863910899064, + "learning_rate": 8.45757772757554e-06, + "loss": 0.7045, + "step": 9111 + }, + { + "epoch": 0.2792693392178497, + "grad_norm": 1.0200911398448675, + "learning_rate": 8.45721918960466e-06, + "loss": 0.4644, + "step": 9112 + }, + { + "epoch": 0.2792999877405909, + "grad_norm": 1.5078161872895166, + "learning_rate": 8.456860617568916e-06, + "loss": 0.5949, + "step": 9113 + }, + { + "epoch": 0.2793306362633321, + "grad_norm": 1.6642337537444154, + "learning_rate": 8.45650201147184e-06, + "loss": 0.6338, + "step": 9114 + }, + { + "epoch": 0.2793612847860733, + "grad_norm": 1.4770523598341978, + "learning_rate": 8.456143371316965e-06, + "loss": 0.6023, + "step": 9115 + }, + { + "epoch": 0.2793919333088145, + "grad_norm": 1.9488836625327326, + "learning_rate": 8.455784697107823e-06, + "loss": 0.7819, + "step": 9116 + }, + { + "epoch": 0.2794225818315557, + "grad_norm": 1.7234310188890511, + "learning_rate": 8.455425988847952e-06, + "loss": 0.7462, + "step": 9117 + }, + { + "epoch": 0.2794532303542969, + "grad_norm": 1.7171814680172979, + "learning_rate": 8.455067246540887e-06, + "loss": 0.6959, + "step": 9118 + }, + { + "epoch": 0.2794838788770381, + "grad_norm": 1.855862921472609, + "learning_rate": 8.454708470190157e-06, + "loss": 0.6518, + "step": 9119 + }, + { + "epoch": 0.27951452739977933, + "grad_norm": 1.6774474081652675, + "learning_rate": 8.454349659799301e-06, + "loss": 0.7949, + "step": 9120 + }, + { + "epoch": 0.27954517592252054, + "grad_norm": 1.1026919726368245, + "learning_rate": 8.453990815371855e-06, + "loss": 0.4922, + "step": 9121 + }, + { + "epoch": 0.27957582444526174, + "grad_norm": 1.8307481917209036, + "learning_rate": 8.453631936911352e-06, + "loss": 0.7093, + "step": 9122 + }, + { + "epoch": 0.27960647296800295, + "grad_norm": 1.5853001074992503, + "learning_rate": 8.45327302442133e-06, + "loss": 0.637, + "step": 9123 + }, + { + "epoch": 0.27963712149074416, + "grad_norm": 1.844145920866591, + "learning_rate": 8.452914077905328e-06, + "loss": 0.6855, + "step": 9124 + }, + { + "epoch": 0.27966777001348536, + "grad_norm": 0.8230357937913534, + "learning_rate": 8.452555097366879e-06, + "loss": 0.4844, + "step": 9125 + }, + { + "epoch": 0.27969841853622657, + "grad_norm": 1.490417126763475, + "learning_rate": 8.452196082809521e-06, + "loss": 0.5778, + "step": 9126 + }, + { + "epoch": 0.2797290670589678, + "grad_norm": 1.7131905648866712, + "learning_rate": 8.451837034236791e-06, + "loss": 0.6289, + "step": 9127 + }, + { + "epoch": 0.279759715581709, + "grad_norm": 1.7338376643462872, + "learning_rate": 8.451477951652229e-06, + "loss": 0.6903, + "step": 9128 + }, + { + "epoch": 0.2797903641044502, + "grad_norm": 1.805253254238031, + "learning_rate": 8.45111883505937e-06, + "loss": 0.8016, + "step": 9129 + }, + { + "epoch": 0.2798210126271914, + "grad_norm": 1.7721400235964266, + "learning_rate": 8.450759684461756e-06, + "loss": 0.6501, + "step": 9130 + }, + { + "epoch": 0.2798516611499326, + "grad_norm": 1.6874493519947555, + "learning_rate": 8.450400499862922e-06, + "loss": 0.7136, + "step": 9131 + }, + { + "epoch": 0.2798823096726738, + "grad_norm": 1.8338909251165465, + "learning_rate": 8.45004128126641e-06, + "loss": 0.7968, + "step": 9132 + }, + { + "epoch": 0.279912958195415, + "grad_norm": 1.0010232785165272, + "learning_rate": 8.449682028675759e-06, + "loss": 0.4807, + "step": 9133 + }, + { + "epoch": 0.27994360671815616, + "grad_norm": 1.8200071506926387, + "learning_rate": 8.449322742094505e-06, + "loss": 0.7629, + "step": 9134 + }, + { + "epoch": 0.27997425524089736, + "grad_norm": 0.8576705266241226, + "learning_rate": 8.448963421526196e-06, + "loss": 0.4774, + "step": 9135 + }, + { + "epoch": 0.28000490376363857, + "grad_norm": 1.7997844989140601, + "learning_rate": 8.448604066974367e-06, + "loss": 0.6701, + "step": 9136 + }, + { + "epoch": 0.2800355522863798, + "grad_norm": 1.6926245977449688, + "learning_rate": 8.448244678442557e-06, + "loss": 0.7006, + "step": 9137 + }, + { + "epoch": 0.280066200809121, + "grad_norm": 1.7161705254646582, + "learning_rate": 8.447885255934312e-06, + "loss": 0.7067, + "step": 9138 + }, + { + "epoch": 0.2800968493318622, + "grad_norm": 1.7915764556142197, + "learning_rate": 8.44752579945317e-06, + "loss": 0.6974, + "step": 9139 + }, + { + "epoch": 0.2801274978546034, + "grad_norm": 1.7074539351669884, + "learning_rate": 8.447166309002677e-06, + "loss": 0.711, + "step": 9140 + }, + { + "epoch": 0.2801581463773446, + "grad_norm": 1.7962471570791716, + "learning_rate": 8.44680678458637e-06, + "loss": 0.7107, + "step": 9141 + }, + { + "epoch": 0.2801887949000858, + "grad_norm": 1.6666449986148049, + "learning_rate": 8.446447226207795e-06, + "loss": 0.6447, + "step": 9142 + }, + { + "epoch": 0.280219443422827, + "grad_norm": 1.6069710465541762, + "learning_rate": 8.446087633870492e-06, + "loss": 0.7463, + "step": 9143 + }, + { + "epoch": 0.2802500919455682, + "grad_norm": 1.7828601637973427, + "learning_rate": 8.445728007578008e-06, + "loss": 0.6552, + "step": 9144 + }, + { + "epoch": 0.2802807404683094, + "grad_norm": 1.6783041044946556, + "learning_rate": 8.445368347333884e-06, + "loss": 0.6715, + "step": 9145 + }, + { + "epoch": 0.2803113889910506, + "grad_norm": 1.1976429846785563, + "learning_rate": 8.445008653141662e-06, + "loss": 0.465, + "step": 9146 + }, + { + "epoch": 0.28034203751379183, + "grad_norm": 1.7228451749868772, + "learning_rate": 8.44464892500489e-06, + "loss": 0.6491, + "step": 9147 + }, + { + "epoch": 0.28037268603653304, + "grad_norm": 1.7600537766321354, + "learning_rate": 8.44428916292711e-06, + "loss": 0.6398, + "step": 9148 + }, + { + "epoch": 0.28040333455927424, + "grad_norm": 2.0619803306751754, + "learning_rate": 8.443929366911869e-06, + "loss": 0.7726, + "step": 9149 + }, + { + "epoch": 0.28043398308201545, + "grad_norm": 1.7056068880987971, + "learning_rate": 8.44356953696271e-06, + "loss": 0.6686, + "step": 9150 + }, + { + "epoch": 0.28046463160475665, + "grad_norm": 1.698727277666855, + "learning_rate": 8.443209673083178e-06, + "loss": 0.8238, + "step": 9151 + }, + { + "epoch": 0.28049528012749786, + "grad_norm": 1.7435259045986629, + "learning_rate": 8.442849775276823e-06, + "loss": 0.7137, + "step": 9152 + }, + { + "epoch": 0.28052592865023906, + "grad_norm": 1.8365437830173774, + "learning_rate": 8.442489843547187e-06, + "loss": 0.6607, + "step": 9153 + }, + { + "epoch": 0.28055657717298027, + "grad_norm": 1.6615037477993857, + "learning_rate": 8.442129877897818e-06, + "loss": 0.7867, + "step": 9154 + }, + { + "epoch": 0.2805872256957215, + "grad_norm": 1.7866891773800628, + "learning_rate": 8.441769878332261e-06, + "loss": 0.682, + "step": 9155 + }, + { + "epoch": 0.2806178742184627, + "grad_norm": 0.94745487551086, + "learning_rate": 8.441409844854067e-06, + "loss": 0.472, + "step": 9156 + }, + { + "epoch": 0.2806485227412039, + "grad_norm": 1.7515279632579521, + "learning_rate": 8.441049777466778e-06, + "loss": 0.5889, + "step": 9157 + }, + { + "epoch": 0.2806791712639451, + "grad_norm": 1.9974321265790864, + "learning_rate": 8.440689676173947e-06, + "loss": 0.8024, + "step": 9158 + }, + { + "epoch": 0.2807098197866863, + "grad_norm": 0.8094749078065999, + "learning_rate": 8.440329540979122e-06, + "loss": 0.4919, + "step": 9159 + }, + { + "epoch": 0.2807404683094275, + "grad_norm": 1.653125836706125, + "learning_rate": 8.439969371885847e-06, + "loss": 0.6179, + "step": 9160 + }, + { + "epoch": 0.2807711168321687, + "grad_norm": 1.8181871549915047, + "learning_rate": 8.439609168897676e-06, + "loss": 0.6552, + "step": 9161 + }, + { + "epoch": 0.2808017653549099, + "grad_norm": 1.9974098370693358, + "learning_rate": 8.439248932018153e-06, + "loss": 0.7958, + "step": 9162 + }, + { + "epoch": 0.2808324138776511, + "grad_norm": 1.568401629131645, + "learning_rate": 8.438888661250833e-06, + "loss": 0.6176, + "step": 9163 + }, + { + "epoch": 0.2808630624003923, + "grad_norm": 1.7995470968367278, + "learning_rate": 8.438528356599262e-06, + "loss": 0.7226, + "step": 9164 + }, + { + "epoch": 0.2808937109231335, + "grad_norm": 1.6033716433059417, + "learning_rate": 8.438168018066992e-06, + "loss": 0.6796, + "step": 9165 + }, + { + "epoch": 0.2809243594458747, + "grad_norm": 1.876066950938984, + "learning_rate": 8.437807645657572e-06, + "loss": 0.7236, + "step": 9166 + }, + { + "epoch": 0.2809550079686159, + "grad_norm": 1.9969468962381096, + "learning_rate": 8.437447239374554e-06, + "loss": 0.6852, + "step": 9167 + }, + { + "epoch": 0.2809856564913571, + "grad_norm": 1.6491899733125368, + "learning_rate": 8.437086799221489e-06, + "loss": 0.6655, + "step": 9168 + }, + { + "epoch": 0.2810163050140983, + "grad_norm": 1.6208828073667716, + "learning_rate": 8.436726325201926e-06, + "loss": 0.6144, + "step": 9169 + }, + { + "epoch": 0.2810469535368395, + "grad_norm": 1.5746746418109445, + "learning_rate": 8.436365817319423e-06, + "loss": 0.6293, + "step": 9170 + }, + { + "epoch": 0.2810776020595807, + "grad_norm": 1.9151478037602558, + "learning_rate": 8.436005275577527e-06, + "loss": 0.6555, + "step": 9171 + }, + { + "epoch": 0.2811082505823219, + "grad_norm": 1.0138317354598037, + "learning_rate": 8.435644699979792e-06, + "loss": 0.4843, + "step": 9172 + }, + { + "epoch": 0.2811388991050631, + "grad_norm": 1.7572085992799515, + "learning_rate": 8.43528409052977e-06, + "loss": 0.6568, + "step": 9173 + }, + { + "epoch": 0.28116954762780433, + "grad_norm": 1.6620946081983958, + "learning_rate": 8.434923447231015e-06, + "loss": 0.5975, + "step": 9174 + }, + { + "epoch": 0.28120019615054553, + "grad_norm": 1.7443699824088565, + "learning_rate": 8.43456277008708e-06, + "loss": 0.6955, + "step": 9175 + }, + { + "epoch": 0.28123084467328674, + "grad_norm": 1.851676502393262, + "learning_rate": 8.43420205910152e-06, + "loss": 0.6801, + "step": 9176 + }, + { + "epoch": 0.28126149319602795, + "grad_norm": 1.0383152892226202, + "learning_rate": 8.433841314277888e-06, + "loss": 0.4994, + "step": 9177 + }, + { + "epoch": 0.28129214171876915, + "grad_norm": 0.8151903833045828, + "learning_rate": 8.433480535619741e-06, + "loss": 0.4493, + "step": 9178 + }, + { + "epoch": 0.28132279024151036, + "grad_norm": 2.7250941966908027, + "learning_rate": 8.433119723130629e-06, + "loss": 0.7744, + "step": 9179 + }, + { + "epoch": 0.28135343876425156, + "grad_norm": 1.8060938213401598, + "learning_rate": 8.43275887681411e-06, + "loss": 0.7566, + "step": 9180 + }, + { + "epoch": 0.28138408728699277, + "grad_norm": 1.6647374534065624, + "learning_rate": 8.432397996673741e-06, + "loss": 0.7029, + "step": 9181 + }, + { + "epoch": 0.281414735809734, + "grad_norm": 1.7556716505343597, + "learning_rate": 8.432037082713077e-06, + "loss": 0.739, + "step": 9182 + }, + { + "epoch": 0.2814453843324752, + "grad_norm": 1.699118352129971, + "learning_rate": 8.43167613493567e-06, + "loss": 0.7543, + "step": 9183 + }, + { + "epoch": 0.2814760328552164, + "grad_norm": 1.7184638159653767, + "learning_rate": 8.431315153345084e-06, + "loss": 0.8153, + "step": 9184 + }, + { + "epoch": 0.2815066813779576, + "grad_norm": 1.7188926261615816, + "learning_rate": 8.430954137944869e-06, + "loss": 0.7054, + "step": 9185 + }, + { + "epoch": 0.2815373299006988, + "grad_norm": 1.7747533315723678, + "learning_rate": 8.430593088738586e-06, + "loss": 0.7453, + "step": 9186 + }, + { + "epoch": 0.28156797842344, + "grad_norm": 0.9368591555901344, + "learning_rate": 8.430232005729792e-06, + "loss": 0.505, + "step": 9187 + }, + { + "epoch": 0.2815986269461812, + "grad_norm": 1.634594490148967, + "learning_rate": 8.429870888922045e-06, + "loss": 0.747, + "step": 9188 + }, + { + "epoch": 0.2816292754689224, + "grad_norm": 1.897578779295267, + "learning_rate": 8.429509738318902e-06, + "loss": 0.7624, + "step": 9189 + }, + { + "epoch": 0.2816599239916636, + "grad_norm": 1.8011533509230442, + "learning_rate": 8.42914855392392e-06, + "loss": 0.7678, + "step": 9190 + }, + { + "epoch": 0.2816905725144048, + "grad_norm": 1.6719877855580103, + "learning_rate": 8.428787335740663e-06, + "loss": 0.6948, + "step": 9191 + }, + { + "epoch": 0.28172122103714603, + "grad_norm": 1.801918095457166, + "learning_rate": 8.428426083772685e-06, + "loss": 0.6893, + "step": 9192 + }, + { + "epoch": 0.28175186955988724, + "grad_norm": 1.5336639158194723, + "learning_rate": 8.428064798023548e-06, + "loss": 0.5794, + "step": 9193 + }, + { + "epoch": 0.28178251808262844, + "grad_norm": 1.5651407668665802, + "learning_rate": 8.427703478496812e-06, + "loss": 0.6528, + "step": 9194 + }, + { + "epoch": 0.28181316660536965, + "grad_norm": 1.6776240728689815, + "learning_rate": 8.427342125196038e-06, + "loss": 0.6904, + "step": 9195 + }, + { + "epoch": 0.28184381512811085, + "grad_norm": 1.7330382407406835, + "learning_rate": 8.426980738124783e-06, + "loss": 0.743, + "step": 9196 + }, + { + "epoch": 0.281874463650852, + "grad_norm": 1.6994379745618342, + "learning_rate": 8.42661931728661e-06, + "loss": 0.7067, + "step": 9197 + }, + { + "epoch": 0.2819051121735932, + "grad_norm": 1.8803528168550474, + "learning_rate": 8.42625786268508e-06, + "loss": 0.6141, + "step": 9198 + }, + { + "epoch": 0.2819357606963344, + "grad_norm": 1.6230530262424998, + "learning_rate": 8.425896374323757e-06, + "loss": 0.5539, + "step": 9199 + }, + { + "epoch": 0.2819664092190756, + "grad_norm": 1.667361912908936, + "learning_rate": 8.425534852206198e-06, + "loss": 0.7335, + "step": 9200 + }, + { + "epoch": 0.2819970577418168, + "grad_norm": 1.6556888885865793, + "learning_rate": 8.425173296335967e-06, + "loss": 0.719, + "step": 9201 + }, + { + "epoch": 0.28202770626455803, + "grad_norm": 1.5887690452063918, + "learning_rate": 8.42481170671663e-06, + "loss": 0.6888, + "step": 9202 + }, + { + "epoch": 0.28205835478729924, + "grad_norm": 1.8730875693969775, + "learning_rate": 8.424450083351746e-06, + "loss": 0.6591, + "step": 9203 + }, + { + "epoch": 0.28208900331004044, + "grad_norm": 2.057804195979405, + "learning_rate": 8.424088426244877e-06, + "loss": 0.6411, + "step": 9204 + }, + { + "epoch": 0.28211965183278165, + "grad_norm": 0.8961021121576378, + "learning_rate": 8.423726735399592e-06, + "loss": 0.4871, + "step": 9205 + }, + { + "epoch": 0.28215030035552285, + "grad_norm": 1.8793503467314165, + "learning_rate": 8.423365010819449e-06, + "loss": 0.6952, + "step": 9206 + }, + { + "epoch": 0.28218094887826406, + "grad_norm": 1.6788656976905554, + "learning_rate": 8.423003252508015e-06, + "loss": 0.7782, + "step": 9207 + }, + { + "epoch": 0.28221159740100527, + "grad_norm": 1.729095038503731, + "learning_rate": 8.422641460468855e-06, + "loss": 0.694, + "step": 9208 + }, + { + "epoch": 0.28224224592374647, + "grad_norm": 0.7858236603456015, + "learning_rate": 8.422279634705531e-06, + "loss": 0.495, + "step": 9209 + }, + { + "epoch": 0.2822728944464877, + "grad_norm": 1.8907441974873123, + "learning_rate": 8.421917775221612e-06, + "loss": 0.6509, + "step": 9210 + }, + { + "epoch": 0.2823035429692289, + "grad_norm": 1.6439182046721887, + "learning_rate": 8.421555882020662e-06, + "loss": 0.6542, + "step": 9211 + }, + { + "epoch": 0.2823341914919701, + "grad_norm": 1.6504770182527455, + "learning_rate": 8.421193955106244e-06, + "loss": 0.6835, + "step": 9212 + }, + { + "epoch": 0.2823648400147113, + "grad_norm": 1.6722760033069317, + "learning_rate": 8.420831994481928e-06, + "loss": 0.6658, + "step": 9213 + }, + { + "epoch": 0.2823954885374525, + "grad_norm": 1.6350193041416214, + "learning_rate": 8.420470000151281e-06, + "loss": 0.6911, + "step": 9214 + }, + { + "epoch": 0.2824261370601937, + "grad_norm": 1.6030238778448278, + "learning_rate": 8.420107972117865e-06, + "loss": 0.6677, + "step": 9215 + }, + { + "epoch": 0.2824567855829349, + "grad_norm": 1.7480286020418125, + "learning_rate": 8.419745910385253e-06, + "loss": 0.7283, + "step": 9216 + }, + { + "epoch": 0.2824874341056761, + "grad_norm": 1.9392914733450413, + "learning_rate": 8.419383814957007e-06, + "loss": 0.7419, + "step": 9217 + }, + { + "epoch": 0.2825180826284173, + "grad_norm": 1.8985872580008778, + "learning_rate": 8.419021685836698e-06, + "loss": 0.7054, + "step": 9218 + }, + { + "epoch": 0.2825487311511585, + "grad_norm": 1.8666202835220558, + "learning_rate": 8.418659523027894e-06, + "loss": 0.8002, + "step": 9219 + }, + { + "epoch": 0.28257937967389973, + "grad_norm": 1.6272808783845067, + "learning_rate": 8.418297326534165e-06, + "loss": 0.6682, + "step": 9220 + }, + { + "epoch": 0.28261002819664094, + "grad_norm": 1.650814816021029, + "learning_rate": 8.417935096359073e-06, + "loss": 0.6762, + "step": 9221 + }, + { + "epoch": 0.28264067671938214, + "grad_norm": 2.022821967735445, + "learning_rate": 8.417572832506196e-06, + "loss": 0.6848, + "step": 9222 + }, + { + "epoch": 0.28267132524212335, + "grad_norm": 1.5844903794379073, + "learning_rate": 8.417210534979098e-06, + "loss": 0.7096, + "step": 9223 + }, + { + "epoch": 0.28270197376486456, + "grad_norm": 1.7698034123405788, + "learning_rate": 8.41684820378135e-06, + "loss": 0.7245, + "step": 9224 + }, + { + "epoch": 0.28273262228760576, + "grad_norm": 1.6443607961255018, + "learning_rate": 8.416485838916522e-06, + "loss": 0.8036, + "step": 9225 + }, + { + "epoch": 0.28276327081034697, + "grad_norm": 1.5123029440553037, + "learning_rate": 8.416123440388188e-06, + "loss": 0.7191, + "step": 9226 + }, + { + "epoch": 0.2827939193330882, + "grad_norm": 1.6422081683524492, + "learning_rate": 8.415761008199912e-06, + "loss": 0.7758, + "step": 9227 + }, + { + "epoch": 0.2828245678558293, + "grad_norm": 1.7951443625487482, + "learning_rate": 8.415398542355271e-06, + "loss": 0.6244, + "step": 9228 + }, + { + "epoch": 0.28285521637857053, + "grad_norm": 1.747526181564695, + "learning_rate": 8.415036042857834e-06, + "loss": 0.7428, + "step": 9229 + }, + { + "epoch": 0.28288586490131173, + "grad_norm": 0.8354897272630903, + "learning_rate": 8.414673509711172e-06, + "loss": 0.4766, + "step": 9230 + }, + { + "epoch": 0.28291651342405294, + "grad_norm": 1.7770260112402072, + "learning_rate": 8.41431094291886e-06, + "loss": 0.7565, + "step": 9231 + }, + { + "epoch": 0.28294716194679415, + "grad_norm": 1.5943644779884751, + "learning_rate": 8.413948342484466e-06, + "loss": 0.6568, + "step": 9232 + }, + { + "epoch": 0.28297781046953535, + "grad_norm": 1.4911840488117787, + "learning_rate": 8.413585708411566e-06, + "loss": 0.5862, + "step": 9233 + }, + { + "epoch": 0.28300845899227656, + "grad_norm": 1.8344042128186329, + "learning_rate": 8.413223040703735e-06, + "loss": 0.7171, + "step": 9234 + }, + { + "epoch": 0.28303910751501776, + "grad_norm": 1.5667669783491138, + "learning_rate": 8.412860339364542e-06, + "loss": 0.655, + "step": 9235 + }, + { + "epoch": 0.28306975603775897, + "grad_norm": 1.624000987276262, + "learning_rate": 8.412497604397564e-06, + "loss": 0.7758, + "step": 9236 + }, + { + "epoch": 0.2831004045605002, + "grad_norm": 1.7533176385418194, + "learning_rate": 8.412134835806374e-06, + "loss": 0.6517, + "step": 9237 + }, + { + "epoch": 0.2831310530832414, + "grad_norm": 1.777579593040081, + "learning_rate": 8.411772033594544e-06, + "loss": 0.7361, + "step": 9238 + }, + { + "epoch": 0.2831617016059826, + "grad_norm": 1.7268290237004658, + "learning_rate": 8.411409197765654e-06, + "loss": 0.7499, + "step": 9239 + }, + { + "epoch": 0.2831923501287238, + "grad_norm": 0.8294639396444521, + "learning_rate": 8.411046328323276e-06, + "loss": 0.4698, + "step": 9240 + }, + { + "epoch": 0.283222998651465, + "grad_norm": 1.683928409501036, + "learning_rate": 8.410683425270986e-06, + "loss": 0.6592, + "step": 9241 + }, + { + "epoch": 0.2832536471742062, + "grad_norm": 1.8772156208546467, + "learning_rate": 8.410320488612358e-06, + "loss": 0.7349, + "step": 9242 + }, + { + "epoch": 0.2832842956969474, + "grad_norm": 1.8352556215097147, + "learning_rate": 8.40995751835097e-06, + "loss": 0.7396, + "step": 9243 + }, + { + "epoch": 0.2833149442196886, + "grad_norm": 1.7417997956394524, + "learning_rate": 8.409594514490401e-06, + "loss": 0.6363, + "step": 9244 + }, + { + "epoch": 0.2833455927424298, + "grad_norm": 1.5850243866471676, + "learning_rate": 8.409231477034221e-06, + "loss": 0.6429, + "step": 9245 + }, + { + "epoch": 0.283376241265171, + "grad_norm": 1.601725962479589, + "learning_rate": 8.408868405986013e-06, + "loss": 0.6161, + "step": 9246 + }, + { + "epoch": 0.28340688978791223, + "grad_norm": 0.8229069447880422, + "learning_rate": 8.408505301349352e-06, + "loss": 0.4943, + "step": 9247 + }, + { + "epoch": 0.28343753831065344, + "grad_norm": 1.6717597297317224, + "learning_rate": 8.408142163127815e-06, + "loss": 0.8125, + "step": 9248 + }, + { + "epoch": 0.28346818683339464, + "grad_norm": 1.6963375218903876, + "learning_rate": 8.407778991324984e-06, + "loss": 0.7185, + "step": 9249 + }, + { + "epoch": 0.28349883535613585, + "grad_norm": 1.7509220196092308, + "learning_rate": 8.407415785944431e-06, + "loss": 0.7687, + "step": 9250 + }, + { + "epoch": 0.28352948387887705, + "grad_norm": 1.6331394465283475, + "learning_rate": 8.40705254698974e-06, + "loss": 0.7278, + "step": 9251 + }, + { + "epoch": 0.28356013240161826, + "grad_norm": 1.585419009155998, + "learning_rate": 8.40668927446449e-06, + "loss": 0.7267, + "step": 9252 + }, + { + "epoch": 0.28359078092435946, + "grad_norm": 1.7745052072136223, + "learning_rate": 8.406325968372258e-06, + "loss": 0.7547, + "step": 9253 + }, + { + "epoch": 0.28362142944710067, + "grad_norm": 0.8454180797283275, + "learning_rate": 8.405962628716624e-06, + "loss": 0.4773, + "step": 9254 + }, + { + "epoch": 0.2836520779698419, + "grad_norm": 0.8038796886778863, + "learning_rate": 8.405599255501168e-06, + "loss": 0.4693, + "step": 9255 + }, + { + "epoch": 0.2836827264925831, + "grad_norm": 1.6855263248026535, + "learning_rate": 8.405235848729474e-06, + "loss": 0.7276, + "step": 9256 + }, + { + "epoch": 0.2837133750153243, + "grad_norm": 1.8477623403194938, + "learning_rate": 8.404872408405118e-06, + "loss": 0.7095, + "step": 9257 + }, + { + "epoch": 0.2837440235380655, + "grad_norm": 1.7084261535124736, + "learning_rate": 8.404508934531684e-06, + "loss": 0.6146, + "step": 9258 + }, + { + "epoch": 0.28377467206080664, + "grad_norm": 0.8038491857390029, + "learning_rate": 8.404145427112751e-06, + "loss": 0.4732, + "step": 9259 + }, + { + "epoch": 0.28380532058354785, + "grad_norm": 1.6267166389949836, + "learning_rate": 8.403781886151902e-06, + "loss": 0.6446, + "step": 9260 + }, + { + "epoch": 0.28383596910628905, + "grad_norm": 1.6314404482328884, + "learning_rate": 8.403418311652721e-06, + "loss": 0.677, + "step": 9261 + }, + { + "epoch": 0.28386661762903026, + "grad_norm": 1.6096167921572477, + "learning_rate": 8.403054703618787e-06, + "loss": 0.748, + "step": 9262 + }, + { + "epoch": 0.28389726615177147, + "grad_norm": 1.8793192332858617, + "learning_rate": 8.402691062053685e-06, + "loss": 0.7305, + "step": 9263 + }, + { + "epoch": 0.28392791467451267, + "grad_norm": 1.8455379142692563, + "learning_rate": 8.402327386960998e-06, + "loss": 0.7503, + "step": 9264 + }, + { + "epoch": 0.2839585631972539, + "grad_norm": 1.954596669517302, + "learning_rate": 8.401963678344309e-06, + "loss": 0.6428, + "step": 9265 + }, + { + "epoch": 0.2839892117199951, + "grad_norm": 1.6092046066932222, + "learning_rate": 8.401599936207199e-06, + "loss": 0.6833, + "step": 9266 + }, + { + "epoch": 0.2840198602427363, + "grad_norm": 1.6869613136296675, + "learning_rate": 8.401236160553257e-06, + "loss": 0.7596, + "step": 9267 + }, + { + "epoch": 0.2840505087654775, + "grad_norm": 1.5824902650183614, + "learning_rate": 8.400872351386063e-06, + "loss": 0.7139, + "step": 9268 + }, + { + "epoch": 0.2840811572882187, + "grad_norm": 1.7532569449791293, + "learning_rate": 8.400508508709205e-06, + "loss": 0.6993, + "step": 9269 + }, + { + "epoch": 0.2841118058109599, + "grad_norm": 0.9537869264821767, + "learning_rate": 8.400144632526266e-06, + "loss": 0.4821, + "step": 9270 + }, + { + "epoch": 0.2841424543337011, + "grad_norm": 0.9496255146886461, + "learning_rate": 8.399780722840832e-06, + "loss": 0.4805, + "step": 9271 + }, + { + "epoch": 0.2841731028564423, + "grad_norm": 1.6451007182453719, + "learning_rate": 8.399416779656489e-06, + "loss": 0.6935, + "step": 9272 + }, + { + "epoch": 0.2842037513791835, + "grad_norm": 1.8571585911258974, + "learning_rate": 8.399052802976822e-06, + "loss": 0.8631, + "step": 9273 + }, + { + "epoch": 0.28423439990192473, + "grad_norm": 1.7468287196972723, + "learning_rate": 8.398688792805417e-06, + "loss": 0.7219, + "step": 9274 + }, + { + "epoch": 0.28426504842466593, + "grad_norm": 1.7857765028382022, + "learning_rate": 8.398324749145864e-06, + "loss": 0.6642, + "step": 9275 + }, + { + "epoch": 0.28429569694740714, + "grad_norm": 0.8707148229039529, + "learning_rate": 8.397960672001748e-06, + "loss": 0.4831, + "step": 9276 + }, + { + "epoch": 0.28432634547014835, + "grad_norm": 1.8188124619903563, + "learning_rate": 8.397596561376652e-06, + "loss": 0.639, + "step": 9277 + }, + { + "epoch": 0.28435699399288955, + "grad_norm": 1.8245974685729935, + "learning_rate": 8.397232417274172e-06, + "loss": 0.7174, + "step": 9278 + }, + { + "epoch": 0.28438764251563076, + "grad_norm": 1.7973190733729985, + "learning_rate": 8.396868239697891e-06, + "loss": 0.7891, + "step": 9279 + }, + { + "epoch": 0.28441829103837196, + "grad_norm": 1.9392530056850827, + "learning_rate": 8.396504028651397e-06, + "loss": 0.8146, + "step": 9280 + }, + { + "epoch": 0.28444893956111317, + "grad_norm": 1.7589765242280015, + "learning_rate": 8.39613978413828e-06, + "loss": 0.735, + "step": 9281 + }, + { + "epoch": 0.2844795880838544, + "grad_norm": 1.6325446019284946, + "learning_rate": 8.395775506162129e-06, + "loss": 0.7594, + "step": 9282 + }, + { + "epoch": 0.2845102366065956, + "grad_norm": 2.0111386101013373, + "learning_rate": 8.395411194726533e-06, + "loss": 0.7431, + "step": 9283 + }, + { + "epoch": 0.2845408851293368, + "grad_norm": 1.6811089978005567, + "learning_rate": 8.395046849835084e-06, + "loss": 0.7099, + "step": 9284 + }, + { + "epoch": 0.284571533652078, + "grad_norm": 0.8414765688717398, + "learning_rate": 8.394682471491366e-06, + "loss": 0.4745, + "step": 9285 + }, + { + "epoch": 0.2846021821748192, + "grad_norm": 1.7762352233639356, + "learning_rate": 8.394318059698976e-06, + "loss": 0.6475, + "step": 9286 + }, + { + "epoch": 0.2846328306975604, + "grad_norm": 1.6880527102483294, + "learning_rate": 8.393953614461501e-06, + "loss": 0.6331, + "step": 9287 + }, + { + "epoch": 0.2846634792203016, + "grad_norm": 1.5325591294298915, + "learning_rate": 8.393589135782531e-06, + "loss": 0.6781, + "step": 9288 + }, + { + "epoch": 0.2846941277430428, + "grad_norm": 1.5420205039483885, + "learning_rate": 8.393224623665658e-06, + "loss": 0.6225, + "step": 9289 + }, + { + "epoch": 0.28472477626578396, + "grad_norm": 1.6734498667196518, + "learning_rate": 8.392860078114477e-06, + "loss": 0.7213, + "step": 9290 + }, + { + "epoch": 0.28475542478852517, + "grad_norm": 2.6675272678185267, + "learning_rate": 8.39249549913258e-06, + "loss": 0.6268, + "step": 9291 + }, + { + "epoch": 0.2847860733112664, + "grad_norm": 1.6968872829732933, + "learning_rate": 8.392130886723553e-06, + "loss": 0.6226, + "step": 9292 + }, + { + "epoch": 0.2848167218340076, + "grad_norm": 1.666065328679615, + "learning_rate": 8.391766240890993e-06, + "loss": 0.7603, + "step": 9293 + }, + { + "epoch": 0.2848473703567488, + "grad_norm": 2.1259996445843847, + "learning_rate": 8.391401561638492e-06, + "loss": 0.6347, + "step": 9294 + }, + { + "epoch": 0.28487801887949, + "grad_norm": 1.947498965788976, + "learning_rate": 8.391036848969646e-06, + "loss": 0.7423, + "step": 9295 + }, + { + "epoch": 0.2849086674022312, + "grad_norm": 1.7368606413561198, + "learning_rate": 8.390672102888044e-06, + "loss": 0.7509, + "step": 9296 + }, + { + "epoch": 0.2849393159249724, + "grad_norm": 1.797156209051628, + "learning_rate": 8.390307323397285e-06, + "loss": 0.6281, + "step": 9297 + }, + { + "epoch": 0.2849699644477136, + "grad_norm": 1.9829734002474682, + "learning_rate": 8.389942510500957e-06, + "loss": 0.738, + "step": 9298 + }, + { + "epoch": 0.2850006129704548, + "grad_norm": 1.562071717563335, + "learning_rate": 8.38957766420266e-06, + "loss": 0.6, + "step": 9299 + }, + { + "epoch": 0.285031261493196, + "grad_norm": 1.8216205573373754, + "learning_rate": 8.389212784505987e-06, + "loss": 0.7178, + "step": 9300 + }, + { + "epoch": 0.2850619100159372, + "grad_norm": 2.0652683521325574, + "learning_rate": 8.388847871414533e-06, + "loss": 0.7628, + "step": 9301 + }, + { + "epoch": 0.28509255853867843, + "grad_norm": 1.816503140880705, + "learning_rate": 8.388482924931893e-06, + "loss": 0.7892, + "step": 9302 + }, + { + "epoch": 0.28512320706141964, + "grad_norm": 1.6365260716691654, + "learning_rate": 8.388117945061664e-06, + "loss": 0.7096, + "step": 9303 + }, + { + "epoch": 0.28515385558416084, + "grad_norm": 1.8276801307184884, + "learning_rate": 8.387752931807442e-06, + "loss": 0.6353, + "step": 9304 + }, + { + "epoch": 0.28518450410690205, + "grad_norm": 1.9046261408584448, + "learning_rate": 8.387387885172825e-06, + "loss": 0.719, + "step": 9305 + }, + { + "epoch": 0.28521515262964325, + "grad_norm": 0.7949902815991364, + "learning_rate": 8.387022805161408e-06, + "loss": 0.4671, + "step": 9306 + }, + { + "epoch": 0.28524580115238446, + "grad_norm": 1.777979919786191, + "learning_rate": 8.386657691776788e-06, + "loss": 0.793, + "step": 9307 + }, + { + "epoch": 0.28527644967512567, + "grad_norm": 1.7100357723361814, + "learning_rate": 8.386292545022563e-06, + "loss": 0.7186, + "step": 9308 + }, + { + "epoch": 0.28530709819786687, + "grad_norm": 1.9544379123154472, + "learning_rate": 8.385927364902332e-06, + "loss": 0.7228, + "step": 9309 + }, + { + "epoch": 0.2853377467206081, + "grad_norm": 1.7103041677426232, + "learning_rate": 8.385562151419693e-06, + "loss": 0.6969, + "step": 9310 + }, + { + "epoch": 0.2853683952433493, + "grad_norm": 1.727165318656334, + "learning_rate": 8.385196904578243e-06, + "loss": 0.7912, + "step": 9311 + }, + { + "epoch": 0.2853990437660905, + "grad_norm": 1.6346097171743443, + "learning_rate": 8.384831624381582e-06, + "loss": 0.6806, + "step": 9312 + }, + { + "epoch": 0.2854296922888317, + "grad_norm": 0.8218441419239554, + "learning_rate": 8.384466310833308e-06, + "loss": 0.5057, + "step": 9313 + }, + { + "epoch": 0.2854603408115729, + "grad_norm": 1.7013045284824555, + "learning_rate": 8.384100963937023e-06, + "loss": 0.8138, + "step": 9314 + }, + { + "epoch": 0.2854909893343141, + "grad_norm": 1.8490753041302619, + "learning_rate": 8.383735583696323e-06, + "loss": 0.7228, + "step": 9315 + }, + { + "epoch": 0.2855216378570553, + "grad_norm": 1.7126753419153842, + "learning_rate": 8.383370170114812e-06, + "loss": 0.6658, + "step": 9316 + }, + { + "epoch": 0.2855522863797965, + "grad_norm": 1.5924557805641557, + "learning_rate": 8.383004723196088e-06, + "loss": 0.6855, + "step": 9317 + }, + { + "epoch": 0.2855829349025377, + "grad_norm": 1.8994844256480663, + "learning_rate": 8.382639242943755e-06, + "loss": 0.6804, + "step": 9318 + }, + { + "epoch": 0.2856135834252789, + "grad_norm": 1.7317050908948795, + "learning_rate": 8.382273729361411e-06, + "loss": 0.6984, + "step": 9319 + }, + { + "epoch": 0.28564423194802013, + "grad_norm": 1.6618564290267255, + "learning_rate": 8.381908182452659e-06, + "loss": 0.687, + "step": 9320 + }, + { + "epoch": 0.2856748804707613, + "grad_norm": 1.7127902792455119, + "learning_rate": 8.3815426022211e-06, + "loss": 0.6756, + "step": 9321 + }, + { + "epoch": 0.2857055289935025, + "grad_norm": 1.8213859444919112, + "learning_rate": 8.381176988670337e-06, + "loss": 0.705, + "step": 9322 + }, + { + "epoch": 0.2857361775162437, + "grad_norm": 2.283630341803989, + "learning_rate": 8.38081134180397e-06, + "loss": 0.6653, + "step": 9323 + }, + { + "epoch": 0.2857668260389849, + "grad_norm": 0.9817962809496489, + "learning_rate": 8.380445661625606e-06, + "loss": 0.4932, + "step": 9324 + }, + { + "epoch": 0.2857974745617261, + "grad_norm": 1.8065337866889952, + "learning_rate": 8.380079948138844e-06, + "loss": 0.7042, + "step": 9325 + }, + { + "epoch": 0.2858281230844673, + "grad_norm": 1.498521041044799, + "learning_rate": 8.379714201347291e-06, + "loss": 0.699, + "step": 9326 + }, + { + "epoch": 0.2858587716072085, + "grad_norm": 1.7041861988544664, + "learning_rate": 8.37934842125455e-06, + "loss": 0.7349, + "step": 9327 + }, + { + "epoch": 0.2858894201299497, + "grad_norm": 1.750375305492842, + "learning_rate": 8.378982607864224e-06, + "loss": 0.6692, + "step": 9328 + }, + { + "epoch": 0.28592006865269093, + "grad_norm": 1.75073060671992, + "learning_rate": 8.378616761179916e-06, + "loss": 0.7743, + "step": 9329 + }, + { + "epoch": 0.28595071717543213, + "grad_norm": 1.820787174826518, + "learning_rate": 8.378250881205235e-06, + "loss": 0.7393, + "step": 9330 + }, + { + "epoch": 0.28598136569817334, + "grad_norm": 0.8987334483116998, + "learning_rate": 8.377884967943781e-06, + "loss": 0.4904, + "step": 9331 + }, + { + "epoch": 0.28601201422091455, + "grad_norm": 1.6585325390644134, + "learning_rate": 8.377519021399164e-06, + "loss": 0.7083, + "step": 9332 + }, + { + "epoch": 0.28604266274365575, + "grad_norm": 1.9536456667814988, + "learning_rate": 8.377153041574986e-06, + "loss": 0.699, + "step": 9333 + }, + { + "epoch": 0.28607331126639696, + "grad_norm": 1.8344767460913918, + "learning_rate": 8.376787028474858e-06, + "loss": 0.7942, + "step": 9334 + }, + { + "epoch": 0.28610395978913816, + "grad_norm": 1.5674144449399843, + "learning_rate": 8.376420982102381e-06, + "loss": 0.8346, + "step": 9335 + }, + { + "epoch": 0.28613460831187937, + "grad_norm": 1.7453112521177891, + "learning_rate": 8.376054902461166e-06, + "loss": 0.8057, + "step": 9336 + }, + { + "epoch": 0.2861652568346206, + "grad_norm": 1.7737294872478055, + "learning_rate": 8.375688789554817e-06, + "loss": 0.691, + "step": 9337 + }, + { + "epoch": 0.2861959053573618, + "grad_norm": 0.8214735691238204, + "learning_rate": 8.375322643386943e-06, + "loss": 0.4604, + "step": 9338 + }, + { + "epoch": 0.286226553880103, + "grad_norm": 1.7353876739133571, + "learning_rate": 8.37495646396115e-06, + "loss": 0.6747, + "step": 9339 + }, + { + "epoch": 0.2862572024028442, + "grad_norm": 1.7762163930622397, + "learning_rate": 8.37459025128105e-06, + "loss": 0.6765, + "step": 9340 + }, + { + "epoch": 0.2862878509255854, + "grad_norm": 1.6230565795538359, + "learning_rate": 8.374224005350247e-06, + "loss": 0.7696, + "step": 9341 + }, + { + "epoch": 0.2863184994483266, + "grad_norm": 1.6852963091934803, + "learning_rate": 8.373857726172352e-06, + "loss": 0.701, + "step": 9342 + }, + { + "epoch": 0.2863491479710678, + "grad_norm": 1.5886482414677037, + "learning_rate": 8.373491413750974e-06, + "loss": 0.6737, + "step": 9343 + }, + { + "epoch": 0.286379796493809, + "grad_norm": 1.5601822488548913, + "learning_rate": 8.373125068089722e-06, + "loss": 0.7005, + "step": 9344 + }, + { + "epoch": 0.2864104450165502, + "grad_norm": 0.8220607610321989, + "learning_rate": 8.372758689192205e-06, + "loss": 0.4807, + "step": 9345 + }, + { + "epoch": 0.2864410935392914, + "grad_norm": 0.7997837309580321, + "learning_rate": 8.372392277062034e-06, + "loss": 0.4824, + "step": 9346 + }, + { + "epoch": 0.28647174206203263, + "grad_norm": 1.7614765868892515, + "learning_rate": 8.372025831702819e-06, + "loss": 0.7825, + "step": 9347 + }, + { + "epoch": 0.28650239058477384, + "grad_norm": 1.553130786279322, + "learning_rate": 8.37165935311817e-06, + "loss": 0.7614, + "step": 9348 + }, + { + "epoch": 0.28653303910751504, + "grad_norm": 1.6277798574480868, + "learning_rate": 8.371292841311701e-06, + "loss": 0.7334, + "step": 9349 + }, + { + "epoch": 0.28656368763025625, + "grad_norm": 1.7669022025865244, + "learning_rate": 8.370926296287018e-06, + "loss": 0.7098, + "step": 9350 + }, + { + "epoch": 0.28659433615299745, + "grad_norm": 1.9196513986673736, + "learning_rate": 8.370559718047738e-06, + "loss": 0.6962, + "step": 9351 + }, + { + "epoch": 0.2866249846757386, + "grad_norm": 1.8056141227927966, + "learning_rate": 8.37019310659747e-06, + "loss": 0.7022, + "step": 9352 + }, + { + "epoch": 0.2866556331984798, + "grad_norm": 1.533397850816452, + "learning_rate": 8.369826461939828e-06, + "loss": 0.7134, + "step": 9353 + }, + { + "epoch": 0.286686281721221, + "grad_norm": 1.7782771472393382, + "learning_rate": 8.369459784078422e-06, + "loss": 0.6846, + "step": 9354 + }, + { + "epoch": 0.2867169302439622, + "grad_norm": 1.8051944016913208, + "learning_rate": 8.369093073016868e-06, + "loss": 0.8257, + "step": 9355 + }, + { + "epoch": 0.2867475787667034, + "grad_norm": 1.7249159106818375, + "learning_rate": 8.368726328758775e-06, + "loss": 0.6838, + "step": 9356 + }, + { + "epoch": 0.28677822728944463, + "grad_norm": 1.8123550907517945, + "learning_rate": 8.368359551307762e-06, + "loss": 0.6851, + "step": 9357 + }, + { + "epoch": 0.28680887581218584, + "grad_norm": 1.6279252578544074, + "learning_rate": 8.36799274066744e-06, + "loss": 0.7234, + "step": 9358 + }, + { + "epoch": 0.28683952433492704, + "grad_norm": 1.6749702647860294, + "learning_rate": 8.367625896841425e-06, + "loss": 0.7618, + "step": 9359 + }, + { + "epoch": 0.28687017285766825, + "grad_norm": 1.7181474573733744, + "learning_rate": 8.367259019833329e-06, + "loss": 0.7029, + "step": 9360 + }, + { + "epoch": 0.28690082138040945, + "grad_norm": 0.9256939666267076, + "learning_rate": 8.36689210964677e-06, + "loss": 0.4743, + "step": 9361 + }, + { + "epoch": 0.28693146990315066, + "grad_norm": 1.9439929306161683, + "learning_rate": 8.36652516628536e-06, + "loss": 0.7342, + "step": 9362 + }, + { + "epoch": 0.28696211842589187, + "grad_norm": 1.6274578098142656, + "learning_rate": 8.366158189752715e-06, + "loss": 0.6557, + "step": 9363 + }, + { + "epoch": 0.28699276694863307, + "grad_norm": 1.7424920832876183, + "learning_rate": 8.365791180052454e-06, + "loss": 0.6927, + "step": 9364 + }, + { + "epoch": 0.2870234154713743, + "grad_norm": 1.7736739252386606, + "learning_rate": 8.365424137188192e-06, + "loss": 0.7253, + "step": 9365 + }, + { + "epoch": 0.2870540639941155, + "grad_norm": 1.5652817964776637, + "learning_rate": 8.365057061163544e-06, + "loss": 0.7143, + "step": 9366 + }, + { + "epoch": 0.2870847125168567, + "grad_norm": 1.83033375165565, + "learning_rate": 8.364689951982126e-06, + "loss": 0.6338, + "step": 9367 + }, + { + "epoch": 0.2871153610395979, + "grad_norm": 1.6039423481559627, + "learning_rate": 8.36432280964756e-06, + "loss": 0.7088, + "step": 9368 + }, + { + "epoch": 0.2871460095623391, + "grad_norm": 1.637665008968857, + "learning_rate": 8.36395563416346e-06, + "loss": 0.6686, + "step": 9369 + }, + { + "epoch": 0.2871766580850803, + "grad_norm": 1.9079955072604176, + "learning_rate": 8.363588425533442e-06, + "loss": 0.8243, + "step": 9370 + }, + { + "epoch": 0.2872073066078215, + "grad_norm": 0.8188259955296027, + "learning_rate": 8.363221183761127e-06, + "loss": 0.4279, + "step": 9371 + }, + { + "epoch": 0.2872379551305627, + "grad_norm": 1.7374495503420364, + "learning_rate": 8.362853908850136e-06, + "loss": 0.5955, + "step": 9372 + }, + { + "epoch": 0.2872686036533039, + "grad_norm": 1.9050522469316362, + "learning_rate": 8.362486600804083e-06, + "loss": 0.7276, + "step": 9373 + }, + { + "epoch": 0.28729925217604513, + "grad_norm": 1.6628178565543164, + "learning_rate": 8.36211925962659e-06, + "loss": 0.6464, + "step": 9374 + }, + { + "epoch": 0.28732990069878633, + "grad_norm": 1.5549037912953483, + "learning_rate": 8.361751885321274e-06, + "loss": 0.6098, + "step": 9375 + }, + { + "epoch": 0.28736054922152754, + "grad_norm": 1.878011931467197, + "learning_rate": 8.36138447789176e-06, + "loss": 0.7957, + "step": 9376 + }, + { + "epoch": 0.28739119774426874, + "grad_norm": 2.0327520842108258, + "learning_rate": 8.36101703734166e-06, + "loss": 0.743, + "step": 9377 + }, + { + "epoch": 0.28742184626700995, + "grad_norm": 1.6702777131093967, + "learning_rate": 8.360649563674604e-06, + "loss": 0.6832, + "step": 9378 + }, + { + "epoch": 0.28745249478975116, + "grad_norm": 1.8731003589275743, + "learning_rate": 8.360282056894205e-06, + "loss": 0.7306, + "step": 9379 + }, + { + "epoch": 0.28748314331249236, + "grad_norm": 1.6652941394506897, + "learning_rate": 8.359914517004089e-06, + "loss": 0.7034, + "step": 9380 + }, + { + "epoch": 0.28751379183523357, + "grad_norm": 1.873310610753865, + "learning_rate": 8.359546944007873e-06, + "loss": 0.7229, + "step": 9381 + }, + { + "epoch": 0.2875444403579748, + "grad_norm": 0.831474900894518, + "learning_rate": 8.359179337909182e-06, + "loss": 0.4764, + "step": 9382 + }, + { + "epoch": 0.2875750888807159, + "grad_norm": 1.6786105533272206, + "learning_rate": 8.35881169871164e-06, + "loss": 0.6737, + "step": 9383 + }, + { + "epoch": 0.28760573740345713, + "grad_norm": 1.7344347416177717, + "learning_rate": 8.358444026418864e-06, + "loss": 0.7475, + "step": 9384 + }, + { + "epoch": 0.28763638592619833, + "grad_norm": 1.7247867045715655, + "learning_rate": 8.35807632103448e-06, + "loss": 0.8551, + "step": 9385 + }, + { + "epoch": 0.28766703444893954, + "grad_norm": 1.5962330837350034, + "learning_rate": 8.357708582562114e-06, + "loss": 0.7397, + "step": 9386 + }, + { + "epoch": 0.28769768297168075, + "grad_norm": 1.7770846409950831, + "learning_rate": 8.357340811005383e-06, + "loss": 0.7841, + "step": 9387 + }, + { + "epoch": 0.28772833149442195, + "grad_norm": 1.8257012460797744, + "learning_rate": 8.356973006367915e-06, + "loss": 0.7156, + "step": 9388 + }, + { + "epoch": 0.28775898001716316, + "grad_norm": 0.779473185656542, + "learning_rate": 8.356605168653334e-06, + "loss": 0.4636, + "step": 9389 + }, + { + "epoch": 0.28778962853990436, + "grad_norm": 1.8026584519439353, + "learning_rate": 8.356237297865261e-06, + "loss": 0.7323, + "step": 9390 + }, + { + "epoch": 0.28782027706264557, + "grad_norm": 1.4830898017807632, + "learning_rate": 8.355869394007326e-06, + "loss": 0.6929, + "step": 9391 + }, + { + "epoch": 0.2878509255853868, + "grad_norm": 1.6787902427542682, + "learning_rate": 8.35550145708315e-06, + "loss": 0.7059, + "step": 9392 + }, + { + "epoch": 0.287881574108128, + "grad_norm": 0.7918914208162977, + "learning_rate": 8.355133487096358e-06, + "loss": 0.4579, + "step": 9393 + }, + { + "epoch": 0.2879122226308692, + "grad_norm": 1.7668835585010445, + "learning_rate": 8.35476548405058e-06, + "loss": 0.6634, + "step": 9394 + }, + { + "epoch": 0.2879428711536104, + "grad_norm": 0.8373900160097685, + "learning_rate": 8.354397447949438e-06, + "loss": 0.5031, + "step": 9395 + }, + { + "epoch": 0.2879735196763516, + "grad_norm": 0.7645955874321492, + "learning_rate": 8.35402937879656e-06, + "loss": 0.4615, + "step": 9396 + }, + { + "epoch": 0.2880041681990928, + "grad_norm": 1.492643022630482, + "learning_rate": 8.35366127659557e-06, + "loss": 0.6819, + "step": 9397 + }, + { + "epoch": 0.288034816721834, + "grad_norm": 1.7180355280869053, + "learning_rate": 8.353293141350101e-06, + "loss": 0.6688, + "step": 9398 + }, + { + "epoch": 0.2880654652445752, + "grad_norm": 1.552306838784709, + "learning_rate": 8.352924973063776e-06, + "loss": 0.6662, + "step": 9399 + }, + { + "epoch": 0.2880961137673164, + "grad_norm": 1.6179083160579606, + "learning_rate": 8.35255677174022e-06, + "loss": 0.7346, + "step": 9400 + }, + { + "epoch": 0.2881267622900576, + "grad_norm": 0.8183851511320228, + "learning_rate": 8.352188537383069e-06, + "loss": 0.4805, + "step": 9401 + }, + { + "epoch": 0.28815741081279883, + "grad_norm": 1.748191106953706, + "learning_rate": 8.351820269995945e-06, + "loss": 0.7884, + "step": 9402 + }, + { + "epoch": 0.28818805933554004, + "grad_norm": 1.8377309098113206, + "learning_rate": 8.351451969582478e-06, + "loss": 0.7067, + "step": 9403 + }, + { + "epoch": 0.28821870785828124, + "grad_norm": 1.7593593554793403, + "learning_rate": 8.351083636146296e-06, + "loss": 0.6869, + "step": 9404 + }, + { + "epoch": 0.28824935638102245, + "grad_norm": 1.573403627449142, + "learning_rate": 8.35071526969103e-06, + "loss": 0.6684, + "step": 9405 + }, + { + "epoch": 0.28828000490376365, + "grad_norm": 1.9413469921088011, + "learning_rate": 8.350346870220311e-06, + "loss": 0.7932, + "step": 9406 + }, + { + "epoch": 0.28831065342650486, + "grad_norm": 0.7872641010621099, + "learning_rate": 8.349978437737765e-06, + "loss": 0.4665, + "step": 9407 + }, + { + "epoch": 0.28834130194924606, + "grad_norm": 1.7747680562540749, + "learning_rate": 8.349609972247026e-06, + "loss": 0.6858, + "step": 9408 + }, + { + "epoch": 0.28837195047198727, + "grad_norm": 1.8635696568725424, + "learning_rate": 8.349241473751721e-06, + "loss": 0.7531, + "step": 9409 + }, + { + "epoch": 0.2884025989947285, + "grad_norm": 1.603952386687509, + "learning_rate": 8.348872942255484e-06, + "loss": 0.6505, + "step": 9410 + }, + { + "epoch": 0.2884332475174697, + "grad_norm": 1.7815121262355096, + "learning_rate": 8.348504377761945e-06, + "loss": 0.6101, + "step": 9411 + }, + { + "epoch": 0.2884638960402109, + "grad_norm": 1.8850120839082185, + "learning_rate": 8.348135780274735e-06, + "loss": 0.7527, + "step": 9412 + }, + { + "epoch": 0.2884945445629521, + "grad_norm": 1.879046193560528, + "learning_rate": 8.347767149797488e-06, + "loss": 0.6853, + "step": 9413 + }, + { + "epoch": 0.28852519308569324, + "grad_norm": 0.7757619127112889, + "learning_rate": 8.347398486333835e-06, + "loss": 0.4616, + "step": 9414 + }, + { + "epoch": 0.28855584160843445, + "grad_norm": 2.1326473103521377, + "learning_rate": 8.347029789887406e-06, + "loss": 0.6176, + "step": 9415 + }, + { + "epoch": 0.28858649013117565, + "grad_norm": 1.7008664855304338, + "learning_rate": 8.346661060461838e-06, + "loss": 0.6653, + "step": 9416 + }, + { + "epoch": 0.28861713865391686, + "grad_norm": 0.7876106402064572, + "learning_rate": 8.34629229806076e-06, + "loss": 0.4632, + "step": 9417 + }, + { + "epoch": 0.28864778717665807, + "grad_norm": 1.7466845764210412, + "learning_rate": 8.34592350268781e-06, + "loss": 0.6182, + "step": 9418 + }, + { + "epoch": 0.28867843569939927, + "grad_norm": 1.5202405066256166, + "learning_rate": 8.345554674346618e-06, + "loss": 0.6537, + "step": 9419 + }, + { + "epoch": 0.2887090842221405, + "grad_norm": 1.8131298756880225, + "learning_rate": 8.345185813040822e-06, + "loss": 0.6404, + "step": 9420 + }, + { + "epoch": 0.2887397327448817, + "grad_norm": 0.8045689482674163, + "learning_rate": 8.344816918774052e-06, + "loss": 0.4827, + "step": 9421 + }, + { + "epoch": 0.2887703812676229, + "grad_norm": 1.5098460613860174, + "learning_rate": 8.344447991549947e-06, + "loss": 0.7419, + "step": 9422 + }, + { + "epoch": 0.2888010297903641, + "grad_norm": 1.8603005467595823, + "learning_rate": 8.344079031372138e-06, + "loss": 0.6823, + "step": 9423 + }, + { + "epoch": 0.2888316783131053, + "grad_norm": 1.852801400075066, + "learning_rate": 8.343710038244264e-06, + "loss": 0.7347, + "step": 9424 + }, + { + "epoch": 0.2888623268358465, + "grad_norm": 1.7954596113926022, + "learning_rate": 8.343341012169958e-06, + "loss": 0.6707, + "step": 9425 + }, + { + "epoch": 0.2888929753585877, + "grad_norm": 1.7960224215982565, + "learning_rate": 8.34297195315286e-06, + "loss": 0.6574, + "step": 9426 + }, + { + "epoch": 0.2889236238813289, + "grad_norm": 1.732274580824317, + "learning_rate": 8.342602861196603e-06, + "loss": 0.6852, + "step": 9427 + }, + { + "epoch": 0.2889542724040701, + "grad_norm": 1.540423407356877, + "learning_rate": 8.342233736304824e-06, + "loss": 0.6537, + "step": 9428 + }, + { + "epoch": 0.28898492092681133, + "grad_norm": 1.6985578323112127, + "learning_rate": 8.341864578481162e-06, + "loss": 0.6822, + "step": 9429 + }, + { + "epoch": 0.28901556944955253, + "grad_norm": 0.8500205995321624, + "learning_rate": 8.341495387729253e-06, + "loss": 0.4642, + "step": 9430 + }, + { + "epoch": 0.28904621797229374, + "grad_norm": 0.8661546511810417, + "learning_rate": 8.341126164052735e-06, + "loss": 0.4831, + "step": 9431 + }, + { + "epoch": 0.28907686649503495, + "grad_norm": 0.7784973377934908, + "learning_rate": 8.340756907455246e-06, + "loss": 0.472, + "step": 9432 + }, + { + "epoch": 0.28910751501777615, + "grad_norm": 1.9024267751365465, + "learning_rate": 8.340387617940424e-06, + "loss": 0.7415, + "step": 9433 + }, + { + "epoch": 0.28913816354051736, + "grad_norm": 0.8201640726044672, + "learning_rate": 8.340018295511908e-06, + "loss": 0.4535, + "step": 9434 + }, + { + "epoch": 0.28916881206325856, + "grad_norm": 0.8100399532711696, + "learning_rate": 8.339648940173337e-06, + "loss": 0.4736, + "step": 9435 + }, + { + "epoch": 0.28919946058599977, + "grad_norm": 1.7625298156217255, + "learning_rate": 8.339279551928351e-06, + "loss": 0.6125, + "step": 9436 + }, + { + "epoch": 0.289230109108741, + "grad_norm": 1.7056375538815391, + "learning_rate": 8.338910130780591e-06, + "loss": 0.6761, + "step": 9437 + }, + { + "epoch": 0.2892607576314822, + "grad_norm": 1.7571615682168633, + "learning_rate": 8.338540676733693e-06, + "loss": 0.744, + "step": 9438 + }, + { + "epoch": 0.2892914061542234, + "grad_norm": 1.8720244018958638, + "learning_rate": 8.3381711897913e-06, + "loss": 0.7199, + "step": 9439 + }, + { + "epoch": 0.2893220546769646, + "grad_norm": 1.8061964092132987, + "learning_rate": 8.337801669957052e-06, + "loss": 0.6988, + "step": 9440 + }, + { + "epoch": 0.2893527031997058, + "grad_norm": 1.7589053430266535, + "learning_rate": 8.337432117234591e-06, + "loss": 0.7245, + "step": 9441 + }, + { + "epoch": 0.289383351722447, + "grad_norm": 1.9271075402285258, + "learning_rate": 8.337062531627556e-06, + "loss": 0.8501, + "step": 9442 + }, + { + "epoch": 0.2894140002451882, + "grad_norm": 1.930465620973963, + "learning_rate": 8.33669291313959e-06, + "loss": 0.7751, + "step": 9443 + }, + { + "epoch": 0.2894446487679294, + "grad_norm": 1.7658179169237362, + "learning_rate": 8.336323261774336e-06, + "loss": 0.7471, + "step": 9444 + }, + { + "epoch": 0.28947529729067056, + "grad_norm": 1.6334923391093263, + "learning_rate": 8.335953577535437e-06, + "loss": 0.6243, + "step": 9445 + }, + { + "epoch": 0.28950594581341177, + "grad_norm": 1.5620716317628844, + "learning_rate": 8.33558386042653e-06, + "loss": 0.6371, + "step": 9446 + }, + { + "epoch": 0.289536594336153, + "grad_norm": 1.7832014567306542, + "learning_rate": 8.335214110451264e-06, + "loss": 0.7073, + "step": 9447 + }, + { + "epoch": 0.2895672428588942, + "grad_norm": 1.5361840272700023, + "learning_rate": 8.334844327613278e-06, + "loss": 0.6664, + "step": 9448 + }, + { + "epoch": 0.2895978913816354, + "grad_norm": 1.783690308731656, + "learning_rate": 8.33447451191622e-06, + "loss": 0.7206, + "step": 9449 + }, + { + "epoch": 0.2896285399043766, + "grad_norm": 1.0010185917377916, + "learning_rate": 8.334104663363732e-06, + "loss": 0.4823, + "step": 9450 + }, + { + "epoch": 0.2896591884271178, + "grad_norm": 1.6290337728924806, + "learning_rate": 8.333734781959456e-06, + "loss": 0.7236, + "step": 9451 + }, + { + "epoch": 0.289689836949859, + "grad_norm": 0.8411490796757592, + "learning_rate": 8.333364867707038e-06, + "loss": 0.4678, + "step": 9452 + }, + { + "epoch": 0.2897204854726002, + "grad_norm": 1.7273968764226562, + "learning_rate": 8.332994920610125e-06, + "loss": 0.662, + "step": 9453 + }, + { + "epoch": 0.2897511339953414, + "grad_norm": 1.7469988842468949, + "learning_rate": 8.332624940672358e-06, + "loss": 0.7207, + "step": 9454 + }, + { + "epoch": 0.2897817825180826, + "grad_norm": 1.6166085087810176, + "learning_rate": 8.332254927897386e-06, + "loss": 0.5631, + "step": 9455 + }, + { + "epoch": 0.2898124310408238, + "grad_norm": 1.9456451599187317, + "learning_rate": 8.331884882288852e-06, + "loss": 0.6925, + "step": 9456 + }, + { + "epoch": 0.28984307956356503, + "grad_norm": 1.7707265027287478, + "learning_rate": 8.331514803850406e-06, + "loss": 0.6999, + "step": 9457 + }, + { + "epoch": 0.28987372808630624, + "grad_norm": 1.7103824893411192, + "learning_rate": 8.33114469258569e-06, + "loss": 0.7214, + "step": 9458 + }, + { + "epoch": 0.28990437660904744, + "grad_norm": 1.9549671122298589, + "learning_rate": 8.330774548498356e-06, + "loss": 0.6764, + "step": 9459 + }, + { + "epoch": 0.28993502513178865, + "grad_norm": 0.9989994270180698, + "learning_rate": 8.330404371592046e-06, + "loss": 0.4545, + "step": 9460 + }, + { + "epoch": 0.28996567365452985, + "grad_norm": 0.9561704246313809, + "learning_rate": 8.33003416187041e-06, + "loss": 0.4703, + "step": 9461 + }, + { + "epoch": 0.28999632217727106, + "grad_norm": 1.8141077828722114, + "learning_rate": 8.329663919337096e-06, + "loss": 0.6785, + "step": 9462 + }, + { + "epoch": 0.29002697070001227, + "grad_norm": 1.554029495940326, + "learning_rate": 8.32929364399575e-06, + "loss": 0.7348, + "step": 9463 + }, + { + "epoch": 0.29005761922275347, + "grad_norm": 1.716903808493168, + "learning_rate": 8.328923335850023e-06, + "loss": 0.7092, + "step": 9464 + }, + { + "epoch": 0.2900882677454947, + "grad_norm": 1.0245547169517795, + "learning_rate": 8.328552994903562e-06, + "loss": 0.4873, + "step": 9465 + }, + { + "epoch": 0.2901189162682359, + "grad_norm": 1.7022558118823918, + "learning_rate": 8.328182621160018e-06, + "loss": 0.684, + "step": 9466 + }, + { + "epoch": 0.2901495647909771, + "grad_norm": 1.8315384393496157, + "learning_rate": 8.327812214623037e-06, + "loss": 0.7311, + "step": 9467 + }, + { + "epoch": 0.2901802133137183, + "grad_norm": 1.8322233197730378, + "learning_rate": 8.327441775296273e-06, + "loss": 0.7366, + "step": 9468 + }, + { + "epoch": 0.2902108618364595, + "grad_norm": 1.9153333745605614, + "learning_rate": 8.327071303183374e-06, + "loss": 0.8235, + "step": 9469 + }, + { + "epoch": 0.2902415103592007, + "grad_norm": 1.885992141546536, + "learning_rate": 8.326700798287988e-06, + "loss": 0.723, + "step": 9470 + }, + { + "epoch": 0.2902721588819419, + "grad_norm": 0.8184267760500369, + "learning_rate": 8.326330260613768e-06, + "loss": 0.4471, + "step": 9471 + }, + { + "epoch": 0.2903028074046831, + "grad_norm": 1.644866297816928, + "learning_rate": 8.325959690164367e-06, + "loss": 0.6294, + "step": 9472 + }, + { + "epoch": 0.2903334559274243, + "grad_norm": 0.8462473064209126, + "learning_rate": 8.325589086943433e-06, + "loss": 0.497, + "step": 9473 + }, + { + "epoch": 0.2903641044501655, + "grad_norm": 1.8785102598412868, + "learning_rate": 8.325218450954619e-06, + "loss": 0.7101, + "step": 9474 + }, + { + "epoch": 0.29039475297290673, + "grad_norm": 0.8033676196456374, + "learning_rate": 8.324847782201576e-06, + "loss": 0.4527, + "step": 9475 + }, + { + "epoch": 0.2904254014956479, + "grad_norm": 1.849065447181978, + "learning_rate": 8.324477080687959e-06, + "loss": 0.7465, + "step": 9476 + }, + { + "epoch": 0.2904560500183891, + "grad_norm": 1.7933733545549817, + "learning_rate": 8.324106346417416e-06, + "loss": 0.7275, + "step": 9477 + }, + { + "epoch": 0.2904866985411303, + "grad_norm": 1.9072950683956682, + "learning_rate": 8.323735579393604e-06, + "loss": 0.7203, + "step": 9478 + }, + { + "epoch": 0.2905173470638715, + "grad_norm": 1.951661960186648, + "learning_rate": 8.323364779620176e-06, + "loss": 0.7277, + "step": 9479 + }, + { + "epoch": 0.2905479955866127, + "grad_norm": 1.761221347585483, + "learning_rate": 8.322993947100783e-06, + "loss": 0.7248, + "step": 9480 + }, + { + "epoch": 0.2905786441093539, + "grad_norm": 0.8997221027195564, + "learning_rate": 8.32262308183908e-06, + "loss": 0.4826, + "step": 9481 + }, + { + "epoch": 0.2906092926320951, + "grad_norm": 1.8173352936925125, + "learning_rate": 8.322252183838723e-06, + "loss": 0.7493, + "step": 9482 + }, + { + "epoch": 0.2906399411548363, + "grad_norm": 1.9368854655840666, + "learning_rate": 8.321881253103366e-06, + "loss": 0.7468, + "step": 9483 + }, + { + "epoch": 0.29067058967757753, + "grad_norm": 1.9495074377496067, + "learning_rate": 8.32151028963666e-06, + "loss": 0.7816, + "step": 9484 + }, + { + "epoch": 0.29070123820031873, + "grad_norm": 1.701796328171423, + "learning_rate": 8.321139293442266e-06, + "loss": 0.735, + "step": 9485 + }, + { + "epoch": 0.29073188672305994, + "grad_norm": 1.9176431303841588, + "learning_rate": 8.320768264523835e-06, + "loss": 0.8292, + "step": 9486 + }, + { + "epoch": 0.29076253524580115, + "grad_norm": 1.8786057370514098, + "learning_rate": 8.320397202885027e-06, + "loss": 0.6923, + "step": 9487 + }, + { + "epoch": 0.29079318376854235, + "grad_norm": 1.6312181134741708, + "learning_rate": 8.320026108529494e-06, + "loss": 0.6894, + "step": 9488 + }, + { + "epoch": 0.29082383229128356, + "grad_norm": 1.7693463633436812, + "learning_rate": 8.319654981460895e-06, + "loss": 0.6621, + "step": 9489 + }, + { + "epoch": 0.29085448081402476, + "grad_norm": 1.9551641012801197, + "learning_rate": 8.319283821682885e-06, + "loss": 0.737, + "step": 9490 + }, + { + "epoch": 0.29088512933676597, + "grad_norm": 1.707133686967687, + "learning_rate": 8.318912629199123e-06, + "loss": 0.6302, + "step": 9491 + }, + { + "epoch": 0.2909157778595072, + "grad_norm": 1.594028835626414, + "learning_rate": 8.318541404013264e-06, + "loss": 0.6851, + "step": 9492 + }, + { + "epoch": 0.2909464263822484, + "grad_norm": 1.789330830820213, + "learning_rate": 8.31817014612897e-06, + "loss": 0.7551, + "step": 9493 + }, + { + "epoch": 0.2909770749049896, + "grad_norm": 0.8961335532870534, + "learning_rate": 8.317798855549897e-06, + "loss": 0.4682, + "step": 9494 + }, + { + "epoch": 0.2910077234277308, + "grad_norm": 0.7846745199185511, + "learning_rate": 8.317427532279702e-06, + "loss": 0.4732, + "step": 9495 + }, + { + "epoch": 0.291038371950472, + "grad_norm": 1.954536716126634, + "learning_rate": 8.317056176322044e-06, + "loss": 0.7392, + "step": 9496 + }, + { + "epoch": 0.2910690204732132, + "grad_norm": 0.798685125179029, + "learning_rate": 8.316684787680582e-06, + "loss": 0.4765, + "step": 9497 + }, + { + "epoch": 0.2910996689959544, + "grad_norm": 1.6254408350127807, + "learning_rate": 8.316313366358978e-06, + "loss": 0.7789, + "step": 9498 + }, + { + "epoch": 0.2911303175186956, + "grad_norm": 1.6869622334803056, + "learning_rate": 8.31594191236089e-06, + "loss": 0.6904, + "step": 9499 + }, + { + "epoch": 0.2911609660414368, + "grad_norm": 1.910559549945184, + "learning_rate": 8.315570425689975e-06, + "loss": 0.7752, + "step": 9500 + }, + { + "epoch": 0.291191614564178, + "grad_norm": 1.8008266076307344, + "learning_rate": 8.3151989063499e-06, + "loss": 0.7408, + "step": 9501 + }, + { + "epoch": 0.29122226308691923, + "grad_norm": 1.7832518797831276, + "learning_rate": 8.314827354344318e-06, + "loss": 0.744, + "step": 9502 + }, + { + "epoch": 0.29125291160966044, + "grad_norm": 1.8507740696715096, + "learning_rate": 8.3144557696769e-06, + "loss": 0.8195, + "step": 9503 + }, + { + "epoch": 0.29128356013240164, + "grad_norm": 1.9672959380628172, + "learning_rate": 8.314084152351297e-06, + "loss": 0.8299, + "step": 9504 + }, + { + "epoch": 0.29131420865514285, + "grad_norm": 1.8102805773206159, + "learning_rate": 8.313712502371174e-06, + "loss": 0.7905, + "step": 9505 + }, + { + "epoch": 0.29134485717788405, + "grad_norm": 1.8469636902847617, + "learning_rate": 8.313340819740195e-06, + "loss": 0.7329, + "step": 9506 + }, + { + "epoch": 0.2913755057006252, + "grad_norm": 1.7331564292418975, + "learning_rate": 8.312969104462024e-06, + "loss": 0.7151, + "step": 9507 + }, + { + "epoch": 0.2914061542233664, + "grad_norm": 0.8523882525105428, + "learning_rate": 8.312597356540316e-06, + "loss": 0.4686, + "step": 9508 + }, + { + "epoch": 0.2914368027461076, + "grad_norm": 0.8191604749304303, + "learning_rate": 8.312225575978741e-06, + "loss": 0.5, + "step": 9509 + }, + { + "epoch": 0.2914674512688488, + "grad_norm": 1.7770143800814282, + "learning_rate": 8.311853762780959e-06, + "loss": 0.6303, + "step": 9510 + }, + { + "epoch": 0.29149809979159, + "grad_norm": 1.6796780043204647, + "learning_rate": 8.311481916950636e-06, + "loss": 0.6209, + "step": 9511 + }, + { + "epoch": 0.29152874831433123, + "grad_norm": 0.811790271994923, + "learning_rate": 8.311110038491435e-06, + "loss": 0.4735, + "step": 9512 + }, + { + "epoch": 0.29155939683707244, + "grad_norm": 1.666410834924763, + "learning_rate": 8.310738127407017e-06, + "loss": 0.6131, + "step": 9513 + }, + { + "epoch": 0.29159004535981364, + "grad_norm": 1.6943509169550826, + "learning_rate": 8.310366183701051e-06, + "loss": 0.654, + "step": 9514 + }, + { + "epoch": 0.29162069388255485, + "grad_norm": 1.8089950612005363, + "learning_rate": 8.3099942073772e-06, + "loss": 0.7179, + "step": 9515 + }, + { + "epoch": 0.29165134240529605, + "grad_norm": 1.6822528206109522, + "learning_rate": 8.30962219843913e-06, + "loss": 0.6224, + "step": 9516 + }, + { + "epoch": 0.29168199092803726, + "grad_norm": 0.8267982168329929, + "learning_rate": 8.309250156890502e-06, + "loss": 0.4843, + "step": 9517 + }, + { + "epoch": 0.29171263945077847, + "grad_norm": 1.6477471121465013, + "learning_rate": 8.308878082734988e-06, + "loss": 0.6244, + "step": 9518 + }, + { + "epoch": 0.29174328797351967, + "grad_norm": 1.533334307983797, + "learning_rate": 8.308505975976252e-06, + "loss": 0.7093, + "step": 9519 + }, + { + "epoch": 0.2917739364962609, + "grad_norm": 1.830120773681969, + "learning_rate": 8.30813383661796e-06, + "loss": 0.737, + "step": 9520 + }, + { + "epoch": 0.2918045850190021, + "grad_norm": 1.8655641572046773, + "learning_rate": 8.307761664663778e-06, + "loss": 0.7511, + "step": 9521 + }, + { + "epoch": 0.2918352335417433, + "grad_norm": 1.5622069996704178, + "learning_rate": 8.307389460117375e-06, + "loss": 0.6549, + "step": 9522 + }, + { + "epoch": 0.2918658820644845, + "grad_norm": 1.656292267623811, + "learning_rate": 8.307017222982416e-06, + "loss": 0.6956, + "step": 9523 + }, + { + "epoch": 0.2918965305872257, + "grad_norm": 0.8560759267502712, + "learning_rate": 8.306644953262571e-06, + "loss": 0.4824, + "step": 9524 + }, + { + "epoch": 0.2919271791099669, + "grad_norm": 1.7701996804925717, + "learning_rate": 8.306272650961507e-06, + "loss": 0.7519, + "step": 9525 + }, + { + "epoch": 0.2919578276327081, + "grad_norm": 1.8030047554884332, + "learning_rate": 8.305900316082893e-06, + "loss": 0.6823, + "step": 9526 + }, + { + "epoch": 0.2919884761554493, + "grad_norm": 1.6070083362755552, + "learning_rate": 8.305527948630398e-06, + "loss": 0.6603, + "step": 9527 + }, + { + "epoch": 0.2920191246781905, + "grad_norm": 1.6482210657530807, + "learning_rate": 8.305155548607688e-06, + "loss": 0.6728, + "step": 9528 + }, + { + "epoch": 0.29204977320093173, + "grad_norm": 1.8088759208574212, + "learning_rate": 8.304783116018437e-06, + "loss": 0.7395, + "step": 9529 + }, + { + "epoch": 0.29208042172367293, + "grad_norm": 1.6472932850530546, + "learning_rate": 8.304410650866312e-06, + "loss": 0.7223, + "step": 9530 + }, + { + "epoch": 0.29211107024641414, + "grad_norm": 1.6167705344827272, + "learning_rate": 8.304038153154983e-06, + "loss": 0.6262, + "step": 9531 + }, + { + "epoch": 0.29214171876915535, + "grad_norm": 1.8133451408484975, + "learning_rate": 8.303665622888121e-06, + "loss": 0.7065, + "step": 9532 + }, + { + "epoch": 0.29217236729189655, + "grad_norm": 1.7917427384124829, + "learning_rate": 8.303293060069394e-06, + "loss": 0.6926, + "step": 9533 + }, + { + "epoch": 0.29220301581463776, + "grad_norm": 1.681103009398211, + "learning_rate": 8.30292046470248e-06, + "loss": 0.6814, + "step": 9534 + }, + { + "epoch": 0.29223366433737896, + "grad_norm": 1.8532409215290098, + "learning_rate": 8.302547836791042e-06, + "loss": 0.6192, + "step": 9535 + }, + { + "epoch": 0.29226431286012017, + "grad_norm": 0.8428901135715134, + "learning_rate": 8.302175176338756e-06, + "loss": 0.4745, + "step": 9536 + }, + { + "epoch": 0.2922949613828614, + "grad_norm": 1.5801971249255995, + "learning_rate": 8.301802483349293e-06, + "loss": 0.5826, + "step": 9537 + }, + { + "epoch": 0.2923256099056025, + "grad_norm": 1.818730619392167, + "learning_rate": 8.301429757826326e-06, + "loss": 0.7378, + "step": 9538 + }, + { + "epoch": 0.29235625842834373, + "grad_norm": 0.8156706592920703, + "learning_rate": 8.301056999773527e-06, + "loss": 0.4858, + "step": 9539 + }, + { + "epoch": 0.29238690695108494, + "grad_norm": 1.5590967794061314, + "learning_rate": 8.300684209194567e-06, + "loss": 0.6003, + "step": 9540 + }, + { + "epoch": 0.29241755547382614, + "grad_norm": 1.6383598682133178, + "learning_rate": 8.300311386093122e-06, + "loss": 0.6203, + "step": 9541 + }, + { + "epoch": 0.29244820399656735, + "grad_norm": 1.773376675961955, + "learning_rate": 8.299938530472866e-06, + "loss": 0.6791, + "step": 9542 + }, + { + "epoch": 0.29247885251930855, + "grad_norm": 0.8851577372718185, + "learning_rate": 8.29956564233747e-06, + "loss": 0.4836, + "step": 9543 + }, + { + "epoch": 0.29250950104204976, + "grad_norm": 1.7594099968171764, + "learning_rate": 8.299192721690609e-06, + "loss": 0.6659, + "step": 9544 + }, + { + "epoch": 0.29254014956479096, + "grad_norm": 1.7919578688971605, + "learning_rate": 8.298819768535959e-06, + "loss": 0.7496, + "step": 9545 + }, + { + "epoch": 0.29257079808753217, + "grad_norm": 1.8522509453191067, + "learning_rate": 8.298446782877194e-06, + "loss": 0.6657, + "step": 9546 + }, + { + "epoch": 0.2926014466102734, + "grad_norm": 1.9561276505621437, + "learning_rate": 8.298073764717988e-06, + "loss": 0.6791, + "step": 9547 + }, + { + "epoch": 0.2926320951330146, + "grad_norm": 1.6627746683887255, + "learning_rate": 8.297700714062017e-06, + "loss": 0.6873, + "step": 9548 + }, + { + "epoch": 0.2926627436557558, + "grad_norm": 1.740622217369237, + "learning_rate": 8.297327630912958e-06, + "loss": 0.7137, + "step": 9549 + }, + { + "epoch": 0.292693392178497, + "grad_norm": 0.8264121475064846, + "learning_rate": 8.296954515274485e-06, + "loss": 0.4686, + "step": 9550 + }, + { + "epoch": 0.2927240407012382, + "grad_norm": 1.688076153521424, + "learning_rate": 8.296581367150277e-06, + "loss": 0.7162, + "step": 9551 + }, + { + "epoch": 0.2927546892239794, + "grad_norm": 1.9184748425928941, + "learning_rate": 8.296208186544008e-06, + "loss": 0.6707, + "step": 9552 + }, + { + "epoch": 0.2927853377467206, + "grad_norm": 0.8240812251782681, + "learning_rate": 8.295834973459358e-06, + "loss": 0.4618, + "step": 9553 + }, + { + "epoch": 0.2928159862694618, + "grad_norm": 1.6656975196375954, + "learning_rate": 8.295461727900003e-06, + "loss": 0.7012, + "step": 9554 + }, + { + "epoch": 0.292846634792203, + "grad_norm": 1.6933451550650578, + "learning_rate": 8.295088449869619e-06, + "loss": 0.6965, + "step": 9555 + }, + { + "epoch": 0.2928772833149442, + "grad_norm": 1.8212714359875837, + "learning_rate": 8.294715139371885e-06, + "loss": 0.6942, + "step": 9556 + }, + { + "epoch": 0.29290793183768543, + "grad_norm": 1.6709743667686983, + "learning_rate": 8.29434179641048e-06, + "loss": 0.6729, + "step": 9557 + }, + { + "epoch": 0.29293858036042664, + "grad_norm": 1.6473471259932184, + "learning_rate": 8.293968420989083e-06, + "loss": 0.713, + "step": 9558 + }, + { + "epoch": 0.29296922888316784, + "grad_norm": 1.8449861590402583, + "learning_rate": 8.293595013111373e-06, + "loss": 0.6936, + "step": 9559 + }, + { + "epoch": 0.29299987740590905, + "grad_norm": 1.730079668441436, + "learning_rate": 8.293221572781027e-06, + "loss": 0.6529, + "step": 9560 + }, + { + "epoch": 0.29303052592865025, + "grad_norm": 1.8329555624082532, + "learning_rate": 8.292848100001727e-06, + "loss": 0.7235, + "step": 9561 + }, + { + "epoch": 0.29306117445139146, + "grad_norm": 2.0265392293185718, + "learning_rate": 8.292474594777152e-06, + "loss": 0.6809, + "step": 9562 + }, + { + "epoch": 0.29309182297413267, + "grad_norm": 1.60781418710933, + "learning_rate": 8.292101057110982e-06, + "loss": 0.6043, + "step": 9563 + }, + { + "epoch": 0.29312247149687387, + "grad_norm": 1.7598212696297255, + "learning_rate": 8.2917274870069e-06, + "loss": 0.6726, + "step": 9564 + }, + { + "epoch": 0.2931531200196151, + "grad_norm": 1.676908305567569, + "learning_rate": 8.291353884468583e-06, + "loss": 0.6621, + "step": 9565 + }, + { + "epoch": 0.2931837685423563, + "grad_norm": 1.4115253294046564, + "learning_rate": 8.290980249499714e-06, + "loss": 0.5762, + "step": 9566 + }, + { + "epoch": 0.2932144170650975, + "grad_norm": 1.5712568842428292, + "learning_rate": 8.290606582103975e-06, + "loss": 0.7484, + "step": 9567 + }, + { + "epoch": 0.2932450655878387, + "grad_norm": 1.5883211087757287, + "learning_rate": 8.290232882285047e-06, + "loss": 0.6349, + "step": 9568 + }, + { + "epoch": 0.29327571411057984, + "grad_norm": 1.841003940034072, + "learning_rate": 8.289859150046614e-06, + "loss": 0.723, + "step": 9569 + }, + { + "epoch": 0.29330636263332105, + "grad_norm": 1.910917775334536, + "learning_rate": 8.289485385392356e-06, + "loss": 0.7255, + "step": 9570 + }, + { + "epoch": 0.29333701115606226, + "grad_norm": 1.6834436768901402, + "learning_rate": 8.289111588325956e-06, + "loss": 0.6629, + "step": 9571 + }, + { + "epoch": 0.29336765967880346, + "grad_norm": 1.7278417803506956, + "learning_rate": 8.2887377588511e-06, + "loss": 0.6665, + "step": 9572 + }, + { + "epoch": 0.29339830820154467, + "grad_norm": 1.0206536210509876, + "learning_rate": 8.288363896971468e-06, + "loss": 0.4995, + "step": 9573 + }, + { + "epoch": 0.29342895672428587, + "grad_norm": 1.74830805705043, + "learning_rate": 8.287990002690746e-06, + "loss": 0.6767, + "step": 9574 + }, + { + "epoch": 0.2934596052470271, + "grad_norm": 0.857287786119707, + "learning_rate": 8.287616076012617e-06, + "loss": 0.4998, + "step": 9575 + }, + { + "epoch": 0.2934902537697683, + "grad_norm": 1.7675407142010058, + "learning_rate": 8.287242116940765e-06, + "loss": 0.6836, + "step": 9576 + }, + { + "epoch": 0.2935209022925095, + "grad_norm": 1.91288585086657, + "learning_rate": 8.286868125478876e-06, + "loss": 0.7548, + "step": 9577 + }, + { + "epoch": 0.2935515508152507, + "grad_norm": 1.5985969074508026, + "learning_rate": 8.286494101630633e-06, + "loss": 0.6238, + "step": 9578 + }, + { + "epoch": 0.2935821993379919, + "grad_norm": 0.8720733932580829, + "learning_rate": 8.286120045399724e-06, + "loss": 0.4822, + "step": 9579 + }, + { + "epoch": 0.2936128478607331, + "grad_norm": 1.597337418001928, + "learning_rate": 8.285745956789832e-06, + "loss": 0.669, + "step": 9580 + }, + { + "epoch": 0.2936434963834743, + "grad_norm": 1.7202752174613822, + "learning_rate": 8.285371835804646e-06, + "loss": 0.6711, + "step": 9581 + }, + { + "epoch": 0.2936741449062155, + "grad_norm": 0.913178313289372, + "learning_rate": 8.28499768244785e-06, + "loss": 0.4847, + "step": 9582 + }, + { + "epoch": 0.2937047934289567, + "grad_norm": 1.3675438107101816, + "learning_rate": 8.284623496723132e-06, + "loss": 0.4514, + "step": 9583 + }, + { + "epoch": 0.29373544195169793, + "grad_norm": 1.8459873513177227, + "learning_rate": 8.284249278634178e-06, + "loss": 0.7306, + "step": 9584 + }, + { + "epoch": 0.29376609047443913, + "grad_norm": 1.4401959080267708, + "learning_rate": 8.283875028184676e-06, + "loss": 0.7361, + "step": 9585 + }, + { + "epoch": 0.29379673899718034, + "grad_norm": 1.648156082049406, + "learning_rate": 8.283500745378312e-06, + "loss": 0.5905, + "step": 9586 + }, + { + "epoch": 0.29382738751992155, + "grad_norm": 1.7655506865470223, + "learning_rate": 8.283126430218776e-06, + "loss": 0.6633, + "step": 9587 + }, + { + "epoch": 0.29385803604266275, + "grad_norm": 1.6378100949963876, + "learning_rate": 8.282752082709755e-06, + "loss": 0.63, + "step": 9588 + }, + { + "epoch": 0.29388868456540396, + "grad_norm": 1.5258806815131736, + "learning_rate": 8.282377702854937e-06, + "loss": 0.627, + "step": 9589 + }, + { + "epoch": 0.29391933308814516, + "grad_norm": 1.7704499122474897, + "learning_rate": 8.282003290658012e-06, + "loss": 0.6753, + "step": 9590 + }, + { + "epoch": 0.29394998161088637, + "grad_norm": 1.837963641793121, + "learning_rate": 8.281628846122668e-06, + "loss": 0.7507, + "step": 9591 + }, + { + "epoch": 0.2939806301336276, + "grad_norm": 1.7892303277773156, + "learning_rate": 8.281254369252598e-06, + "loss": 0.6882, + "step": 9592 + }, + { + "epoch": 0.2940112786563688, + "grad_norm": 1.2203541969707108, + "learning_rate": 8.280879860051488e-06, + "loss": 0.4836, + "step": 9593 + }, + { + "epoch": 0.29404192717911, + "grad_norm": 1.8081877497120336, + "learning_rate": 8.280505318523028e-06, + "loss": 0.7633, + "step": 9594 + }, + { + "epoch": 0.2940725757018512, + "grad_norm": 1.635407355938089, + "learning_rate": 8.28013074467091e-06, + "loss": 0.6057, + "step": 9595 + }, + { + "epoch": 0.2941032242245924, + "grad_norm": 1.7089131636749733, + "learning_rate": 8.279756138498826e-06, + "loss": 0.7144, + "step": 9596 + }, + { + "epoch": 0.2941338727473336, + "grad_norm": 0.7778193624298518, + "learning_rate": 8.279381500010466e-06, + "loss": 0.4676, + "step": 9597 + }, + { + "epoch": 0.2941645212700748, + "grad_norm": 1.7069161339861105, + "learning_rate": 8.279006829209519e-06, + "loss": 0.7831, + "step": 9598 + }, + { + "epoch": 0.294195169792816, + "grad_norm": 0.8554194408639904, + "learning_rate": 8.27863212609968e-06, + "loss": 0.508, + "step": 9599 + }, + { + "epoch": 0.29422581831555716, + "grad_norm": 1.5604844891527805, + "learning_rate": 8.278257390684639e-06, + "loss": 0.7057, + "step": 9600 + }, + { + "epoch": 0.29425646683829837, + "grad_norm": 1.6552978489075243, + "learning_rate": 8.277882622968089e-06, + "loss": 0.6225, + "step": 9601 + }, + { + "epoch": 0.2942871153610396, + "grad_norm": 1.970049522338308, + "learning_rate": 8.277507822953722e-06, + "loss": 0.6722, + "step": 9602 + }, + { + "epoch": 0.2943177638837808, + "grad_norm": 1.6852486273466463, + "learning_rate": 8.277132990645235e-06, + "loss": 0.6821, + "step": 9603 + }, + { + "epoch": 0.294348412406522, + "grad_norm": 1.9399925259076678, + "learning_rate": 8.276758126046316e-06, + "loss": 0.8376, + "step": 9604 + }, + { + "epoch": 0.2943790609292632, + "grad_norm": 1.6794306761610975, + "learning_rate": 8.27638322916066e-06, + "loss": 0.6533, + "step": 9605 + }, + { + "epoch": 0.2944097094520044, + "grad_norm": 1.8970666698777485, + "learning_rate": 8.276008299991965e-06, + "loss": 0.765, + "step": 9606 + }, + { + "epoch": 0.2944403579747456, + "grad_norm": 1.716754262172186, + "learning_rate": 8.275633338543918e-06, + "loss": 0.6791, + "step": 9607 + }, + { + "epoch": 0.2944710064974868, + "grad_norm": 1.8484353467277177, + "learning_rate": 8.27525834482022e-06, + "loss": 0.673, + "step": 9608 + }, + { + "epoch": 0.294501655020228, + "grad_norm": 1.683231124825208, + "learning_rate": 8.274883318824563e-06, + "loss": 0.7541, + "step": 9609 + }, + { + "epoch": 0.2945323035429692, + "grad_norm": 1.6002508484152538, + "learning_rate": 8.274508260560644e-06, + "loss": 0.7084, + "step": 9610 + }, + { + "epoch": 0.2945629520657104, + "grad_norm": 1.886222974347158, + "learning_rate": 8.274133170032155e-06, + "loss": 0.6518, + "step": 9611 + }, + { + "epoch": 0.29459360058845163, + "grad_norm": 1.5852865915055372, + "learning_rate": 8.273758047242795e-06, + "loss": 0.68, + "step": 9612 + }, + { + "epoch": 0.29462424911119284, + "grad_norm": 0.9332532370062857, + "learning_rate": 8.27338289219626e-06, + "loss": 0.4747, + "step": 9613 + }, + { + "epoch": 0.29465489763393404, + "grad_norm": 1.676745123738552, + "learning_rate": 8.273007704896246e-06, + "loss": 0.6725, + "step": 9614 + }, + { + "epoch": 0.29468554615667525, + "grad_norm": 0.8560412203699387, + "learning_rate": 8.272632485346449e-06, + "loss": 0.4632, + "step": 9615 + }, + { + "epoch": 0.29471619467941645, + "grad_norm": 1.7538249126668084, + "learning_rate": 8.272257233550566e-06, + "loss": 0.7553, + "step": 9616 + }, + { + "epoch": 0.29474684320215766, + "grad_norm": 1.8385204239586357, + "learning_rate": 8.271881949512297e-06, + "loss": 0.6947, + "step": 9617 + }, + { + "epoch": 0.29477749172489887, + "grad_norm": 1.5397640139993614, + "learning_rate": 8.271506633235335e-06, + "loss": 0.6516, + "step": 9618 + }, + { + "epoch": 0.29480814024764007, + "grad_norm": 0.806916651046564, + "learning_rate": 8.271131284723384e-06, + "loss": 0.4702, + "step": 9619 + }, + { + "epoch": 0.2948387887703813, + "grad_norm": 1.8442253575452232, + "learning_rate": 8.270755903980139e-06, + "loss": 0.6482, + "step": 9620 + }, + { + "epoch": 0.2948694372931225, + "grad_norm": 1.9103251853887102, + "learning_rate": 8.270380491009297e-06, + "loss": 0.7383, + "step": 9621 + }, + { + "epoch": 0.2949000858158637, + "grad_norm": 1.7040684983510048, + "learning_rate": 8.270005045814563e-06, + "loss": 0.6533, + "step": 9622 + }, + { + "epoch": 0.2949307343386049, + "grad_norm": 1.9436020419152236, + "learning_rate": 8.26962956839963e-06, + "loss": 0.7464, + "step": 9623 + }, + { + "epoch": 0.2949613828613461, + "grad_norm": 0.8373550407993988, + "learning_rate": 8.269254058768201e-06, + "loss": 0.4892, + "step": 9624 + }, + { + "epoch": 0.2949920313840873, + "grad_norm": 1.8775635529299197, + "learning_rate": 8.268878516923975e-06, + "loss": 0.7687, + "step": 9625 + }, + { + "epoch": 0.2950226799068285, + "grad_norm": 0.789577250090038, + "learning_rate": 8.268502942870654e-06, + "loss": 0.4646, + "step": 9626 + }, + { + "epoch": 0.2950533284295697, + "grad_norm": 1.7505695067051608, + "learning_rate": 8.268127336611935e-06, + "loss": 0.6753, + "step": 9627 + }, + { + "epoch": 0.2950839769523109, + "grad_norm": 0.7956698514248758, + "learning_rate": 8.267751698151523e-06, + "loss": 0.4468, + "step": 9628 + }, + { + "epoch": 0.29511462547505213, + "grad_norm": 1.7134400487271102, + "learning_rate": 8.267376027493117e-06, + "loss": 0.7118, + "step": 9629 + }, + { + "epoch": 0.29514527399779333, + "grad_norm": 1.5048845211097277, + "learning_rate": 8.267000324640418e-06, + "loss": 0.8247, + "step": 9630 + }, + { + "epoch": 0.2951759225205345, + "grad_norm": 1.4917051892617177, + "learning_rate": 8.26662458959713e-06, + "loss": 0.6594, + "step": 9631 + }, + { + "epoch": 0.2952065710432757, + "grad_norm": 0.8210387457319369, + "learning_rate": 8.266248822366953e-06, + "loss": 0.4595, + "step": 9632 + }, + { + "epoch": 0.2952372195660169, + "grad_norm": 0.8004633604513065, + "learning_rate": 8.265873022953591e-06, + "loss": 0.4698, + "step": 9633 + }, + { + "epoch": 0.2952678680887581, + "grad_norm": 1.8104708331855803, + "learning_rate": 8.265497191360747e-06, + "loss": 0.6799, + "step": 9634 + }, + { + "epoch": 0.2952985166114993, + "grad_norm": 1.76758454937122, + "learning_rate": 8.265121327592124e-06, + "loss": 0.6878, + "step": 9635 + }, + { + "epoch": 0.2953291651342405, + "grad_norm": 1.688916287594444, + "learning_rate": 8.264745431651424e-06, + "loss": 0.6748, + "step": 9636 + }, + { + "epoch": 0.2953598136569817, + "grad_norm": 1.5763220569885716, + "learning_rate": 8.264369503542353e-06, + "loss": 0.6616, + "step": 9637 + }, + { + "epoch": 0.2953904621797229, + "grad_norm": 1.7515015333318682, + "learning_rate": 8.263993543268613e-06, + "loss": 0.7089, + "step": 9638 + }, + { + "epoch": 0.29542111070246413, + "grad_norm": 1.6095277848666631, + "learning_rate": 8.263617550833911e-06, + "loss": 0.6938, + "step": 9639 + }, + { + "epoch": 0.29545175922520533, + "grad_norm": 1.8465736292671566, + "learning_rate": 8.263241526241949e-06, + "loss": 0.7124, + "step": 9640 + }, + { + "epoch": 0.29548240774794654, + "grad_norm": 1.6754105030571005, + "learning_rate": 8.262865469496433e-06, + "loss": 0.69, + "step": 9641 + }, + { + "epoch": 0.29551305627068775, + "grad_norm": 1.4149141582554803, + "learning_rate": 8.26248938060107e-06, + "loss": 0.4686, + "step": 9642 + }, + { + "epoch": 0.29554370479342895, + "grad_norm": 1.9641667682914887, + "learning_rate": 8.262113259559564e-06, + "loss": 0.7737, + "step": 9643 + }, + { + "epoch": 0.29557435331617016, + "grad_norm": 1.8034596801475258, + "learning_rate": 8.26173710637562e-06, + "loss": 0.7167, + "step": 9644 + }, + { + "epoch": 0.29560500183891136, + "grad_norm": 2.0100069703751595, + "learning_rate": 8.261360921052948e-06, + "loss": 0.786, + "step": 9645 + }, + { + "epoch": 0.29563565036165257, + "grad_norm": 1.7503391316073817, + "learning_rate": 8.260984703595252e-06, + "loss": 0.7433, + "step": 9646 + }, + { + "epoch": 0.2956662988843938, + "grad_norm": 1.5972840176712322, + "learning_rate": 8.260608454006238e-06, + "loss": 0.7393, + "step": 9647 + }, + { + "epoch": 0.295696947407135, + "grad_norm": 1.6393309004866967, + "learning_rate": 8.260232172289615e-06, + "loss": 0.7228, + "step": 9648 + }, + { + "epoch": 0.2957275959298762, + "grad_norm": 1.626003181223305, + "learning_rate": 8.25985585844909e-06, + "loss": 0.6898, + "step": 9649 + }, + { + "epoch": 0.2957582444526174, + "grad_norm": 0.8372170171335941, + "learning_rate": 8.259479512488373e-06, + "loss": 0.4782, + "step": 9650 + }, + { + "epoch": 0.2957888929753586, + "grad_norm": 1.6926477141595875, + "learning_rate": 8.259103134411168e-06, + "loss": 0.6856, + "step": 9651 + }, + { + "epoch": 0.2958195414980998, + "grad_norm": 1.9177569455618944, + "learning_rate": 8.258726724221187e-06, + "loss": 0.7769, + "step": 9652 + }, + { + "epoch": 0.295850190020841, + "grad_norm": 1.635090019729589, + "learning_rate": 8.258350281922138e-06, + "loss": 0.6913, + "step": 9653 + }, + { + "epoch": 0.2958808385435822, + "grad_norm": 1.698307784798279, + "learning_rate": 8.25797380751773e-06, + "loss": 0.8134, + "step": 9654 + }, + { + "epoch": 0.2959114870663234, + "grad_norm": 1.7733778123297992, + "learning_rate": 8.257597301011673e-06, + "loss": 0.6517, + "step": 9655 + }, + { + "epoch": 0.2959421355890646, + "grad_norm": 1.8814485417540572, + "learning_rate": 8.257220762407675e-06, + "loss": 0.715, + "step": 9656 + }, + { + "epoch": 0.29597278411180583, + "grad_norm": 1.8026297907074276, + "learning_rate": 8.256844191709447e-06, + "loss": 0.7082, + "step": 9657 + }, + { + "epoch": 0.29600343263454704, + "grad_norm": 1.6495348460404662, + "learning_rate": 8.256467588920703e-06, + "loss": 0.6928, + "step": 9658 + }, + { + "epoch": 0.29603408115728824, + "grad_norm": 0.8506310824649878, + "learning_rate": 8.256090954045146e-06, + "loss": 0.4739, + "step": 9659 + }, + { + "epoch": 0.29606472968002945, + "grad_norm": 1.6597171481955049, + "learning_rate": 8.255714287086496e-06, + "loss": 0.6406, + "step": 9660 + }, + { + "epoch": 0.29609537820277065, + "grad_norm": 1.8014261894225967, + "learning_rate": 8.255337588048458e-06, + "loss": 0.7753, + "step": 9661 + }, + { + "epoch": 0.2961260267255118, + "grad_norm": 2.0899386384668404, + "learning_rate": 8.254960856934746e-06, + "loss": 0.7006, + "step": 9662 + }, + { + "epoch": 0.296156675248253, + "grad_norm": 1.6035501176979083, + "learning_rate": 8.254584093749071e-06, + "loss": 0.796, + "step": 9663 + }, + { + "epoch": 0.2961873237709942, + "grad_norm": 1.7663470197479654, + "learning_rate": 8.254207298495148e-06, + "loss": 0.5956, + "step": 9664 + }, + { + "epoch": 0.2962179722937354, + "grad_norm": 0.8404012246234289, + "learning_rate": 8.253830471176687e-06, + "loss": 0.4499, + "step": 9665 + }, + { + "epoch": 0.2962486208164766, + "grad_norm": 1.9983628138339702, + "learning_rate": 8.253453611797403e-06, + "loss": 0.6801, + "step": 9666 + }, + { + "epoch": 0.29627926933921783, + "grad_norm": 0.8514438470731515, + "learning_rate": 8.253076720361006e-06, + "loss": 0.4921, + "step": 9667 + }, + { + "epoch": 0.29630991786195904, + "grad_norm": 1.7671242800149811, + "learning_rate": 8.252699796871213e-06, + "loss": 0.7719, + "step": 9668 + }, + { + "epoch": 0.29634056638470024, + "grad_norm": 0.8238830700075251, + "learning_rate": 8.252322841331737e-06, + "loss": 0.4756, + "step": 9669 + }, + { + "epoch": 0.29637121490744145, + "grad_norm": 1.7027259796416492, + "learning_rate": 8.251945853746293e-06, + "loss": 0.716, + "step": 9670 + }, + { + "epoch": 0.29640186343018265, + "grad_norm": 1.9172021643268748, + "learning_rate": 8.251568834118592e-06, + "loss": 0.7602, + "step": 9671 + }, + { + "epoch": 0.29643251195292386, + "grad_norm": 1.6640765730276506, + "learning_rate": 8.251191782452352e-06, + "loss": 0.7366, + "step": 9672 + }, + { + "epoch": 0.29646316047566507, + "grad_norm": 1.867771934096366, + "learning_rate": 8.250814698751289e-06, + "loss": 0.7176, + "step": 9673 + }, + { + "epoch": 0.29649380899840627, + "grad_norm": 1.574734799414687, + "learning_rate": 8.250437583019114e-06, + "loss": 0.7227, + "step": 9674 + }, + { + "epoch": 0.2965244575211475, + "grad_norm": 1.5280611779629039, + "learning_rate": 8.250060435259548e-06, + "loss": 0.467, + "step": 9675 + }, + { + "epoch": 0.2965551060438887, + "grad_norm": 1.7518494545202308, + "learning_rate": 8.249683255476304e-06, + "loss": 0.7169, + "step": 9676 + }, + { + "epoch": 0.2965857545666299, + "grad_norm": 0.980481763038534, + "learning_rate": 8.2493060436731e-06, + "loss": 0.4964, + "step": 9677 + }, + { + "epoch": 0.2966164030893711, + "grad_norm": 2.0298504959900385, + "learning_rate": 8.248928799853652e-06, + "loss": 0.6978, + "step": 9678 + }, + { + "epoch": 0.2966470516121123, + "grad_norm": 1.5891416978443396, + "learning_rate": 8.248551524021678e-06, + "loss": 0.669, + "step": 9679 + }, + { + "epoch": 0.2966777001348535, + "grad_norm": 1.9668097965180737, + "learning_rate": 8.248174216180895e-06, + "loss": 0.6536, + "step": 9680 + }, + { + "epoch": 0.2967083486575947, + "grad_norm": 1.4795630431651414, + "learning_rate": 8.247796876335019e-06, + "loss": 0.5948, + "step": 9681 + }, + { + "epoch": 0.2967389971803359, + "grad_norm": 1.4091007373775326, + "learning_rate": 8.247419504487769e-06, + "loss": 0.5562, + "step": 9682 + }, + { + "epoch": 0.2967696457030771, + "grad_norm": 0.783244646601774, + "learning_rate": 8.247042100642863e-06, + "loss": 0.4573, + "step": 9683 + }, + { + "epoch": 0.29680029422581833, + "grad_norm": 1.8064293703842698, + "learning_rate": 8.246664664804024e-06, + "loss": 0.7412, + "step": 9684 + }, + { + "epoch": 0.29683094274855953, + "grad_norm": 1.6330788160471965, + "learning_rate": 8.246287196974964e-06, + "loss": 0.6634, + "step": 9685 + }, + { + "epoch": 0.29686159127130074, + "grad_norm": 1.6651039192165038, + "learning_rate": 8.245909697159408e-06, + "loss": 0.6532, + "step": 9686 + }, + { + "epoch": 0.29689223979404195, + "grad_norm": 1.8054636167342801, + "learning_rate": 8.245532165361072e-06, + "loss": 0.7021, + "step": 9687 + }, + { + "epoch": 0.29692288831678315, + "grad_norm": 1.5727353190495519, + "learning_rate": 8.245154601583678e-06, + "loss": 0.6984, + "step": 9688 + }, + { + "epoch": 0.29695353683952436, + "grad_norm": 1.9506304898195317, + "learning_rate": 8.244777005830944e-06, + "loss": 0.6601, + "step": 9689 + }, + { + "epoch": 0.29698418536226556, + "grad_norm": 1.6436763510708667, + "learning_rate": 8.244399378106593e-06, + "loss": 0.7562, + "step": 9690 + }, + { + "epoch": 0.29701483388500677, + "grad_norm": 1.970773842326678, + "learning_rate": 8.244021718414344e-06, + "loss": 0.7632, + "step": 9691 + }, + { + "epoch": 0.297045482407748, + "grad_norm": 2.013345105547174, + "learning_rate": 8.24364402675792e-06, + "loss": 0.5595, + "step": 9692 + }, + { + "epoch": 0.2970761309304891, + "grad_norm": 1.4838070362167133, + "learning_rate": 8.243266303141042e-06, + "loss": 0.6314, + "step": 9693 + }, + { + "epoch": 0.29710677945323033, + "grad_norm": 1.837021990029946, + "learning_rate": 8.24288854756743e-06, + "loss": 0.6399, + "step": 9694 + }, + { + "epoch": 0.29713742797597154, + "grad_norm": 1.7097009215652, + "learning_rate": 8.242510760040807e-06, + "loss": 0.682, + "step": 9695 + }, + { + "epoch": 0.29716807649871274, + "grad_norm": 1.6106093485950206, + "learning_rate": 8.242132940564898e-06, + "loss": 0.6663, + "step": 9696 + }, + { + "epoch": 0.29719872502145395, + "grad_norm": 0.8645430065888323, + "learning_rate": 8.241755089143421e-06, + "loss": 0.4554, + "step": 9697 + }, + { + "epoch": 0.29722937354419515, + "grad_norm": 1.7575898368169318, + "learning_rate": 8.241377205780103e-06, + "loss": 0.7626, + "step": 9698 + }, + { + "epoch": 0.29726002206693636, + "grad_norm": 1.5618741598011745, + "learning_rate": 8.240999290478667e-06, + "loss": 0.6122, + "step": 9699 + }, + { + "epoch": 0.29729067058967756, + "grad_norm": 1.6317540646998718, + "learning_rate": 8.240621343242832e-06, + "loss": 0.6997, + "step": 9700 + }, + { + "epoch": 0.29732131911241877, + "grad_norm": 1.5729959940785105, + "learning_rate": 8.240243364076328e-06, + "loss": 0.6683, + "step": 9701 + }, + { + "epoch": 0.29735196763516, + "grad_norm": 1.7380392529970077, + "learning_rate": 8.23986535298288e-06, + "loss": 0.7415, + "step": 9702 + }, + { + "epoch": 0.2973826161579012, + "grad_norm": 1.6940234673186312, + "learning_rate": 8.239487309966205e-06, + "loss": 0.7221, + "step": 9703 + }, + { + "epoch": 0.2974132646806424, + "grad_norm": 1.6374830014228183, + "learning_rate": 8.239109235030037e-06, + "loss": 0.6827, + "step": 9704 + }, + { + "epoch": 0.2974439132033836, + "grad_norm": 1.634512289291701, + "learning_rate": 8.238731128178094e-06, + "loss": 0.5032, + "step": 9705 + }, + { + "epoch": 0.2974745617261248, + "grad_norm": 0.8973778299323845, + "learning_rate": 8.238352989414104e-06, + "loss": 0.4894, + "step": 9706 + }, + { + "epoch": 0.297505210248866, + "grad_norm": 1.9274153460544987, + "learning_rate": 8.237974818741796e-06, + "loss": 0.6531, + "step": 9707 + }, + { + "epoch": 0.2975358587716072, + "grad_norm": 1.7637489215808515, + "learning_rate": 8.237596616164893e-06, + "loss": 0.5534, + "step": 9708 + }, + { + "epoch": 0.2975665072943484, + "grad_norm": 1.9232231802682473, + "learning_rate": 8.23721838168712e-06, + "loss": 0.7982, + "step": 9709 + }, + { + "epoch": 0.2975971558170896, + "grad_norm": 1.8785882985225217, + "learning_rate": 8.236840115312207e-06, + "loss": 0.6619, + "step": 9710 + }, + { + "epoch": 0.2976278043398308, + "grad_norm": 1.9451066323134483, + "learning_rate": 8.236461817043881e-06, + "loss": 0.6744, + "step": 9711 + }, + { + "epoch": 0.29765845286257203, + "grad_norm": 1.7099044327930035, + "learning_rate": 8.236083486885869e-06, + "loss": 0.6657, + "step": 9712 + }, + { + "epoch": 0.29768910138531324, + "grad_norm": 1.7889568517923506, + "learning_rate": 8.235705124841898e-06, + "loss": 0.7269, + "step": 9713 + }, + { + "epoch": 0.29771974990805444, + "grad_norm": 0.8251186952812745, + "learning_rate": 8.235326730915696e-06, + "loss": 0.4735, + "step": 9714 + }, + { + "epoch": 0.29775039843079565, + "grad_norm": 1.8442423109298602, + "learning_rate": 8.234948305110993e-06, + "loss": 0.8088, + "step": 9715 + }, + { + "epoch": 0.29778104695353685, + "grad_norm": 0.7572219445544273, + "learning_rate": 8.234569847431514e-06, + "loss": 0.5006, + "step": 9716 + }, + { + "epoch": 0.29781169547627806, + "grad_norm": 1.8190705297307412, + "learning_rate": 8.234191357880994e-06, + "loss": 0.7402, + "step": 9717 + }, + { + "epoch": 0.29784234399901927, + "grad_norm": 2.098820967117495, + "learning_rate": 8.233812836463157e-06, + "loss": 0.6553, + "step": 9718 + }, + { + "epoch": 0.29787299252176047, + "grad_norm": 1.8352588470155993, + "learning_rate": 8.233434283181737e-06, + "loss": 0.6976, + "step": 9719 + }, + { + "epoch": 0.2979036410445017, + "grad_norm": 1.81722245958645, + "learning_rate": 8.23305569804046e-06, + "loss": 0.7431, + "step": 9720 + }, + { + "epoch": 0.2979342895672429, + "grad_norm": 1.8061276762129383, + "learning_rate": 8.232677081043057e-06, + "loss": 0.7542, + "step": 9721 + }, + { + "epoch": 0.2979649380899841, + "grad_norm": 1.6556368141415523, + "learning_rate": 8.23229843219326e-06, + "loss": 0.6744, + "step": 9722 + }, + { + "epoch": 0.2979955866127253, + "grad_norm": 1.893449009359125, + "learning_rate": 8.231919751494802e-06, + "loss": 0.7067, + "step": 9723 + }, + { + "epoch": 0.29802623513546644, + "grad_norm": 1.7216341672724038, + "learning_rate": 8.23154103895141e-06, + "loss": 0.7084, + "step": 9724 + }, + { + "epoch": 0.29805688365820765, + "grad_norm": 1.768601454011489, + "learning_rate": 8.231162294566817e-06, + "loss": 0.7218, + "step": 9725 + }, + { + "epoch": 0.29808753218094886, + "grad_norm": 1.8385887324375196, + "learning_rate": 8.230783518344754e-06, + "loss": 0.7331, + "step": 9726 + }, + { + "epoch": 0.29811818070369006, + "grad_norm": 0.9169992725618527, + "learning_rate": 8.230404710288955e-06, + "loss": 0.4692, + "step": 9727 + }, + { + "epoch": 0.29814882922643127, + "grad_norm": 1.6348641210984314, + "learning_rate": 8.230025870403153e-06, + "loss": 0.6814, + "step": 9728 + }, + { + "epoch": 0.2981794777491725, + "grad_norm": 1.7934693000311444, + "learning_rate": 8.22964699869108e-06, + "loss": 0.6706, + "step": 9729 + }, + { + "epoch": 0.2982101262719137, + "grad_norm": 2.2422683448256944, + "learning_rate": 8.229268095156469e-06, + "loss": 0.7725, + "step": 9730 + }, + { + "epoch": 0.2982407747946549, + "grad_norm": 1.8572757455292102, + "learning_rate": 8.22888915980305e-06, + "loss": 0.7453, + "step": 9731 + }, + { + "epoch": 0.2982714233173961, + "grad_norm": 1.8468473798118938, + "learning_rate": 8.228510192634564e-06, + "loss": 0.732, + "step": 9732 + }, + { + "epoch": 0.2983020718401373, + "grad_norm": 1.712140010990861, + "learning_rate": 8.228131193654739e-06, + "loss": 0.6954, + "step": 9733 + }, + { + "epoch": 0.2983327203628785, + "grad_norm": 1.6534407405886837, + "learning_rate": 8.227752162867312e-06, + "loss": 0.7336, + "step": 9734 + }, + { + "epoch": 0.2983633688856197, + "grad_norm": 1.5980211499704289, + "learning_rate": 8.227373100276017e-06, + "loss": 0.7083, + "step": 9735 + }, + { + "epoch": 0.2983940174083609, + "grad_norm": 1.7547985264533907, + "learning_rate": 8.226994005884588e-06, + "loss": 0.7299, + "step": 9736 + }, + { + "epoch": 0.2984246659311021, + "grad_norm": 0.8186630762701427, + "learning_rate": 8.226614879696762e-06, + "loss": 0.4732, + "step": 9737 + }, + { + "epoch": 0.2984553144538433, + "grad_norm": 1.841288970252898, + "learning_rate": 8.226235721716274e-06, + "loss": 0.6474, + "step": 9738 + }, + { + "epoch": 0.29848596297658453, + "grad_norm": 0.7937612064957111, + "learning_rate": 8.22585653194686e-06, + "loss": 0.4859, + "step": 9739 + }, + { + "epoch": 0.29851661149932573, + "grad_norm": 1.6185109963705981, + "learning_rate": 8.225477310392259e-06, + "loss": 0.6291, + "step": 9740 + }, + { + "epoch": 0.29854726002206694, + "grad_norm": 1.5052240454954526, + "learning_rate": 8.2250980570562e-06, + "loss": 0.7603, + "step": 9741 + }, + { + "epoch": 0.29857790854480815, + "grad_norm": 1.7554872238340349, + "learning_rate": 8.224718771942428e-06, + "loss": 0.692, + "step": 9742 + }, + { + "epoch": 0.29860855706754935, + "grad_norm": 2.036766495325701, + "learning_rate": 8.224339455054675e-06, + "loss": 0.7018, + "step": 9743 + }, + { + "epoch": 0.29863920559029056, + "grad_norm": 1.7330170266873133, + "learning_rate": 8.223960106396681e-06, + "loss": 0.7299, + "step": 9744 + }, + { + "epoch": 0.29866985411303176, + "grad_norm": 1.5897563446719252, + "learning_rate": 8.223580725972184e-06, + "loss": 0.704, + "step": 9745 + }, + { + "epoch": 0.29870050263577297, + "grad_norm": 1.6702430656479272, + "learning_rate": 8.223201313784921e-06, + "loss": 0.6325, + "step": 9746 + }, + { + "epoch": 0.2987311511585142, + "grad_norm": 1.7143747301084116, + "learning_rate": 8.22282186983863e-06, + "loss": 0.6434, + "step": 9747 + }, + { + "epoch": 0.2987617996812554, + "grad_norm": 1.7306542275951986, + "learning_rate": 8.22244239413705e-06, + "loss": 0.7252, + "step": 9748 + }, + { + "epoch": 0.2987924482039966, + "grad_norm": 1.686158378899414, + "learning_rate": 8.222062886683923e-06, + "loss": 0.6726, + "step": 9749 + }, + { + "epoch": 0.2988230967267378, + "grad_norm": 1.734204517024543, + "learning_rate": 8.221683347482984e-06, + "loss": 0.6974, + "step": 9750 + }, + { + "epoch": 0.298853745249479, + "grad_norm": 1.550547672954785, + "learning_rate": 8.221303776537975e-06, + "loss": 0.5967, + "step": 9751 + }, + { + "epoch": 0.2988843937722202, + "grad_norm": 1.5308184289454878, + "learning_rate": 8.220924173852635e-06, + "loss": 0.5571, + "step": 9752 + }, + { + "epoch": 0.2989150422949614, + "grad_norm": 2.0360339844948525, + "learning_rate": 8.220544539430707e-06, + "loss": 0.6749, + "step": 9753 + }, + { + "epoch": 0.2989456908177026, + "grad_norm": 2.001220888019737, + "learning_rate": 8.220164873275928e-06, + "loss": 0.7377, + "step": 9754 + }, + { + "epoch": 0.29897633934044376, + "grad_norm": 1.6251228307061243, + "learning_rate": 8.21978517539204e-06, + "loss": 0.6679, + "step": 9755 + }, + { + "epoch": 0.29900698786318497, + "grad_norm": 1.7770693767379855, + "learning_rate": 8.219405445782786e-06, + "loss": 0.7524, + "step": 9756 + }, + { + "epoch": 0.2990376363859262, + "grad_norm": 1.8727896018869907, + "learning_rate": 8.219025684451907e-06, + "loss": 0.7535, + "step": 9757 + }, + { + "epoch": 0.2990682849086674, + "grad_norm": 1.846710161709493, + "learning_rate": 8.218645891403145e-06, + "loss": 0.7785, + "step": 9758 + }, + { + "epoch": 0.2990989334314086, + "grad_norm": 1.7669218006291056, + "learning_rate": 8.218266066640238e-06, + "loss": 0.7566, + "step": 9759 + }, + { + "epoch": 0.2991295819541498, + "grad_norm": 1.560205194865756, + "learning_rate": 8.217886210166936e-06, + "loss": 0.6485, + "step": 9760 + }, + { + "epoch": 0.299160230476891, + "grad_norm": 1.8357621905876655, + "learning_rate": 8.217506321986976e-06, + "loss": 0.7047, + "step": 9761 + }, + { + "epoch": 0.2991908789996322, + "grad_norm": 1.726793522844941, + "learning_rate": 8.217126402104103e-06, + "loss": 0.6345, + "step": 9762 + }, + { + "epoch": 0.2992215275223734, + "grad_norm": 0.9631960129832212, + "learning_rate": 8.216746450522059e-06, + "loss": 0.464, + "step": 9763 + }, + { + "epoch": 0.2992521760451146, + "grad_norm": 1.9904045535472268, + "learning_rate": 8.216366467244592e-06, + "loss": 0.6749, + "step": 9764 + }, + { + "epoch": 0.2992828245678558, + "grad_norm": 1.5934980875935696, + "learning_rate": 8.215986452275442e-06, + "loss": 0.6418, + "step": 9765 + }, + { + "epoch": 0.299313473090597, + "grad_norm": 1.9075736629591182, + "learning_rate": 8.215606405618355e-06, + "loss": 0.7753, + "step": 9766 + }, + { + "epoch": 0.29934412161333823, + "grad_norm": 1.7251350434149355, + "learning_rate": 8.215226327277073e-06, + "loss": 0.705, + "step": 9767 + }, + { + "epoch": 0.29937477013607944, + "grad_norm": 1.6208751341387573, + "learning_rate": 8.214846217255346e-06, + "loss": 0.7032, + "step": 9768 + }, + { + "epoch": 0.29940541865882064, + "grad_norm": 1.8589963306731325, + "learning_rate": 8.214466075556915e-06, + "loss": 0.6633, + "step": 9769 + }, + { + "epoch": 0.29943606718156185, + "grad_norm": 1.831793630295091, + "learning_rate": 8.21408590218553e-06, + "loss": 0.6963, + "step": 9770 + }, + { + "epoch": 0.29946671570430305, + "grad_norm": 1.5177011254095907, + "learning_rate": 8.213705697144932e-06, + "loss": 0.6362, + "step": 9771 + }, + { + "epoch": 0.29949736422704426, + "grad_norm": 0.9132877910916161, + "learning_rate": 8.213325460438868e-06, + "loss": 0.4863, + "step": 9772 + }, + { + "epoch": 0.29952801274978547, + "grad_norm": 1.696593513395055, + "learning_rate": 8.212945192071089e-06, + "loss": 0.6543, + "step": 9773 + }, + { + "epoch": 0.29955866127252667, + "grad_norm": 1.8257304080291206, + "learning_rate": 8.212564892045338e-06, + "loss": 0.7374, + "step": 9774 + }, + { + "epoch": 0.2995893097952679, + "grad_norm": 1.664490118265574, + "learning_rate": 8.212184560365363e-06, + "loss": 0.7478, + "step": 9775 + }, + { + "epoch": 0.2996199583180091, + "grad_norm": 1.7433231671981955, + "learning_rate": 8.211804197034913e-06, + "loss": 0.6634, + "step": 9776 + }, + { + "epoch": 0.2996506068407503, + "grad_norm": 1.8517059531086746, + "learning_rate": 8.211423802057733e-06, + "loss": 0.7271, + "step": 9777 + }, + { + "epoch": 0.2996812553634915, + "grad_norm": 1.65708663772969, + "learning_rate": 8.211043375437573e-06, + "loss": 0.7036, + "step": 9778 + }, + { + "epoch": 0.2997119038862327, + "grad_norm": 1.6050846422180947, + "learning_rate": 8.21066291717818e-06, + "loss": 0.7586, + "step": 9779 + }, + { + "epoch": 0.2997425524089739, + "grad_norm": 1.6516592793254457, + "learning_rate": 8.210282427283304e-06, + "loss": 0.6892, + "step": 9780 + }, + { + "epoch": 0.2997732009317151, + "grad_norm": 0.917591810177919, + "learning_rate": 8.209901905756695e-06, + "loss": 0.4895, + "step": 9781 + }, + { + "epoch": 0.2998038494544563, + "grad_norm": 1.5596721325768008, + "learning_rate": 8.209521352602102e-06, + "loss": 0.6437, + "step": 9782 + }, + { + "epoch": 0.2998344979771975, + "grad_norm": 1.7294137759060721, + "learning_rate": 8.209140767823271e-06, + "loss": 0.7402, + "step": 9783 + }, + { + "epoch": 0.29986514649993873, + "grad_norm": 1.6690730906580877, + "learning_rate": 8.208760151423959e-06, + "loss": 0.6862, + "step": 9784 + }, + { + "epoch": 0.29989579502267993, + "grad_norm": 0.7862832899545711, + "learning_rate": 8.208379503407908e-06, + "loss": 0.4923, + "step": 9785 + }, + { + "epoch": 0.2999264435454211, + "grad_norm": 1.8196667931249002, + "learning_rate": 8.207998823778874e-06, + "loss": 0.7279, + "step": 9786 + }, + { + "epoch": 0.2999570920681623, + "grad_norm": 1.9713274792716864, + "learning_rate": 8.207618112540607e-06, + "loss": 0.7335, + "step": 9787 + }, + { + "epoch": 0.2999877405909035, + "grad_norm": 1.7465329729482255, + "learning_rate": 8.20723736969686e-06, + "loss": 0.7197, + "step": 9788 + }, + { + "epoch": 0.3000183891136447, + "grad_norm": 1.7924899095313618, + "learning_rate": 8.20685659525138e-06, + "loss": 0.7861, + "step": 9789 + }, + { + "epoch": 0.3000490376363859, + "grad_norm": 1.5621555250922843, + "learning_rate": 8.206475789207924e-06, + "loss": 0.6331, + "step": 9790 + }, + { + "epoch": 0.3000796861591271, + "grad_norm": 1.7165005701194882, + "learning_rate": 8.20609495157024e-06, + "loss": 0.7059, + "step": 9791 + }, + { + "epoch": 0.3001103346818683, + "grad_norm": 1.719183038988036, + "learning_rate": 8.205714082342082e-06, + "loss": 0.705, + "step": 9792 + }, + { + "epoch": 0.3001409832046095, + "grad_norm": 0.8773624055363176, + "learning_rate": 8.205333181527203e-06, + "loss": 0.4766, + "step": 9793 + }, + { + "epoch": 0.30017163172735073, + "grad_norm": 1.871340407241197, + "learning_rate": 8.204952249129356e-06, + "loss": 0.6899, + "step": 9794 + }, + { + "epoch": 0.30020228025009194, + "grad_norm": 1.6972426776662202, + "learning_rate": 8.204571285152293e-06, + "loss": 0.6914, + "step": 9795 + }, + { + "epoch": 0.30023292877283314, + "grad_norm": 1.5848935352040927, + "learning_rate": 8.204190289599773e-06, + "loss": 0.6666, + "step": 9796 + }, + { + "epoch": 0.30026357729557435, + "grad_norm": 1.9148881595242568, + "learning_rate": 8.203809262475545e-06, + "loss": 0.5837, + "step": 9797 + }, + { + "epoch": 0.30029422581831555, + "grad_norm": 1.7102832624708861, + "learning_rate": 8.203428203783362e-06, + "loss": 0.6184, + "step": 9798 + }, + { + "epoch": 0.30032487434105676, + "grad_norm": 1.6261576463379663, + "learning_rate": 8.203047113526983e-06, + "loss": 0.7262, + "step": 9799 + }, + { + "epoch": 0.30035552286379796, + "grad_norm": 1.5644747637059984, + "learning_rate": 8.202665991710162e-06, + "loss": 0.6363, + "step": 9800 + }, + { + "epoch": 0.30038617138653917, + "grad_norm": 1.5406579298101744, + "learning_rate": 8.202284838336654e-06, + "loss": 0.6491, + "step": 9801 + }, + { + "epoch": 0.3004168199092804, + "grad_norm": 1.7432271904903802, + "learning_rate": 8.201903653410213e-06, + "loss": 0.6955, + "step": 9802 + }, + { + "epoch": 0.3004474684320216, + "grad_norm": 1.55764413757883, + "learning_rate": 8.201522436934596e-06, + "loss": 0.6656, + "step": 9803 + }, + { + "epoch": 0.3004781169547628, + "grad_norm": 0.8772009512675442, + "learning_rate": 8.201141188913559e-06, + "loss": 0.4795, + "step": 9804 + }, + { + "epoch": 0.300508765477504, + "grad_norm": 1.826451910091905, + "learning_rate": 8.20075990935086e-06, + "loss": 0.6888, + "step": 9805 + }, + { + "epoch": 0.3005394140002452, + "grad_norm": 1.5965945169772848, + "learning_rate": 8.200378598250253e-06, + "loss": 0.6734, + "step": 9806 + }, + { + "epoch": 0.3005700625229864, + "grad_norm": 1.6004286537818952, + "learning_rate": 8.199997255615497e-06, + "loss": 0.6912, + "step": 9807 + }, + { + "epoch": 0.3006007110457276, + "grad_norm": 1.7795739878507093, + "learning_rate": 8.19961588145035e-06, + "loss": 0.7393, + "step": 9808 + }, + { + "epoch": 0.3006313595684688, + "grad_norm": 1.7196242354689772, + "learning_rate": 8.19923447575857e-06, + "loss": 0.6406, + "step": 9809 + }, + { + "epoch": 0.30066200809121, + "grad_norm": 1.6038834095536776, + "learning_rate": 8.198853038543913e-06, + "loss": 0.709, + "step": 9810 + }, + { + "epoch": 0.3006926566139512, + "grad_norm": 1.8452215187318521, + "learning_rate": 8.198471569810138e-06, + "loss": 0.7857, + "step": 9811 + }, + { + "epoch": 0.30072330513669243, + "grad_norm": 1.7872481596715228, + "learning_rate": 8.198090069561005e-06, + "loss": 0.6976, + "step": 9812 + }, + { + "epoch": 0.30075395365943364, + "grad_norm": 1.6392176317946494, + "learning_rate": 8.197708537800271e-06, + "loss": 0.6954, + "step": 9813 + }, + { + "epoch": 0.30078460218217484, + "grad_norm": 2.0585815792120448, + "learning_rate": 8.197326974531699e-06, + "loss": 0.6025, + "step": 9814 + }, + { + "epoch": 0.30081525070491605, + "grad_norm": 1.709851327013035, + "learning_rate": 8.196945379759045e-06, + "loss": 0.7272, + "step": 9815 + }, + { + "epoch": 0.30084589922765725, + "grad_norm": 1.7901871504715712, + "learning_rate": 8.19656375348607e-06, + "loss": 0.6866, + "step": 9816 + }, + { + "epoch": 0.3008765477503984, + "grad_norm": 0.8858953188530209, + "learning_rate": 8.196182095716534e-06, + "loss": 0.4741, + "step": 9817 + }, + { + "epoch": 0.3009071962731396, + "grad_norm": 0.857417015339588, + "learning_rate": 8.1958004064542e-06, + "loss": 0.4562, + "step": 9818 + }, + { + "epoch": 0.3009378447958808, + "grad_norm": 1.7141902049754434, + "learning_rate": 8.195418685702826e-06, + "loss": 0.7268, + "step": 9819 + }, + { + "epoch": 0.300968493318622, + "grad_norm": 1.824758114783751, + "learning_rate": 8.195036933466173e-06, + "loss": 0.6514, + "step": 9820 + }, + { + "epoch": 0.3009991418413632, + "grad_norm": 1.8635834131134803, + "learning_rate": 8.194655149748005e-06, + "loss": 0.6487, + "step": 9821 + }, + { + "epoch": 0.30102979036410443, + "grad_norm": 1.8191645258902547, + "learning_rate": 8.194273334552081e-06, + "loss": 0.6768, + "step": 9822 + }, + { + "epoch": 0.30106043888684564, + "grad_norm": 1.616178047171144, + "learning_rate": 8.193891487882167e-06, + "loss": 0.6685, + "step": 9823 + }, + { + "epoch": 0.30109108740958684, + "grad_norm": 1.667238391436406, + "learning_rate": 8.19350960974202e-06, + "loss": 0.6781, + "step": 9824 + }, + { + "epoch": 0.30112173593232805, + "grad_norm": 1.7992371674373862, + "learning_rate": 8.193127700135408e-06, + "loss": 0.782, + "step": 9825 + }, + { + "epoch": 0.30115238445506926, + "grad_norm": 2.1925446113076785, + "learning_rate": 8.192745759066089e-06, + "loss": 0.5939, + "step": 9826 + }, + { + "epoch": 0.30118303297781046, + "grad_norm": 1.813928212552698, + "learning_rate": 8.192363786537834e-06, + "loss": 0.7691, + "step": 9827 + }, + { + "epoch": 0.30121368150055167, + "grad_norm": 1.173957176687985, + "learning_rate": 8.191981782554397e-06, + "loss": 0.4909, + "step": 9828 + }, + { + "epoch": 0.30124433002329287, + "grad_norm": 1.6951562622815148, + "learning_rate": 8.19159974711955e-06, + "loss": 0.7817, + "step": 9829 + }, + { + "epoch": 0.3012749785460341, + "grad_norm": 2.0532113966538277, + "learning_rate": 8.191217680237053e-06, + "loss": 0.7033, + "step": 9830 + }, + { + "epoch": 0.3013056270687753, + "grad_norm": 0.8574431089644494, + "learning_rate": 8.19083558191067e-06, + "loss": 0.4882, + "step": 9831 + }, + { + "epoch": 0.3013362755915165, + "grad_norm": 1.596746157429657, + "learning_rate": 8.19045345214417e-06, + "loss": 0.6639, + "step": 9832 + }, + { + "epoch": 0.3013669241142577, + "grad_norm": 1.8232349204374834, + "learning_rate": 8.190071290941313e-06, + "loss": 0.6756, + "step": 9833 + }, + { + "epoch": 0.3013975726369989, + "grad_norm": 1.9454571507418452, + "learning_rate": 8.18968909830587e-06, + "loss": 0.732, + "step": 9834 + }, + { + "epoch": 0.3014282211597401, + "grad_norm": 1.902656221657718, + "learning_rate": 8.189306874241603e-06, + "loss": 0.7227, + "step": 9835 + }, + { + "epoch": 0.3014588696824813, + "grad_norm": 1.638075849606096, + "learning_rate": 8.18892461875228e-06, + "loss": 0.6452, + "step": 9836 + }, + { + "epoch": 0.3014895182052225, + "grad_norm": 1.7005781454193067, + "learning_rate": 8.188542331841667e-06, + "loss": 0.7634, + "step": 9837 + }, + { + "epoch": 0.3015201667279637, + "grad_norm": 1.7356482034920155, + "learning_rate": 8.188160013513531e-06, + "loss": 0.7606, + "step": 9838 + }, + { + "epoch": 0.30155081525070493, + "grad_norm": 1.08098436481968, + "learning_rate": 8.187777663771637e-06, + "loss": 0.4706, + "step": 9839 + }, + { + "epoch": 0.30158146377344613, + "grad_norm": 1.777986099525208, + "learning_rate": 8.187395282619755e-06, + "loss": 0.6962, + "step": 9840 + }, + { + "epoch": 0.30161211229618734, + "grad_norm": 1.9413077109882433, + "learning_rate": 8.18701287006165e-06, + "loss": 0.7641, + "step": 9841 + }, + { + "epoch": 0.30164276081892855, + "grad_norm": 0.8197590184704292, + "learning_rate": 8.186630426101094e-06, + "loss": 0.4696, + "step": 9842 + }, + { + "epoch": 0.30167340934166975, + "grad_norm": 1.747784371834914, + "learning_rate": 8.186247950741852e-06, + "loss": 0.7623, + "step": 9843 + }, + { + "epoch": 0.30170405786441096, + "grad_norm": 1.632723872282145, + "learning_rate": 8.185865443987695e-06, + "loss": 0.535, + "step": 9844 + }, + { + "epoch": 0.30173470638715216, + "grad_norm": 2.4803443056752412, + "learning_rate": 8.18548290584239e-06, + "loss": 0.634, + "step": 9845 + }, + { + "epoch": 0.30176535490989337, + "grad_norm": 2.1493652303481525, + "learning_rate": 8.185100336309706e-06, + "loss": 0.6615, + "step": 9846 + }, + { + "epoch": 0.3017960034326346, + "grad_norm": 1.8355950041518931, + "learning_rate": 8.184717735393415e-06, + "loss": 0.7261, + "step": 9847 + }, + { + "epoch": 0.3018266519553757, + "grad_norm": 1.9224454230150028, + "learning_rate": 8.184335103097284e-06, + "loss": 0.7126, + "step": 9848 + }, + { + "epoch": 0.30185730047811693, + "grad_norm": 1.7186607359903052, + "learning_rate": 8.183952439425084e-06, + "loss": 0.7257, + "step": 9849 + }, + { + "epoch": 0.30188794900085814, + "grad_norm": 1.9873391326509187, + "learning_rate": 8.183569744380587e-06, + "loss": 0.7123, + "step": 9850 + }, + { + "epoch": 0.30191859752359934, + "grad_norm": 2.523188445839736, + "learning_rate": 8.183187017967562e-06, + "loss": 0.6984, + "step": 9851 + }, + { + "epoch": 0.30194924604634055, + "grad_norm": 1.9353284844885743, + "learning_rate": 8.182804260189783e-06, + "loss": 0.6373, + "step": 9852 + }, + { + "epoch": 0.30197989456908175, + "grad_norm": 1.7181405452707852, + "learning_rate": 8.182421471051018e-06, + "loss": 0.6531, + "step": 9853 + }, + { + "epoch": 0.30201054309182296, + "grad_norm": 1.8137468893414304, + "learning_rate": 8.18203865055504e-06, + "loss": 0.6632, + "step": 9854 + }, + { + "epoch": 0.30204119161456416, + "grad_norm": 1.552831613230682, + "learning_rate": 8.181655798705618e-06, + "loss": 0.6195, + "step": 9855 + }, + { + "epoch": 0.30207184013730537, + "grad_norm": 1.7232438837692146, + "learning_rate": 8.18127291550653e-06, + "loss": 0.7051, + "step": 9856 + }, + { + "epoch": 0.3021024886600466, + "grad_norm": 1.452336799470743, + "learning_rate": 8.180890000961548e-06, + "loss": 0.6043, + "step": 9857 + }, + { + "epoch": 0.3021331371827878, + "grad_norm": 1.6812397387009257, + "learning_rate": 8.18050705507444e-06, + "loss": 0.6753, + "step": 9858 + }, + { + "epoch": 0.302163785705529, + "grad_norm": 1.680718057847362, + "learning_rate": 8.180124077848983e-06, + "loss": 0.7264, + "step": 9859 + }, + { + "epoch": 0.3021944342282702, + "grad_norm": 1.8591978543508518, + "learning_rate": 8.179741069288951e-06, + "loss": 0.563, + "step": 9860 + }, + { + "epoch": 0.3022250827510114, + "grad_norm": 1.4644562198834894, + "learning_rate": 8.179358029398117e-06, + "loss": 0.7128, + "step": 9861 + }, + { + "epoch": 0.3022557312737526, + "grad_norm": 1.8392739597953172, + "learning_rate": 8.178974958180253e-06, + "loss": 0.7386, + "step": 9862 + }, + { + "epoch": 0.3022863797964938, + "grad_norm": 1.9462727571008824, + "learning_rate": 8.178591855639136e-06, + "loss": 0.7386, + "step": 9863 + }, + { + "epoch": 0.302317028319235, + "grad_norm": 1.7982685455602054, + "learning_rate": 8.17820872177854e-06, + "loss": 0.7292, + "step": 9864 + }, + { + "epoch": 0.3023476768419762, + "grad_norm": 1.8067299141075952, + "learning_rate": 8.17782555660224e-06, + "loss": 0.7296, + "step": 9865 + }, + { + "epoch": 0.3023783253647174, + "grad_norm": 1.589005519149208, + "learning_rate": 8.177442360114012e-06, + "loss": 0.7135, + "step": 9866 + }, + { + "epoch": 0.30240897388745863, + "grad_norm": 1.2553059757339537, + "learning_rate": 8.17705913231763e-06, + "loss": 0.4566, + "step": 9867 + }, + { + "epoch": 0.30243962241019984, + "grad_norm": 1.7417606486914352, + "learning_rate": 8.176675873216874e-06, + "loss": 0.6328, + "step": 9868 + }, + { + "epoch": 0.30247027093294104, + "grad_norm": 1.641906339176859, + "learning_rate": 8.176292582815517e-06, + "loss": 0.5964, + "step": 9869 + }, + { + "epoch": 0.30250091945568225, + "grad_norm": 1.5943275686821976, + "learning_rate": 8.175909261117336e-06, + "loss": 0.6243, + "step": 9870 + }, + { + "epoch": 0.30253156797842345, + "grad_norm": 0.8773831561561108, + "learning_rate": 8.17552590812611e-06, + "loss": 0.4746, + "step": 9871 + }, + { + "epoch": 0.30256221650116466, + "grad_norm": 2.0912301562357776, + "learning_rate": 8.175142523845613e-06, + "loss": 0.6835, + "step": 9872 + }, + { + "epoch": 0.30259286502390587, + "grad_norm": 1.7563639486459084, + "learning_rate": 8.174759108279625e-06, + "loss": 0.6796, + "step": 9873 + }, + { + "epoch": 0.30262351354664707, + "grad_norm": 2.027434262739058, + "learning_rate": 8.174375661431924e-06, + "loss": 0.8542, + "step": 9874 + }, + { + "epoch": 0.3026541620693883, + "grad_norm": 1.5530561005999353, + "learning_rate": 8.173992183306285e-06, + "loss": 0.6029, + "step": 9875 + }, + { + "epoch": 0.3026848105921295, + "grad_norm": 1.7971591487596734, + "learning_rate": 8.17360867390649e-06, + "loss": 0.7192, + "step": 9876 + }, + { + "epoch": 0.3027154591148707, + "grad_norm": 1.6961502798040415, + "learning_rate": 8.173225133236317e-06, + "loss": 0.6984, + "step": 9877 + }, + { + "epoch": 0.3027461076376119, + "grad_norm": 1.6991215987081387, + "learning_rate": 8.172841561299547e-06, + "loss": 0.7623, + "step": 9878 + }, + { + "epoch": 0.30277675616035304, + "grad_norm": 1.563586060840047, + "learning_rate": 8.172457958099954e-06, + "loss": 0.7218, + "step": 9879 + }, + { + "epoch": 0.30280740468309425, + "grad_norm": 1.5514054501126073, + "learning_rate": 8.172074323641323e-06, + "loss": 0.6464, + "step": 9880 + }, + { + "epoch": 0.30283805320583546, + "grad_norm": 1.6969770865416205, + "learning_rate": 8.171690657927432e-06, + "loss": 0.7268, + "step": 9881 + }, + { + "epoch": 0.30286870172857666, + "grad_norm": 1.7786924764445555, + "learning_rate": 8.17130696096206e-06, + "loss": 0.7703, + "step": 9882 + }, + { + "epoch": 0.30289935025131787, + "grad_norm": 1.6598374619959793, + "learning_rate": 8.17092323274899e-06, + "loss": 0.6361, + "step": 9883 + }, + { + "epoch": 0.3029299987740591, + "grad_norm": 1.9140113994251817, + "learning_rate": 8.170539473292001e-06, + "loss": 0.8107, + "step": 9884 + }, + { + "epoch": 0.3029606472968003, + "grad_norm": 2.039317488664188, + "learning_rate": 8.170155682594877e-06, + "loss": 0.7514, + "step": 9885 + }, + { + "epoch": 0.3029912958195415, + "grad_norm": 1.191875830153326, + "learning_rate": 8.169771860661397e-06, + "loss": 0.496, + "step": 9886 + }, + { + "epoch": 0.3030219443422827, + "grad_norm": 0.9666976778556446, + "learning_rate": 8.169388007495344e-06, + "loss": 0.4661, + "step": 9887 + }, + { + "epoch": 0.3030525928650239, + "grad_norm": 1.8877737134150703, + "learning_rate": 8.169004123100501e-06, + "loss": 0.6288, + "step": 9888 + }, + { + "epoch": 0.3030832413877651, + "grad_norm": 1.7125491829748942, + "learning_rate": 8.168620207480649e-06, + "loss": 0.7024, + "step": 9889 + }, + { + "epoch": 0.3031138899105063, + "grad_norm": 1.974114420524914, + "learning_rate": 8.16823626063957e-06, + "loss": 0.7157, + "step": 9890 + }, + { + "epoch": 0.3031445384332475, + "grad_norm": 1.618431614452004, + "learning_rate": 8.16785228258105e-06, + "loss": 0.5814, + "step": 9891 + }, + { + "epoch": 0.3031751869559887, + "grad_norm": 1.8868548836133756, + "learning_rate": 8.16746827330887e-06, + "loss": 0.6813, + "step": 9892 + }, + { + "epoch": 0.3032058354787299, + "grad_norm": 1.9710655903766556, + "learning_rate": 8.167084232826816e-06, + "loss": 0.6122, + "step": 9893 + }, + { + "epoch": 0.30323648400147113, + "grad_norm": 1.846272028871666, + "learning_rate": 8.16670016113867e-06, + "loss": 0.7546, + "step": 9894 + }, + { + "epoch": 0.30326713252421234, + "grad_norm": 1.298714473201504, + "learning_rate": 8.166316058248217e-06, + "loss": 0.4904, + "step": 9895 + }, + { + "epoch": 0.30329778104695354, + "grad_norm": 1.8008494300305415, + "learning_rate": 8.165931924159242e-06, + "loss": 0.6912, + "step": 9896 + }, + { + "epoch": 0.30332842956969475, + "grad_norm": 1.709031580553762, + "learning_rate": 8.165547758875529e-06, + "loss": 0.6654, + "step": 9897 + }, + { + "epoch": 0.30335907809243595, + "grad_norm": 1.9275641763001985, + "learning_rate": 8.165163562400864e-06, + "loss": 0.6533, + "step": 9898 + }, + { + "epoch": 0.30338972661517716, + "grad_norm": 1.5714561836978216, + "learning_rate": 8.164779334739033e-06, + "loss": 0.6357, + "step": 9899 + }, + { + "epoch": 0.30342037513791836, + "grad_norm": 0.8726593351617854, + "learning_rate": 8.164395075893822e-06, + "loss": 0.4789, + "step": 9900 + }, + { + "epoch": 0.30345102366065957, + "grad_norm": 1.7356896479533797, + "learning_rate": 8.164010785869016e-06, + "loss": 0.6989, + "step": 9901 + }, + { + "epoch": 0.3034816721834008, + "grad_norm": 1.7328092658827094, + "learning_rate": 8.1636264646684e-06, + "loss": 0.7114, + "step": 9902 + }, + { + "epoch": 0.303512320706142, + "grad_norm": 1.6947284924009258, + "learning_rate": 8.163242112295767e-06, + "loss": 0.6843, + "step": 9903 + }, + { + "epoch": 0.3035429692288832, + "grad_norm": 0.8508813234910475, + "learning_rate": 8.162857728754898e-06, + "loss": 0.4831, + "step": 9904 + }, + { + "epoch": 0.3035736177516244, + "grad_norm": 1.603555600262242, + "learning_rate": 8.162473314049584e-06, + "loss": 0.662, + "step": 9905 + }, + { + "epoch": 0.3036042662743656, + "grad_norm": 1.6830599525013532, + "learning_rate": 8.16208886818361e-06, + "loss": 0.679, + "step": 9906 + }, + { + "epoch": 0.3036349147971068, + "grad_norm": 0.8277910134603678, + "learning_rate": 8.161704391160765e-06, + "loss": 0.4768, + "step": 9907 + }, + { + "epoch": 0.303665563319848, + "grad_norm": 2.052250102683192, + "learning_rate": 8.16131988298484e-06, + "loss": 0.6879, + "step": 9908 + }, + { + "epoch": 0.3036962118425892, + "grad_norm": 1.6900147197763065, + "learning_rate": 8.160935343659618e-06, + "loss": 0.7075, + "step": 9909 + }, + { + "epoch": 0.30372686036533036, + "grad_norm": 1.6401468178264076, + "learning_rate": 8.160550773188894e-06, + "loss": 0.6699, + "step": 9910 + }, + { + "epoch": 0.30375750888807157, + "grad_norm": 1.8051692934197334, + "learning_rate": 8.160166171576453e-06, + "loss": 0.6709, + "step": 9911 + }, + { + "epoch": 0.3037881574108128, + "grad_norm": 1.5992883175754284, + "learning_rate": 8.159781538826087e-06, + "loss": 0.6734, + "step": 9912 + }, + { + "epoch": 0.303818805933554, + "grad_norm": 1.811116605503594, + "learning_rate": 8.159396874941584e-06, + "loss": 0.6756, + "step": 9913 + }, + { + "epoch": 0.3038494544562952, + "grad_norm": 1.78046485409467, + "learning_rate": 8.159012179926736e-06, + "loss": 0.6511, + "step": 9914 + }, + { + "epoch": 0.3038801029790364, + "grad_norm": 1.6073455730462005, + "learning_rate": 8.158627453785332e-06, + "loss": 0.6664, + "step": 9915 + }, + { + "epoch": 0.3039107515017776, + "grad_norm": 1.7158992697793665, + "learning_rate": 8.158242696521165e-06, + "loss": 0.7492, + "step": 9916 + }, + { + "epoch": 0.3039414000245188, + "grad_norm": 1.7208427555972137, + "learning_rate": 8.157857908138022e-06, + "loss": 0.7126, + "step": 9917 + }, + { + "epoch": 0.30397204854726, + "grad_norm": 1.887691707306023, + "learning_rate": 8.1574730886397e-06, + "loss": 0.7393, + "step": 9918 + }, + { + "epoch": 0.3040026970700012, + "grad_norm": 1.7464737830352413, + "learning_rate": 8.157088238029986e-06, + "loss": 0.754, + "step": 9919 + }, + { + "epoch": 0.3040333455927424, + "grad_norm": 1.3734995907547753, + "learning_rate": 8.156703356312676e-06, + "loss": 0.6167, + "step": 9920 + }, + { + "epoch": 0.3040639941154836, + "grad_norm": 1.7033627958735853, + "learning_rate": 8.156318443491558e-06, + "loss": 0.7505, + "step": 9921 + }, + { + "epoch": 0.30409464263822483, + "grad_norm": 1.5476217717541083, + "learning_rate": 8.155933499570428e-06, + "loss": 0.6652, + "step": 9922 + }, + { + "epoch": 0.30412529116096604, + "grad_norm": 1.6884092725233089, + "learning_rate": 8.155548524553076e-06, + "loss": 0.7309, + "step": 9923 + }, + { + "epoch": 0.30415593968370724, + "grad_norm": 1.9932172976828817, + "learning_rate": 8.155163518443298e-06, + "loss": 0.6226, + "step": 9924 + }, + { + "epoch": 0.30418658820644845, + "grad_norm": 1.8963932365973104, + "learning_rate": 8.154778481244888e-06, + "loss": 0.6757, + "step": 9925 + }, + { + "epoch": 0.30421723672918966, + "grad_norm": 1.7993101706063521, + "learning_rate": 8.154393412961637e-06, + "loss": 0.6703, + "step": 9926 + }, + { + "epoch": 0.30424788525193086, + "grad_norm": 1.9598947373942466, + "learning_rate": 8.15400831359734e-06, + "loss": 0.7596, + "step": 9927 + }, + { + "epoch": 0.30427853377467207, + "grad_norm": 1.9322864827782875, + "learning_rate": 8.153623183155793e-06, + "loss": 0.7635, + "step": 9928 + }, + { + "epoch": 0.30430918229741327, + "grad_norm": 1.9099048569933417, + "learning_rate": 8.15323802164079e-06, + "loss": 0.6212, + "step": 9929 + }, + { + "epoch": 0.3043398308201545, + "grad_norm": 1.8531741930847716, + "learning_rate": 8.152852829056126e-06, + "loss": 0.713, + "step": 9930 + }, + { + "epoch": 0.3043704793428957, + "grad_norm": 1.7158150151798461, + "learning_rate": 8.152467605405596e-06, + "loss": 0.7085, + "step": 9931 + }, + { + "epoch": 0.3044011278656369, + "grad_norm": 2.0320555240638045, + "learning_rate": 8.152082350692996e-06, + "loss": 0.7569, + "step": 9932 + }, + { + "epoch": 0.3044317763883781, + "grad_norm": 1.5933143619059875, + "learning_rate": 8.151697064922121e-06, + "loss": 0.6488, + "step": 9933 + }, + { + "epoch": 0.3044624249111193, + "grad_norm": 1.9563515665374656, + "learning_rate": 8.151311748096771e-06, + "loss": 0.7836, + "step": 9934 + }, + { + "epoch": 0.3044930734338605, + "grad_norm": 1.9912710496027035, + "learning_rate": 8.150926400220738e-06, + "loss": 0.8075, + "step": 9935 + }, + { + "epoch": 0.3045237219566017, + "grad_norm": 1.807034381511625, + "learning_rate": 8.150541021297822e-06, + "loss": 0.7382, + "step": 9936 + }, + { + "epoch": 0.3045543704793429, + "grad_norm": 0.9169265681154324, + "learning_rate": 8.150155611331819e-06, + "loss": 0.4896, + "step": 9937 + }, + { + "epoch": 0.3045850190020841, + "grad_norm": 1.6020011501405012, + "learning_rate": 8.149770170326527e-06, + "loss": 0.627, + "step": 9938 + }, + { + "epoch": 0.30461566752482533, + "grad_norm": 1.9603291877008628, + "learning_rate": 8.149384698285742e-06, + "loss": 0.6811, + "step": 9939 + }, + { + "epoch": 0.30464631604756653, + "grad_norm": 1.7348808983680664, + "learning_rate": 8.148999195213266e-06, + "loss": 0.6903, + "step": 9940 + }, + { + "epoch": 0.3046769645703077, + "grad_norm": 1.6489289387271862, + "learning_rate": 8.148613661112894e-06, + "loss": 0.6119, + "step": 9941 + }, + { + "epoch": 0.3047076130930489, + "grad_norm": 1.7212498981228779, + "learning_rate": 8.148228095988427e-06, + "loss": 0.6993, + "step": 9942 + }, + { + "epoch": 0.3047382616157901, + "grad_norm": 1.903749932943825, + "learning_rate": 8.147842499843663e-06, + "loss": 0.7016, + "step": 9943 + }, + { + "epoch": 0.3047689101385313, + "grad_norm": 1.8382436840587968, + "learning_rate": 8.1474568726824e-06, + "loss": 0.6664, + "step": 9944 + }, + { + "epoch": 0.3047995586612725, + "grad_norm": 1.804892437799155, + "learning_rate": 8.14707121450844e-06, + "loss": 0.8187, + "step": 9945 + }, + { + "epoch": 0.3048302071840137, + "grad_norm": 0.9444770940705277, + "learning_rate": 8.146685525325582e-06, + "loss": 0.4684, + "step": 9946 + }, + { + "epoch": 0.3048608557067549, + "grad_norm": 1.810993879330766, + "learning_rate": 8.146299805137626e-06, + "loss": 0.7139, + "step": 9947 + }, + { + "epoch": 0.3048915042294961, + "grad_norm": 1.7924840275299379, + "learning_rate": 8.145914053948373e-06, + "loss": 0.6601, + "step": 9948 + }, + { + "epoch": 0.30492215275223733, + "grad_norm": 1.8746595470240743, + "learning_rate": 8.145528271761624e-06, + "loss": 0.7268, + "step": 9949 + }, + { + "epoch": 0.30495280127497854, + "grad_norm": 1.8542020850166625, + "learning_rate": 8.145142458581182e-06, + "loss": 0.7072, + "step": 9950 + }, + { + "epoch": 0.30498344979771974, + "grad_norm": 1.9537156968452036, + "learning_rate": 8.144756614410846e-06, + "loss": 0.6757, + "step": 9951 + }, + { + "epoch": 0.30501409832046095, + "grad_norm": 1.6439733688048368, + "learning_rate": 8.144370739254418e-06, + "loss": 0.6443, + "step": 9952 + }, + { + "epoch": 0.30504474684320215, + "grad_norm": 1.7840661215186073, + "learning_rate": 8.1439848331157e-06, + "loss": 0.7143, + "step": 9953 + }, + { + "epoch": 0.30507539536594336, + "grad_norm": 1.7295314557884864, + "learning_rate": 8.143598895998494e-06, + "loss": 0.7506, + "step": 9954 + }, + { + "epoch": 0.30510604388868456, + "grad_norm": 1.9360172695782982, + "learning_rate": 8.143212927906606e-06, + "loss": 0.7129, + "step": 9955 + }, + { + "epoch": 0.30513669241142577, + "grad_norm": 1.627703122729354, + "learning_rate": 8.142826928843835e-06, + "loss": 0.577, + "step": 9956 + }, + { + "epoch": 0.305167340934167, + "grad_norm": 0.8555671972885345, + "learning_rate": 8.142440898813985e-06, + "loss": 0.4747, + "step": 9957 + }, + { + "epoch": 0.3051979894569082, + "grad_norm": 1.8404403478745408, + "learning_rate": 8.142054837820865e-06, + "loss": 0.6845, + "step": 9958 + }, + { + "epoch": 0.3052286379796494, + "grad_norm": 1.5707709073512115, + "learning_rate": 8.141668745868271e-06, + "loss": 0.5722, + "step": 9959 + }, + { + "epoch": 0.3052592865023906, + "grad_norm": 2.0678515547919813, + "learning_rate": 8.141282622960012e-06, + "loss": 0.7579, + "step": 9960 + }, + { + "epoch": 0.3052899350251318, + "grad_norm": 1.6986045326867014, + "learning_rate": 8.140896469099893e-06, + "loss": 0.7451, + "step": 9961 + }, + { + "epoch": 0.305320583547873, + "grad_norm": 0.7895058032989531, + "learning_rate": 8.140510284291716e-06, + "loss": 0.4642, + "step": 9962 + }, + { + "epoch": 0.3053512320706142, + "grad_norm": 1.898382498758553, + "learning_rate": 8.140124068539288e-06, + "loss": 0.719, + "step": 9963 + }, + { + "epoch": 0.3053818805933554, + "grad_norm": 1.9431115272419486, + "learning_rate": 8.139737821846415e-06, + "loss": 0.5947, + "step": 9964 + }, + { + "epoch": 0.3054125291160966, + "grad_norm": 1.8533192737968534, + "learning_rate": 8.139351544216901e-06, + "loss": 0.7443, + "step": 9965 + }, + { + "epoch": 0.3054431776388378, + "grad_norm": 1.6332134447864906, + "learning_rate": 8.138965235654553e-06, + "loss": 0.7247, + "step": 9966 + }, + { + "epoch": 0.30547382616157903, + "grad_norm": 1.768405542590437, + "learning_rate": 8.138578896163177e-06, + "loss": 0.7457, + "step": 9967 + }, + { + "epoch": 0.30550447468432024, + "grad_norm": 1.7249737915808079, + "learning_rate": 8.138192525746582e-06, + "loss": 0.6077, + "step": 9968 + }, + { + "epoch": 0.30553512320706144, + "grad_norm": 1.9072440169560319, + "learning_rate": 8.137806124408572e-06, + "loss": 0.6494, + "step": 9969 + }, + { + "epoch": 0.30556577172980265, + "grad_norm": 1.5127658687799577, + "learning_rate": 8.137419692152954e-06, + "loss": 0.6838, + "step": 9970 + }, + { + "epoch": 0.30559642025254385, + "grad_norm": 0.8731842800367429, + "learning_rate": 8.137033228983538e-06, + "loss": 0.4923, + "step": 9971 + }, + { + "epoch": 0.305627068775285, + "grad_norm": 1.8398095587863275, + "learning_rate": 8.136646734904132e-06, + "loss": 0.725, + "step": 9972 + }, + { + "epoch": 0.3056577172980262, + "grad_norm": 1.6628448636444055, + "learning_rate": 8.136260209918541e-06, + "loss": 0.7188, + "step": 9973 + }, + { + "epoch": 0.3056883658207674, + "grad_norm": 1.7181901448574632, + "learning_rate": 8.135873654030577e-06, + "loss": 0.6889, + "step": 9974 + }, + { + "epoch": 0.3057190143435086, + "grad_norm": 1.7535843871399124, + "learning_rate": 8.135487067244048e-06, + "loss": 0.6908, + "step": 9975 + }, + { + "epoch": 0.3057496628662498, + "grad_norm": 2.115953783533447, + "learning_rate": 8.135100449562763e-06, + "loss": 0.6348, + "step": 9976 + }, + { + "epoch": 0.30578031138899103, + "grad_norm": 1.7871888001219765, + "learning_rate": 8.13471380099053e-06, + "loss": 0.8013, + "step": 9977 + }, + { + "epoch": 0.30581095991173224, + "grad_norm": 2.053954820573454, + "learning_rate": 8.13432712153116e-06, + "loss": 0.7435, + "step": 9978 + }, + { + "epoch": 0.30584160843447344, + "grad_norm": 1.5704363870178852, + "learning_rate": 8.133940411188463e-06, + "loss": 0.6564, + "step": 9979 + }, + { + "epoch": 0.30587225695721465, + "grad_norm": 1.7668189188987162, + "learning_rate": 8.13355366996625e-06, + "loss": 0.6911, + "step": 9980 + }, + { + "epoch": 0.30590290547995586, + "grad_norm": 1.7207956065284136, + "learning_rate": 8.13316689786833e-06, + "loss": 0.7244, + "step": 9981 + }, + { + "epoch": 0.30593355400269706, + "grad_norm": 1.7644331084424802, + "learning_rate": 8.132780094898515e-06, + "loss": 0.6507, + "step": 9982 + }, + { + "epoch": 0.30596420252543827, + "grad_norm": 1.658144010000826, + "learning_rate": 8.132393261060616e-06, + "loss": 0.6153, + "step": 9983 + }, + { + "epoch": 0.3059948510481795, + "grad_norm": 0.7919473144194484, + "learning_rate": 8.132006396358447e-06, + "loss": 0.4615, + "step": 9984 + }, + { + "epoch": 0.3060254995709207, + "grad_norm": 1.5826437031037688, + "learning_rate": 8.131619500795815e-06, + "loss": 0.6344, + "step": 9985 + }, + { + "epoch": 0.3060561480936619, + "grad_norm": 1.681913920518749, + "learning_rate": 8.131232574376535e-06, + "loss": 0.6829, + "step": 9986 + }, + { + "epoch": 0.3060867966164031, + "grad_norm": 1.9751660140820408, + "learning_rate": 8.130845617104419e-06, + "loss": 0.6862, + "step": 9987 + }, + { + "epoch": 0.3061174451391443, + "grad_norm": 1.8097105563665412, + "learning_rate": 8.130458628983281e-06, + "loss": 0.7382, + "step": 9988 + }, + { + "epoch": 0.3061480936618855, + "grad_norm": 1.80340401967374, + "learning_rate": 8.130071610016934e-06, + "loss": 0.6304, + "step": 9989 + }, + { + "epoch": 0.3061787421846267, + "grad_norm": 1.6925083048958267, + "learning_rate": 8.12968456020919e-06, + "loss": 0.6444, + "step": 9990 + }, + { + "epoch": 0.3062093907073679, + "grad_norm": 1.7568352011151858, + "learning_rate": 8.129297479563863e-06, + "loss": 0.6723, + "step": 9991 + }, + { + "epoch": 0.3062400392301091, + "grad_norm": 1.9567694809837621, + "learning_rate": 8.128910368084767e-06, + "loss": 0.8131, + "step": 9992 + }, + { + "epoch": 0.3062706877528503, + "grad_norm": 1.8578487305077476, + "learning_rate": 8.128523225775717e-06, + "loss": 0.6981, + "step": 9993 + }, + { + "epoch": 0.30630133627559153, + "grad_norm": 1.8947991684041496, + "learning_rate": 8.128136052640526e-06, + "loss": 0.7995, + "step": 9994 + }, + { + "epoch": 0.30633198479833273, + "grad_norm": 0.8391567221416658, + "learning_rate": 8.12774884868301e-06, + "loss": 0.4682, + "step": 9995 + }, + { + "epoch": 0.30636263332107394, + "grad_norm": 2.467869319763568, + "learning_rate": 8.127361613906988e-06, + "loss": 0.7604, + "step": 9996 + }, + { + "epoch": 0.30639328184381515, + "grad_norm": 1.976626494914839, + "learning_rate": 8.126974348316268e-06, + "loss": 0.7883, + "step": 9997 + }, + { + "epoch": 0.30642393036655635, + "grad_norm": 1.6167741134990343, + "learning_rate": 8.12658705191467e-06, + "loss": 0.6713, + "step": 9998 + }, + { + "epoch": 0.30645457888929756, + "grad_norm": 1.719209838254567, + "learning_rate": 8.126199724706012e-06, + "loss": 0.7432, + "step": 9999 + }, + { + "epoch": 0.30648522741203876, + "grad_norm": 1.8594416401300824, + "learning_rate": 8.125812366694106e-06, + "loss": 0.6388, + "step": 10000 + }, + { + "epoch": 0.30651587593477997, + "grad_norm": 0.8057456987891236, + "learning_rate": 8.125424977882772e-06, + "loss": 0.4781, + "step": 10001 + }, + { + "epoch": 0.3065465244575212, + "grad_norm": 0.8007921001244939, + "learning_rate": 8.125037558275826e-06, + "loss": 0.4594, + "step": 10002 + }, + { + "epoch": 0.3065771729802623, + "grad_norm": 1.6149823911433334, + "learning_rate": 8.124650107877086e-06, + "loss": 0.637, + "step": 10003 + }, + { + "epoch": 0.30660782150300353, + "grad_norm": 1.6716818753052534, + "learning_rate": 8.124262626690367e-06, + "loss": 0.6767, + "step": 10004 + }, + { + "epoch": 0.30663847002574474, + "grad_norm": 1.7496885202000658, + "learning_rate": 8.123875114719491e-06, + "loss": 0.6995, + "step": 10005 + }, + { + "epoch": 0.30666911854848594, + "grad_norm": 1.5217555948699353, + "learning_rate": 8.123487571968273e-06, + "loss": 0.6383, + "step": 10006 + }, + { + "epoch": 0.30669976707122715, + "grad_norm": 1.8785305461567685, + "learning_rate": 8.123099998440535e-06, + "loss": 0.7628, + "step": 10007 + }, + { + "epoch": 0.30673041559396835, + "grad_norm": 1.57503084647934, + "learning_rate": 8.12271239414009e-06, + "loss": 0.6473, + "step": 10008 + }, + { + "epoch": 0.30676106411670956, + "grad_norm": 1.6227432614540533, + "learning_rate": 8.122324759070764e-06, + "loss": 0.6935, + "step": 10009 + }, + { + "epoch": 0.30679171263945076, + "grad_norm": 2.0172003163010794, + "learning_rate": 8.121937093236371e-06, + "loss": 0.668, + "step": 10010 + }, + { + "epoch": 0.30682236116219197, + "grad_norm": 1.8384747580710254, + "learning_rate": 8.121549396640736e-06, + "loss": 0.7072, + "step": 10011 + }, + { + "epoch": 0.3068530096849332, + "grad_norm": 1.7875535901697375, + "learning_rate": 8.121161669287674e-06, + "loss": 0.6941, + "step": 10012 + }, + { + "epoch": 0.3068836582076744, + "grad_norm": 1.5571552976568779, + "learning_rate": 8.120773911181005e-06, + "loss": 0.6688, + "step": 10013 + }, + { + "epoch": 0.3069143067304156, + "grad_norm": 1.6202993892688111, + "learning_rate": 8.120386122324556e-06, + "loss": 0.725, + "step": 10014 + }, + { + "epoch": 0.3069449552531568, + "grad_norm": 1.709934132815986, + "learning_rate": 8.119998302722143e-06, + "loss": 0.6062, + "step": 10015 + }, + { + "epoch": 0.306975603775898, + "grad_norm": 2.8728179331409036, + "learning_rate": 8.119610452377588e-06, + "loss": 0.6649, + "step": 10016 + }, + { + "epoch": 0.3070062522986392, + "grad_norm": 1.7407477214928213, + "learning_rate": 8.119222571294714e-06, + "loss": 0.629, + "step": 10017 + }, + { + "epoch": 0.3070369008213804, + "grad_norm": 1.683660714957663, + "learning_rate": 8.118834659477341e-06, + "loss": 0.5762, + "step": 10018 + }, + { + "epoch": 0.3070675493441216, + "grad_norm": 1.9433140897096906, + "learning_rate": 8.118446716929294e-06, + "loss": 0.7013, + "step": 10019 + }, + { + "epoch": 0.3070981978668628, + "grad_norm": 1.772304676162161, + "learning_rate": 8.118058743654392e-06, + "loss": 0.7152, + "step": 10020 + }, + { + "epoch": 0.307128846389604, + "grad_norm": 0.9085512718050663, + "learning_rate": 8.117670739656457e-06, + "loss": 0.464, + "step": 10021 + }, + { + "epoch": 0.30715949491234523, + "grad_norm": 1.8305972995985287, + "learning_rate": 8.117282704939318e-06, + "loss": 0.7246, + "step": 10022 + }, + { + "epoch": 0.30719014343508644, + "grad_norm": 2.5109441044529492, + "learning_rate": 8.116894639506794e-06, + "loss": 0.6885, + "step": 10023 + }, + { + "epoch": 0.30722079195782764, + "grad_norm": 1.7759064050396463, + "learning_rate": 8.11650654336271e-06, + "loss": 0.731, + "step": 10024 + }, + { + "epoch": 0.30725144048056885, + "grad_norm": 1.8151224660835072, + "learning_rate": 8.116118416510889e-06, + "loss": 0.7214, + "step": 10025 + }, + { + "epoch": 0.30728208900331005, + "grad_norm": 1.7565206800826514, + "learning_rate": 8.115730258955156e-06, + "loss": 0.7146, + "step": 10026 + }, + { + "epoch": 0.30731273752605126, + "grad_norm": 1.7886762704881047, + "learning_rate": 8.115342070699335e-06, + "loss": 0.7514, + "step": 10027 + }, + { + "epoch": 0.30734338604879247, + "grad_norm": 1.7149959207794132, + "learning_rate": 8.114953851747252e-06, + "loss": 0.7181, + "step": 10028 + }, + { + "epoch": 0.30737403457153367, + "grad_norm": 1.712162174011383, + "learning_rate": 8.114565602102733e-06, + "loss": 0.7196, + "step": 10029 + }, + { + "epoch": 0.3074046830942749, + "grad_norm": 1.681804370524691, + "learning_rate": 8.1141773217696e-06, + "loss": 0.7095, + "step": 10030 + }, + { + "epoch": 0.3074353316170161, + "grad_norm": 0.8544495692585663, + "learning_rate": 8.113789010751682e-06, + "loss": 0.4701, + "step": 10031 + }, + { + "epoch": 0.3074659801397573, + "grad_norm": 1.9018956476225455, + "learning_rate": 8.113400669052805e-06, + "loss": 0.6865, + "step": 10032 + }, + { + "epoch": 0.3074966286624985, + "grad_norm": 0.7652581867266213, + "learning_rate": 8.113012296676793e-06, + "loss": 0.4899, + "step": 10033 + }, + { + "epoch": 0.30752727718523964, + "grad_norm": 2.0391166605144875, + "learning_rate": 8.112623893627476e-06, + "loss": 0.84, + "step": 10034 + }, + { + "epoch": 0.30755792570798085, + "grad_norm": 1.641176843797518, + "learning_rate": 8.11223545990868e-06, + "loss": 0.7066, + "step": 10035 + }, + { + "epoch": 0.30758857423072206, + "grad_norm": 1.619517278336058, + "learning_rate": 8.111846995524228e-06, + "loss": 0.7085, + "step": 10036 + }, + { + "epoch": 0.30761922275346326, + "grad_norm": 1.661654853814147, + "learning_rate": 8.111458500477955e-06, + "loss": 0.6678, + "step": 10037 + }, + { + "epoch": 0.30764987127620447, + "grad_norm": 1.653663606956275, + "learning_rate": 8.111069974773684e-06, + "loss": 0.6835, + "step": 10038 + }, + { + "epoch": 0.3076805197989457, + "grad_norm": 1.8773358052217388, + "learning_rate": 8.110681418415245e-06, + "loss": 0.7306, + "step": 10039 + }, + { + "epoch": 0.3077111683216869, + "grad_norm": 1.8375903213656628, + "learning_rate": 8.110292831406466e-06, + "loss": 0.6962, + "step": 10040 + }, + { + "epoch": 0.3077418168444281, + "grad_norm": 0.850981080095948, + "learning_rate": 8.109904213751174e-06, + "loss": 0.497, + "step": 10041 + }, + { + "epoch": 0.3077724653671693, + "grad_norm": 1.6358576374183647, + "learning_rate": 8.109515565453202e-06, + "loss": 0.6445, + "step": 10042 + }, + { + "epoch": 0.3078031138899105, + "grad_norm": 1.4080705451668314, + "learning_rate": 8.109126886516376e-06, + "loss": 0.6838, + "step": 10043 + }, + { + "epoch": 0.3078337624126517, + "grad_norm": 1.7277811527335412, + "learning_rate": 8.108738176944529e-06, + "loss": 0.8038, + "step": 10044 + }, + { + "epoch": 0.3078644109353929, + "grad_norm": 1.5645724452884309, + "learning_rate": 8.10834943674149e-06, + "loss": 0.6028, + "step": 10045 + }, + { + "epoch": 0.3078950594581341, + "grad_norm": 0.8262300089899393, + "learning_rate": 8.107960665911087e-06, + "loss": 0.4886, + "step": 10046 + }, + { + "epoch": 0.3079257079808753, + "grad_norm": 1.6493482845272724, + "learning_rate": 8.107571864457153e-06, + "loss": 0.7037, + "step": 10047 + }, + { + "epoch": 0.3079563565036165, + "grad_norm": 1.534295277741896, + "learning_rate": 8.107183032383517e-06, + "loss": 0.698, + "step": 10048 + }, + { + "epoch": 0.30798700502635773, + "grad_norm": 1.7182703998482822, + "learning_rate": 8.106794169694012e-06, + "loss": 0.73, + "step": 10049 + }, + { + "epoch": 0.30801765354909894, + "grad_norm": 1.8142591810251345, + "learning_rate": 8.106405276392471e-06, + "loss": 0.7316, + "step": 10050 + }, + { + "epoch": 0.30804830207184014, + "grad_norm": 1.8024801904775647, + "learning_rate": 8.106016352482722e-06, + "loss": 0.7296, + "step": 10051 + }, + { + "epoch": 0.30807895059458135, + "grad_norm": 0.8120950284964582, + "learning_rate": 8.105627397968601e-06, + "loss": 0.4557, + "step": 10052 + }, + { + "epoch": 0.30810959911732255, + "grad_norm": 0.7866239272681655, + "learning_rate": 8.105238412853937e-06, + "loss": 0.4638, + "step": 10053 + }, + { + "epoch": 0.30814024764006376, + "grad_norm": 0.7687675885502991, + "learning_rate": 8.104849397142566e-06, + "loss": 0.4651, + "step": 10054 + }, + { + "epoch": 0.30817089616280496, + "grad_norm": 0.7790846006398108, + "learning_rate": 8.104460350838318e-06, + "loss": 0.4774, + "step": 10055 + }, + { + "epoch": 0.30820154468554617, + "grad_norm": 0.7785107641014709, + "learning_rate": 8.104071273945029e-06, + "loss": 0.4603, + "step": 10056 + }, + { + "epoch": 0.3082321932082874, + "grad_norm": 1.7558124877301418, + "learning_rate": 8.10368216646653e-06, + "loss": 0.7628, + "step": 10057 + }, + { + "epoch": 0.3082628417310286, + "grad_norm": 1.6611429778591755, + "learning_rate": 8.103293028406658e-06, + "loss": 0.5294, + "step": 10058 + }, + { + "epoch": 0.3082934902537698, + "grad_norm": 2.995172270162869, + "learning_rate": 8.102903859769244e-06, + "loss": 0.701, + "step": 10059 + }, + { + "epoch": 0.308324138776511, + "grad_norm": 1.7672260595609497, + "learning_rate": 8.102514660558126e-06, + "loss": 0.7225, + "step": 10060 + }, + { + "epoch": 0.3083547872992522, + "grad_norm": 1.6317167671018173, + "learning_rate": 8.102125430777138e-06, + "loss": 0.7012, + "step": 10061 + }, + { + "epoch": 0.3083854358219934, + "grad_norm": 1.5631672144926574, + "learning_rate": 8.101736170430113e-06, + "loss": 0.7275, + "step": 10062 + }, + { + "epoch": 0.3084160843447346, + "grad_norm": 1.6835390000094919, + "learning_rate": 8.101346879520888e-06, + "loss": 0.6611, + "step": 10063 + }, + { + "epoch": 0.3084467328674758, + "grad_norm": 1.7539966116507246, + "learning_rate": 8.100957558053298e-06, + "loss": 0.7299, + "step": 10064 + }, + { + "epoch": 0.30847738139021696, + "grad_norm": 1.6819022592822206, + "learning_rate": 8.10056820603118e-06, + "loss": 0.6439, + "step": 10065 + }, + { + "epoch": 0.30850802991295817, + "grad_norm": 1.6171432535146752, + "learning_rate": 8.100178823458373e-06, + "loss": 0.7455, + "step": 10066 + }, + { + "epoch": 0.3085386784356994, + "grad_norm": 1.7424709695582234, + "learning_rate": 8.099789410338708e-06, + "loss": 0.8021, + "step": 10067 + }, + { + "epoch": 0.3085693269584406, + "grad_norm": 1.827678982887259, + "learning_rate": 8.099399966676025e-06, + "loss": 0.7501, + "step": 10068 + }, + { + "epoch": 0.3085999754811818, + "grad_norm": 1.8361868308740676, + "learning_rate": 8.099010492474162e-06, + "loss": 0.6763, + "step": 10069 + }, + { + "epoch": 0.308630624003923, + "grad_norm": 1.6799639957198556, + "learning_rate": 8.098620987736953e-06, + "loss": 0.6837, + "step": 10070 + }, + { + "epoch": 0.3086612725266642, + "grad_norm": 1.1337855844843137, + "learning_rate": 8.098231452468242e-06, + "loss": 0.4863, + "step": 10071 + }, + { + "epoch": 0.3086919210494054, + "grad_norm": 1.7514464152043057, + "learning_rate": 8.097841886671863e-06, + "loss": 0.7111, + "step": 10072 + }, + { + "epoch": 0.3087225695721466, + "grad_norm": 1.6419533226370162, + "learning_rate": 8.097452290351655e-06, + "loss": 0.7212, + "step": 10073 + }, + { + "epoch": 0.3087532180948878, + "grad_norm": 1.785181100353865, + "learning_rate": 8.097062663511457e-06, + "loss": 0.7169, + "step": 10074 + }, + { + "epoch": 0.308783866617629, + "grad_norm": 1.7447569295565761, + "learning_rate": 8.096673006155107e-06, + "loss": 0.6131, + "step": 10075 + }, + { + "epoch": 0.3088145151403702, + "grad_norm": 1.772845647156414, + "learning_rate": 8.096283318286446e-06, + "loss": 0.6872, + "step": 10076 + }, + { + "epoch": 0.30884516366311143, + "grad_norm": 1.8741020591822655, + "learning_rate": 8.095893599909315e-06, + "loss": 0.6593, + "step": 10077 + }, + { + "epoch": 0.30887581218585264, + "grad_norm": 1.6849996263283185, + "learning_rate": 8.09550385102755e-06, + "loss": 0.7636, + "step": 10078 + }, + { + "epoch": 0.30890646070859384, + "grad_norm": 0.8603937318391685, + "learning_rate": 8.095114071644996e-06, + "loss": 0.4571, + "step": 10079 + }, + { + "epoch": 0.30893710923133505, + "grad_norm": 1.780188205668451, + "learning_rate": 8.09472426176549e-06, + "loss": 0.7142, + "step": 10080 + }, + { + "epoch": 0.30896775775407626, + "grad_norm": 1.8137556212392536, + "learning_rate": 8.094334421392873e-06, + "loss": 0.6631, + "step": 10081 + }, + { + "epoch": 0.30899840627681746, + "grad_norm": 1.6726502656202054, + "learning_rate": 8.09394455053099e-06, + "loss": 0.5968, + "step": 10082 + }, + { + "epoch": 0.30902905479955867, + "grad_norm": 1.6998195155977374, + "learning_rate": 8.093554649183677e-06, + "loss": 0.7204, + "step": 10083 + }, + { + "epoch": 0.3090597033222999, + "grad_norm": 1.6653314194244349, + "learning_rate": 8.093164717354779e-06, + "loss": 0.6325, + "step": 10084 + }, + { + "epoch": 0.3090903518450411, + "grad_norm": 0.788931064419268, + "learning_rate": 8.092774755048138e-06, + "loss": 0.4633, + "step": 10085 + }, + { + "epoch": 0.3091210003677823, + "grad_norm": 1.715583706698734, + "learning_rate": 8.092384762267596e-06, + "loss": 0.654, + "step": 10086 + }, + { + "epoch": 0.3091516488905235, + "grad_norm": 1.7700461176077817, + "learning_rate": 8.091994739016995e-06, + "loss": 0.6766, + "step": 10087 + }, + { + "epoch": 0.3091822974132647, + "grad_norm": 1.6181894056807764, + "learning_rate": 8.09160468530018e-06, + "loss": 0.6588, + "step": 10088 + }, + { + "epoch": 0.3092129459360059, + "grad_norm": 1.8831072668402187, + "learning_rate": 8.091214601120992e-06, + "loss": 0.7604, + "step": 10089 + }, + { + "epoch": 0.3092435944587471, + "grad_norm": 1.969028599202364, + "learning_rate": 8.090824486483274e-06, + "loss": 0.7453, + "step": 10090 + }, + { + "epoch": 0.3092742429814883, + "grad_norm": 0.8889620597845428, + "learning_rate": 8.090434341390874e-06, + "loss": 0.485, + "step": 10091 + }, + { + "epoch": 0.3093048915042295, + "grad_norm": 1.8749088670669127, + "learning_rate": 8.090044165847634e-06, + "loss": 0.72, + "step": 10092 + }, + { + "epoch": 0.3093355400269707, + "grad_norm": 1.4787354360767133, + "learning_rate": 8.089653959857398e-06, + "loss": 0.6249, + "step": 10093 + }, + { + "epoch": 0.30936618854971193, + "grad_norm": 1.6449970558596003, + "learning_rate": 8.08926372342401e-06, + "loss": 0.6135, + "step": 10094 + }, + { + "epoch": 0.30939683707245313, + "grad_norm": 1.617305833831159, + "learning_rate": 8.088873456551317e-06, + "loss": 0.6912, + "step": 10095 + }, + { + "epoch": 0.3094274855951943, + "grad_norm": 1.7727314507673428, + "learning_rate": 8.088483159243164e-06, + "loss": 0.6917, + "step": 10096 + }, + { + "epoch": 0.3094581341179355, + "grad_norm": 1.7116448356930645, + "learning_rate": 8.088092831503394e-06, + "loss": 0.6296, + "step": 10097 + }, + { + "epoch": 0.3094887826406767, + "grad_norm": 1.669287814110503, + "learning_rate": 8.087702473335858e-06, + "loss": 0.6611, + "step": 10098 + }, + { + "epoch": 0.3095194311634179, + "grad_norm": 1.9615667675469812, + "learning_rate": 8.087312084744397e-06, + "loss": 0.5688, + "step": 10099 + }, + { + "epoch": 0.3095500796861591, + "grad_norm": 1.692688172126528, + "learning_rate": 8.086921665732861e-06, + "loss": 0.5815, + "step": 10100 + }, + { + "epoch": 0.3095807282089003, + "grad_norm": 1.584373986460534, + "learning_rate": 8.086531216305095e-06, + "loss": 0.649, + "step": 10101 + }, + { + "epoch": 0.3096113767316415, + "grad_norm": 1.7291723285890366, + "learning_rate": 8.086140736464949e-06, + "loss": 0.6909, + "step": 10102 + }, + { + "epoch": 0.3096420252543827, + "grad_norm": 1.7590291134477556, + "learning_rate": 8.085750226216267e-06, + "loss": 0.6288, + "step": 10103 + }, + { + "epoch": 0.30967267377712393, + "grad_norm": 1.5677416329862728, + "learning_rate": 8.0853596855629e-06, + "loss": 0.6278, + "step": 10104 + }, + { + "epoch": 0.30970332229986514, + "grad_norm": 1.887187618742984, + "learning_rate": 8.084969114508693e-06, + "loss": 0.6899, + "step": 10105 + }, + { + "epoch": 0.30973397082260634, + "grad_norm": 1.81927406485111, + "learning_rate": 8.084578513057499e-06, + "loss": 0.6911, + "step": 10106 + }, + { + "epoch": 0.30976461934534755, + "grad_norm": 0.8815177275923194, + "learning_rate": 8.084187881213162e-06, + "loss": 0.4464, + "step": 10107 + }, + { + "epoch": 0.30979526786808875, + "grad_norm": 0.831122761464326, + "learning_rate": 8.083797218979532e-06, + "loss": 0.4652, + "step": 10108 + }, + { + "epoch": 0.30982591639082996, + "grad_norm": 1.7413585515761807, + "learning_rate": 8.083406526360459e-06, + "loss": 0.6245, + "step": 10109 + }, + { + "epoch": 0.30985656491357116, + "grad_norm": 1.9388748730735341, + "learning_rate": 8.083015803359793e-06, + "loss": 0.6903, + "step": 10110 + }, + { + "epoch": 0.30988721343631237, + "grad_norm": 0.8417568301404633, + "learning_rate": 8.082625049981383e-06, + "loss": 0.4577, + "step": 10111 + }, + { + "epoch": 0.3099178619590536, + "grad_norm": 1.812310893317172, + "learning_rate": 8.08223426622908e-06, + "loss": 0.6706, + "step": 10112 + }, + { + "epoch": 0.3099485104817948, + "grad_norm": 1.6747522359081, + "learning_rate": 8.081843452106735e-06, + "loss": 0.6253, + "step": 10113 + }, + { + "epoch": 0.309979159004536, + "grad_norm": 1.6017241041383226, + "learning_rate": 8.081452607618196e-06, + "loss": 0.602, + "step": 10114 + }, + { + "epoch": 0.3100098075272772, + "grad_norm": 0.8720517148842616, + "learning_rate": 8.08106173276732e-06, + "loss": 0.4743, + "step": 10115 + }, + { + "epoch": 0.3100404560500184, + "grad_norm": 1.8072466870877735, + "learning_rate": 8.08067082755795e-06, + "loss": 0.6359, + "step": 10116 + }, + { + "epoch": 0.3100711045727596, + "grad_norm": 1.7256905232264013, + "learning_rate": 8.080279891993943e-06, + "loss": 0.7568, + "step": 10117 + }, + { + "epoch": 0.3101017530955008, + "grad_norm": 1.8122934792871799, + "learning_rate": 8.079888926079152e-06, + "loss": 0.7563, + "step": 10118 + }, + { + "epoch": 0.310132401618242, + "grad_norm": 1.6636519335442888, + "learning_rate": 8.079497929817426e-06, + "loss": 0.7255, + "step": 10119 + }, + { + "epoch": 0.3101630501409832, + "grad_norm": 1.6115537185219433, + "learning_rate": 8.07910690321262e-06, + "loss": 0.5598, + "step": 10120 + }, + { + "epoch": 0.3101936986637244, + "grad_norm": 0.8504589220174502, + "learning_rate": 8.078715846268583e-06, + "loss": 0.4705, + "step": 10121 + }, + { + "epoch": 0.31022434718646563, + "grad_norm": 1.6139950869049242, + "learning_rate": 8.078324758989174e-06, + "loss": 0.68, + "step": 10122 + }, + { + "epoch": 0.31025499570920684, + "grad_norm": 0.8156365721494608, + "learning_rate": 8.077933641378243e-06, + "loss": 0.4642, + "step": 10123 + }, + { + "epoch": 0.31028564423194804, + "grad_norm": 2.064826217284638, + "learning_rate": 8.077542493439643e-06, + "loss": 0.6591, + "step": 10124 + }, + { + "epoch": 0.31031629275468925, + "grad_norm": 1.6153779176496492, + "learning_rate": 8.077151315177232e-06, + "loss": 0.7171, + "step": 10125 + }, + { + "epoch": 0.31034694127743045, + "grad_norm": 1.852348751504051, + "learning_rate": 8.076760106594859e-06, + "loss": 0.6561, + "step": 10126 + }, + { + "epoch": 0.3103775898001716, + "grad_norm": 1.7898484423871, + "learning_rate": 8.076368867696382e-06, + "loss": 0.6871, + "step": 10127 + }, + { + "epoch": 0.3104082383229128, + "grad_norm": 1.7417992910247408, + "learning_rate": 8.075977598485656e-06, + "loss": 0.6959, + "step": 10128 + }, + { + "epoch": 0.310438886845654, + "grad_norm": 1.6208193484848572, + "learning_rate": 8.075586298966536e-06, + "loss": 0.6401, + "step": 10129 + }, + { + "epoch": 0.3104695353683952, + "grad_norm": 0.8507793142067752, + "learning_rate": 8.075194969142876e-06, + "loss": 0.4657, + "step": 10130 + }, + { + "epoch": 0.3105001838911364, + "grad_norm": 1.822669491533285, + "learning_rate": 8.074803609018535e-06, + "loss": 0.7604, + "step": 10131 + }, + { + "epoch": 0.31053083241387763, + "grad_norm": 1.6115280936607073, + "learning_rate": 8.074412218597367e-06, + "loss": 0.7082, + "step": 10132 + }, + { + "epoch": 0.31056148093661884, + "grad_norm": 1.6361780316393648, + "learning_rate": 8.07402079788323e-06, + "loss": 0.7618, + "step": 10133 + }, + { + "epoch": 0.31059212945936004, + "grad_norm": 1.625532682314719, + "learning_rate": 8.073629346879976e-06, + "loss": 0.7274, + "step": 10134 + }, + { + "epoch": 0.31062277798210125, + "grad_norm": 1.871841851448694, + "learning_rate": 8.073237865591468e-06, + "loss": 0.7756, + "step": 10135 + }, + { + "epoch": 0.31065342650484246, + "grad_norm": 2.8135160580216696, + "learning_rate": 8.07284635402156e-06, + "loss": 0.6618, + "step": 10136 + }, + { + "epoch": 0.31068407502758366, + "grad_norm": 1.6916045749916504, + "learning_rate": 8.072454812174111e-06, + "loss": 0.6981, + "step": 10137 + }, + { + "epoch": 0.31071472355032487, + "grad_norm": 1.753590758889076, + "learning_rate": 8.072063240052978e-06, + "loss": 0.6051, + "step": 10138 + }, + { + "epoch": 0.3107453720730661, + "grad_norm": 1.7280514434205065, + "learning_rate": 8.071671637662022e-06, + "loss": 0.6133, + "step": 10139 + }, + { + "epoch": 0.3107760205958073, + "grad_norm": 1.6724925834515456, + "learning_rate": 8.071280005005098e-06, + "loss": 0.708, + "step": 10140 + }, + { + "epoch": 0.3108066691185485, + "grad_norm": 1.7655272115963094, + "learning_rate": 8.070888342086065e-06, + "loss": 0.6642, + "step": 10141 + }, + { + "epoch": 0.3108373176412897, + "grad_norm": 1.1103673360618431, + "learning_rate": 8.070496648908786e-06, + "loss": 0.4618, + "step": 10142 + }, + { + "epoch": 0.3108679661640309, + "grad_norm": 1.821528980446909, + "learning_rate": 8.070104925477116e-06, + "loss": 0.6014, + "step": 10143 + }, + { + "epoch": 0.3108986146867721, + "grad_norm": 0.8730600648821262, + "learning_rate": 8.069713171794918e-06, + "loss": 0.4731, + "step": 10144 + }, + { + "epoch": 0.3109292632095133, + "grad_norm": 1.983476052522464, + "learning_rate": 8.06932138786605e-06, + "loss": 0.7167, + "step": 10145 + }, + { + "epoch": 0.3109599117322545, + "grad_norm": 1.531679379690055, + "learning_rate": 8.068929573694373e-06, + "loss": 0.5411, + "step": 10146 + }, + { + "epoch": 0.3109905602549957, + "grad_norm": 1.8443129990879759, + "learning_rate": 8.068537729283748e-06, + "loss": 0.6819, + "step": 10147 + }, + { + "epoch": 0.3110212087777369, + "grad_norm": 1.7016957642285062, + "learning_rate": 8.068145854638034e-06, + "loss": 0.7479, + "step": 10148 + }, + { + "epoch": 0.31105185730047813, + "grad_norm": 1.6566023174548126, + "learning_rate": 8.067753949761095e-06, + "loss": 0.6672, + "step": 10149 + }, + { + "epoch": 0.31108250582321934, + "grad_norm": 1.8770510215302554, + "learning_rate": 8.067362014656792e-06, + "loss": 0.689, + "step": 10150 + }, + { + "epoch": 0.31111315434596054, + "grad_norm": 1.7678809407121239, + "learning_rate": 8.066970049328985e-06, + "loss": 0.555, + "step": 10151 + }, + { + "epoch": 0.31114380286870175, + "grad_norm": 1.8724335270034573, + "learning_rate": 8.06657805378154e-06, + "loss": 0.738, + "step": 10152 + }, + { + "epoch": 0.31117445139144295, + "grad_norm": 1.927024317498484, + "learning_rate": 8.066186028018314e-06, + "loss": 0.6257, + "step": 10153 + }, + { + "epoch": 0.31120509991418416, + "grad_norm": 1.8284009511927812, + "learning_rate": 8.065793972043175e-06, + "loss": 0.7071, + "step": 10154 + }, + { + "epoch": 0.31123574843692536, + "grad_norm": 1.8104392781704077, + "learning_rate": 8.06540188585998e-06, + "loss": 0.6594, + "step": 10155 + }, + { + "epoch": 0.31126639695966657, + "grad_norm": 1.6021870692118136, + "learning_rate": 8.065009769472598e-06, + "loss": 0.672, + "step": 10156 + }, + { + "epoch": 0.3112970454824078, + "grad_norm": 1.7037731186854124, + "learning_rate": 8.064617622884892e-06, + "loss": 0.7125, + "step": 10157 + }, + { + "epoch": 0.3113276940051489, + "grad_norm": 1.7425480337988668, + "learning_rate": 8.064225446100723e-06, + "loss": 0.6228, + "step": 10158 + }, + { + "epoch": 0.31135834252789013, + "grad_norm": 1.6930155125776924, + "learning_rate": 8.063833239123958e-06, + "loss": 0.7095, + "step": 10159 + }, + { + "epoch": 0.31138899105063134, + "grad_norm": 1.8041130159801273, + "learning_rate": 8.063441001958456e-06, + "loss": 0.6813, + "step": 10160 + }, + { + "epoch": 0.31141963957337254, + "grad_norm": 1.9462973529687955, + "learning_rate": 8.06304873460809e-06, + "loss": 0.7749, + "step": 10161 + }, + { + "epoch": 0.31145028809611375, + "grad_norm": 2.249675284095581, + "learning_rate": 8.06265643707672e-06, + "loss": 0.5814, + "step": 10162 + }, + { + "epoch": 0.31148093661885495, + "grad_norm": 1.710251388197853, + "learning_rate": 8.062264109368214e-06, + "loss": 0.6763, + "step": 10163 + }, + { + "epoch": 0.31151158514159616, + "grad_norm": 1.7145320551080157, + "learning_rate": 8.061871751486434e-06, + "loss": 0.7106, + "step": 10164 + }, + { + "epoch": 0.31154223366433736, + "grad_norm": 1.924403899391398, + "learning_rate": 8.06147936343525e-06, + "loss": 0.6448, + "step": 10165 + }, + { + "epoch": 0.31157288218707857, + "grad_norm": 1.841172014350238, + "learning_rate": 8.061086945218523e-06, + "loss": 0.7142, + "step": 10166 + }, + { + "epoch": 0.3116035307098198, + "grad_norm": 2.0163861831308503, + "learning_rate": 8.060694496840127e-06, + "loss": 0.7394, + "step": 10167 + }, + { + "epoch": 0.311634179232561, + "grad_norm": 1.942901485936241, + "learning_rate": 8.060302018303923e-06, + "loss": 0.6663, + "step": 10168 + }, + { + "epoch": 0.3116648277553022, + "grad_norm": 1.7345590705464125, + "learning_rate": 8.059909509613781e-06, + "loss": 0.727, + "step": 10169 + }, + { + "epoch": 0.3116954762780434, + "grad_norm": 1.5418040243326685, + "learning_rate": 8.059516970773566e-06, + "loss": 0.7315, + "step": 10170 + }, + { + "epoch": 0.3117261248007846, + "grad_norm": 1.752457025470043, + "learning_rate": 8.05912440178715e-06, + "loss": 0.6677, + "step": 10171 + }, + { + "epoch": 0.3117567733235258, + "grad_norm": 1.5810352316104213, + "learning_rate": 8.058731802658397e-06, + "loss": 0.5952, + "step": 10172 + }, + { + "epoch": 0.311787421846267, + "grad_norm": 1.7145015268000687, + "learning_rate": 8.058339173391179e-06, + "loss": 0.5935, + "step": 10173 + }, + { + "epoch": 0.3118180703690082, + "grad_norm": 1.9102317297802698, + "learning_rate": 8.05794651398936e-06, + "loss": 0.7111, + "step": 10174 + }, + { + "epoch": 0.3118487188917494, + "grad_norm": 1.5423601766217383, + "learning_rate": 8.057553824456812e-06, + "loss": 0.6758, + "step": 10175 + }, + { + "epoch": 0.3118793674144906, + "grad_norm": 1.9196314939173196, + "learning_rate": 8.057161104797404e-06, + "loss": 0.696, + "step": 10176 + }, + { + "epoch": 0.31191001593723183, + "grad_norm": 1.7002529909467825, + "learning_rate": 8.056768355015008e-06, + "loss": 0.7178, + "step": 10177 + }, + { + "epoch": 0.31194066445997304, + "grad_norm": 1.8182810330532906, + "learning_rate": 8.056375575113489e-06, + "loss": 0.7321, + "step": 10178 + }, + { + "epoch": 0.31197131298271424, + "grad_norm": 1.8943885599754187, + "learning_rate": 8.055982765096719e-06, + "loss": 0.6391, + "step": 10179 + }, + { + "epoch": 0.31200196150545545, + "grad_norm": 1.7204723047765342, + "learning_rate": 8.05558992496857e-06, + "loss": 0.7171, + "step": 10180 + }, + { + "epoch": 0.31203261002819666, + "grad_norm": 1.592987311248771, + "learning_rate": 8.05519705473291e-06, + "loss": 0.665, + "step": 10181 + }, + { + "epoch": 0.31206325855093786, + "grad_norm": 1.663425809598342, + "learning_rate": 8.054804154393614e-06, + "loss": 0.6328, + "step": 10182 + }, + { + "epoch": 0.31209390707367907, + "grad_norm": 1.6846222139570486, + "learning_rate": 8.05441122395455e-06, + "loss": 0.755, + "step": 10183 + }, + { + "epoch": 0.3121245555964203, + "grad_norm": 2.0454098602131694, + "learning_rate": 8.054018263419591e-06, + "loss": 0.6946, + "step": 10184 + }, + { + "epoch": 0.3121552041191615, + "grad_norm": 1.8169944904018398, + "learning_rate": 8.053625272792609e-06, + "loss": 0.642, + "step": 10185 + }, + { + "epoch": 0.3121858526419027, + "grad_norm": 1.6582364815939545, + "learning_rate": 8.053232252077475e-06, + "loss": 0.649, + "step": 10186 + }, + { + "epoch": 0.3122165011646439, + "grad_norm": 1.4828999093079442, + "learning_rate": 8.052839201278063e-06, + "loss": 0.7458, + "step": 10187 + }, + { + "epoch": 0.3122471496873851, + "grad_norm": 1.4803472649372866, + "learning_rate": 8.052446120398246e-06, + "loss": 0.4887, + "step": 10188 + }, + { + "epoch": 0.31227779821012625, + "grad_norm": 1.815739206556174, + "learning_rate": 8.052053009441893e-06, + "loss": 0.8074, + "step": 10189 + }, + { + "epoch": 0.31230844673286745, + "grad_norm": 2.3731385626081063, + "learning_rate": 8.051659868412885e-06, + "loss": 0.779, + "step": 10190 + }, + { + "epoch": 0.31233909525560866, + "grad_norm": 1.9137756073461307, + "learning_rate": 8.051266697315087e-06, + "loss": 0.6179, + "step": 10191 + }, + { + "epoch": 0.31236974377834986, + "grad_norm": 1.6889608036053037, + "learning_rate": 8.050873496152382e-06, + "loss": 0.7221, + "step": 10192 + }, + { + "epoch": 0.31240039230109107, + "grad_norm": 1.70177629354553, + "learning_rate": 8.050480264928637e-06, + "loss": 0.6969, + "step": 10193 + }, + { + "epoch": 0.3124310408238323, + "grad_norm": 1.772157532373174, + "learning_rate": 8.050087003647731e-06, + "loss": 0.6797, + "step": 10194 + }, + { + "epoch": 0.3124616893465735, + "grad_norm": 1.753955730478708, + "learning_rate": 8.049693712313537e-06, + "loss": 0.6418, + "step": 10195 + }, + { + "epoch": 0.3124923378693147, + "grad_norm": 1.8012183097492118, + "learning_rate": 8.049300390929931e-06, + "loss": 0.757, + "step": 10196 + }, + { + "epoch": 0.3125229863920559, + "grad_norm": 1.7181063732503596, + "learning_rate": 8.048907039500786e-06, + "loss": 0.7204, + "step": 10197 + }, + { + "epoch": 0.3125536349147971, + "grad_norm": 1.6438378781397014, + "learning_rate": 8.048513658029981e-06, + "loss": 0.7182, + "step": 10198 + }, + { + "epoch": 0.3125842834375383, + "grad_norm": 1.7565382173805995, + "learning_rate": 8.048120246521392e-06, + "loss": 0.6486, + "step": 10199 + }, + { + "epoch": 0.3126149319602795, + "grad_norm": 1.6615955051983922, + "learning_rate": 8.047726804978893e-06, + "loss": 0.6313, + "step": 10200 + }, + { + "epoch": 0.3126455804830207, + "grad_norm": 1.7309263548472036, + "learning_rate": 8.047333333406363e-06, + "loss": 0.6723, + "step": 10201 + }, + { + "epoch": 0.3126762290057619, + "grad_norm": 1.7095933681358442, + "learning_rate": 8.046939831807678e-06, + "loss": 0.7777, + "step": 10202 + }, + { + "epoch": 0.3127068775285031, + "grad_norm": 1.6947453711771643, + "learning_rate": 8.046546300186714e-06, + "loss": 0.6687, + "step": 10203 + }, + { + "epoch": 0.31273752605124433, + "grad_norm": 1.804425029221585, + "learning_rate": 8.04615273854735e-06, + "loss": 0.6597, + "step": 10204 + }, + { + "epoch": 0.31276817457398554, + "grad_norm": 1.9665916720510055, + "learning_rate": 8.045759146893465e-06, + "loss": 0.686, + "step": 10205 + }, + { + "epoch": 0.31279882309672674, + "grad_norm": 1.8430605953904968, + "learning_rate": 8.045365525228934e-06, + "loss": 0.713, + "step": 10206 + }, + { + "epoch": 0.31282947161946795, + "grad_norm": 1.854644405813173, + "learning_rate": 8.044971873557639e-06, + "loss": 0.7248, + "step": 10207 + }, + { + "epoch": 0.31286012014220915, + "grad_norm": 1.6139153414936482, + "learning_rate": 8.044578191883456e-06, + "loss": 0.6669, + "step": 10208 + }, + { + "epoch": 0.31289076866495036, + "grad_norm": 1.647428758242388, + "learning_rate": 8.044184480210267e-06, + "loss": 0.6124, + "step": 10209 + }, + { + "epoch": 0.31292141718769156, + "grad_norm": 1.609264663840641, + "learning_rate": 8.043790738541947e-06, + "loss": 0.7095, + "step": 10210 + }, + { + "epoch": 0.31295206571043277, + "grad_norm": 1.7051927107196754, + "learning_rate": 8.043396966882377e-06, + "loss": 0.6614, + "step": 10211 + }, + { + "epoch": 0.312982714233174, + "grad_norm": 1.2740378447380516, + "learning_rate": 8.04300316523544e-06, + "loss": 0.5108, + "step": 10212 + }, + { + "epoch": 0.3130133627559152, + "grad_norm": 1.135124129218952, + "learning_rate": 8.042609333605017e-06, + "loss": 0.4919, + "step": 10213 + }, + { + "epoch": 0.3130440112786564, + "grad_norm": 1.758425511979037, + "learning_rate": 8.042215471994981e-06, + "loss": 0.7187, + "step": 10214 + }, + { + "epoch": 0.3130746598013976, + "grad_norm": 1.731461880898835, + "learning_rate": 8.04182158040922e-06, + "loss": 0.7101, + "step": 10215 + }, + { + "epoch": 0.3131053083241388, + "grad_norm": 1.8599544476126204, + "learning_rate": 8.041427658851613e-06, + "loss": 0.5975, + "step": 10216 + }, + { + "epoch": 0.31313595684688, + "grad_norm": 1.1957033883085273, + "learning_rate": 8.04103370732604e-06, + "loss": 0.4892, + "step": 10217 + }, + { + "epoch": 0.3131666053696212, + "grad_norm": 1.0850601348975035, + "learning_rate": 8.040639725836384e-06, + "loss": 0.4817, + "step": 10218 + }, + { + "epoch": 0.3131972538923624, + "grad_norm": 1.6753144386032548, + "learning_rate": 8.040245714386528e-06, + "loss": 0.7072, + "step": 10219 + }, + { + "epoch": 0.31322790241510357, + "grad_norm": 1.6922184451439382, + "learning_rate": 8.03985167298035e-06, + "loss": 0.7178, + "step": 10220 + }, + { + "epoch": 0.31325855093784477, + "grad_norm": 1.7022490198572477, + "learning_rate": 8.039457601621738e-06, + "loss": 0.7575, + "step": 10221 + }, + { + "epoch": 0.313289199460586, + "grad_norm": 1.825165850100471, + "learning_rate": 8.039063500314572e-06, + "loss": 0.6727, + "step": 10222 + }, + { + "epoch": 0.3133198479833272, + "grad_norm": 1.6411897710156371, + "learning_rate": 8.038669369062736e-06, + "loss": 0.759, + "step": 10223 + }, + { + "epoch": 0.3133504965060684, + "grad_norm": 0.8601215195223313, + "learning_rate": 8.038275207870114e-06, + "loss": 0.4676, + "step": 10224 + }, + { + "epoch": 0.3133811450288096, + "grad_norm": 1.9893837147896665, + "learning_rate": 8.037881016740587e-06, + "loss": 0.7555, + "step": 10225 + }, + { + "epoch": 0.3134117935515508, + "grad_norm": 1.7840220417464825, + "learning_rate": 8.037486795678042e-06, + "loss": 0.7659, + "step": 10226 + }, + { + "epoch": 0.313442442074292, + "grad_norm": 1.8518270333165352, + "learning_rate": 8.037092544686364e-06, + "loss": 0.7505, + "step": 10227 + }, + { + "epoch": 0.3134730905970332, + "grad_norm": 1.716344784003412, + "learning_rate": 8.036698263769434e-06, + "loss": 0.6782, + "step": 10228 + }, + { + "epoch": 0.3135037391197744, + "grad_norm": 1.6918442237026052, + "learning_rate": 8.03630395293114e-06, + "loss": 0.706, + "step": 10229 + }, + { + "epoch": 0.3135343876425156, + "grad_norm": 1.648072183405197, + "learning_rate": 8.035909612175366e-06, + "loss": 0.6091, + "step": 10230 + }, + { + "epoch": 0.3135650361652568, + "grad_norm": 1.7763922714045588, + "learning_rate": 8.035515241505999e-06, + "loss": 0.7361, + "step": 10231 + }, + { + "epoch": 0.31359568468799803, + "grad_norm": 1.7951363810097214, + "learning_rate": 8.035120840926922e-06, + "loss": 0.7397, + "step": 10232 + }, + { + "epoch": 0.31362633321073924, + "grad_norm": 1.583688258663894, + "learning_rate": 8.034726410442024e-06, + "loss": 0.6153, + "step": 10233 + }, + { + "epoch": 0.31365698173348044, + "grad_norm": 1.7271782001319476, + "learning_rate": 8.034331950055188e-06, + "loss": 0.6491, + "step": 10234 + }, + { + "epoch": 0.31368763025622165, + "grad_norm": 1.8049319782403765, + "learning_rate": 8.033937459770306e-06, + "loss": 0.6985, + "step": 10235 + }, + { + "epoch": 0.31371827877896286, + "grad_norm": 1.7393159685426198, + "learning_rate": 8.03354293959126e-06, + "loss": 0.6293, + "step": 10236 + }, + { + "epoch": 0.31374892730170406, + "grad_norm": 1.680276420393459, + "learning_rate": 8.033148389521939e-06, + "loss": 0.7893, + "step": 10237 + }, + { + "epoch": 0.31377957582444527, + "grad_norm": 1.7490075828242515, + "learning_rate": 8.032753809566232e-06, + "loss": 0.6722, + "step": 10238 + }, + { + "epoch": 0.3138102243471865, + "grad_norm": 1.8932299769952265, + "learning_rate": 8.032359199728025e-06, + "loss": 0.8089, + "step": 10239 + }, + { + "epoch": 0.3138408728699277, + "grad_norm": 2.0978957714640196, + "learning_rate": 8.031964560011207e-06, + "loss": 0.6524, + "step": 10240 + }, + { + "epoch": 0.3138715213926689, + "grad_norm": 1.6665676810907033, + "learning_rate": 8.031569890419667e-06, + "loss": 0.6609, + "step": 10241 + }, + { + "epoch": 0.3139021699154101, + "grad_norm": 2.107507380615819, + "learning_rate": 8.031175190957295e-06, + "loss": 0.8009, + "step": 10242 + }, + { + "epoch": 0.3139328184381513, + "grad_norm": 1.6106450759129987, + "learning_rate": 8.030780461627975e-06, + "loss": 0.6325, + "step": 10243 + }, + { + "epoch": 0.3139634669608925, + "grad_norm": 1.501369590526032, + "learning_rate": 8.0303857024356e-06, + "loss": 0.6692, + "step": 10244 + }, + { + "epoch": 0.3139941154836337, + "grad_norm": 1.6579552141662497, + "learning_rate": 8.02999091338406e-06, + "loss": 0.7428, + "step": 10245 + }, + { + "epoch": 0.3140247640063749, + "grad_norm": 1.8542923925832295, + "learning_rate": 8.029596094477246e-06, + "loss": 0.721, + "step": 10246 + }, + { + "epoch": 0.3140554125291161, + "grad_norm": 1.9637589932681412, + "learning_rate": 8.029201245719046e-06, + "loss": 0.7278, + "step": 10247 + }, + { + "epoch": 0.3140860610518573, + "grad_norm": 1.8566052402267323, + "learning_rate": 8.028806367113349e-06, + "loss": 0.7003, + "step": 10248 + }, + { + "epoch": 0.31411670957459853, + "grad_norm": 1.9072467322360744, + "learning_rate": 8.028411458664047e-06, + "loss": 0.7189, + "step": 10249 + }, + { + "epoch": 0.31414735809733974, + "grad_norm": 2.0038107527631004, + "learning_rate": 8.028016520375036e-06, + "loss": 0.707, + "step": 10250 + }, + { + "epoch": 0.3141780066200809, + "grad_norm": 1.042093513231399, + "learning_rate": 8.0276215522502e-06, + "loss": 0.482, + "step": 10251 + }, + { + "epoch": 0.3142086551428221, + "grad_norm": 0.8673545953907261, + "learning_rate": 8.027226554293435e-06, + "loss": 0.4859, + "step": 10252 + }, + { + "epoch": 0.3142393036655633, + "grad_norm": 1.6132067008062996, + "learning_rate": 8.026831526508633e-06, + "loss": 0.6826, + "step": 10253 + }, + { + "epoch": 0.3142699521883045, + "grad_norm": 1.6567529732917545, + "learning_rate": 8.026436468899686e-06, + "loss": 0.6776, + "step": 10254 + }, + { + "epoch": 0.3143006007110457, + "grad_norm": 1.5881416411951328, + "learning_rate": 8.026041381470486e-06, + "loss": 0.6852, + "step": 10255 + }, + { + "epoch": 0.3143312492337869, + "grad_norm": 1.5820035370831755, + "learning_rate": 8.025646264224924e-06, + "loss": 0.5877, + "step": 10256 + }, + { + "epoch": 0.3143618977565281, + "grad_norm": 0.9392477608929719, + "learning_rate": 8.025251117166896e-06, + "loss": 0.4672, + "step": 10257 + }, + { + "epoch": 0.3143925462792693, + "grad_norm": 1.6908226821751586, + "learning_rate": 8.024855940300298e-06, + "loss": 0.7372, + "step": 10258 + }, + { + "epoch": 0.31442319480201053, + "grad_norm": 1.7509340998450176, + "learning_rate": 8.024460733629017e-06, + "loss": 0.7125, + "step": 10259 + }, + { + "epoch": 0.31445384332475174, + "grad_norm": 0.8424303633499397, + "learning_rate": 8.024065497156951e-06, + "loss": 0.4659, + "step": 10260 + }, + { + "epoch": 0.31448449184749294, + "grad_norm": 1.735460357417008, + "learning_rate": 8.023670230887995e-06, + "loss": 0.6743, + "step": 10261 + }, + { + "epoch": 0.31451514037023415, + "grad_norm": 1.9164321330789187, + "learning_rate": 8.023274934826042e-06, + "loss": 0.7622, + "step": 10262 + }, + { + "epoch": 0.31454578889297535, + "grad_norm": 1.5879805020727678, + "learning_rate": 8.022879608974988e-06, + "loss": 0.6804, + "step": 10263 + }, + { + "epoch": 0.31457643741571656, + "grad_norm": 0.8445760021786048, + "learning_rate": 8.022484253338726e-06, + "loss": 0.4784, + "step": 10264 + }, + { + "epoch": 0.31460708593845776, + "grad_norm": 2.0188878944229827, + "learning_rate": 8.022088867921157e-06, + "loss": 0.6504, + "step": 10265 + }, + { + "epoch": 0.31463773446119897, + "grad_norm": 1.6077070683938326, + "learning_rate": 8.02169345272617e-06, + "loss": 0.7552, + "step": 10266 + }, + { + "epoch": 0.3146683829839402, + "grad_norm": 1.5747879025383342, + "learning_rate": 8.021298007757663e-06, + "loss": 0.6245, + "step": 10267 + }, + { + "epoch": 0.3146990315066814, + "grad_norm": 1.5655018942918035, + "learning_rate": 8.020902533019536e-06, + "loss": 0.6679, + "step": 10268 + }, + { + "epoch": 0.3147296800294226, + "grad_norm": 1.8110885762150248, + "learning_rate": 8.020507028515684e-06, + "loss": 0.8777, + "step": 10269 + }, + { + "epoch": 0.3147603285521638, + "grad_norm": 1.8313327253917602, + "learning_rate": 8.020111494250003e-06, + "loss": 0.6926, + "step": 10270 + }, + { + "epoch": 0.314790977074905, + "grad_norm": 1.735603932701433, + "learning_rate": 8.019715930226389e-06, + "loss": 0.747, + "step": 10271 + }, + { + "epoch": 0.3148216255976462, + "grad_norm": 0.8812473739575681, + "learning_rate": 8.019320336448743e-06, + "loss": 0.4669, + "step": 10272 + }, + { + "epoch": 0.3148522741203874, + "grad_norm": 1.6966035135027122, + "learning_rate": 8.018924712920961e-06, + "loss": 0.7476, + "step": 10273 + }, + { + "epoch": 0.3148829226431286, + "grad_norm": 1.64806554518263, + "learning_rate": 8.018529059646941e-06, + "loss": 0.6197, + "step": 10274 + }, + { + "epoch": 0.3149135711658698, + "grad_norm": 0.8736661296314143, + "learning_rate": 8.018133376630582e-06, + "loss": 0.4847, + "step": 10275 + }, + { + "epoch": 0.314944219688611, + "grad_norm": 1.7269778890049055, + "learning_rate": 8.017737663875782e-06, + "loss": 0.7964, + "step": 10276 + }, + { + "epoch": 0.31497486821135223, + "grad_norm": 1.7455598391709817, + "learning_rate": 8.01734192138644e-06, + "loss": 0.754, + "step": 10277 + }, + { + "epoch": 0.31500551673409344, + "grad_norm": 1.4602705113955312, + "learning_rate": 8.016946149166458e-06, + "loss": 0.5298, + "step": 10278 + }, + { + "epoch": 0.31503616525683464, + "grad_norm": 1.6005831076021697, + "learning_rate": 8.016550347219734e-06, + "loss": 0.6703, + "step": 10279 + }, + { + "epoch": 0.31506681377957585, + "grad_norm": 1.79807250798061, + "learning_rate": 8.016154515550165e-06, + "loss": 0.6304, + "step": 10280 + }, + { + "epoch": 0.31509746230231706, + "grad_norm": 1.510247594521405, + "learning_rate": 8.015758654161657e-06, + "loss": 0.6119, + "step": 10281 + }, + { + "epoch": 0.3151281108250582, + "grad_norm": 1.6592276927898075, + "learning_rate": 8.015362763058105e-06, + "loss": 0.7123, + "step": 10282 + }, + { + "epoch": 0.3151587593477994, + "grad_norm": 1.6983426852616887, + "learning_rate": 8.014966842243414e-06, + "loss": 0.6863, + "step": 10283 + }, + { + "epoch": 0.3151894078705406, + "grad_norm": 1.7134905942092016, + "learning_rate": 8.014570891721481e-06, + "loss": 0.6706, + "step": 10284 + }, + { + "epoch": 0.3152200563932818, + "grad_norm": 1.7181113823786547, + "learning_rate": 8.014174911496213e-06, + "loss": 0.732, + "step": 10285 + }, + { + "epoch": 0.31525070491602303, + "grad_norm": 1.47853210144812, + "learning_rate": 8.013778901571506e-06, + "loss": 0.6747, + "step": 10286 + }, + { + "epoch": 0.31528135343876423, + "grad_norm": 1.6848789542051483, + "learning_rate": 8.013382861951264e-06, + "loss": 0.6685, + "step": 10287 + }, + { + "epoch": 0.31531200196150544, + "grad_norm": 1.7597538960720949, + "learning_rate": 8.01298679263939e-06, + "loss": 0.7102, + "step": 10288 + }, + { + "epoch": 0.31534265048424664, + "grad_norm": 1.6389878587084445, + "learning_rate": 8.012590693639786e-06, + "loss": 0.654, + "step": 10289 + }, + { + "epoch": 0.31537329900698785, + "grad_norm": 1.8146355310582671, + "learning_rate": 8.012194564956357e-06, + "loss": 0.7711, + "step": 10290 + }, + { + "epoch": 0.31540394752972906, + "grad_norm": 1.7446195287490116, + "learning_rate": 8.011798406593004e-06, + "loss": 0.7819, + "step": 10291 + }, + { + "epoch": 0.31543459605247026, + "grad_norm": 1.6249350817266666, + "learning_rate": 8.011402218553628e-06, + "loss": 0.7249, + "step": 10292 + }, + { + "epoch": 0.31546524457521147, + "grad_norm": 1.5320020769424063, + "learning_rate": 8.011006000842137e-06, + "loss": 0.7798, + "step": 10293 + }, + { + "epoch": 0.3154958930979527, + "grad_norm": 1.6308069610417335, + "learning_rate": 8.010609753462433e-06, + "loss": 0.6381, + "step": 10294 + }, + { + "epoch": 0.3155265416206939, + "grad_norm": 1.0508510812441658, + "learning_rate": 8.010213476418422e-06, + "loss": 0.4771, + "step": 10295 + }, + { + "epoch": 0.3155571901434351, + "grad_norm": 1.730042798963716, + "learning_rate": 8.009817169714007e-06, + "loss": 0.6367, + "step": 10296 + }, + { + "epoch": 0.3155878386661763, + "grad_norm": 1.7125244233428862, + "learning_rate": 8.009420833353094e-06, + "loss": 0.7311, + "step": 10297 + }, + { + "epoch": 0.3156184871889175, + "grad_norm": 1.6360262571299982, + "learning_rate": 8.009024467339586e-06, + "loss": 0.6911, + "step": 10298 + }, + { + "epoch": 0.3156491357116587, + "grad_norm": 1.6948290811697913, + "learning_rate": 8.00862807167739e-06, + "loss": 0.6438, + "step": 10299 + }, + { + "epoch": 0.3156797842343999, + "grad_norm": 0.8014656659391767, + "learning_rate": 8.008231646370412e-06, + "loss": 0.4861, + "step": 10300 + }, + { + "epoch": 0.3157104327571411, + "grad_norm": 1.5639154784312095, + "learning_rate": 8.007835191422559e-06, + "loss": 0.6675, + "step": 10301 + }, + { + "epoch": 0.3157410812798823, + "grad_norm": 1.561464738497131, + "learning_rate": 8.007438706837735e-06, + "loss": 0.7194, + "step": 10302 + }, + { + "epoch": 0.3157717298026235, + "grad_norm": 1.6722182972821509, + "learning_rate": 8.007042192619849e-06, + "loss": 0.6881, + "step": 10303 + }, + { + "epoch": 0.31580237832536473, + "grad_norm": 1.7394958405961016, + "learning_rate": 8.006645648772806e-06, + "loss": 0.7469, + "step": 10304 + }, + { + "epoch": 0.31583302684810594, + "grad_norm": 1.6549701626955817, + "learning_rate": 8.006249075300515e-06, + "loss": 0.631, + "step": 10305 + }, + { + "epoch": 0.31586367537084714, + "grad_norm": 1.8633868254520898, + "learning_rate": 8.005852472206883e-06, + "loss": 0.6612, + "step": 10306 + }, + { + "epoch": 0.31589432389358835, + "grad_norm": 1.756358273196184, + "learning_rate": 8.005455839495816e-06, + "loss": 0.6046, + "step": 10307 + }, + { + "epoch": 0.31592497241632955, + "grad_norm": 1.6089866986164802, + "learning_rate": 8.005059177171225e-06, + "loss": 0.6343, + "step": 10308 + }, + { + "epoch": 0.31595562093907076, + "grad_norm": 1.7625762971469547, + "learning_rate": 8.004662485237016e-06, + "loss": 0.6196, + "step": 10309 + }, + { + "epoch": 0.31598626946181196, + "grad_norm": 1.6442661043442701, + "learning_rate": 8.004265763697099e-06, + "loss": 0.7069, + "step": 10310 + }, + { + "epoch": 0.31601691798455317, + "grad_norm": 1.9072602202187123, + "learning_rate": 8.003869012555383e-06, + "loss": 0.6883, + "step": 10311 + }, + { + "epoch": 0.3160475665072944, + "grad_norm": 1.7303012472711905, + "learning_rate": 8.00347223181578e-06, + "loss": 0.6936, + "step": 10312 + }, + { + "epoch": 0.3160782150300355, + "grad_norm": 0.8910514223137476, + "learning_rate": 8.003075421482191e-06, + "loss": 0.4617, + "step": 10313 + }, + { + "epoch": 0.31610886355277673, + "grad_norm": 1.8865855124473627, + "learning_rate": 8.002678581558534e-06, + "loss": 0.7311, + "step": 10314 + }, + { + "epoch": 0.31613951207551794, + "grad_norm": 1.539098422577212, + "learning_rate": 8.002281712048717e-06, + "loss": 0.705, + "step": 10315 + }, + { + "epoch": 0.31617016059825914, + "grad_norm": 1.637688070155546, + "learning_rate": 8.00188481295665e-06, + "loss": 0.6644, + "step": 10316 + }, + { + "epoch": 0.31620080912100035, + "grad_norm": 1.9839459488532587, + "learning_rate": 8.001487884286245e-06, + "loss": 0.6829, + "step": 10317 + }, + { + "epoch": 0.31623145764374155, + "grad_norm": 1.5454168070043421, + "learning_rate": 8.00109092604141e-06, + "loss": 0.6127, + "step": 10318 + }, + { + "epoch": 0.31626210616648276, + "grad_norm": 1.667398142926777, + "learning_rate": 8.00069393822606e-06, + "loss": 0.6702, + "step": 10319 + }, + { + "epoch": 0.31629275468922396, + "grad_norm": 0.7665621436141825, + "learning_rate": 8.000296920844102e-06, + "loss": 0.4674, + "step": 10320 + }, + { + "epoch": 0.31632340321196517, + "grad_norm": 1.9264790254857893, + "learning_rate": 7.999899873899453e-06, + "loss": 0.7151, + "step": 10321 + }, + { + "epoch": 0.3163540517347064, + "grad_norm": 1.7644245232310627, + "learning_rate": 7.999502797396024e-06, + "loss": 0.7842, + "step": 10322 + }, + { + "epoch": 0.3163847002574476, + "grad_norm": 1.4552787418939546, + "learning_rate": 7.999105691337725e-06, + "loss": 0.6255, + "step": 10323 + }, + { + "epoch": 0.3164153487801888, + "grad_norm": 1.766365582477874, + "learning_rate": 7.99870855572847e-06, + "loss": 0.7161, + "step": 10324 + }, + { + "epoch": 0.31644599730293, + "grad_norm": 1.9857084020219777, + "learning_rate": 7.998311390572173e-06, + "loss": 0.8855, + "step": 10325 + }, + { + "epoch": 0.3164766458256712, + "grad_norm": 1.9678292031595717, + "learning_rate": 7.997914195872746e-06, + "loss": 0.7714, + "step": 10326 + }, + { + "epoch": 0.3165072943484124, + "grad_norm": 1.4408761625639688, + "learning_rate": 7.997516971634106e-06, + "loss": 0.7052, + "step": 10327 + }, + { + "epoch": 0.3165379428711536, + "grad_norm": 1.9065130462253064, + "learning_rate": 7.99711971786016e-06, + "loss": 0.8689, + "step": 10328 + }, + { + "epoch": 0.3165685913938948, + "grad_norm": 1.6770637179203127, + "learning_rate": 7.996722434554828e-06, + "loss": 0.6687, + "step": 10329 + }, + { + "epoch": 0.316599239916636, + "grad_norm": 1.572683229975785, + "learning_rate": 7.996325121722024e-06, + "loss": 0.611, + "step": 10330 + }, + { + "epoch": 0.3166298884393772, + "grad_norm": 0.81494970079232, + "learning_rate": 7.995927779365662e-06, + "loss": 0.4738, + "step": 10331 + }, + { + "epoch": 0.31666053696211843, + "grad_norm": 1.607302767349199, + "learning_rate": 7.995530407489659e-06, + "loss": 0.6142, + "step": 10332 + }, + { + "epoch": 0.31669118548485964, + "grad_norm": 1.8122017921855118, + "learning_rate": 7.995133006097923e-06, + "loss": 0.7134, + "step": 10333 + }, + { + "epoch": 0.31672183400760084, + "grad_norm": 1.97824285069799, + "learning_rate": 7.99473557519438e-06, + "loss": 0.6665, + "step": 10334 + }, + { + "epoch": 0.31675248253034205, + "grad_norm": 0.8001512384294002, + "learning_rate": 7.99433811478294e-06, + "loss": 0.4705, + "step": 10335 + }, + { + "epoch": 0.31678313105308326, + "grad_norm": 1.7195376330147776, + "learning_rate": 7.99394062486752e-06, + "loss": 0.7274, + "step": 10336 + }, + { + "epoch": 0.31681377957582446, + "grad_norm": 1.7176875508879557, + "learning_rate": 7.993543105452036e-06, + "loss": 0.6528, + "step": 10337 + }, + { + "epoch": 0.31684442809856567, + "grad_norm": 1.8758644278518726, + "learning_rate": 7.993145556540407e-06, + "loss": 0.639, + "step": 10338 + }, + { + "epoch": 0.3168750766213069, + "grad_norm": 0.7816363012357664, + "learning_rate": 7.99274797813655e-06, + "loss": 0.4678, + "step": 10339 + }, + { + "epoch": 0.3169057251440481, + "grad_norm": 1.7688204912567471, + "learning_rate": 7.99235037024438e-06, + "loss": 0.7073, + "step": 10340 + }, + { + "epoch": 0.3169363736667893, + "grad_norm": 2.025790571356503, + "learning_rate": 7.991952732867817e-06, + "loss": 0.7395, + "step": 10341 + }, + { + "epoch": 0.3169670221895305, + "grad_norm": 1.740908302002349, + "learning_rate": 7.991555066010777e-06, + "loss": 0.6905, + "step": 10342 + }, + { + "epoch": 0.3169976707122717, + "grad_norm": 1.5802691899219619, + "learning_rate": 7.99115736967718e-06, + "loss": 0.5938, + "step": 10343 + }, + { + "epoch": 0.31702831923501285, + "grad_norm": 1.553541142680813, + "learning_rate": 7.990759643870944e-06, + "loss": 0.6133, + "step": 10344 + }, + { + "epoch": 0.31705896775775405, + "grad_norm": 0.7929985080783944, + "learning_rate": 7.990361888595987e-06, + "loss": 0.4685, + "step": 10345 + }, + { + "epoch": 0.31708961628049526, + "grad_norm": 1.6249206911947536, + "learning_rate": 7.989964103856232e-06, + "loss": 0.6286, + "step": 10346 + }, + { + "epoch": 0.31712026480323646, + "grad_norm": 2.1182595416196253, + "learning_rate": 7.989566289655596e-06, + "loss": 0.6905, + "step": 10347 + }, + { + "epoch": 0.31715091332597767, + "grad_norm": 1.7209542167066818, + "learning_rate": 7.989168445997994e-06, + "loss": 0.7435, + "step": 10348 + }, + { + "epoch": 0.3171815618487189, + "grad_norm": 1.7449322349102387, + "learning_rate": 7.988770572887353e-06, + "loss": 0.6238, + "step": 10349 + }, + { + "epoch": 0.3172122103714601, + "grad_norm": 1.6817694327653443, + "learning_rate": 7.988372670327591e-06, + "loss": 0.6454, + "step": 10350 + }, + { + "epoch": 0.3172428588942013, + "grad_norm": 1.75060870987441, + "learning_rate": 7.987974738322629e-06, + "loss": 0.6471, + "step": 10351 + }, + { + "epoch": 0.3172735074169425, + "grad_norm": 1.8727430674448413, + "learning_rate": 7.987576776876387e-06, + "loss": 0.6687, + "step": 10352 + }, + { + "epoch": 0.3173041559396837, + "grad_norm": 1.692444332973689, + "learning_rate": 7.987178785992787e-06, + "loss": 0.7389, + "step": 10353 + }, + { + "epoch": 0.3173348044624249, + "grad_norm": 1.4498438199781447, + "learning_rate": 7.98678076567575e-06, + "loss": 0.6406, + "step": 10354 + }, + { + "epoch": 0.3173654529851661, + "grad_norm": 2.096257773688264, + "learning_rate": 7.986382715929196e-06, + "loss": 0.853, + "step": 10355 + }, + { + "epoch": 0.3173961015079073, + "grad_norm": 1.9023827965113733, + "learning_rate": 7.985984636757051e-06, + "loss": 0.7445, + "step": 10356 + }, + { + "epoch": 0.3174267500306485, + "grad_norm": 1.657472324493234, + "learning_rate": 7.985586528163234e-06, + "loss": 0.7044, + "step": 10357 + }, + { + "epoch": 0.3174573985533897, + "grad_norm": 1.909352852322529, + "learning_rate": 7.98518839015167e-06, + "loss": 0.8324, + "step": 10358 + }, + { + "epoch": 0.31748804707613093, + "grad_norm": 1.750786227838019, + "learning_rate": 7.984790222726281e-06, + "loss": 0.6952, + "step": 10359 + }, + { + "epoch": 0.31751869559887214, + "grad_norm": 1.9165948937360995, + "learning_rate": 7.984392025890991e-06, + "loss": 0.7708, + "step": 10360 + }, + { + "epoch": 0.31754934412161334, + "grad_norm": 1.7652190966018344, + "learning_rate": 7.98399379964972e-06, + "loss": 0.6464, + "step": 10361 + }, + { + "epoch": 0.31757999264435455, + "grad_norm": 2.0289464173391956, + "learning_rate": 7.983595544006398e-06, + "loss": 0.7719, + "step": 10362 + }, + { + "epoch": 0.31761064116709575, + "grad_norm": 1.5649319923731555, + "learning_rate": 7.983197258964943e-06, + "loss": 0.6431, + "step": 10363 + }, + { + "epoch": 0.31764128968983696, + "grad_norm": 2.0059934260132377, + "learning_rate": 7.982798944529284e-06, + "loss": 0.7133, + "step": 10364 + }, + { + "epoch": 0.31767193821257816, + "grad_norm": 1.7558964756350466, + "learning_rate": 7.982400600703344e-06, + "loss": 0.6681, + "step": 10365 + }, + { + "epoch": 0.31770258673531937, + "grad_norm": 1.5741926718857056, + "learning_rate": 7.982002227491045e-06, + "loss": 0.6734, + "step": 10366 + }, + { + "epoch": 0.3177332352580606, + "grad_norm": 1.675904145486813, + "learning_rate": 7.981603824896319e-06, + "loss": 0.6122, + "step": 10367 + }, + { + "epoch": 0.3177638837808018, + "grad_norm": 1.4466394031173586, + "learning_rate": 7.981205392923085e-06, + "loss": 0.6418, + "step": 10368 + }, + { + "epoch": 0.317794532303543, + "grad_norm": 1.5976783615349264, + "learning_rate": 7.980806931575273e-06, + "loss": 0.6105, + "step": 10369 + }, + { + "epoch": 0.3178251808262842, + "grad_norm": 1.6816665402857662, + "learning_rate": 7.980408440856806e-06, + "loss": 0.6661, + "step": 10370 + }, + { + "epoch": 0.3178558293490254, + "grad_norm": 1.930723146114888, + "learning_rate": 7.980009920771613e-06, + "loss": 0.7152, + "step": 10371 + }, + { + "epoch": 0.3178864778717666, + "grad_norm": 1.7329764634926952, + "learning_rate": 7.979611371323619e-06, + "loss": 0.6796, + "step": 10372 + }, + { + "epoch": 0.3179171263945078, + "grad_norm": 1.5055272027045123, + "learning_rate": 7.979212792516752e-06, + "loss": 0.6399, + "step": 10373 + }, + { + "epoch": 0.317947774917249, + "grad_norm": 1.8854174790485891, + "learning_rate": 7.978814184354941e-06, + "loss": 0.7121, + "step": 10374 + }, + { + "epoch": 0.31797842343999017, + "grad_norm": 1.6088869098782284, + "learning_rate": 7.978415546842108e-06, + "loss": 0.5532, + "step": 10375 + }, + { + "epoch": 0.31800907196273137, + "grad_norm": 1.7110140775350307, + "learning_rate": 7.978016879982188e-06, + "loss": 0.7279, + "step": 10376 + }, + { + "epoch": 0.3180397204854726, + "grad_norm": 0.8391998503563887, + "learning_rate": 7.977618183779106e-06, + "loss": 0.4565, + "step": 10377 + }, + { + "epoch": 0.3180703690082138, + "grad_norm": 1.7287076774564438, + "learning_rate": 7.977219458236787e-06, + "loss": 0.7426, + "step": 10378 + }, + { + "epoch": 0.318101017530955, + "grad_norm": 1.9641506192746319, + "learning_rate": 7.976820703359166e-06, + "loss": 0.6705, + "step": 10379 + }, + { + "epoch": 0.3181316660536962, + "grad_norm": 1.765195560006129, + "learning_rate": 7.976421919150165e-06, + "loss": 0.6855, + "step": 10380 + }, + { + "epoch": 0.3181623145764374, + "grad_norm": 1.6888013702103983, + "learning_rate": 7.976023105613722e-06, + "loss": 0.6426, + "step": 10381 + }, + { + "epoch": 0.3181929630991786, + "grad_norm": 1.8830420941802792, + "learning_rate": 7.975624262753758e-06, + "loss": 0.74, + "step": 10382 + }, + { + "epoch": 0.3182236116219198, + "grad_norm": 1.6579071500371927, + "learning_rate": 7.97522539057421e-06, + "loss": 0.6411, + "step": 10383 + }, + { + "epoch": 0.318254260144661, + "grad_norm": 1.7105203458933271, + "learning_rate": 7.974826489079002e-06, + "loss": 0.6449, + "step": 10384 + }, + { + "epoch": 0.3182849086674022, + "grad_norm": 1.9548918632183006, + "learning_rate": 7.97442755827207e-06, + "loss": 0.819, + "step": 10385 + }, + { + "epoch": 0.31831555719014343, + "grad_norm": 1.722573251270712, + "learning_rate": 7.97402859815734e-06, + "loss": 0.7158, + "step": 10386 + }, + { + "epoch": 0.31834620571288463, + "grad_norm": 1.6584745224505726, + "learning_rate": 7.973629608738746e-06, + "loss": 0.679, + "step": 10387 + }, + { + "epoch": 0.31837685423562584, + "grad_norm": 1.6925868637668038, + "learning_rate": 7.97323059002022e-06, + "loss": 0.6234, + "step": 10388 + }, + { + "epoch": 0.31840750275836704, + "grad_norm": 1.8014290448725159, + "learning_rate": 7.972831542005692e-06, + "loss": 0.7222, + "step": 10389 + }, + { + "epoch": 0.31843815128110825, + "grad_norm": 1.7600257607911083, + "learning_rate": 7.972432464699093e-06, + "loss": 0.734, + "step": 10390 + }, + { + "epoch": 0.31846879980384946, + "grad_norm": 1.878985378914163, + "learning_rate": 7.972033358104355e-06, + "loss": 0.7146, + "step": 10391 + }, + { + "epoch": 0.31849944832659066, + "grad_norm": 1.9053769143398445, + "learning_rate": 7.971634222225416e-06, + "loss": 0.6592, + "step": 10392 + }, + { + "epoch": 0.31853009684933187, + "grad_norm": 1.7099177761531075, + "learning_rate": 7.971235057066202e-06, + "loss": 0.7441, + "step": 10393 + }, + { + "epoch": 0.3185607453720731, + "grad_norm": 1.781893970713319, + "learning_rate": 7.97083586263065e-06, + "loss": 0.6674, + "step": 10394 + }, + { + "epoch": 0.3185913938948143, + "grad_norm": 0.9161831925100504, + "learning_rate": 7.970436638922691e-06, + "loss": 0.4768, + "step": 10395 + }, + { + "epoch": 0.3186220424175555, + "grad_norm": 0.7942368030167392, + "learning_rate": 7.97003738594626e-06, + "loss": 0.4704, + "step": 10396 + }, + { + "epoch": 0.3186526909402967, + "grad_norm": 1.827646912577226, + "learning_rate": 7.969638103705291e-06, + "loss": 0.7191, + "step": 10397 + }, + { + "epoch": 0.3186833394630379, + "grad_norm": 1.8174170275143124, + "learning_rate": 7.969238792203719e-06, + "loss": 0.7361, + "step": 10398 + }, + { + "epoch": 0.3187139879857791, + "grad_norm": 1.8793262639014814, + "learning_rate": 7.968839451445477e-06, + "loss": 0.627, + "step": 10399 + }, + { + "epoch": 0.3187446365085203, + "grad_norm": 1.474761778649149, + "learning_rate": 7.968440081434499e-06, + "loss": 0.6959, + "step": 10400 + }, + { + "epoch": 0.3187752850312615, + "grad_norm": 1.8172577275389667, + "learning_rate": 7.96804068217472e-06, + "loss": 0.747, + "step": 10401 + }, + { + "epoch": 0.3188059335540027, + "grad_norm": 1.803565131551222, + "learning_rate": 7.96764125367008e-06, + "loss": 0.6914, + "step": 10402 + }, + { + "epoch": 0.3188365820767439, + "grad_norm": 1.6932729329997813, + "learning_rate": 7.96724179592451e-06, + "loss": 0.664, + "step": 10403 + }, + { + "epoch": 0.31886723059948513, + "grad_norm": 1.8786827736491372, + "learning_rate": 7.966842308941948e-06, + "loss": 0.8192, + "step": 10404 + }, + { + "epoch": 0.31889787912222634, + "grad_norm": 1.7781214779348806, + "learning_rate": 7.966442792726328e-06, + "loss": 0.6448, + "step": 10405 + }, + { + "epoch": 0.3189285276449675, + "grad_norm": 1.2127451663107114, + "learning_rate": 7.96604324728159e-06, + "loss": 0.5116, + "step": 10406 + }, + { + "epoch": 0.3189591761677087, + "grad_norm": 1.68303276702153, + "learning_rate": 7.965643672611667e-06, + "loss": 0.7017, + "step": 10407 + }, + { + "epoch": 0.3189898246904499, + "grad_norm": 1.9749546631764883, + "learning_rate": 7.965244068720501e-06, + "loss": 0.6021, + "step": 10408 + }, + { + "epoch": 0.3190204732131911, + "grad_norm": 1.6507062546492564, + "learning_rate": 7.964844435612025e-06, + "loss": 0.7056, + "step": 10409 + }, + { + "epoch": 0.3190511217359323, + "grad_norm": 0.7824406090297301, + "learning_rate": 7.964444773290177e-06, + "loss": 0.4779, + "step": 10410 + }, + { + "epoch": 0.3190817702586735, + "grad_norm": 1.561146153268163, + "learning_rate": 7.964045081758898e-06, + "loss": 0.7149, + "step": 10411 + }, + { + "epoch": 0.3191124187814147, + "grad_norm": 1.9967931500797373, + "learning_rate": 7.963645361022123e-06, + "loss": 0.6884, + "step": 10412 + }, + { + "epoch": 0.3191430673041559, + "grad_norm": 1.135794423036662, + "learning_rate": 7.963245611083792e-06, + "loss": 0.4695, + "step": 10413 + }, + { + "epoch": 0.31917371582689713, + "grad_norm": 1.751291872624916, + "learning_rate": 7.962845831947845e-06, + "loss": 0.6163, + "step": 10414 + }, + { + "epoch": 0.31920436434963834, + "grad_norm": 1.711653298716096, + "learning_rate": 7.96244602361822e-06, + "loss": 0.6899, + "step": 10415 + }, + { + "epoch": 0.31923501287237954, + "grad_norm": 1.4445819207656085, + "learning_rate": 7.962046186098854e-06, + "loss": 0.6751, + "step": 10416 + }, + { + "epoch": 0.31926566139512075, + "grad_norm": 0.9122120724231345, + "learning_rate": 7.961646319393693e-06, + "loss": 0.5009, + "step": 10417 + }, + { + "epoch": 0.31929630991786195, + "grad_norm": 1.3731632254610044, + "learning_rate": 7.96124642350667e-06, + "loss": 0.5287, + "step": 10418 + }, + { + "epoch": 0.31932695844060316, + "grad_norm": 1.754979070671725, + "learning_rate": 7.96084649844173e-06, + "loss": 0.6988, + "step": 10419 + }, + { + "epoch": 0.31935760696334436, + "grad_norm": 1.8205965572783558, + "learning_rate": 7.96044654420281e-06, + "loss": 0.6618, + "step": 10420 + }, + { + "epoch": 0.31938825548608557, + "grad_norm": 1.5271784237967054, + "learning_rate": 7.960046560793854e-06, + "loss": 0.6383, + "step": 10421 + }, + { + "epoch": 0.3194189040088268, + "grad_norm": 1.8245199608531386, + "learning_rate": 7.959646548218802e-06, + "loss": 0.7767, + "step": 10422 + }, + { + "epoch": 0.319449552531568, + "grad_norm": 0.8066625697798168, + "learning_rate": 7.959246506481595e-06, + "loss": 0.4471, + "step": 10423 + }, + { + "epoch": 0.3194802010543092, + "grad_norm": 1.6064201366596758, + "learning_rate": 7.958846435586175e-06, + "loss": 0.6442, + "step": 10424 + }, + { + "epoch": 0.3195108495770504, + "grad_norm": 1.7424639265225494, + "learning_rate": 7.958446335536484e-06, + "loss": 0.6484, + "step": 10425 + }, + { + "epoch": 0.3195414980997916, + "grad_norm": 1.6954125362694379, + "learning_rate": 7.958046206336463e-06, + "loss": 0.6696, + "step": 10426 + }, + { + "epoch": 0.3195721466225328, + "grad_norm": 1.6588842799903891, + "learning_rate": 7.957646047990058e-06, + "loss": 0.5764, + "step": 10427 + }, + { + "epoch": 0.319602795145274, + "grad_norm": 1.6910499390934304, + "learning_rate": 7.957245860501209e-06, + "loss": 0.6172, + "step": 10428 + }, + { + "epoch": 0.3196334436680152, + "grad_norm": 1.4456175166223815, + "learning_rate": 7.956845643873861e-06, + "loss": 0.5768, + "step": 10429 + }, + { + "epoch": 0.3196640921907564, + "grad_norm": 1.7324244921428842, + "learning_rate": 7.956445398111954e-06, + "loss": 0.7121, + "step": 10430 + }, + { + "epoch": 0.3196947407134976, + "grad_norm": 1.7847271718626945, + "learning_rate": 7.956045123219436e-06, + "loss": 0.7932, + "step": 10431 + }, + { + "epoch": 0.31972538923623883, + "grad_norm": 1.8740091320782772, + "learning_rate": 7.955644819200248e-06, + "loss": 0.6637, + "step": 10432 + }, + { + "epoch": 0.31975603775898004, + "grad_norm": 1.795119777094036, + "learning_rate": 7.955244486058335e-06, + "loss": 0.6932, + "step": 10433 + }, + { + "epoch": 0.31978668628172124, + "grad_norm": 1.719661462067658, + "learning_rate": 7.954844123797642e-06, + "loss": 0.6948, + "step": 10434 + }, + { + "epoch": 0.31981733480446245, + "grad_norm": 0.8175548277538084, + "learning_rate": 7.954443732422116e-06, + "loss": 0.4706, + "step": 10435 + }, + { + "epoch": 0.31984798332720366, + "grad_norm": 0.8349422622404666, + "learning_rate": 7.954043311935697e-06, + "loss": 0.4571, + "step": 10436 + }, + { + "epoch": 0.3198786318499448, + "grad_norm": 1.4393255721846918, + "learning_rate": 7.953642862342335e-06, + "loss": 0.6212, + "step": 10437 + }, + { + "epoch": 0.319909280372686, + "grad_norm": 1.7694804254106176, + "learning_rate": 7.953242383645974e-06, + "loss": 0.7501, + "step": 10438 + }, + { + "epoch": 0.3199399288954272, + "grad_norm": 1.6158835581344888, + "learning_rate": 7.952841875850562e-06, + "loss": 0.6154, + "step": 10439 + }, + { + "epoch": 0.3199705774181684, + "grad_norm": 1.7643840187882889, + "learning_rate": 7.95244133896004e-06, + "loss": 0.5728, + "step": 10440 + }, + { + "epoch": 0.32000122594090963, + "grad_norm": 1.593839548164367, + "learning_rate": 7.95204077297836e-06, + "loss": 0.6644, + "step": 10441 + }, + { + "epoch": 0.32003187446365083, + "grad_norm": 1.54400683494199, + "learning_rate": 7.951640177909467e-06, + "loss": 0.679, + "step": 10442 + }, + { + "epoch": 0.32006252298639204, + "grad_norm": 1.941046471023423, + "learning_rate": 7.951239553757308e-06, + "loss": 0.8019, + "step": 10443 + }, + { + "epoch": 0.32009317150913325, + "grad_norm": 1.7680740137896622, + "learning_rate": 7.95083890052583e-06, + "loss": 0.6884, + "step": 10444 + }, + { + "epoch": 0.32012382003187445, + "grad_norm": 1.666649551709474, + "learning_rate": 7.95043821821898e-06, + "loss": 0.6344, + "step": 10445 + }, + { + "epoch": 0.32015446855461566, + "grad_norm": 1.6409508944024496, + "learning_rate": 7.95003750684071e-06, + "loss": 0.6983, + "step": 10446 + }, + { + "epoch": 0.32018511707735686, + "grad_norm": 1.7117980785479925, + "learning_rate": 7.949636766394966e-06, + "loss": 0.6487, + "step": 10447 + }, + { + "epoch": 0.32021576560009807, + "grad_norm": 1.5620909755144619, + "learning_rate": 7.949235996885694e-06, + "loss": 0.6003, + "step": 10448 + }, + { + "epoch": 0.3202464141228393, + "grad_norm": 1.787211488857671, + "learning_rate": 7.948835198316845e-06, + "loss": 0.797, + "step": 10449 + }, + { + "epoch": 0.3202770626455805, + "grad_norm": 1.7952876166537886, + "learning_rate": 7.94843437069237e-06, + "loss": 0.6032, + "step": 10450 + }, + { + "epoch": 0.3203077111683217, + "grad_norm": 1.6632493005720685, + "learning_rate": 7.948033514016216e-06, + "loss": 0.5798, + "step": 10451 + }, + { + "epoch": 0.3203383596910629, + "grad_norm": 1.026938145081259, + "learning_rate": 7.947632628292334e-06, + "loss": 0.4639, + "step": 10452 + }, + { + "epoch": 0.3203690082138041, + "grad_norm": 1.7821616464574574, + "learning_rate": 7.947231713524672e-06, + "loss": 0.6943, + "step": 10453 + }, + { + "epoch": 0.3203996567365453, + "grad_norm": 0.9005889266741222, + "learning_rate": 7.946830769717184e-06, + "loss": 0.4592, + "step": 10454 + }, + { + "epoch": 0.3204303052592865, + "grad_norm": 1.7250096921127456, + "learning_rate": 7.946429796873816e-06, + "loss": 0.675, + "step": 10455 + }, + { + "epoch": 0.3204609537820277, + "grad_norm": 1.8028588501687197, + "learning_rate": 7.946028794998524e-06, + "loss": 0.7631, + "step": 10456 + }, + { + "epoch": 0.3204916023047689, + "grad_norm": 0.7866349106403938, + "learning_rate": 7.945627764095253e-06, + "loss": 0.4687, + "step": 10457 + }, + { + "epoch": 0.3205222508275101, + "grad_norm": 1.7601502557776143, + "learning_rate": 7.945226704167963e-06, + "loss": 0.6039, + "step": 10458 + }, + { + "epoch": 0.32055289935025133, + "grad_norm": 1.6334435213453402, + "learning_rate": 7.944825615220598e-06, + "loss": 0.7146, + "step": 10459 + }, + { + "epoch": 0.32058354787299254, + "grad_norm": 1.7611949269219629, + "learning_rate": 7.944424497257111e-06, + "loss": 0.7898, + "step": 10460 + }, + { + "epoch": 0.32061419639573374, + "grad_norm": 1.5553684513858248, + "learning_rate": 7.944023350281458e-06, + "loss": 0.6996, + "step": 10461 + }, + { + "epoch": 0.32064484491847495, + "grad_norm": 1.7987206077536222, + "learning_rate": 7.94362217429759e-06, + "loss": 0.6734, + "step": 10462 + }, + { + "epoch": 0.32067549344121615, + "grad_norm": 0.8650215572018628, + "learning_rate": 7.943220969309458e-06, + "loss": 0.4514, + "step": 10463 + }, + { + "epoch": 0.32070614196395736, + "grad_norm": 0.848173961624055, + "learning_rate": 7.94281973532102e-06, + "loss": 0.4947, + "step": 10464 + }, + { + "epoch": 0.32073679048669856, + "grad_norm": 1.7450820924788808, + "learning_rate": 7.942418472336222e-06, + "loss": 0.7364, + "step": 10465 + }, + { + "epoch": 0.32076743900943977, + "grad_norm": 0.9105188676685093, + "learning_rate": 7.942017180359025e-06, + "loss": 0.4791, + "step": 10466 + }, + { + "epoch": 0.320798087532181, + "grad_norm": 1.7773478012395583, + "learning_rate": 7.941615859393379e-06, + "loss": 0.6821, + "step": 10467 + }, + { + "epoch": 0.3208287360549221, + "grad_norm": 1.7502555381139877, + "learning_rate": 7.941214509443237e-06, + "loss": 0.6981, + "step": 10468 + }, + { + "epoch": 0.32085938457766333, + "grad_norm": 1.6827912964176033, + "learning_rate": 7.940813130512559e-06, + "loss": 0.6108, + "step": 10469 + }, + { + "epoch": 0.32089003310040454, + "grad_norm": 1.5981152773160454, + "learning_rate": 7.940411722605296e-06, + "loss": 0.6786, + "step": 10470 + }, + { + "epoch": 0.32092068162314574, + "grad_norm": 0.8797948268688601, + "learning_rate": 7.940010285725403e-06, + "loss": 0.4667, + "step": 10471 + }, + { + "epoch": 0.32095133014588695, + "grad_norm": 1.8470291516579829, + "learning_rate": 7.939608819876837e-06, + "loss": 0.7585, + "step": 10472 + }, + { + "epoch": 0.32098197866862815, + "grad_norm": 1.4662966285475036, + "learning_rate": 7.939207325063553e-06, + "loss": 0.6563, + "step": 10473 + }, + { + "epoch": 0.32101262719136936, + "grad_norm": 1.6115970168866203, + "learning_rate": 7.938805801289509e-06, + "loss": 0.7633, + "step": 10474 + }, + { + "epoch": 0.32104327571411057, + "grad_norm": 0.7989244000618565, + "learning_rate": 7.938404248558658e-06, + "loss": 0.4661, + "step": 10475 + }, + { + "epoch": 0.32107392423685177, + "grad_norm": 0.8260250651829746, + "learning_rate": 7.938002666874958e-06, + "loss": 0.4709, + "step": 10476 + }, + { + "epoch": 0.321104572759593, + "grad_norm": 1.5952579951055204, + "learning_rate": 7.937601056242365e-06, + "loss": 0.7048, + "step": 10477 + }, + { + "epoch": 0.3211352212823342, + "grad_norm": 1.6258268183171436, + "learning_rate": 7.937199416664839e-06, + "loss": 0.6506, + "step": 10478 + }, + { + "epoch": 0.3211658698050754, + "grad_norm": 1.727451716890594, + "learning_rate": 7.936797748146335e-06, + "loss": 0.6718, + "step": 10479 + }, + { + "epoch": 0.3211965183278166, + "grad_norm": 1.6766948341030603, + "learning_rate": 7.936396050690812e-06, + "loss": 0.6849, + "step": 10480 + }, + { + "epoch": 0.3212271668505578, + "grad_norm": 0.8887958110342795, + "learning_rate": 7.935994324302226e-06, + "loss": 0.4789, + "step": 10481 + }, + { + "epoch": 0.321257815373299, + "grad_norm": 1.7376707976310524, + "learning_rate": 7.935592568984537e-06, + "loss": 0.7159, + "step": 10482 + }, + { + "epoch": 0.3212884638960402, + "grad_norm": 1.6946121549144273, + "learning_rate": 7.935190784741705e-06, + "loss": 0.7076, + "step": 10483 + }, + { + "epoch": 0.3213191124187814, + "grad_norm": 1.7742394562445083, + "learning_rate": 7.934788971577685e-06, + "loss": 0.7015, + "step": 10484 + }, + { + "epoch": 0.3213497609415226, + "grad_norm": 0.8337629783947982, + "learning_rate": 7.93438712949644e-06, + "loss": 0.4767, + "step": 10485 + }, + { + "epoch": 0.3213804094642638, + "grad_norm": 0.8247869377231828, + "learning_rate": 7.933985258501926e-06, + "loss": 0.4616, + "step": 10486 + }, + { + "epoch": 0.32141105798700503, + "grad_norm": 1.7963405319436685, + "learning_rate": 7.933583358598107e-06, + "loss": 0.6975, + "step": 10487 + }, + { + "epoch": 0.32144170650974624, + "grad_norm": 1.5904218113917061, + "learning_rate": 7.933181429788937e-06, + "loss": 0.6907, + "step": 10488 + }, + { + "epoch": 0.32147235503248744, + "grad_norm": 1.7406780893236748, + "learning_rate": 7.932779472078384e-06, + "loss": 0.6727, + "step": 10489 + }, + { + "epoch": 0.32150300355522865, + "grad_norm": 1.6567138657907632, + "learning_rate": 7.932377485470402e-06, + "loss": 0.6878, + "step": 10490 + }, + { + "epoch": 0.32153365207796986, + "grad_norm": 0.8683767019652492, + "learning_rate": 7.931975469968956e-06, + "loss": 0.4506, + "step": 10491 + }, + { + "epoch": 0.32156430060071106, + "grad_norm": 0.8860508621074091, + "learning_rate": 7.931573425578003e-06, + "loss": 0.4554, + "step": 10492 + }, + { + "epoch": 0.32159494912345227, + "grad_norm": 1.8133571211166355, + "learning_rate": 7.93117135230151e-06, + "loss": 0.766, + "step": 10493 + }, + { + "epoch": 0.3216255976461935, + "grad_norm": 1.7324134457977995, + "learning_rate": 7.930769250143433e-06, + "loss": 0.6622, + "step": 10494 + }, + { + "epoch": 0.3216562461689347, + "grad_norm": 1.5062612784553129, + "learning_rate": 7.930367119107738e-06, + "loss": 0.6444, + "step": 10495 + }, + { + "epoch": 0.3216868946916759, + "grad_norm": 1.7132534832828912, + "learning_rate": 7.929964959198387e-06, + "loss": 0.7808, + "step": 10496 + }, + { + "epoch": 0.3217175432144171, + "grad_norm": 1.805755645974166, + "learning_rate": 7.92956277041934e-06, + "loss": 0.6743, + "step": 10497 + }, + { + "epoch": 0.3217481917371583, + "grad_norm": 1.5740298405162674, + "learning_rate": 7.929160552774561e-06, + "loss": 0.6462, + "step": 10498 + }, + { + "epoch": 0.32177884025989945, + "grad_norm": 1.8249993500649047, + "learning_rate": 7.928758306268014e-06, + "loss": 0.7186, + "step": 10499 + }, + { + "epoch": 0.32180948878264065, + "grad_norm": 1.7200453937163573, + "learning_rate": 7.928356030903663e-06, + "loss": 0.698, + "step": 10500 + }, + { + "epoch": 0.32184013730538186, + "grad_norm": 1.3760417442421453, + "learning_rate": 7.927953726685472e-06, + "loss": 0.6902, + "step": 10501 + }, + { + "epoch": 0.32187078582812306, + "grad_norm": 0.9783151436255428, + "learning_rate": 7.927551393617401e-06, + "loss": 0.4775, + "step": 10502 + }, + { + "epoch": 0.32190143435086427, + "grad_norm": 1.7538152330051662, + "learning_rate": 7.927149031703418e-06, + "loss": 0.7227, + "step": 10503 + }, + { + "epoch": 0.3219320828736055, + "grad_norm": 1.694229637707536, + "learning_rate": 7.926746640947487e-06, + "loss": 0.6565, + "step": 10504 + }, + { + "epoch": 0.3219627313963467, + "grad_norm": 1.8614976261582172, + "learning_rate": 7.926344221353573e-06, + "loss": 0.7014, + "step": 10505 + }, + { + "epoch": 0.3219933799190879, + "grad_norm": 1.6362416116943972, + "learning_rate": 7.925941772925639e-06, + "loss": 0.7248, + "step": 10506 + }, + { + "epoch": 0.3220240284418291, + "grad_norm": 1.8227443239608982, + "learning_rate": 7.925539295667654e-06, + "loss": 0.7745, + "step": 10507 + }, + { + "epoch": 0.3220546769645703, + "grad_norm": 1.6321817253275885, + "learning_rate": 7.925136789583581e-06, + "loss": 0.6742, + "step": 10508 + }, + { + "epoch": 0.3220853254873115, + "grad_norm": 1.7745182678480078, + "learning_rate": 7.924734254677386e-06, + "loss": 0.7346, + "step": 10509 + }, + { + "epoch": 0.3221159740100527, + "grad_norm": 1.545140957515503, + "learning_rate": 7.924331690953038e-06, + "loss": 0.6796, + "step": 10510 + }, + { + "epoch": 0.3221466225327939, + "grad_norm": 1.6154815211073663, + "learning_rate": 7.9239290984145e-06, + "loss": 0.6127, + "step": 10511 + }, + { + "epoch": 0.3221772710555351, + "grad_norm": 1.9328287621204403, + "learning_rate": 7.92352647706574e-06, + "loss": 0.6867, + "step": 10512 + }, + { + "epoch": 0.3222079195782763, + "grad_norm": 1.0095214957852596, + "learning_rate": 7.923123826910726e-06, + "loss": 0.4168, + "step": 10513 + }, + { + "epoch": 0.32223856810101753, + "grad_norm": 1.9327600236766491, + "learning_rate": 7.922721147953425e-06, + "loss": 0.7194, + "step": 10514 + }, + { + "epoch": 0.32226921662375874, + "grad_norm": 1.5933848423835155, + "learning_rate": 7.922318440197805e-06, + "loss": 0.6722, + "step": 10515 + }, + { + "epoch": 0.32229986514649994, + "grad_norm": 0.814337774195997, + "learning_rate": 7.921915703647836e-06, + "loss": 0.4629, + "step": 10516 + }, + { + "epoch": 0.32233051366924115, + "grad_norm": 1.723607823582721, + "learning_rate": 7.921512938307481e-06, + "loss": 0.7087, + "step": 10517 + }, + { + "epoch": 0.32236116219198235, + "grad_norm": 1.4471069345087595, + "learning_rate": 7.921110144180712e-06, + "loss": 0.6921, + "step": 10518 + }, + { + "epoch": 0.32239181071472356, + "grad_norm": 1.9365751278921173, + "learning_rate": 7.920707321271497e-06, + "loss": 0.6949, + "step": 10519 + }, + { + "epoch": 0.32242245923746476, + "grad_norm": 0.7882557427338817, + "learning_rate": 7.920304469583808e-06, + "loss": 0.4622, + "step": 10520 + }, + { + "epoch": 0.32245310776020597, + "grad_norm": 1.5937379960597773, + "learning_rate": 7.91990158912161e-06, + "loss": 0.6424, + "step": 10521 + }, + { + "epoch": 0.3224837562829472, + "grad_norm": 0.8297894377988558, + "learning_rate": 7.919498679888873e-06, + "loss": 0.4851, + "step": 10522 + }, + { + "epoch": 0.3225144048056884, + "grad_norm": 1.769011558883391, + "learning_rate": 7.919095741889572e-06, + "loss": 0.7136, + "step": 10523 + }, + { + "epoch": 0.3225450533284296, + "grad_norm": 1.4661221935581308, + "learning_rate": 7.91869277512767e-06, + "loss": 0.6734, + "step": 10524 + }, + { + "epoch": 0.3225757018511708, + "grad_norm": 1.7964002506855257, + "learning_rate": 7.918289779607144e-06, + "loss": 0.7065, + "step": 10525 + }, + { + "epoch": 0.322606350373912, + "grad_norm": 2.0009987125895177, + "learning_rate": 7.91788675533196e-06, + "loss": 0.7164, + "step": 10526 + }, + { + "epoch": 0.3226369988966532, + "grad_norm": 1.7451958852794098, + "learning_rate": 7.917483702306094e-06, + "loss": 0.6691, + "step": 10527 + }, + { + "epoch": 0.3226676474193944, + "grad_norm": 1.7027447555070407, + "learning_rate": 7.917080620533513e-06, + "loss": 0.7166, + "step": 10528 + }, + { + "epoch": 0.3226982959421356, + "grad_norm": 1.7284462987509592, + "learning_rate": 7.91667751001819e-06, + "loss": 0.5541, + "step": 10529 + }, + { + "epoch": 0.32272894446487677, + "grad_norm": 1.9550730999137034, + "learning_rate": 7.916274370764098e-06, + "loss": 0.7375, + "step": 10530 + }, + { + "epoch": 0.32275959298761797, + "grad_norm": 1.6106592039562062, + "learning_rate": 7.915871202775209e-06, + "loss": 0.7105, + "step": 10531 + }, + { + "epoch": 0.3227902415103592, + "grad_norm": 0.8933906432831927, + "learning_rate": 7.915468006055493e-06, + "loss": 0.4556, + "step": 10532 + }, + { + "epoch": 0.3228208900331004, + "grad_norm": 1.6211491925463324, + "learning_rate": 7.915064780608926e-06, + "loss": 0.7171, + "step": 10533 + }, + { + "epoch": 0.3228515385558416, + "grad_norm": 1.877990046293407, + "learning_rate": 7.91466152643948e-06, + "loss": 0.6923, + "step": 10534 + }, + { + "epoch": 0.3228821870785828, + "grad_norm": 1.8792296445950234, + "learning_rate": 7.914258243551129e-06, + "loss": 0.7315, + "step": 10535 + }, + { + "epoch": 0.322912835601324, + "grad_norm": 1.796370692924275, + "learning_rate": 7.913854931947844e-06, + "loss": 0.6952, + "step": 10536 + }, + { + "epoch": 0.3229434841240652, + "grad_norm": 1.7838603045222066, + "learning_rate": 7.913451591633602e-06, + "loss": 0.6935, + "step": 10537 + }, + { + "epoch": 0.3229741326468064, + "grad_norm": 1.910907819910301, + "learning_rate": 7.913048222612376e-06, + "loss": 0.7035, + "step": 10538 + }, + { + "epoch": 0.3230047811695476, + "grad_norm": 1.5668440386998972, + "learning_rate": 7.91264482488814e-06, + "loss": 0.6721, + "step": 10539 + }, + { + "epoch": 0.3230354296922888, + "grad_norm": 1.5755297180262147, + "learning_rate": 7.91224139846487e-06, + "loss": 0.7252, + "step": 10540 + }, + { + "epoch": 0.32306607821503003, + "grad_norm": 1.6736357143717928, + "learning_rate": 7.911837943346538e-06, + "loss": 0.6598, + "step": 10541 + }, + { + "epoch": 0.32309672673777123, + "grad_norm": 1.733964170229089, + "learning_rate": 7.911434459537124e-06, + "loss": 0.6178, + "step": 10542 + }, + { + "epoch": 0.32312737526051244, + "grad_norm": 1.6350882027362474, + "learning_rate": 7.911030947040602e-06, + "loss": 0.6719, + "step": 10543 + }, + { + "epoch": 0.32315802378325365, + "grad_norm": 1.9553030769712498, + "learning_rate": 7.910627405860947e-06, + "loss": 0.6021, + "step": 10544 + }, + { + "epoch": 0.32318867230599485, + "grad_norm": 1.5922237714236676, + "learning_rate": 7.910223836002133e-06, + "loss": 0.6421, + "step": 10545 + }, + { + "epoch": 0.32321932082873606, + "grad_norm": 1.7596932591503576, + "learning_rate": 7.909820237468141e-06, + "loss": 0.7196, + "step": 10546 + }, + { + "epoch": 0.32324996935147726, + "grad_norm": 1.668412651929019, + "learning_rate": 7.909416610262945e-06, + "loss": 0.648, + "step": 10547 + }, + { + "epoch": 0.32328061787421847, + "grad_norm": 1.728292710439839, + "learning_rate": 7.909012954390526e-06, + "loss": 0.6812, + "step": 10548 + }, + { + "epoch": 0.3233112663969597, + "grad_norm": 1.932621346952897, + "learning_rate": 7.908609269854852e-06, + "loss": 0.6065, + "step": 10549 + }, + { + "epoch": 0.3233419149197009, + "grad_norm": 1.6437240830987558, + "learning_rate": 7.908205556659911e-06, + "loss": 0.6795, + "step": 10550 + }, + { + "epoch": 0.3233725634424421, + "grad_norm": 1.8104409269877326, + "learning_rate": 7.907801814809674e-06, + "loss": 0.6955, + "step": 10551 + }, + { + "epoch": 0.3234032119651833, + "grad_norm": 1.5814677490794722, + "learning_rate": 7.907398044308123e-06, + "loss": 0.7537, + "step": 10552 + }, + { + "epoch": 0.3234338604879245, + "grad_norm": 1.7249789875428079, + "learning_rate": 7.906994245159235e-06, + "loss": 0.6477, + "step": 10553 + }, + { + "epoch": 0.3234645090106657, + "grad_norm": 1.9807177637834343, + "learning_rate": 7.90659041736699e-06, + "loss": 0.7784, + "step": 10554 + }, + { + "epoch": 0.3234951575334069, + "grad_norm": 1.7477290329878994, + "learning_rate": 7.906186560935366e-06, + "loss": 0.6705, + "step": 10555 + }, + { + "epoch": 0.3235258060561481, + "grad_norm": 1.7098601060373904, + "learning_rate": 7.905782675868341e-06, + "loss": 0.6369, + "step": 10556 + }, + { + "epoch": 0.3235564545788893, + "grad_norm": 1.677909555270474, + "learning_rate": 7.905378762169896e-06, + "loss": 0.7511, + "step": 10557 + }, + { + "epoch": 0.3235871031016305, + "grad_norm": 1.7099519783488768, + "learning_rate": 7.904974819844012e-06, + "loss": 0.6667, + "step": 10558 + }, + { + "epoch": 0.32361775162437173, + "grad_norm": 1.4663802678528661, + "learning_rate": 7.904570848894666e-06, + "loss": 0.6368, + "step": 10559 + }, + { + "epoch": 0.32364840014711294, + "grad_norm": 1.5153255875662581, + "learning_rate": 7.90416684932584e-06, + "loss": 0.7246, + "step": 10560 + }, + { + "epoch": 0.3236790486698541, + "grad_norm": 0.8426753137649525, + "learning_rate": 7.903762821141516e-06, + "loss": 0.463, + "step": 10561 + }, + { + "epoch": 0.3237096971925953, + "grad_norm": 1.6597043605836643, + "learning_rate": 7.903358764345674e-06, + "loss": 0.6335, + "step": 10562 + }, + { + "epoch": 0.3237403457153365, + "grad_norm": 1.6370817767993056, + "learning_rate": 7.902954678942296e-06, + "loss": 0.6249, + "step": 10563 + }, + { + "epoch": 0.3237709942380777, + "grad_norm": 1.8887208917497007, + "learning_rate": 7.902550564935363e-06, + "loss": 0.6595, + "step": 10564 + }, + { + "epoch": 0.3238016427608189, + "grad_norm": 1.7741180214805892, + "learning_rate": 7.902146422328853e-06, + "loss": 0.6173, + "step": 10565 + }, + { + "epoch": 0.3238322912835601, + "grad_norm": 1.873277637929503, + "learning_rate": 7.901742251126755e-06, + "loss": 0.7186, + "step": 10566 + }, + { + "epoch": 0.3238629398063013, + "grad_norm": 1.6954933932501974, + "learning_rate": 7.901338051333047e-06, + "loss": 0.659, + "step": 10567 + }, + { + "epoch": 0.3238935883290425, + "grad_norm": 1.7619300069363986, + "learning_rate": 7.900933822951714e-06, + "loss": 0.7335, + "step": 10568 + }, + { + "epoch": 0.32392423685178373, + "grad_norm": 1.7670495184955703, + "learning_rate": 7.900529565986737e-06, + "loss": 0.7269, + "step": 10569 + }, + { + "epoch": 0.32395488537452494, + "grad_norm": 1.7120842737614124, + "learning_rate": 7.9001252804421e-06, + "loss": 0.7422, + "step": 10570 + }, + { + "epoch": 0.32398553389726614, + "grad_norm": 1.7796646336227524, + "learning_rate": 7.899720966321786e-06, + "loss": 0.7923, + "step": 10571 + }, + { + "epoch": 0.32401618242000735, + "grad_norm": 1.73697572187235, + "learning_rate": 7.89931662362978e-06, + "loss": 0.6601, + "step": 10572 + }, + { + "epoch": 0.32404683094274855, + "grad_norm": 1.7616612262205873, + "learning_rate": 7.898912252370066e-06, + "loss": 0.7579, + "step": 10573 + }, + { + "epoch": 0.32407747946548976, + "grad_norm": 1.8018971764130922, + "learning_rate": 7.898507852546628e-06, + "loss": 0.7399, + "step": 10574 + }, + { + "epoch": 0.32410812798823097, + "grad_norm": 1.8314596473220253, + "learning_rate": 7.89810342416345e-06, + "loss": 0.659, + "step": 10575 + }, + { + "epoch": 0.32413877651097217, + "grad_norm": 2.017183487736497, + "learning_rate": 7.897698967224517e-06, + "loss": 0.6765, + "step": 10576 + }, + { + "epoch": 0.3241694250337134, + "grad_norm": 1.6596252764864718, + "learning_rate": 7.897294481733816e-06, + "loss": 0.7007, + "step": 10577 + }, + { + "epoch": 0.3242000735564546, + "grad_norm": 1.717291202784849, + "learning_rate": 7.896889967695329e-06, + "loss": 0.734, + "step": 10578 + }, + { + "epoch": 0.3242307220791958, + "grad_norm": 1.5497009700936937, + "learning_rate": 7.896485425113045e-06, + "loss": 0.6424, + "step": 10579 + }, + { + "epoch": 0.324261370601937, + "grad_norm": 1.9515864851860685, + "learning_rate": 7.896080853990951e-06, + "loss": 0.7179, + "step": 10580 + }, + { + "epoch": 0.3242920191246782, + "grad_norm": 1.9772354844722821, + "learning_rate": 7.895676254333029e-06, + "loss": 0.7107, + "step": 10581 + }, + { + "epoch": 0.3243226676474194, + "grad_norm": 1.666594941796978, + "learning_rate": 7.895271626143268e-06, + "loss": 0.6545, + "step": 10582 + }, + { + "epoch": 0.3243533161701606, + "grad_norm": 1.6928041125218052, + "learning_rate": 7.894866969425656e-06, + "loss": 0.7364, + "step": 10583 + }, + { + "epoch": 0.3243839646929018, + "grad_norm": 1.5989618129511307, + "learning_rate": 7.894462284184178e-06, + "loss": 0.6985, + "step": 10584 + }, + { + "epoch": 0.324414613215643, + "grad_norm": 1.6859763766205675, + "learning_rate": 7.894057570422824e-06, + "loss": 0.6153, + "step": 10585 + }, + { + "epoch": 0.3244452617383842, + "grad_norm": 1.7906339576841916, + "learning_rate": 7.893652828145579e-06, + "loss": 0.745, + "step": 10586 + }, + { + "epoch": 0.32447591026112543, + "grad_norm": 1.7749504456404228, + "learning_rate": 7.893248057356433e-06, + "loss": 0.7308, + "step": 10587 + }, + { + "epoch": 0.32450655878386664, + "grad_norm": 1.6872087636369844, + "learning_rate": 7.892843258059373e-06, + "loss": 0.7264, + "step": 10588 + }, + { + "epoch": 0.32453720730660784, + "grad_norm": 1.7030761276835953, + "learning_rate": 7.892438430258388e-06, + "loss": 0.7733, + "step": 10589 + }, + { + "epoch": 0.32456785582934905, + "grad_norm": 1.7280952775533098, + "learning_rate": 7.892033573957467e-06, + "loss": 0.6677, + "step": 10590 + }, + { + "epoch": 0.32459850435209026, + "grad_norm": 1.6867579006269733, + "learning_rate": 7.8916286891606e-06, + "loss": 0.7478, + "step": 10591 + }, + { + "epoch": 0.3246291528748314, + "grad_norm": 1.7574233264832644, + "learning_rate": 7.891223775871776e-06, + "loss": 0.7143, + "step": 10592 + }, + { + "epoch": 0.3246598013975726, + "grad_norm": 1.4232572653904467, + "learning_rate": 7.890818834094985e-06, + "loss": 0.6238, + "step": 10593 + }, + { + "epoch": 0.3246904499203138, + "grad_norm": 1.664396667607645, + "learning_rate": 7.890413863834214e-06, + "loss": 0.7247, + "step": 10594 + }, + { + "epoch": 0.324721098443055, + "grad_norm": 0.8256889010143097, + "learning_rate": 7.890008865093458e-06, + "loss": 0.4842, + "step": 10595 + }, + { + "epoch": 0.32475174696579623, + "grad_norm": 1.8353445578237568, + "learning_rate": 7.889603837876702e-06, + "loss": 0.6602, + "step": 10596 + }, + { + "epoch": 0.32478239548853743, + "grad_norm": 1.6888909890970747, + "learning_rate": 7.889198782187944e-06, + "loss": 0.6252, + "step": 10597 + }, + { + "epoch": 0.32481304401127864, + "grad_norm": 1.94037969141328, + "learning_rate": 7.888793698031167e-06, + "loss": 0.8394, + "step": 10598 + }, + { + "epoch": 0.32484369253401985, + "grad_norm": 2.066282407031063, + "learning_rate": 7.88838858541037e-06, + "loss": 0.7427, + "step": 10599 + }, + { + "epoch": 0.32487434105676105, + "grad_norm": 1.7236183609551192, + "learning_rate": 7.88798344432954e-06, + "loss": 0.6263, + "step": 10600 + }, + { + "epoch": 0.32490498957950226, + "grad_norm": 0.7932164238760486, + "learning_rate": 7.88757827479267e-06, + "loss": 0.4764, + "step": 10601 + }, + { + "epoch": 0.32493563810224346, + "grad_norm": 1.691158749180594, + "learning_rate": 7.887173076803753e-06, + "loss": 0.6416, + "step": 10602 + }, + { + "epoch": 0.32496628662498467, + "grad_norm": 1.7540152958513657, + "learning_rate": 7.886767850366781e-06, + "loss": 0.6727, + "step": 10603 + }, + { + "epoch": 0.3249969351477259, + "grad_norm": 0.7931565346171888, + "learning_rate": 7.886362595485747e-06, + "loss": 0.4792, + "step": 10604 + }, + { + "epoch": 0.3250275836704671, + "grad_norm": 1.7791629559981539, + "learning_rate": 7.885957312164643e-06, + "loss": 0.7916, + "step": 10605 + }, + { + "epoch": 0.3250582321932083, + "grad_norm": 1.485371792377513, + "learning_rate": 7.885552000407463e-06, + "loss": 0.5995, + "step": 10606 + }, + { + "epoch": 0.3250888807159495, + "grad_norm": 1.7246419340617458, + "learning_rate": 7.885146660218202e-06, + "loss": 0.6846, + "step": 10607 + }, + { + "epoch": 0.3251195292386907, + "grad_norm": 1.7142510226280632, + "learning_rate": 7.884741291600853e-06, + "loss": 0.7134, + "step": 10608 + }, + { + "epoch": 0.3251501777614319, + "grad_norm": 1.8916293970780733, + "learning_rate": 7.884335894559408e-06, + "loss": 0.7742, + "step": 10609 + }, + { + "epoch": 0.3251808262841731, + "grad_norm": 1.742783518277049, + "learning_rate": 7.883930469097864e-06, + "loss": 0.7437, + "step": 10610 + }, + { + "epoch": 0.3252114748069143, + "grad_norm": 1.58688803137667, + "learning_rate": 7.883525015220215e-06, + "loss": 0.6567, + "step": 10611 + }, + { + "epoch": 0.3252421233296555, + "grad_norm": 1.4428079628258854, + "learning_rate": 7.883119532930458e-06, + "loss": 0.6096, + "step": 10612 + }, + { + "epoch": 0.3252727718523967, + "grad_norm": 1.7375925810461341, + "learning_rate": 7.882714022232585e-06, + "loss": 0.7059, + "step": 10613 + }, + { + "epoch": 0.32530342037513793, + "grad_norm": 1.7662767623837654, + "learning_rate": 7.882308483130594e-06, + "loss": 0.7259, + "step": 10614 + }, + { + "epoch": 0.32533406889787914, + "grad_norm": 1.7056229212935579, + "learning_rate": 7.88190291562848e-06, + "loss": 0.6329, + "step": 10615 + }, + { + "epoch": 0.32536471742062034, + "grad_norm": 0.9266809379982968, + "learning_rate": 7.881497319730239e-06, + "loss": 0.4862, + "step": 10616 + }, + { + "epoch": 0.32539536594336155, + "grad_norm": 1.8521498245326322, + "learning_rate": 7.881091695439867e-06, + "loss": 0.7195, + "step": 10617 + }, + { + "epoch": 0.32542601446610275, + "grad_norm": 1.5146724622428291, + "learning_rate": 7.880686042761363e-06, + "loss": 0.6036, + "step": 10618 + }, + { + "epoch": 0.32545666298884396, + "grad_norm": 1.4164633558622801, + "learning_rate": 7.88028036169872e-06, + "loss": 0.5957, + "step": 10619 + }, + { + "epoch": 0.32548731151158516, + "grad_norm": 1.6654636927235429, + "learning_rate": 7.879874652255938e-06, + "loss": 0.7018, + "step": 10620 + }, + { + "epoch": 0.32551796003432637, + "grad_norm": 1.5760853248360724, + "learning_rate": 7.879468914437016e-06, + "loss": 0.6626, + "step": 10621 + }, + { + "epoch": 0.3255486085570676, + "grad_norm": 1.735923893481031, + "learning_rate": 7.879063148245949e-06, + "loss": 0.7101, + "step": 10622 + }, + { + "epoch": 0.3255792570798087, + "grad_norm": 1.8993146735133397, + "learning_rate": 7.878657353686736e-06, + "loss": 0.6879, + "step": 10623 + }, + { + "epoch": 0.32560990560254993, + "grad_norm": 1.5900633457238518, + "learning_rate": 7.878251530763377e-06, + "loss": 0.6272, + "step": 10624 + }, + { + "epoch": 0.32564055412529114, + "grad_norm": 1.4873016821019376, + "learning_rate": 7.877845679479868e-06, + "loss": 0.6825, + "step": 10625 + }, + { + "epoch": 0.32567120264803234, + "grad_norm": 1.9088498938946017, + "learning_rate": 7.87743979984021e-06, + "loss": 0.7061, + "step": 10626 + }, + { + "epoch": 0.32570185117077355, + "grad_norm": 1.7568217609394625, + "learning_rate": 7.8770338918484e-06, + "loss": 0.6761, + "step": 10627 + }, + { + "epoch": 0.32573249969351475, + "grad_norm": 1.8262232610621933, + "learning_rate": 7.87662795550844e-06, + "loss": 0.6544, + "step": 10628 + }, + { + "epoch": 0.32576314821625596, + "grad_norm": 1.6079489833722618, + "learning_rate": 7.876221990824329e-06, + "loss": 0.6784, + "step": 10629 + }, + { + "epoch": 0.32579379673899717, + "grad_norm": 0.8685057256846949, + "learning_rate": 7.875815997800064e-06, + "loss": 0.4858, + "step": 10630 + }, + { + "epoch": 0.32582444526173837, + "grad_norm": 2.0266060455441086, + "learning_rate": 7.875409976439651e-06, + "loss": 0.7084, + "step": 10631 + }, + { + "epoch": 0.3258550937844796, + "grad_norm": 1.6483427745131556, + "learning_rate": 7.875003926747087e-06, + "loss": 0.6916, + "step": 10632 + }, + { + "epoch": 0.3258857423072208, + "grad_norm": 1.557022015750213, + "learning_rate": 7.874597848726375e-06, + "loss": 0.6692, + "step": 10633 + }, + { + "epoch": 0.325916390829962, + "grad_norm": 1.5424826680378645, + "learning_rate": 7.874191742381514e-06, + "loss": 0.7206, + "step": 10634 + }, + { + "epoch": 0.3259470393527032, + "grad_norm": 0.810132921887563, + "learning_rate": 7.873785607716507e-06, + "loss": 0.4771, + "step": 10635 + }, + { + "epoch": 0.3259776878754444, + "grad_norm": 1.98120664422848, + "learning_rate": 7.873379444735354e-06, + "loss": 0.7545, + "step": 10636 + }, + { + "epoch": 0.3260083363981856, + "grad_norm": 1.7535083488011403, + "learning_rate": 7.872973253442058e-06, + "loss": 0.7835, + "step": 10637 + }, + { + "epoch": 0.3260389849209268, + "grad_norm": 0.8053802373521246, + "learning_rate": 7.872567033840621e-06, + "loss": 0.4805, + "step": 10638 + }, + { + "epoch": 0.326069633443668, + "grad_norm": 1.8035340595470912, + "learning_rate": 7.872160785935047e-06, + "loss": 0.6937, + "step": 10639 + }, + { + "epoch": 0.3261002819664092, + "grad_norm": 1.8305077088321158, + "learning_rate": 7.87175450972934e-06, + "loss": 0.6993, + "step": 10640 + }, + { + "epoch": 0.32613093048915043, + "grad_norm": 1.7983770480317374, + "learning_rate": 7.871348205227498e-06, + "loss": 0.7253, + "step": 10641 + }, + { + "epoch": 0.32616157901189163, + "grad_norm": 2.0230560313262407, + "learning_rate": 7.870941872433527e-06, + "loss": 0.6366, + "step": 10642 + }, + { + "epoch": 0.32619222753463284, + "grad_norm": 1.6398161980793817, + "learning_rate": 7.870535511351433e-06, + "loss": 0.6767, + "step": 10643 + }, + { + "epoch": 0.32622287605737404, + "grad_norm": 1.8956972621946158, + "learning_rate": 7.870129121985218e-06, + "loss": 0.6048, + "step": 10644 + }, + { + "epoch": 0.32625352458011525, + "grad_norm": 1.7532984808876915, + "learning_rate": 7.869722704338887e-06, + "loss": 0.6886, + "step": 10645 + }, + { + "epoch": 0.32628417310285646, + "grad_norm": 1.6066154704983275, + "learning_rate": 7.869316258416442e-06, + "loss": 0.6318, + "step": 10646 + }, + { + "epoch": 0.32631482162559766, + "grad_norm": 2.3000152162811496, + "learning_rate": 7.868909784221891e-06, + "loss": 0.7732, + "step": 10647 + }, + { + "epoch": 0.32634547014833887, + "grad_norm": 1.844626343729666, + "learning_rate": 7.868503281759238e-06, + "loss": 0.6493, + "step": 10648 + }, + { + "epoch": 0.3263761186710801, + "grad_norm": 1.612764441124113, + "learning_rate": 7.868096751032489e-06, + "loss": 0.7539, + "step": 10649 + }, + { + "epoch": 0.3264067671938213, + "grad_norm": 1.8652644603182473, + "learning_rate": 7.867690192045646e-06, + "loss": 0.7691, + "step": 10650 + }, + { + "epoch": 0.3264374157165625, + "grad_norm": 1.9896323447481237, + "learning_rate": 7.86728360480272e-06, + "loss": 0.7587, + "step": 10651 + }, + { + "epoch": 0.3264680642393037, + "grad_norm": 1.8236311337922018, + "learning_rate": 7.866876989307715e-06, + "loss": 0.6661, + "step": 10652 + }, + { + "epoch": 0.3264987127620449, + "grad_norm": 1.6869420931497063, + "learning_rate": 7.866470345564636e-06, + "loss": 0.6768, + "step": 10653 + }, + { + "epoch": 0.32652936128478605, + "grad_norm": 0.8632947261776728, + "learning_rate": 7.866063673577492e-06, + "loss": 0.4749, + "step": 10654 + }, + { + "epoch": 0.32656000980752725, + "grad_norm": 1.786390935458658, + "learning_rate": 7.865656973350291e-06, + "loss": 0.6834, + "step": 10655 + }, + { + "epoch": 0.32659065833026846, + "grad_norm": 1.635712502638858, + "learning_rate": 7.865250244887038e-06, + "loss": 0.7166, + "step": 10656 + }, + { + "epoch": 0.32662130685300966, + "grad_norm": 1.8604360476646107, + "learning_rate": 7.86484348819174e-06, + "loss": 0.7501, + "step": 10657 + }, + { + "epoch": 0.32665195537575087, + "grad_norm": 1.6540858587903897, + "learning_rate": 7.864436703268407e-06, + "loss": 0.5677, + "step": 10658 + }, + { + "epoch": 0.3266826038984921, + "grad_norm": 2.054158106714936, + "learning_rate": 7.864029890121045e-06, + "loss": 0.7032, + "step": 10659 + }, + { + "epoch": 0.3267132524212333, + "grad_norm": 1.6294122830515068, + "learning_rate": 7.863623048753665e-06, + "loss": 0.6592, + "step": 10660 + }, + { + "epoch": 0.3267439009439745, + "grad_norm": 1.5978341402536103, + "learning_rate": 7.863216179170274e-06, + "loss": 0.7649, + "step": 10661 + }, + { + "epoch": 0.3267745494667157, + "grad_norm": 1.8170813141261521, + "learning_rate": 7.862809281374882e-06, + "loss": 0.7027, + "step": 10662 + }, + { + "epoch": 0.3268051979894569, + "grad_norm": 1.8081617263538798, + "learning_rate": 7.862402355371496e-06, + "loss": 0.7424, + "step": 10663 + }, + { + "epoch": 0.3268358465121981, + "grad_norm": 1.848570305148486, + "learning_rate": 7.861995401164128e-06, + "loss": 0.7378, + "step": 10664 + }, + { + "epoch": 0.3268664950349393, + "grad_norm": 1.4840015621532276, + "learning_rate": 7.861588418756787e-06, + "loss": 0.6225, + "step": 10665 + }, + { + "epoch": 0.3268971435576805, + "grad_norm": 1.5101986416962851, + "learning_rate": 7.861181408153485e-06, + "loss": 0.6156, + "step": 10666 + }, + { + "epoch": 0.3269277920804217, + "grad_norm": 0.8230120929676032, + "learning_rate": 7.860774369358229e-06, + "loss": 0.4553, + "step": 10667 + }, + { + "epoch": 0.3269584406031629, + "grad_norm": 1.6149980075228056, + "learning_rate": 7.86036730237503e-06, + "loss": 0.5685, + "step": 10668 + }, + { + "epoch": 0.32698908912590413, + "grad_norm": 1.721500011868338, + "learning_rate": 7.859960207207901e-06, + "loss": 0.7483, + "step": 10669 + }, + { + "epoch": 0.32701973764864534, + "grad_norm": 1.6755873369103829, + "learning_rate": 7.859553083860854e-06, + "loss": 0.673, + "step": 10670 + }, + { + "epoch": 0.32705038617138654, + "grad_norm": 0.8009555923188935, + "learning_rate": 7.859145932337897e-06, + "loss": 0.4742, + "step": 10671 + }, + { + "epoch": 0.32708103469412775, + "grad_norm": 0.7833950792284816, + "learning_rate": 7.858738752643043e-06, + "loss": 0.4532, + "step": 10672 + }, + { + "epoch": 0.32711168321686895, + "grad_norm": 1.6521534716960131, + "learning_rate": 7.858331544780306e-06, + "loss": 0.6838, + "step": 10673 + }, + { + "epoch": 0.32714233173961016, + "grad_norm": 1.8667006686951746, + "learning_rate": 7.857924308753698e-06, + "loss": 0.6395, + "step": 10674 + }, + { + "epoch": 0.32717298026235136, + "grad_norm": 1.8062571366617624, + "learning_rate": 7.857517044567228e-06, + "loss": 0.7965, + "step": 10675 + }, + { + "epoch": 0.32720362878509257, + "grad_norm": 1.5742559420248954, + "learning_rate": 7.857109752224911e-06, + "loss": 0.5745, + "step": 10676 + }, + { + "epoch": 0.3272342773078338, + "grad_norm": 0.7983435193762469, + "learning_rate": 7.856702431730763e-06, + "loss": 0.4676, + "step": 10677 + }, + { + "epoch": 0.327264925830575, + "grad_norm": 0.8799977126060976, + "learning_rate": 7.856295083088793e-06, + "loss": 0.4918, + "step": 10678 + }, + { + "epoch": 0.3272955743533162, + "grad_norm": 1.5775936076670207, + "learning_rate": 7.85588770630302e-06, + "loss": 0.6698, + "step": 10679 + }, + { + "epoch": 0.3273262228760574, + "grad_norm": 0.7994887231014898, + "learning_rate": 7.855480301377451e-06, + "loss": 0.4754, + "step": 10680 + }, + { + "epoch": 0.3273568713987986, + "grad_norm": 1.7974881914689074, + "learning_rate": 7.855072868316107e-06, + "loss": 0.6882, + "step": 10681 + }, + { + "epoch": 0.3273875199215398, + "grad_norm": 1.7385449849561565, + "learning_rate": 7.854665407122998e-06, + "loss": 0.7296, + "step": 10682 + }, + { + "epoch": 0.327418168444281, + "grad_norm": 2.101935158857409, + "learning_rate": 7.854257917802141e-06, + "loss": 0.804, + "step": 10683 + }, + { + "epoch": 0.3274488169670222, + "grad_norm": 0.8617901661524179, + "learning_rate": 7.85385040035755e-06, + "loss": 0.4918, + "step": 10684 + }, + { + "epoch": 0.32747946548976337, + "grad_norm": 1.6676944216672118, + "learning_rate": 7.853442854793241e-06, + "loss": 0.7594, + "step": 10685 + }, + { + "epoch": 0.32751011401250457, + "grad_norm": 1.7934347156690373, + "learning_rate": 7.853035281113228e-06, + "loss": 0.6497, + "step": 10686 + }, + { + "epoch": 0.3275407625352458, + "grad_norm": 1.5143751271533938, + "learning_rate": 7.852627679321529e-06, + "loss": 0.6104, + "step": 10687 + }, + { + "epoch": 0.327571411057987, + "grad_norm": 1.7224569697274215, + "learning_rate": 7.85222004942216e-06, + "loss": 0.7255, + "step": 10688 + }, + { + "epoch": 0.3276020595807282, + "grad_norm": 1.5629033643074604, + "learning_rate": 7.851812391419139e-06, + "loss": 0.6276, + "step": 10689 + }, + { + "epoch": 0.3276327081034694, + "grad_norm": 1.595422992718776, + "learning_rate": 7.851404705316478e-06, + "loss": 0.6855, + "step": 10690 + }, + { + "epoch": 0.3276633566262106, + "grad_norm": 0.8450526305557425, + "learning_rate": 7.850996991118199e-06, + "loss": 0.4906, + "step": 10691 + }, + { + "epoch": 0.3276940051489518, + "grad_norm": 1.4606920667828276, + "learning_rate": 7.850589248828316e-06, + "loss": 0.5871, + "step": 10692 + }, + { + "epoch": 0.327724653671693, + "grad_norm": 1.5784191657709978, + "learning_rate": 7.850181478450847e-06, + "loss": 0.7252, + "step": 10693 + }, + { + "epoch": 0.3277553021944342, + "grad_norm": 1.7567070973758558, + "learning_rate": 7.849773679989814e-06, + "loss": 0.6945, + "step": 10694 + }, + { + "epoch": 0.3277859507171754, + "grad_norm": 1.6361604675807215, + "learning_rate": 7.849365853449228e-06, + "loss": 0.6565, + "step": 10695 + }, + { + "epoch": 0.32781659923991663, + "grad_norm": 1.7170637135469233, + "learning_rate": 7.848957998833113e-06, + "loss": 0.7327, + "step": 10696 + }, + { + "epoch": 0.32784724776265783, + "grad_norm": 1.5831810970721722, + "learning_rate": 7.848550116145486e-06, + "loss": 0.6797, + "step": 10697 + }, + { + "epoch": 0.32787789628539904, + "grad_norm": 0.7546303060918953, + "learning_rate": 7.848142205390364e-06, + "loss": 0.4612, + "step": 10698 + }, + { + "epoch": 0.32790854480814025, + "grad_norm": 1.7732842204630546, + "learning_rate": 7.847734266571769e-06, + "loss": 0.6467, + "step": 10699 + }, + { + "epoch": 0.32793919333088145, + "grad_norm": 1.518696036345308, + "learning_rate": 7.847326299693721e-06, + "loss": 0.7286, + "step": 10700 + }, + { + "epoch": 0.32796984185362266, + "grad_norm": 1.522754861570597, + "learning_rate": 7.846918304760239e-06, + "loss": 0.645, + "step": 10701 + }, + { + "epoch": 0.32800049037636386, + "grad_norm": 0.7891683682039259, + "learning_rate": 7.84651028177534e-06, + "loss": 0.4467, + "step": 10702 + }, + { + "epoch": 0.32803113889910507, + "grad_norm": 1.5868290650504742, + "learning_rate": 7.846102230743049e-06, + "loss": 0.7282, + "step": 10703 + }, + { + "epoch": 0.3280617874218463, + "grad_norm": 1.7461637954259355, + "learning_rate": 7.845694151667382e-06, + "loss": 0.6672, + "step": 10704 + }, + { + "epoch": 0.3280924359445875, + "grad_norm": 1.7106787522450695, + "learning_rate": 7.845286044552365e-06, + "loss": 0.6858, + "step": 10705 + }, + { + "epoch": 0.3281230844673287, + "grad_norm": 1.8494192023503124, + "learning_rate": 7.844877909402015e-06, + "loss": 0.6311, + "step": 10706 + }, + { + "epoch": 0.3281537329900699, + "grad_norm": 0.8673732763027145, + "learning_rate": 7.844469746220356e-06, + "loss": 0.4868, + "step": 10707 + }, + { + "epoch": 0.3281843815128111, + "grad_norm": 2.0170743480666884, + "learning_rate": 7.844061555011408e-06, + "loss": 0.7443, + "step": 10708 + }, + { + "epoch": 0.3282150300355523, + "grad_norm": 1.6957566394500334, + "learning_rate": 7.843653335779194e-06, + "loss": 0.6291, + "step": 10709 + }, + { + "epoch": 0.3282456785582935, + "grad_norm": 0.7769784448169675, + "learning_rate": 7.843245088527736e-06, + "loss": 0.4548, + "step": 10710 + }, + { + "epoch": 0.3282763270810347, + "grad_norm": 0.7850199323811362, + "learning_rate": 7.842836813261057e-06, + "loss": 0.4795, + "step": 10711 + }, + { + "epoch": 0.3283069756037759, + "grad_norm": 1.7302496310272841, + "learning_rate": 7.84242850998318e-06, + "loss": 0.6699, + "step": 10712 + }, + { + "epoch": 0.3283376241265171, + "grad_norm": 1.5597510574235858, + "learning_rate": 7.842020178698126e-06, + "loss": 0.6958, + "step": 10713 + }, + { + "epoch": 0.32836827264925833, + "grad_norm": 1.7034779915053493, + "learning_rate": 7.841611819409922e-06, + "loss": 0.6979, + "step": 10714 + }, + { + "epoch": 0.32839892117199954, + "grad_norm": 1.9225022898856285, + "learning_rate": 7.841203432122588e-06, + "loss": 0.6553, + "step": 10715 + }, + { + "epoch": 0.3284295696947407, + "grad_norm": 1.3949760391769574, + "learning_rate": 7.840795016840151e-06, + "loss": 0.6718, + "step": 10716 + }, + { + "epoch": 0.3284602182174819, + "grad_norm": 1.7321250739418852, + "learning_rate": 7.840386573566634e-06, + "loss": 0.7074, + "step": 10717 + }, + { + "epoch": 0.3284908667402231, + "grad_norm": 1.5342671173083704, + "learning_rate": 7.83997810230606e-06, + "loss": 0.5798, + "step": 10718 + }, + { + "epoch": 0.3285215152629643, + "grad_norm": 2.030960178853603, + "learning_rate": 7.839569603062456e-06, + "loss": 0.6625, + "step": 10719 + }, + { + "epoch": 0.3285521637857055, + "grad_norm": 0.9230229678166129, + "learning_rate": 7.839161075839846e-06, + "loss": 0.4943, + "step": 10720 + }, + { + "epoch": 0.3285828123084467, + "grad_norm": 2.626603648807846, + "learning_rate": 7.838752520642256e-06, + "loss": 0.754, + "step": 10721 + }, + { + "epoch": 0.3286134608311879, + "grad_norm": 1.615037983578785, + "learning_rate": 7.83834393747371e-06, + "loss": 0.6145, + "step": 10722 + }, + { + "epoch": 0.3286441093539291, + "grad_norm": 2.292572106050651, + "learning_rate": 7.837935326338236e-06, + "loss": 0.7194, + "step": 10723 + }, + { + "epoch": 0.32867475787667033, + "grad_norm": 1.8462802120318498, + "learning_rate": 7.837526687239858e-06, + "loss": 0.6567, + "step": 10724 + }, + { + "epoch": 0.32870540639941154, + "grad_norm": 1.9565857849376798, + "learning_rate": 7.837118020182606e-06, + "loss": 0.7986, + "step": 10725 + }, + { + "epoch": 0.32873605492215274, + "grad_norm": 1.4795938622786047, + "learning_rate": 7.8367093251705e-06, + "loss": 0.625, + "step": 10726 + }, + { + "epoch": 0.32876670344489395, + "grad_norm": 0.7935924038322327, + "learning_rate": 7.836300602207574e-06, + "loss": 0.4673, + "step": 10727 + }, + { + "epoch": 0.32879735196763515, + "grad_norm": 1.6360171161747659, + "learning_rate": 7.835891851297852e-06, + "loss": 0.6915, + "step": 10728 + }, + { + "epoch": 0.32882800049037636, + "grad_norm": 1.7742509363199446, + "learning_rate": 7.835483072445363e-06, + "loss": 0.6077, + "step": 10729 + }, + { + "epoch": 0.32885864901311757, + "grad_norm": 0.7997627543606389, + "learning_rate": 7.835074265654133e-06, + "loss": 0.4861, + "step": 10730 + }, + { + "epoch": 0.32888929753585877, + "grad_norm": 1.8178498389681375, + "learning_rate": 7.83466543092819e-06, + "loss": 0.6517, + "step": 10731 + }, + { + "epoch": 0.3289199460586, + "grad_norm": 1.70936084582804, + "learning_rate": 7.834256568271564e-06, + "loss": 0.6064, + "step": 10732 + }, + { + "epoch": 0.3289505945813412, + "grad_norm": 1.904579568828216, + "learning_rate": 7.833847677688282e-06, + "loss": 0.7793, + "step": 10733 + }, + { + "epoch": 0.3289812431040824, + "grad_norm": 0.8178715495429909, + "learning_rate": 7.833438759182375e-06, + "loss": 0.4627, + "step": 10734 + }, + { + "epoch": 0.3290118916268236, + "grad_norm": 1.5324898237960054, + "learning_rate": 7.833029812757871e-06, + "loss": 0.5396, + "step": 10735 + }, + { + "epoch": 0.3290425401495648, + "grad_norm": 1.7086670222826923, + "learning_rate": 7.832620838418798e-06, + "loss": 0.7407, + "step": 10736 + }, + { + "epoch": 0.329073188672306, + "grad_norm": 0.8174346883743621, + "learning_rate": 7.832211836169188e-06, + "loss": 0.4495, + "step": 10737 + }, + { + "epoch": 0.3291038371950472, + "grad_norm": 1.6695562439465452, + "learning_rate": 7.831802806013072e-06, + "loss": 0.6488, + "step": 10738 + }, + { + "epoch": 0.3291344857177884, + "grad_norm": 1.840918331844408, + "learning_rate": 7.831393747954477e-06, + "loss": 0.7614, + "step": 10739 + }, + { + "epoch": 0.3291651342405296, + "grad_norm": 1.815630454710627, + "learning_rate": 7.830984661997434e-06, + "loss": 0.689, + "step": 10740 + }, + { + "epoch": 0.32919578276327083, + "grad_norm": 1.6762992464722133, + "learning_rate": 7.830575548145975e-06, + "loss": 0.7255, + "step": 10741 + }, + { + "epoch": 0.32922643128601203, + "grad_norm": 1.6978442392491055, + "learning_rate": 7.83016640640413e-06, + "loss": 0.6779, + "step": 10742 + }, + { + "epoch": 0.32925707980875324, + "grad_norm": 1.7702697354905366, + "learning_rate": 7.829757236775934e-06, + "loss": 0.7307, + "step": 10743 + }, + { + "epoch": 0.32928772833149444, + "grad_norm": 0.898816670605621, + "learning_rate": 7.829348039265413e-06, + "loss": 0.4627, + "step": 10744 + }, + { + "epoch": 0.32931837685423565, + "grad_norm": 1.5991016622675986, + "learning_rate": 7.828938813876603e-06, + "loss": 0.6612, + "step": 10745 + }, + { + "epoch": 0.32934902537697686, + "grad_norm": 1.5416940660724474, + "learning_rate": 7.828529560613536e-06, + "loss": 0.6291, + "step": 10746 + }, + { + "epoch": 0.329379673899718, + "grad_norm": 1.8437268769903228, + "learning_rate": 7.828120279480242e-06, + "loss": 0.6922, + "step": 10747 + }, + { + "epoch": 0.3294103224224592, + "grad_norm": 1.768952146578513, + "learning_rate": 7.827710970480757e-06, + "loss": 0.7255, + "step": 10748 + }, + { + "epoch": 0.3294409709452004, + "grad_norm": 1.8088393500675246, + "learning_rate": 7.827301633619112e-06, + "loss": 0.6688, + "step": 10749 + }, + { + "epoch": 0.3294716194679416, + "grad_norm": 1.900625383252148, + "learning_rate": 7.826892268899338e-06, + "loss": 0.6161, + "step": 10750 + }, + { + "epoch": 0.32950226799068283, + "grad_norm": 1.7890622311317, + "learning_rate": 7.826482876325474e-06, + "loss": 0.7104, + "step": 10751 + }, + { + "epoch": 0.32953291651342403, + "grad_norm": 1.7552890558721348, + "learning_rate": 7.82607345590155e-06, + "loss": 0.6905, + "step": 10752 + }, + { + "epoch": 0.32956356503616524, + "grad_norm": 1.3835347957887496, + "learning_rate": 7.825664007631601e-06, + "loss": 0.6693, + "step": 10753 + }, + { + "epoch": 0.32959421355890645, + "grad_norm": 1.7456432654592835, + "learning_rate": 7.825254531519663e-06, + "loss": 0.7313, + "step": 10754 + }, + { + "epoch": 0.32962486208164765, + "grad_norm": 1.7456557606456036, + "learning_rate": 7.824845027569769e-06, + "loss": 0.7157, + "step": 10755 + }, + { + "epoch": 0.32965551060438886, + "grad_norm": 1.6665806866680233, + "learning_rate": 7.824435495785953e-06, + "loss": 0.671, + "step": 10756 + }, + { + "epoch": 0.32968615912713006, + "grad_norm": 1.7618361541834688, + "learning_rate": 7.82402593617225e-06, + "loss": 0.6722, + "step": 10757 + }, + { + "epoch": 0.32971680764987127, + "grad_norm": 1.610417701650447, + "learning_rate": 7.8236163487327e-06, + "loss": 0.6846, + "step": 10758 + }, + { + "epoch": 0.3297474561726125, + "grad_norm": 1.602768546083451, + "learning_rate": 7.823206733471333e-06, + "loss": 0.5345, + "step": 10759 + }, + { + "epoch": 0.3297781046953537, + "grad_norm": 1.609484235216472, + "learning_rate": 7.82279709039219e-06, + "loss": 0.6184, + "step": 10760 + }, + { + "epoch": 0.3298087532180949, + "grad_norm": 0.8963825564617305, + "learning_rate": 7.822387419499304e-06, + "loss": 0.4671, + "step": 10761 + }, + { + "epoch": 0.3298394017408361, + "grad_norm": 1.5171512539184457, + "learning_rate": 7.821977720796713e-06, + "loss": 0.6025, + "step": 10762 + }, + { + "epoch": 0.3298700502635773, + "grad_norm": 1.5520833186805805, + "learning_rate": 7.821567994288452e-06, + "loss": 0.6722, + "step": 10763 + }, + { + "epoch": 0.3299006987863185, + "grad_norm": 1.6665443947790093, + "learning_rate": 7.821158239978561e-06, + "loss": 0.8399, + "step": 10764 + }, + { + "epoch": 0.3299313473090597, + "grad_norm": 1.6792669282710504, + "learning_rate": 7.820748457871077e-06, + "loss": 0.6175, + "step": 10765 + }, + { + "epoch": 0.3299619958318009, + "grad_norm": 1.7655121431363394, + "learning_rate": 7.820338647970036e-06, + "loss": 0.7216, + "step": 10766 + }, + { + "epoch": 0.3299926443545421, + "grad_norm": 1.7333571915719261, + "learning_rate": 7.819928810279476e-06, + "loss": 0.6792, + "step": 10767 + }, + { + "epoch": 0.3300232928772833, + "grad_norm": 0.8643607166681198, + "learning_rate": 7.819518944803434e-06, + "loss": 0.4871, + "step": 10768 + }, + { + "epoch": 0.33005394140002453, + "grad_norm": 1.6360211878873925, + "learning_rate": 7.819109051545955e-06, + "loss": 0.6846, + "step": 10769 + }, + { + "epoch": 0.33008458992276574, + "grad_norm": 1.7852295633144057, + "learning_rate": 7.81869913051107e-06, + "loss": 0.709, + "step": 10770 + }, + { + "epoch": 0.33011523844550694, + "grad_norm": 1.6520308184507677, + "learning_rate": 7.818289181702822e-06, + "loss": 0.7007, + "step": 10771 + }, + { + "epoch": 0.33014588696824815, + "grad_norm": 1.5797642367331632, + "learning_rate": 7.81787920512525e-06, + "loss": 0.6487, + "step": 10772 + }, + { + "epoch": 0.33017653549098935, + "grad_norm": 0.8113216843763471, + "learning_rate": 7.817469200782394e-06, + "loss": 0.4728, + "step": 10773 + }, + { + "epoch": 0.33020718401373056, + "grad_norm": 1.6865345580801423, + "learning_rate": 7.81705916867829e-06, + "loss": 0.6413, + "step": 10774 + }, + { + "epoch": 0.33023783253647176, + "grad_norm": 0.808754516348698, + "learning_rate": 7.816649108816982e-06, + "loss": 0.4706, + "step": 10775 + }, + { + "epoch": 0.33026848105921297, + "grad_norm": 1.6055463629954465, + "learning_rate": 7.816239021202512e-06, + "loss": 0.6347, + "step": 10776 + }, + { + "epoch": 0.3302991295819542, + "grad_norm": 1.715966109104157, + "learning_rate": 7.815828905838917e-06, + "loss": 0.6291, + "step": 10777 + }, + { + "epoch": 0.3303297781046953, + "grad_norm": 1.716286517223071, + "learning_rate": 7.81541876273024e-06, + "loss": 0.7031, + "step": 10778 + }, + { + "epoch": 0.33036042662743653, + "grad_norm": 1.5693031886135316, + "learning_rate": 7.81500859188052e-06, + "loss": 0.6389, + "step": 10779 + }, + { + "epoch": 0.33039107515017774, + "grad_norm": 1.5874791386660778, + "learning_rate": 7.814598393293802e-06, + "loss": 0.6209, + "step": 10780 + }, + { + "epoch": 0.33042172367291894, + "grad_norm": 1.6284243850911921, + "learning_rate": 7.814188166974125e-06, + "loss": 0.6826, + "step": 10781 + }, + { + "epoch": 0.33045237219566015, + "grad_norm": 1.885963433688321, + "learning_rate": 7.813777912925533e-06, + "loss": 0.7462, + "step": 10782 + }, + { + "epoch": 0.33048302071840135, + "grad_norm": 1.7024980451190463, + "learning_rate": 7.813367631152066e-06, + "loss": 0.7397, + "step": 10783 + }, + { + "epoch": 0.33051366924114256, + "grad_norm": 1.5756339228981449, + "learning_rate": 7.812957321657769e-06, + "loss": 0.671, + "step": 10784 + }, + { + "epoch": 0.33054431776388377, + "grad_norm": 1.8851558441244636, + "learning_rate": 7.812546984446681e-06, + "loss": 0.6759, + "step": 10785 + }, + { + "epoch": 0.33057496628662497, + "grad_norm": 1.712961525410508, + "learning_rate": 7.81213661952285e-06, + "loss": 0.6804, + "step": 10786 + }, + { + "epoch": 0.3306056148093662, + "grad_norm": 0.9153326041117261, + "learning_rate": 7.811726226890317e-06, + "loss": 0.4933, + "step": 10787 + }, + { + "epoch": 0.3306362633321074, + "grad_norm": 1.7183073640772786, + "learning_rate": 7.811315806553126e-06, + "loss": 0.6454, + "step": 10788 + }, + { + "epoch": 0.3306669118548486, + "grad_norm": 1.59693523245959, + "learning_rate": 7.810905358515323e-06, + "loss": 0.6522, + "step": 10789 + }, + { + "epoch": 0.3306975603775898, + "grad_norm": 1.699518645285084, + "learning_rate": 7.810494882780947e-06, + "loss": 0.7173, + "step": 10790 + }, + { + "epoch": 0.330728208900331, + "grad_norm": 0.8125736820008332, + "learning_rate": 7.810084379354049e-06, + "loss": 0.4702, + "step": 10791 + }, + { + "epoch": 0.3307588574230722, + "grad_norm": 0.7831151977399571, + "learning_rate": 7.809673848238668e-06, + "loss": 0.4633, + "step": 10792 + }, + { + "epoch": 0.3307895059458134, + "grad_norm": 1.682962049140233, + "learning_rate": 7.809263289438855e-06, + "loss": 0.6792, + "step": 10793 + }, + { + "epoch": 0.3308201544685546, + "grad_norm": 1.6922495547614493, + "learning_rate": 7.80885270295865e-06, + "loss": 0.6344, + "step": 10794 + }, + { + "epoch": 0.3308508029912958, + "grad_norm": 1.6312624685197288, + "learning_rate": 7.8084420888021e-06, + "loss": 0.7249, + "step": 10795 + }, + { + "epoch": 0.33088145151403703, + "grad_norm": 1.686518865054059, + "learning_rate": 7.80803144697325e-06, + "loss": 0.7441, + "step": 10796 + }, + { + "epoch": 0.33091210003677823, + "grad_norm": 1.658104747241581, + "learning_rate": 7.807620777476151e-06, + "loss": 0.7642, + "step": 10797 + }, + { + "epoch": 0.33094274855951944, + "grad_norm": 1.6352774830788923, + "learning_rate": 7.807210080314844e-06, + "loss": 0.562, + "step": 10798 + }, + { + "epoch": 0.33097339708226065, + "grad_norm": 1.7293217393541227, + "learning_rate": 7.80679935549338e-06, + "loss": 0.7062, + "step": 10799 + }, + { + "epoch": 0.33100404560500185, + "grad_norm": 1.6385445267794327, + "learning_rate": 7.806388603015802e-06, + "loss": 0.7277, + "step": 10800 + }, + { + "epoch": 0.33103469412774306, + "grad_norm": 1.6757883936039923, + "learning_rate": 7.805977822886159e-06, + "loss": 0.7112, + "step": 10801 + }, + { + "epoch": 0.33106534265048426, + "grad_norm": 1.7543210254706905, + "learning_rate": 7.8055670151085e-06, + "loss": 0.6492, + "step": 10802 + }, + { + "epoch": 0.33109599117322547, + "grad_norm": 1.5830046814399525, + "learning_rate": 7.80515617968687e-06, + "loss": 0.645, + "step": 10803 + }, + { + "epoch": 0.3311266396959667, + "grad_norm": 1.9988879052433655, + "learning_rate": 7.80474531662532e-06, + "loss": 0.7889, + "step": 10804 + }, + { + "epoch": 0.3311572882187079, + "grad_norm": 1.9128683500181312, + "learning_rate": 7.804334425927896e-06, + "loss": 0.6593, + "step": 10805 + }, + { + "epoch": 0.3311879367414491, + "grad_norm": 0.956446397976111, + "learning_rate": 7.803923507598645e-06, + "loss": 0.4428, + "step": 10806 + }, + { + "epoch": 0.3312185852641903, + "grad_norm": 1.8118649135603646, + "learning_rate": 7.803512561641622e-06, + "loss": 0.6863, + "step": 10807 + }, + { + "epoch": 0.3312492337869315, + "grad_norm": 1.6355627831539894, + "learning_rate": 7.803101588060871e-06, + "loss": 0.6448, + "step": 10808 + }, + { + "epoch": 0.33127988230967265, + "grad_norm": 1.8023790686285537, + "learning_rate": 7.802690586860442e-06, + "loss": 0.7922, + "step": 10809 + }, + { + "epoch": 0.33131053083241385, + "grad_norm": 1.784482480543444, + "learning_rate": 7.802279558044385e-06, + "loss": 0.6793, + "step": 10810 + }, + { + "epoch": 0.33134117935515506, + "grad_norm": 1.6393006363149374, + "learning_rate": 7.801868501616752e-06, + "loss": 0.684, + "step": 10811 + }, + { + "epoch": 0.33137182787789626, + "grad_norm": 1.6315036683175432, + "learning_rate": 7.801457417581592e-06, + "loss": 0.7096, + "step": 10812 + }, + { + "epoch": 0.33140247640063747, + "grad_norm": 0.8795008757986977, + "learning_rate": 7.801046305942954e-06, + "loss": 0.4693, + "step": 10813 + }, + { + "epoch": 0.3314331249233787, + "grad_norm": 1.7337071924008811, + "learning_rate": 7.80063516670489e-06, + "loss": 0.6743, + "step": 10814 + }, + { + "epoch": 0.3314637734461199, + "grad_norm": 0.9233340778419447, + "learning_rate": 7.800223999871452e-06, + "loss": 0.4711, + "step": 10815 + }, + { + "epoch": 0.3314944219688611, + "grad_norm": 2.0603592006001548, + "learning_rate": 7.799812805446691e-06, + "loss": 0.7226, + "step": 10816 + }, + { + "epoch": 0.3315250704916023, + "grad_norm": 1.7519422069130788, + "learning_rate": 7.799401583434659e-06, + "loss": 0.6115, + "step": 10817 + }, + { + "epoch": 0.3315557190143435, + "grad_norm": 1.92849057974925, + "learning_rate": 7.798990333839405e-06, + "loss": 0.7957, + "step": 10818 + }, + { + "epoch": 0.3315863675370847, + "grad_norm": 1.5777103048868117, + "learning_rate": 7.798579056664984e-06, + "loss": 0.7252, + "step": 10819 + }, + { + "epoch": 0.3316170160598259, + "grad_norm": 1.6988768143496504, + "learning_rate": 7.798167751915446e-06, + "loss": 0.7166, + "step": 10820 + }, + { + "epoch": 0.3316476645825671, + "grad_norm": 1.59121088740816, + "learning_rate": 7.797756419594846e-06, + "loss": 0.7605, + "step": 10821 + }, + { + "epoch": 0.3316783131053083, + "grad_norm": 1.7844987119800422, + "learning_rate": 7.797345059707236e-06, + "loss": 0.6496, + "step": 10822 + }, + { + "epoch": 0.3317089616280495, + "grad_norm": 1.6322233491500384, + "learning_rate": 7.79693367225667e-06, + "loss": 0.699, + "step": 10823 + }, + { + "epoch": 0.33173961015079073, + "grad_norm": 1.8417436223300827, + "learning_rate": 7.7965222572472e-06, + "loss": 0.6848, + "step": 10824 + }, + { + "epoch": 0.33177025867353194, + "grad_norm": 0.9644361149887974, + "learning_rate": 7.796110814682882e-06, + "loss": 0.4466, + "step": 10825 + }, + { + "epoch": 0.33180090719627314, + "grad_norm": 1.8316885030502028, + "learning_rate": 7.79569934456777e-06, + "loss": 0.6804, + "step": 10826 + }, + { + "epoch": 0.33183155571901435, + "grad_norm": 1.7288339323383461, + "learning_rate": 7.795287846905912e-06, + "loss": 0.6982, + "step": 10827 + }, + { + "epoch": 0.33186220424175555, + "grad_norm": 1.5970136689294068, + "learning_rate": 7.794876321701372e-06, + "loss": 0.6901, + "step": 10828 + }, + { + "epoch": 0.33189285276449676, + "grad_norm": 1.788471475920761, + "learning_rate": 7.794464768958198e-06, + "loss": 0.6594, + "step": 10829 + }, + { + "epoch": 0.33192350128723797, + "grad_norm": 1.750918463066456, + "learning_rate": 7.79405318868045e-06, + "loss": 0.6557, + "step": 10830 + }, + { + "epoch": 0.33195414980997917, + "grad_norm": 1.7465440395489507, + "learning_rate": 7.79364158087218e-06, + "loss": 0.7124, + "step": 10831 + }, + { + "epoch": 0.3319847983327204, + "grad_norm": 0.8570623663366038, + "learning_rate": 7.793229945537444e-06, + "loss": 0.472, + "step": 10832 + }, + { + "epoch": 0.3320154468554616, + "grad_norm": 2.1080828836292933, + "learning_rate": 7.792818282680299e-06, + "loss": 0.574, + "step": 10833 + }, + { + "epoch": 0.3320460953782028, + "grad_norm": 1.8254800002871936, + "learning_rate": 7.792406592304802e-06, + "loss": 0.6627, + "step": 10834 + }, + { + "epoch": 0.332076743900944, + "grad_norm": 1.8233336106058264, + "learning_rate": 7.791994874415008e-06, + "loss": 0.7338, + "step": 10835 + }, + { + "epoch": 0.3321073924236852, + "grad_norm": 1.841148925222183, + "learning_rate": 7.791583129014973e-06, + "loss": 0.6568, + "step": 10836 + }, + { + "epoch": 0.3321380409464264, + "grad_norm": 1.6854452201636336, + "learning_rate": 7.791171356108755e-06, + "loss": 0.6718, + "step": 10837 + }, + { + "epoch": 0.3321686894691676, + "grad_norm": 1.4614002519379636, + "learning_rate": 7.790759555700413e-06, + "loss": 0.6545, + "step": 10838 + }, + { + "epoch": 0.3321993379919088, + "grad_norm": 1.942330187865892, + "learning_rate": 7.790347727794003e-06, + "loss": 0.7535, + "step": 10839 + }, + { + "epoch": 0.33222998651464997, + "grad_norm": 1.5337739191187747, + "learning_rate": 7.789935872393582e-06, + "loss": 0.5665, + "step": 10840 + }, + { + "epoch": 0.33226063503739117, + "grad_norm": 1.6999077177758506, + "learning_rate": 7.78952398950321e-06, + "loss": 0.6837, + "step": 10841 + }, + { + "epoch": 0.3322912835601324, + "grad_norm": 0.8535434968021792, + "learning_rate": 7.789112079126942e-06, + "loss": 0.4612, + "step": 10842 + }, + { + "epoch": 0.3323219320828736, + "grad_norm": 0.8131861980717159, + "learning_rate": 7.788700141268842e-06, + "loss": 0.4835, + "step": 10843 + }, + { + "epoch": 0.3323525806056148, + "grad_norm": 1.9684529723501423, + "learning_rate": 7.788288175932965e-06, + "loss": 0.7195, + "step": 10844 + }, + { + "epoch": 0.332383229128356, + "grad_norm": 0.741449891897247, + "learning_rate": 7.787876183123371e-06, + "loss": 0.4636, + "step": 10845 + }, + { + "epoch": 0.3324138776510972, + "grad_norm": 1.5805558017172556, + "learning_rate": 7.787464162844118e-06, + "loss": 0.5704, + "step": 10846 + }, + { + "epoch": 0.3324445261738384, + "grad_norm": 1.68232440697366, + "learning_rate": 7.78705211509927e-06, + "loss": 0.7415, + "step": 10847 + }, + { + "epoch": 0.3324751746965796, + "grad_norm": 1.8638737856616725, + "learning_rate": 7.786640039892884e-06, + "loss": 0.6572, + "step": 10848 + }, + { + "epoch": 0.3325058232193208, + "grad_norm": 0.9828325243330144, + "learning_rate": 7.78622793722902e-06, + "loss": 0.4859, + "step": 10849 + }, + { + "epoch": 0.332536471742062, + "grad_norm": 1.7111769884135397, + "learning_rate": 7.78581580711174e-06, + "loss": 0.729, + "step": 10850 + }, + { + "epoch": 0.33256712026480323, + "grad_norm": 1.6448911771996848, + "learning_rate": 7.785403649545103e-06, + "loss": 0.6856, + "step": 10851 + }, + { + "epoch": 0.33259776878754443, + "grad_norm": 0.8577214121564866, + "learning_rate": 7.784991464533171e-06, + "loss": 0.4708, + "step": 10852 + }, + { + "epoch": 0.33262841731028564, + "grad_norm": 1.4558842225175084, + "learning_rate": 7.784579252080006e-06, + "loss": 0.6782, + "step": 10853 + }, + { + "epoch": 0.33265906583302685, + "grad_norm": 1.6305610248224705, + "learning_rate": 7.78416701218967e-06, + "loss": 0.6385, + "step": 10854 + }, + { + "epoch": 0.33268971435576805, + "grad_norm": 1.7913519527263135, + "learning_rate": 7.783754744866223e-06, + "loss": 0.6488, + "step": 10855 + }, + { + "epoch": 0.33272036287850926, + "grad_norm": 0.8691700991713442, + "learning_rate": 7.783342450113727e-06, + "loss": 0.4646, + "step": 10856 + }, + { + "epoch": 0.33275101140125046, + "grad_norm": 1.7977700092878854, + "learning_rate": 7.782930127936248e-06, + "loss": 0.7207, + "step": 10857 + }, + { + "epoch": 0.33278165992399167, + "grad_norm": 1.7656617175734328, + "learning_rate": 7.782517778337845e-06, + "loss": 0.6233, + "step": 10858 + }, + { + "epoch": 0.3328123084467329, + "grad_norm": 1.8388440369796986, + "learning_rate": 7.782105401322584e-06, + "loss": 0.6789, + "step": 10859 + }, + { + "epoch": 0.3328429569694741, + "grad_norm": 1.753502241023095, + "learning_rate": 7.781692996894526e-06, + "loss": 0.6644, + "step": 10860 + }, + { + "epoch": 0.3328736054922153, + "grad_norm": 1.9209499567283088, + "learning_rate": 7.781280565057734e-06, + "loss": 0.7775, + "step": 10861 + }, + { + "epoch": 0.3329042540149565, + "grad_norm": 0.8176288738181923, + "learning_rate": 7.780868105816275e-06, + "loss": 0.4879, + "step": 10862 + }, + { + "epoch": 0.3329349025376977, + "grad_norm": 1.6165594526194709, + "learning_rate": 7.78045561917421e-06, + "loss": 0.6726, + "step": 10863 + }, + { + "epoch": 0.3329655510604389, + "grad_norm": 0.8281412352788333, + "learning_rate": 7.780043105135604e-06, + "loss": 0.4938, + "step": 10864 + }, + { + "epoch": 0.3329961995831801, + "grad_norm": 0.8048500310349952, + "learning_rate": 7.779630563704522e-06, + "loss": 0.4636, + "step": 10865 + }, + { + "epoch": 0.3330268481059213, + "grad_norm": 1.7313143524778636, + "learning_rate": 7.779217994885028e-06, + "loss": 0.783, + "step": 10866 + }, + { + "epoch": 0.3330574966286625, + "grad_norm": 1.679506867404854, + "learning_rate": 7.778805398681191e-06, + "loss": 0.5732, + "step": 10867 + }, + { + "epoch": 0.3330881451514037, + "grad_norm": 1.6964946316022418, + "learning_rate": 7.77839277509707e-06, + "loss": 0.7161, + "step": 10868 + }, + { + "epoch": 0.33311879367414493, + "grad_norm": 1.884332891749786, + "learning_rate": 7.777980124136735e-06, + "loss": 0.7037, + "step": 10869 + }, + { + "epoch": 0.33314944219688614, + "grad_norm": 0.8108372390640648, + "learning_rate": 7.777567445804253e-06, + "loss": 0.4647, + "step": 10870 + }, + { + "epoch": 0.3331800907196273, + "grad_norm": 0.8158215868245611, + "learning_rate": 7.777154740103687e-06, + "loss": 0.4643, + "step": 10871 + }, + { + "epoch": 0.3332107392423685, + "grad_norm": 1.758041546237296, + "learning_rate": 7.776742007039104e-06, + "loss": 0.6633, + "step": 10872 + }, + { + "epoch": 0.3332413877651097, + "grad_norm": 1.9986467329319553, + "learning_rate": 7.776329246614574e-06, + "loss": 0.6959, + "step": 10873 + }, + { + "epoch": 0.3332720362878509, + "grad_norm": 1.955197644556663, + "learning_rate": 7.77591645883416e-06, + "loss": 0.6209, + "step": 10874 + }, + { + "epoch": 0.3333026848105921, + "grad_norm": 1.7975277666932727, + "learning_rate": 7.77550364370193e-06, + "loss": 0.7114, + "step": 10875 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 1.7496133182326778, + "learning_rate": 7.775090801221953e-06, + "loss": 0.6735, + "step": 10876 + }, + { + "epoch": 0.3333639818560745, + "grad_norm": 1.6937430724210358, + "learning_rate": 7.774677931398295e-06, + "loss": 0.6483, + "step": 10877 + }, + { + "epoch": 0.3333946303788157, + "grad_norm": 1.7995485737282695, + "learning_rate": 7.774265034235029e-06, + "loss": 0.7144, + "step": 10878 + }, + { + "epoch": 0.33342527890155693, + "grad_norm": 1.743875589825426, + "learning_rate": 7.773852109736217e-06, + "loss": 0.6702, + "step": 10879 + }, + { + "epoch": 0.33345592742429814, + "grad_norm": 0.9862730298883094, + "learning_rate": 7.773439157905931e-06, + "loss": 0.4842, + "step": 10880 + }, + { + "epoch": 0.33348657594703934, + "grad_norm": 1.8472121524513088, + "learning_rate": 7.773026178748239e-06, + "loss": 0.6491, + "step": 10881 + }, + { + "epoch": 0.33351722446978055, + "grad_norm": 1.9177441814368281, + "learning_rate": 7.77261317226721e-06, + "loss": 0.6962, + "step": 10882 + }, + { + "epoch": 0.33354787299252175, + "grad_norm": 0.8256344558178554, + "learning_rate": 7.772200138466917e-06, + "loss": 0.454, + "step": 10883 + }, + { + "epoch": 0.33357852151526296, + "grad_norm": 1.8984619810217114, + "learning_rate": 7.771787077351425e-06, + "loss": 0.6896, + "step": 10884 + }, + { + "epoch": 0.33360917003800417, + "grad_norm": 0.7635994430691071, + "learning_rate": 7.771373988924806e-06, + "loss": 0.4511, + "step": 10885 + }, + { + "epoch": 0.33363981856074537, + "grad_norm": 1.7427057590119213, + "learning_rate": 7.770960873191128e-06, + "loss": 0.7347, + "step": 10886 + }, + { + "epoch": 0.3336704670834866, + "grad_norm": 1.641029976037963, + "learning_rate": 7.770547730154465e-06, + "loss": 0.6176, + "step": 10887 + }, + { + "epoch": 0.3337011156062278, + "grad_norm": 1.8932246219464808, + "learning_rate": 7.770134559818888e-06, + "loss": 0.7712, + "step": 10888 + }, + { + "epoch": 0.333731764128969, + "grad_norm": 1.6038001587609358, + "learning_rate": 7.769721362188465e-06, + "loss": 0.6326, + "step": 10889 + }, + { + "epoch": 0.3337624126517102, + "grad_norm": 1.6129382095340061, + "learning_rate": 7.769308137267268e-06, + "loss": 0.6593, + "step": 10890 + }, + { + "epoch": 0.3337930611744514, + "grad_norm": 1.8901645133839133, + "learning_rate": 7.76889488505937e-06, + "loss": 0.6747, + "step": 10891 + }, + { + "epoch": 0.3338237096971926, + "grad_norm": 1.6381058177535728, + "learning_rate": 7.768481605568843e-06, + "loss": 0.6331, + "step": 10892 + }, + { + "epoch": 0.3338543582199338, + "grad_norm": 1.617825748526461, + "learning_rate": 7.768068298799758e-06, + "loss": 0.6516, + "step": 10893 + }, + { + "epoch": 0.333885006742675, + "grad_norm": 0.8909340316837673, + "learning_rate": 7.767654964756186e-06, + "loss": 0.4473, + "step": 10894 + }, + { + "epoch": 0.3339156552654162, + "grad_norm": 1.6163821197571941, + "learning_rate": 7.767241603442204e-06, + "loss": 0.6341, + "step": 10895 + }, + { + "epoch": 0.33394630378815743, + "grad_norm": 1.8211165242108138, + "learning_rate": 7.76682821486188e-06, + "loss": 0.6398, + "step": 10896 + }, + { + "epoch": 0.33397695231089863, + "grad_norm": 1.9646577075097886, + "learning_rate": 7.766414799019294e-06, + "loss": 0.7093, + "step": 10897 + }, + { + "epoch": 0.33400760083363984, + "grad_norm": 1.532538246446913, + "learning_rate": 7.76600135591851e-06, + "loss": 0.698, + "step": 10898 + }, + { + "epoch": 0.33403824935638105, + "grad_norm": 0.8387091273100931, + "learning_rate": 7.765587885563609e-06, + "loss": 0.4827, + "step": 10899 + }, + { + "epoch": 0.33406889787912225, + "grad_norm": 1.6480811658474148, + "learning_rate": 7.765174387958663e-06, + "loss": 0.662, + "step": 10900 + }, + { + "epoch": 0.33409954640186346, + "grad_norm": 1.6863044321302127, + "learning_rate": 7.764760863107748e-06, + "loss": 0.6489, + "step": 10901 + }, + { + "epoch": 0.3341301949246046, + "grad_norm": 1.5865839516727875, + "learning_rate": 7.764347311014935e-06, + "loss": 0.6727, + "step": 10902 + }, + { + "epoch": 0.3341608434473458, + "grad_norm": 1.613243353535291, + "learning_rate": 7.7639337316843e-06, + "loss": 0.5715, + "step": 10903 + }, + { + "epoch": 0.334191491970087, + "grad_norm": 1.9902832533596833, + "learning_rate": 7.763520125119918e-06, + "loss": 0.6792, + "step": 10904 + }, + { + "epoch": 0.3342221404928282, + "grad_norm": 1.7884607936709833, + "learning_rate": 7.763106491325869e-06, + "loss": 0.773, + "step": 10905 + }, + { + "epoch": 0.33425278901556943, + "grad_norm": 1.5753543269847812, + "learning_rate": 7.762692830306223e-06, + "loss": 0.7473, + "step": 10906 + }, + { + "epoch": 0.33428343753831063, + "grad_norm": 1.8267753418119774, + "learning_rate": 7.762279142065055e-06, + "loss": 0.7408, + "step": 10907 + }, + { + "epoch": 0.33431408606105184, + "grad_norm": 1.6546753768534148, + "learning_rate": 7.761865426606447e-06, + "loss": 0.6431, + "step": 10908 + }, + { + "epoch": 0.33434473458379305, + "grad_norm": 1.5440676523601446, + "learning_rate": 7.76145168393447e-06, + "loss": 0.6329, + "step": 10909 + }, + { + "epoch": 0.33437538310653425, + "grad_norm": 0.8975185983374674, + "learning_rate": 7.761037914053205e-06, + "loss": 0.4568, + "step": 10910 + }, + { + "epoch": 0.33440603162927546, + "grad_norm": 1.8688083913080407, + "learning_rate": 7.760624116966726e-06, + "loss": 0.6881, + "step": 10911 + }, + { + "epoch": 0.33443668015201666, + "grad_norm": 1.9881751201118874, + "learning_rate": 7.760210292679114e-06, + "loss": 0.6645, + "step": 10912 + }, + { + "epoch": 0.33446732867475787, + "grad_norm": 1.86707109063973, + "learning_rate": 7.75979644119444e-06, + "loss": 0.6989, + "step": 10913 + }, + { + "epoch": 0.3344979771974991, + "grad_norm": 1.883440896141748, + "learning_rate": 7.759382562516786e-06, + "loss": 0.7667, + "step": 10914 + }, + { + "epoch": 0.3345286257202403, + "grad_norm": 1.8713099063478615, + "learning_rate": 7.758968656650231e-06, + "loss": 0.6718, + "step": 10915 + }, + { + "epoch": 0.3345592742429815, + "grad_norm": 1.6278434288028791, + "learning_rate": 7.758554723598852e-06, + "loss": 0.5853, + "step": 10916 + }, + { + "epoch": 0.3345899227657227, + "grad_norm": 2.031608194662077, + "learning_rate": 7.758140763366726e-06, + "loss": 0.7006, + "step": 10917 + }, + { + "epoch": 0.3346205712884639, + "grad_norm": 1.9105289306741662, + "learning_rate": 7.757726775957935e-06, + "loss": 0.7698, + "step": 10918 + }, + { + "epoch": 0.3346512198112051, + "grad_norm": 1.7347654207658414, + "learning_rate": 7.757312761376555e-06, + "loss": 0.6247, + "step": 10919 + }, + { + "epoch": 0.3346818683339463, + "grad_norm": 1.439609946130256, + "learning_rate": 7.75689871962667e-06, + "loss": 0.575, + "step": 10920 + }, + { + "epoch": 0.3347125168566875, + "grad_norm": 1.7153123151556833, + "learning_rate": 7.756484650712352e-06, + "loss": 0.7062, + "step": 10921 + }, + { + "epoch": 0.3347431653794287, + "grad_norm": 1.6409904349741433, + "learning_rate": 7.756070554637689e-06, + "loss": 0.7328, + "step": 10922 + }, + { + "epoch": 0.3347738139021699, + "grad_norm": 1.6960533717797601, + "learning_rate": 7.755656431406756e-06, + "loss": 0.6824, + "step": 10923 + }, + { + "epoch": 0.33480446242491113, + "grad_norm": 1.7686178013397997, + "learning_rate": 7.755242281023634e-06, + "loss": 0.6542, + "step": 10924 + }, + { + "epoch": 0.33483511094765234, + "grad_norm": 1.0275306786889833, + "learning_rate": 7.754828103492407e-06, + "loss": 0.4736, + "step": 10925 + }, + { + "epoch": 0.33486575947039354, + "grad_norm": 1.8209572075398885, + "learning_rate": 7.754413898817152e-06, + "loss": 0.5273, + "step": 10926 + }, + { + "epoch": 0.33489640799313475, + "grad_norm": 1.7258951413880346, + "learning_rate": 7.753999667001952e-06, + "loss": 0.7046, + "step": 10927 + }, + { + "epoch": 0.33492705651587595, + "grad_norm": 1.6446343947868578, + "learning_rate": 7.753585408050892e-06, + "loss": 0.7477, + "step": 10928 + }, + { + "epoch": 0.33495770503861716, + "grad_norm": 1.6535760841368063, + "learning_rate": 7.753171121968048e-06, + "loss": 0.7178, + "step": 10929 + }, + { + "epoch": 0.33498835356135837, + "grad_norm": 2.038544310504032, + "learning_rate": 7.752756808757505e-06, + "loss": 0.7822, + "step": 10930 + }, + { + "epoch": 0.33501900208409957, + "grad_norm": 1.5092899192400135, + "learning_rate": 7.752342468423343e-06, + "loss": 0.649, + "step": 10931 + }, + { + "epoch": 0.3350496506068408, + "grad_norm": 1.8183383688151733, + "learning_rate": 7.75192810096965e-06, + "loss": 0.6608, + "step": 10932 + }, + { + "epoch": 0.3350802991295819, + "grad_norm": 1.648421354758214, + "learning_rate": 7.751513706400502e-06, + "loss": 0.6328, + "step": 10933 + }, + { + "epoch": 0.33511094765232313, + "grad_norm": 2.1344300965463643, + "learning_rate": 7.751099284719988e-06, + "loss": 0.6868, + "step": 10934 + }, + { + "epoch": 0.33514159617506434, + "grad_norm": 1.6058824363352486, + "learning_rate": 7.750684835932185e-06, + "loss": 0.6341, + "step": 10935 + }, + { + "epoch": 0.33517224469780554, + "grad_norm": 1.7727002492922619, + "learning_rate": 7.750270360041185e-06, + "loss": 0.6122, + "step": 10936 + }, + { + "epoch": 0.33520289322054675, + "grad_norm": 1.5565980449874548, + "learning_rate": 7.749855857051065e-06, + "loss": 0.6822, + "step": 10937 + }, + { + "epoch": 0.33523354174328795, + "grad_norm": 1.7435065688251399, + "learning_rate": 7.749441326965912e-06, + "loss": 0.6678, + "step": 10938 + }, + { + "epoch": 0.33526419026602916, + "grad_norm": 0.9746180590864405, + "learning_rate": 7.749026769789811e-06, + "loss": 0.4686, + "step": 10939 + }, + { + "epoch": 0.33529483878877037, + "grad_norm": 1.738111524688391, + "learning_rate": 7.748612185526845e-06, + "loss": 0.6342, + "step": 10940 + }, + { + "epoch": 0.33532548731151157, + "grad_norm": 1.7061369092020893, + "learning_rate": 7.7481975741811e-06, + "loss": 0.6703, + "step": 10941 + }, + { + "epoch": 0.3353561358342528, + "grad_norm": 1.7767395608885637, + "learning_rate": 7.747782935756662e-06, + "loss": 0.6463, + "step": 10942 + }, + { + "epoch": 0.335386784356994, + "grad_norm": 1.834001896040395, + "learning_rate": 7.747368270257616e-06, + "loss": 0.6662, + "step": 10943 + }, + { + "epoch": 0.3354174328797352, + "grad_norm": 1.6157993080567832, + "learning_rate": 7.746953577688046e-06, + "loss": 0.6762, + "step": 10944 + }, + { + "epoch": 0.3354480814024764, + "grad_norm": 1.8065752316810495, + "learning_rate": 7.74653885805204e-06, + "loss": 0.7468, + "step": 10945 + }, + { + "epoch": 0.3354787299252176, + "grad_norm": 1.8486408983216676, + "learning_rate": 7.746124111353683e-06, + "loss": 0.6726, + "step": 10946 + }, + { + "epoch": 0.3355093784479588, + "grad_norm": 0.8792692287390222, + "learning_rate": 7.745709337597062e-06, + "loss": 0.4491, + "step": 10947 + }, + { + "epoch": 0.3355400269707, + "grad_norm": 1.4785387420788447, + "learning_rate": 7.745294536786268e-06, + "loss": 0.6781, + "step": 10948 + }, + { + "epoch": 0.3355706754934412, + "grad_norm": 1.7066405729497383, + "learning_rate": 7.74487970892538e-06, + "loss": 0.7141, + "step": 10949 + }, + { + "epoch": 0.3356013240161824, + "grad_norm": 1.5735801991715028, + "learning_rate": 7.744464854018494e-06, + "loss": 0.6414, + "step": 10950 + }, + { + "epoch": 0.33563197253892363, + "grad_norm": 1.6826023115001074, + "learning_rate": 7.74404997206969e-06, + "loss": 0.6736, + "step": 10951 + }, + { + "epoch": 0.33566262106166483, + "grad_norm": 1.5121879546526935, + "learning_rate": 7.743635063083062e-06, + "loss": 0.584, + "step": 10952 + }, + { + "epoch": 0.33569326958440604, + "grad_norm": 1.9379336915370693, + "learning_rate": 7.743220127062696e-06, + "loss": 0.683, + "step": 10953 + }, + { + "epoch": 0.33572391810714725, + "grad_norm": 1.7752305375086408, + "learning_rate": 7.742805164012679e-06, + "loss": 0.7634, + "step": 10954 + }, + { + "epoch": 0.33575456662988845, + "grad_norm": 1.6279976280114699, + "learning_rate": 7.742390173937103e-06, + "loss": 0.6578, + "step": 10955 + }, + { + "epoch": 0.33578521515262966, + "grad_norm": 1.763769711973038, + "learning_rate": 7.74197515684005e-06, + "loss": 0.6102, + "step": 10956 + }, + { + "epoch": 0.33581586367537086, + "grad_norm": 1.6800109219999089, + "learning_rate": 7.741560112725619e-06, + "loss": 0.6716, + "step": 10957 + }, + { + "epoch": 0.33584651219811207, + "grad_norm": 1.6487920433720602, + "learning_rate": 7.741145041597892e-06, + "loss": 0.7033, + "step": 10958 + }, + { + "epoch": 0.3358771607208533, + "grad_norm": 1.7390370368239552, + "learning_rate": 7.740729943460965e-06, + "loss": 0.706, + "step": 10959 + }, + { + "epoch": 0.3359078092435945, + "grad_norm": 2.0117618085062023, + "learning_rate": 7.740314818318921e-06, + "loss": 0.7748, + "step": 10960 + }, + { + "epoch": 0.3359384577663357, + "grad_norm": 1.7657654198384296, + "learning_rate": 7.739899666175856e-06, + "loss": 0.6817, + "step": 10961 + }, + { + "epoch": 0.3359691062890769, + "grad_norm": 1.6461232251672306, + "learning_rate": 7.739484487035858e-06, + "loss": 0.6296, + "step": 10962 + }, + { + "epoch": 0.3359997548118181, + "grad_norm": 0.8670168386986385, + "learning_rate": 7.739069280903017e-06, + "loss": 0.4692, + "step": 10963 + }, + { + "epoch": 0.33603040333455925, + "grad_norm": 1.5652553567808123, + "learning_rate": 7.738654047781427e-06, + "loss": 0.7223, + "step": 10964 + }, + { + "epoch": 0.33606105185730045, + "grad_norm": 1.6685533745784744, + "learning_rate": 7.738238787675178e-06, + "loss": 0.5735, + "step": 10965 + }, + { + "epoch": 0.33609170038004166, + "grad_norm": 1.8919097876098556, + "learning_rate": 7.737823500588361e-06, + "loss": 0.7244, + "step": 10966 + }, + { + "epoch": 0.33612234890278286, + "grad_norm": 1.9867346149636458, + "learning_rate": 7.73740818652507e-06, + "loss": 0.7326, + "step": 10967 + }, + { + "epoch": 0.33615299742552407, + "grad_norm": 1.848982969049535, + "learning_rate": 7.736992845489394e-06, + "loss": 0.7339, + "step": 10968 + }, + { + "epoch": 0.3361836459482653, + "grad_norm": 0.7601828968784949, + "learning_rate": 7.736577477485427e-06, + "loss": 0.4571, + "step": 10969 + }, + { + "epoch": 0.3362142944710065, + "grad_norm": 1.8555430144888612, + "learning_rate": 7.736162082517265e-06, + "loss": 0.7981, + "step": 10970 + }, + { + "epoch": 0.3362449429937477, + "grad_norm": 1.734137444995043, + "learning_rate": 7.735746660588993e-06, + "loss": 0.6915, + "step": 10971 + }, + { + "epoch": 0.3362755915164889, + "grad_norm": 1.671056154142007, + "learning_rate": 7.735331211704713e-06, + "loss": 0.6309, + "step": 10972 + }, + { + "epoch": 0.3363062400392301, + "grad_norm": 1.7918010590463551, + "learning_rate": 7.734915735868513e-06, + "loss": 0.8007, + "step": 10973 + }, + { + "epoch": 0.3363368885619713, + "grad_norm": 1.5798167877631646, + "learning_rate": 7.73450023308449e-06, + "loss": 0.6422, + "step": 10974 + }, + { + "epoch": 0.3363675370847125, + "grad_norm": 1.766204381619496, + "learning_rate": 7.734084703356736e-06, + "loss": 0.6644, + "step": 10975 + }, + { + "epoch": 0.3363981856074537, + "grad_norm": 1.776023593383118, + "learning_rate": 7.733669146689344e-06, + "loss": 0.7516, + "step": 10976 + }, + { + "epoch": 0.3364288341301949, + "grad_norm": 1.617170423163549, + "learning_rate": 7.733253563086413e-06, + "loss": 0.6748, + "step": 10977 + }, + { + "epoch": 0.3364594826529361, + "grad_norm": 1.715685553849979, + "learning_rate": 7.732837952552035e-06, + "loss": 0.679, + "step": 10978 + }, + { + "epoch": 0.33649013117567733, + "grad_norm": 1.7793926658049914, + "learning_rate": 7.732422315090304e-06, + "loss": 0.7143, + "step": 10979 + }, + { + "epoch": 0.33652077969841854, + "grad_norm": 1.714160793327565, + "learning_rate": 7.732006650705318e-06, + "loss": 0.7242, + "step": 10980 + }, + { + "epoch": 0.33655142822115974, + "grad_norm": 0.8899186812368616, + "learning_rate": 7.73159095940117e-06, + "loss": 0.4964, + "step": 10981 + }, + { + "epoch": 0.33658207674390095, + "grad_norm": 1.5567876412543642, + "learning_rate": 7.731175241181959e-06, + "loss": 0.6966, + "step": 10982 + }, + { + "epoch": 0.33661272526664215, + "grad_norm": 1.5840807080131103, + "learning_rate": 7.730759496051778e-06, + "loss": 0.7478, + "step": 10983 + }, + { + "epoch": 0.33664337378938336, + "grad_norm": 1.9355213742033692, + "learning_rate": 7.730343724014726e-06, + "loss": 0.6362, + "step": 10984 + }, + { + "epoch": 0.33667402231212457, + "grad_norm": 1.9182465684387333, + "learning_rate": 7.729927925074898e-06, + "loss": 0.7318, + "step": 10985 + }, + { + "epoch": 0.33670467083486577, + "grad_norm": 1.7416826988311738, + "learning_rate": 7.729512099236394e-06, + "loss": 0.689, + "step": 10986 + }, + { + "epoch": 0.336735319357607, + "grad_norm": 1.707331863493097, + "learning_rate": 7.729096246503307e-06, + "loss": 0.7489, + "step": 10987 + }, + { + "epoch": 0.3367659678803482, + "grad_norm": 1.6930498281366002, + "learning_rate": 7.728680366879736e-06, + "loss": 0.719, + "step": 10988 + }, + { + "epoch": 0.3367966164030894, + "grad_norm": 1.7209197523118147, + "learning_rate": 7.728264460369781e-06, + "loss": 0.6594, + "step": 10989 + }, + { + "epoch": 0.3368272649258306, + "grad_norm": 2.0440295573543406, + "learning_rate": 7.727848526977535e-06, + "loss": 0.6255, + "step": 10990 + }, + { + "epoch": 0.3368579134485718, + "grad_norm": 1.752732314902263, + "learning_rate": 7.727432566707103e-06, + "loss": 0.7174, + "step": 10991 + }, + { + "epoch": 0.336888561971313, + "grad_norm": 1.3650484019262699, + "learning_rate": 7.727016579562578e-06, + "loss": 0.539, + "step": 10992 + }, + { + "epoch": 0.3369192104940542, + "grad_norm": 1.8058078397056763, + "learning_rate": 7.726600565548061e-06, + "loss": 0.6943, + "step": 10993 + }, + { + "epoch": 0.3369498590167954, + "grad_norm": 1.893373512565859, + "learning_rate": 7.726184524667653e-06, + "loss": 0.7434, + "step": 10994 + }, + { + "epoch": 0.33698050753953657, + "grad_norm": 1.8502221360264792, + "learning_rate": 7.72576845692545e-06, + "loss": 0.6991, + "step": 10995 + }, + { + "epoch": 0.3370111560622778, + "grad_norm": 1.7951504494381816, + "learning_rate": 7.72535236232555e-06, + "loss": 0.6792, + "step": 10996 + }, + { + "epoch": 0.337041804585019, + "grad_norm": 1.7625909636560468, + "learning_rate": 7.72493624087206e-06, + "loss": 0.7337, + "step": 10997 + }, + { + "epoch": 0.3370724531077602, + "grad_norm": 1.9076571190756888, + "learning_rate": 7.724520092569075e-06, + "loss": 0.7096, + "step": 10998 + }, + { + "epoch": 0.3371031016305014, + "grad_norm": 1.8354928484395332, + "learning_rate": 7.724103917420695e-06, + "loss": 0.7619, + "step": 10999 + }, + { + "epoch": 0.3371337501532426, + "grad_norm": 0.9016623310230905, + "learning_rate": 7.723687715431024e-06, + "loss": 0.452, + "step": 11000 + }, + { + "epoch": 0.3371643986759838, + "grad_norm": 2.094862923254399, + "learning_rate": 7.723271486604162e-06, + "loss": 0.7422, + "step": 11001 + }, + { + "epoch": 0.337195047198725, + "grad_norm": 1.8805310876820327, + "learning_rate": 7.722855230944206e-06, + "loss": 0.755, + "step": 11002 + }, + { + "epoch": 0.3372256957214662, + "grad_norm": 1.8464353112020808, + "learning_rate": 7.722438948455263e-06, + "loss": 0.7344, + "step": 11003 + }, + { + "epoch": 0.3372563442442074, + "grad_norm": 1.5727105098338905, + "learning_rate": 7.722022639141431e-06, + "loss": 0.6856, + "step": 11004 + }, + { + "epoch": 0.3372869927669486, + "grad_norm": 1.7740467376000142, + "learning_rate": 7.721606303006815e-06, + "loss": 0.6482, + "step": 11005 + }, + { + "epoch": 0.33731764128968983, + "grad_norm": 1.6555577179192742, + "learning_rate": 7.721189940055513e-06, + "loss": 0.6846, + "step": 11006 + }, + { + "epoch": 0.33734828981243103, + "grad_norm": 1.7321202396945274, + "learning_rate": 7.720773550291634e-06, + "loss": 0.6565, + "step": 11007 + }, + { + "epoch": 0.33737893833517224, + "grad_norm": 1.6858563011936964, + "learning_rate": 7.720357133719274e-06, + "loss": 0.6837, + "step": 11008 + }, + { + "epoch": 0.33740958685791345, + "grad_norm": 1.6961520672515618, + "learning_rate": 7.719940690342543e-06, + "loss": 0.6667, + "step": 11009 + }, + { + "epoch": 0.33744023538065465, + "grad_norm": 1.8767931797993807, + "learning_rate": 7.719524220165537e-06, + "loss": 0.7602, + "step": 11010 + }, + { + "epoch": 0.33747088390339586, + "grad_norm": 1.7421473813224697, + "learning_rate": 7.719107723192363e-06, + "loss": 0.7749, + "step": 11011 + }, + { + "epoch": 0.33750153242613706, + "grad_norm": 2.277282347050048, + "learning_rate": 7.718691199427126e-06, + "loss": 0.6758, + "step": 11012 + }, + { + "epoch": 0.33753218094887827, + "grad_norm": 1.804225301160795, + "learning_rate": 7.718274648873929e-06, + "loss": 0.6163, + "step": 11013 + }, + { + "epoch": 0.3375628294716195, + "grad_norm": 1.7696558247882639, + "learning_rate": 7.717858071536877e-06, + "loss": 0.7507, + "step": 11014 + }, + { + "epoch": 0.3375934779943607, + "grad_norm": 1.5456393913638553, + "learning_rate": 7.717441467420072e-06, + "loss": 0.682, + "step": 11015 + }, + { + "epoch": 0.3376241265171019, + "grad_norm": 1.7806658696012567, + "learning_rate": 7.717024836527623e-06, + "loss": 0.7236, + "step": 11016 + }, + { + "epoch": 0.3376547750398431, + "grad_norm": 1.5196701690756782, + "learning_rate": 7.716608178863631e-06, + "loss": 0.6333, + "step": 11017 + }, + { + "epoch": 0.3376854235625843, + "grad_norm": 1.607312554583663, + "learning_rate": 7.716191494432206e-06, + "loss": 0.5797, + "step": 11018 + }, + { + "epoch": 0.3377160720853255, + "grad_norm": 1.977496474792238, + "learning_rate": 7.71577478323745e-06, + "loss": 0.7383, + "step": 11019 + }, + { + "epoch": 0.3377467206080667, + "grad_norm": 1.5567240691371758, + "learning_rate": 7.71535804528347e-06, + "loss": 0.6786, + "step": 11020 + }, + { + "epoch": 0.3377773691308079, + "grad_norm": 1.7721908112358167, + "learning_rate": 7.714941280574373e-06, + "loss": 0.7433, + "step": 11021 + }, + { + "epoch": 0.3378080176535491, + "grad_norm": 0.9559427934977114, + "learning_rate": 7.714524489114264e-06, + "loss": 0.4894, + "step": 11022 + }, + { + "epoch": 0.3378386661762903, + "grad_norm": 1.4805626210932694, + "learning_rate": 7.714107670907252e-06, + "loss": 0.6864, + "step": 11023 + }, + { + "epoch": 0.33786931469903153, + "grad_norm": 1.5858642824032005, + "learning_rate": 7.713690825957442e-06, + "loss": 0.6393, + "step": 11024 + }, + { + "epoch": 0.33789996322177274, + "grad_norm": 1.718311598447449, + "learning_rate": 7.713273954268942e-06, + "loss": 0.7706, + "step": 11025 + }, + { + "epoch": 0.3379306117445139, + "grad_norm": 1.6441479001930965, + "learning_rate": 7.712857055845859e-06, + "loss": 0.6018, + "step": 11026 + }, + { + "epoch": 0.3379612602672551, + "grad_norm": 1.7361805158640664, + "learning_rate": 7.712440130692302e-06, + "loss": 0.6404, + "step": 11027 + }, + { + "epoch": 0.3379919087899963, + "grad_norm": 1.7877804322531965, + "learning_rate": 7.712023178812378e-06, + "loss": 0.7774, + "step": 11028 + }, + { + "epoch": 0.3380225573127375, + "grad_norm": 1.6376563594853826, + "learning_rate": 7.711606200210195e-06, + "loss": 0.7215, + "step": 11029 + }, + { + "epoch": 0.3380532058354787, + "grad_norm": 1.6988899259490702, + "learning_rate": 7.711189194889864e-06, + "loss": 0.7661, + "step": 11030 + }, + { + "epoch": 0.3380838543582199, + "grad_norm": 1.7644828683689306, + "learning_rate": 7.710772162855492e-06, + "loss": 0.7376, + "step": 11031 + }, + { + "epoch": 0.3381145028809611, + "grad_norm": 0.8419935647386401, + "learning_rate": 7.710355104111186e-06, + "loss": 0.4692, + "step": 11032 + }, + { + "epoch": 0.3381451514037023, + "grad_norm": 1.6132978869617975, + "learning_rate": 7.70993801866106e-06, + "loss": 0.6292, + "step": 11033 + }, + { + "epoch": 0.33817579992644353, + "grad_norm": 1.6390930171757456, + "learning_rate": 7.70952090650922e-06, + "loss": 0.6712, + "step": 11034 + }, + { + "epoch": 0.33820644844918474, + "grad_norm": 0.8154244943966991, + "learning_rate": 7.709103767659779e-06, + "loss": 0.4753, + "step": 11035 + }, + { + "epoch": 0.33823709697192594, + "grad_norm": 1.6196467357886648, + "learning_rate": 7.708686602116843e-06, + "loss": 0.5876, + "step": 11036 + }, + { + "epoch": 0.33826774549466715, + "grad_norm": 1.5381530156330816, + "learning_rate": 7.708269409884528e-06, + "loss": 0.6913, + "step": 11037 + }, + { + "epoch": 0.33829839401740835, + "grad_norm": 1.510492703029193, + "learning_rate": 7.707852190966937e-06, + "loss": 0.6072, + "step": 11038 + }, + { + "epoch": 0.33832904254014956, + "grad_norm": 1.5977341859570446, + "learning_rate": 7.70743494536819e-06, + "loss": 0.7592, + "step": 11039 + }, + { + "epoch": 0.33835969106289077, + "grad_norm": 1.5546612995407705, + "learning_rate": 7.707017673092391e-06, + "loss": 0.6574, + "step": 11040 + }, + { + "epoch": 0.33839033958563197, + "grad_norm": 1.566241967302383, + "learning_rate": 7.706600374143655e-06, + "loss": 0.7081, + "step": 11041 + }, + { + "epoch": 0.3384209881083732, + "grad_norm": 1.654449709060952, + "learning_rate": 7.706183048526095e-06, + "loss": 0.6643, + "step": 11042 + }, + { + "epoch": 0.3384516366311144, + "grad_norm": 1.49736175090248, + "learning_rate": 7.70576569624382e-06, + "loss": 0.6272, + "step": 11043 + }, + { + "epoch": 0.3384822851538556, + "grad_norm": 1.5931519609235905, + "learning_rate": 7.705348317300943e-06, + "loss": 0.7134, + "step": 11044 + }, + { + "epoch": 0.3385129336765968, + "grad_norm": 1.7503511559647202, + "learning_rate": 7.704930911701575e-06, + "loss": 0.7236, + "step": 11045 + }, + { + "epoch": 0.338543582199338, + "grad_norm": 1.5410717910259126, + "learning_rate": 7.704513479449831e-06, + "loss": 0.6582, + "step": 11046 + }, + { + "epoch": 0.3385742307220792, + "grad_norm": 1.7046430508239525, + "learning_rate": 7.704096020549824e-06, + "loss": 0.6094, + "step": 11047 + }, + { + "epoch": 0.3386048792448204, + "grad_norm": 1.597214158918231, + "learning_rate": 7.70367853500567e-06, + "loss": 0.7152, + "step": 11048 + }, + { + "epoch": 0.3386355277675616, + "grad_norm": 1.6317158580226483, + "learning_rate": 7.703261022821476e-06, + "loss": 0.6754, + "step": 11049 + }, + { + "epoch": 0.3386661762903028, + "grad_norm": 1.9832781307753906, + "learning_rate": 7.702843484001361e-06, + "loss": 0.7315, + "step": 11050 + }, + { + "epoch": 0.33869682481304403, + "grad_norm": 1.7960847315636919, + "learning_rate": 7.70242591854944e-06, + "loss": 0.6877, + "step": 11051 + }, + { + "epoch": 0.33872747333578523, + "grad_norm": 1.798248574361676, + "learning_rate": 7.70200832646982e-06, + "loss": 0.7239, + "step": 11052 + }, + { + "epoch": 0.33875812185852644, + "grad_norm": 1.652087430527781, + "learning_rate": 7.701590707766624e-06, + "loss": 0.6748, + "step": 11053 + }, + { + "epoch": 0.33878877038126765, + "grad_norm": 1.7718670998059474, + "learning_rate": 7.701173062443963e-06, + "loss": 0.7451, + "step": 11054 + }, + { + "epoch": 0.33881941890400885, + "grad_norm": 1.7769994121341401, + "learning_rate": 7.700755390505952e-06, + "loss": 0.7562, + "step": 11055 + }, + { + "epoch": 0.33885006742675006, + "grad_norm": 1.0413975618918845, + "learning_rate": 7.700337691956708e-06, + "loss": 0.4872, + "step": 11056 + }, + { + "epoch": 0.3388807159494912, + "grad_norm": 1.7275965007142258, + "learning_rate": 7.699919966800344e-06, + "loss": 0.6473, + "step": 11057 + }, + { + "epoch": 0.3389113644722324, + "grad_norm": 1.9173837288941435, + "learning_rate": 7.69950221504098e-06, + "loss": 0.7289, + "step": 11058 + }, + { + "epoch": 0.3389420129949736, + "grad_norm": 1.5509788113437981, + "learning_rate": 7.699084436682728e-06, + "loss": 0.5286, + "step": 11059 + }, + { + "epoch": 0.3389726615177148, + "grad_norm": 1.8717564832484836, + "learning_rate": 7.698666631729708e-06, + "loss": 0.645, + "step": 11060 + }, + { + "epoch": 0.33900331004045603, + "grad_norm": 1.5589179375258824, + "learning_rate": 7.698248800186035e-06, + "loss": 0.6628, + "step": 11061 + }, + { + "epoch": 0.33903395856319724, + "grad_norm": 1.6379186547883615, + "learning_rate": 7.697830942055825e-06, + "loss": 0.7039, + "step": 11062 + }, + { + "epoch": 0.33906460708593844, + "grad_norm": 0.8980505944664496, + "learning_rate": 7.697413057343198e-06, + "loss": 0.4572, + "step": 11063 + }, + { + "epoch": 0.33909525560867965, + "grad_norm": 1.7288046415661964, + "learning_rate": 7.696995146052269e-06, + "loss": 0.7546, + "step": 11064 + }, + { + "epoch": 0.33912590413142085, + "grad_norm": 0.8287500649516086, + "learning_rate": 7.696577208187157e-06, + "loss": 0.4734, + "step": 11065 + }, + { + "epoch": 0.33915655265416206, + "grad_norm": 1.8500103701340007, + "learning_rate": 7.69615924375198e-06, + "loss": 0.6916, + "step": 11066 + }, + { + "epoch": 0.33918720117690326, + "grad_norm": 1.5453043697976385, + "learning_rate": 7.695741252750857e-06, + "loss": 0.5988, + "step": 11067 + }, + { + "epoch": 0.33921784969964447, + "grad_norm": 1.6674979284513078, + "learning_rate": 7.695323235187904e-06, + "loss": 0.6456, + "step": 11068 + }, + { + "epoch": 0.3392484982223857, + "grad_norm": 1.6911613605931692, + "learning_rate": 7.694905191067241e-06, + "loss": 0.7142, + "step": 11069 + }, + { + "epoch": 0.3392791467451269, + "grad_norm": 1.8373536323731763, + "learning_rate": 7.69448712039299e-06, + "loss": 0.6763, + "step": 11070 + }, + { + "epoch": 0.3393097952678681, + "grad_norm": 0.8682374886031297, + "learning_rate": 7.694069023169267e-06, + "loss": 0.4753, + "step": 11071 + }, + { + "epoch": 0.3393404437906093, + "grad_norm": 1.723995562866244, + "learning_rate": 7.69365089940019e-06, + "loss": 0.7736, + "step": 11072 + }, + { + "epoch": 0.3393710923133505, + "grad_norm": 1.7321326793716603, + "learning_rate": 7.693232749089886e-06, + "loss": 0.6855, + "step": 11073 + }, + { + "epoch": 0.3394017408360917, + "grad_norm": 1.8587273848367198, + "learning_rate": 7.69281457224247e-06, + "loss": 0.7583, + "step": 11074 + }, + { + "epoch": 0.3394323893588329, + "grad_norm": 1.6061001111812272, + "learning_rate": 7.69239636886206e-06, + "loss": 0.6873, + "step": 11075 + }, + { + "epoch": 0.3394630378815741, + "grad_norm": 1.6870930768687569, + "learning_rate": 7.691978138952782e-06, + "loss": 0.7387, + "step": 11076 + }, + { + "epoch": 0.3394936864043153, + "grad_norm": 1.5764344446205443, + "learning_rate": 7.691559882518753e-06, + "loss": 0.7067, + "step": 11077 + }, + { + "epoch": 0.3395243349270565, + "grad_norm": 1.530060905716234, + "learning_rate": 7.691141599564098e-06, + "loss": 0.6764, + "step": 11078 + }, + { + "epoch": 0.33955498344979773, + "grad_norm": 1.5447600505559165, + "learning_rate": 7.690723290092933e-06, + "loss": 0.695, + "step": 11079 + }, + { + "epoch": 0.33958563197253894, + "grad_norm": 1.554631193495637, + "learning_rate": 7.690304954109387e-06, + "loss": 0.6585, + "step": 11080 + }, + { + "epoch": 0.33961628049528014, + "grad_norm": 1.8486543574832632, + "learning_rate": 7.689886591617574e-06, + "loss": 0.7227, + "step": 11081 + }, + { + "epoch": 0.33964692901802135, + "grad_norm": 1.5081010537793134, + "learning_rate": 7.689468202621623e-06, + "loss": 0.632, + "step": 11082 + }, + { + "epoch": 0.33967757754076255, + "grad_norm": 1.5630538128417175, + "learning_rate": 7.689049787125654e-06, + "loss": 0.6473, + "step": 11083 + }, + { + "epoch": 0.33970822606350376, + "grad_norm": 1.5638371908583333, + "learning_rate": 7.688631345133787e-06, + "loss": 0.6622, + "step": 11084 + }, + { + "epoch": 0.33973887458624497, + "grad_norm": 1.7375581523178951, + "learning_rate": 7.688212876650149e-06, + "loss": 0.6923, + "step": 11085 + }, + { + "epoch": 0.33976952310898617, + "grad_norm": 1.670385915483066, + "learning_rate": 7.687794381678861e-06, + "loss": 0.6044, + "step": 11086 + }, + { + "epoch": 0.3398001716317274, + "grad_norm": 1.6806788261095233, + "learning_rate": 7.687375860224047e-06, + "loss": 0.6928, + "step": 11087 + }, + { + "epoch": 0.3398308201544685, + "grad_norm": 1.6920839315588807, + "learning_rate": 7.686957312289833e-06, + "loss": 0.7236, + "step": 11088 + }, + { + "epoch": 0.33986146867720973, + "grad_norm": 1.740509596828907, + "learning_rate": 7.686538737880339e-06, + "loss": 0.6714, + "step": 11089 + }, + { + "epoch": 0.33989211719995094, + "grad_norm": 1.5824933987289274, + "learning_rate": 7.686120136999692e-06, + "loss": 0.6785, + "step": 11090 + }, + { + "epoch": 0.33992276572269214, + "grad_norm": 1.6664154507297788, + "learning_rate": 7.685701509652017e-06, + "loss": 0.671, + "step": 11091 + }, + { + "epoch": 0.33995341424543335, + "grad_norm": 1.74542695466802, + "learning_rate": 7.685282855841438e-06, + "loss": 0.771, + "step": 11092 + }, + { + "epoch": 0.33998406276817456, + "grad_norm": 1.5899692702800852, + "learning_rate": 7.684864175572078e-06, + "loss": 0.6624, + "step": 11093 + }, + { + "epoch": 0.34001471129091576, + "grad_norm": 1.6621410236092826, + "learning_rate": 7.684445468848064e-06, + "loss": 0.6856, + "step": 11094 + }, + { + "epoch": 0.34004535981365697, + "grad_norm": 1.6006377463941728, + "learning_rate": 7.684026735673525e-06, + "loss": 0.5953, + "step": 11095 + }, + { + "epoch": 0.3400760083363982, + "grad_norm": 1.6821620702049884, + "learning_rate": 7.68360797605258e-06, + "loss": 0.7056, + "step": 11096 + }, + { + "epoch": 0.3401066568591394, + "grad_norm": 1.6027185380059208, + "learning_rate": 7.683189189989364e-06, + "loss": 0.6624, + "step": 11097 + }, + { + "epoch": 0.3401373053818806, + "grad_norm": 1.9763245156848697, + "learning_rate": 7.682770377487995e-06, + "loss": 0.8134, + "step": 11098 + }, + { + "epoch": 0.3401679539046218, + "grad_norm": 1.4652055005147422, + "learning_rate": 7.682351538552603e-06, + "loss": 0.6422, + "step": 11099 + }, + { + "epoch": 0.340198602427363, + "grad_norm": 1.7646402224971085, + "learning_rate": 7.681932673187315e-06, + "loss": 0.7096, + "step": 11100 + }, + { + "epoch": 0.3402292509501042, + "grad_norm": 1.5436284653648507, + "learning_rate": 7.68151378139626e-06, + "loss": 0.6933, + "step": 11101 + }, + { + "epoch": 0.3402598994728454, + "grad_norm": 1.608012936590729, + "learning_rate": 7.681094863183562e-06, + "loss": 0.6762, + "step": 11102 + }, + { + "epoch": 0.3402905479955866, + "grad_norm": 1.0338711693257274, + "learning_rate": 7.68067591855335e-06, + "loss": 0.501, + "step": 11103 + }, + { + "epoch": 0.3403211965183278, + "grad_norm": 1.671225466903337, + "learning_rate": 7.680256947509754e-06, + "loss": 0.7255, + "step": 11104 + }, + { + "epoch": 0.340351845041069, + "grad_norm": 1.8356457416550225, + "learning_rate": 7.679837950056899e-06, + "loss": 0.8258, + "step": 11105 + }, + { + "epoch": 0.34038249356381023, + "grad_norm": 1.599606885480282, + "learning_rate": 7.679418926198915e-06, + "loss": 0.6606, + "step": 11106 + }, + { + "epoch": 0.34041314208655143, + "grad_norm": 1.8983377904660461, + "learning_rate": 7.678999875939931e-06, + "loss": 0.7418, + "step": 11107 + }, + { + "epoch": 0.34044379060929264, + "grad_norm": 1.725760680720646, + "learning_rate": 7.678580799284077e-06, + "loss": 0.6567, + "step": 11108 + }, + { + "epoch": 0.34047443913203385, + "grad_norm": 1.52407638238097, + "learning_rate": 7.67816169623548e-06, + "loss": 0.6137, + "step": 11109 + }, + { + "epoch": 0.34050508765477505, + "grad_norm": 0.8538324024888082, + "learning_rate": 7.67774256679827e-06, + "loss": 0.4676, + "step": 11110 + }, + { + "epoch": 0.34053573617751626, + "grad_norm": 1.4905283417880677, + "learning_rate": 7.677323410976577e-06, + "loss": 0.6484, + "step": 11111 + }, + { + "epoch": 0.34056638470025746, + "grad_norm": 1.722948494786767, + "learning_rate": 7.676904228774533e-06, + "loss": 0.649, + "step": 11112 + }, + { + "epoch": 0.34059703322299867, + "grad_norm": 1.6941780734064091, + "learning_rate": 7.676485020196266e-06, + "loss": 0.79, + "step": 11113 + }, + { + "epoch": 0.3406276817457399, + "grad_norm": 1.6920734799373895, + "learning_rate": 7.676065785245909e-06, + "loss": 0.7558, + "step": 11114 + }, + { + "epoch": 0.3406583302684811, + "grad_norm": 2.0035044886509157, + "learning_rate": 7.675646523927588e-06, + "loss": 0.7107, + "step": 11115 + }, + { + "epoch": 0.3406889787912223, + "grad_norm": 1.5349587768039148, + "learning_rate": 7.67522723624544e-06, + "loss": 0.6487, + "step": 11116 + }, + { + "epoch": 0.3407196273139635, + "grad_norm": 1.6855081359376913, + "learning_rate": 7.674807922203593e-06, + "loss": 0.7259, + "step": 11117 + }, + { + "epoch": 0.3407502758367047, + "grad_norm": 1.8363487549238153, + "learning_rate": 7.674388581806179e-06, + "loss": 0.6156, + "step": 11118 + }, + { + "epoch": 0.34078092435944585, + "grad_norm": 1.7374937476784598, + "learning_rate": 7.673969215057328e-06, + "loss": 0.6018, + "step": 11119 + }, + { + "epoch": 0.34081157288218705, + "grad_norm": 1.5774245689742366, + "learning_rate": 7.673549821961176e-06, + "loss": 0.6551, + "step": 11120 + }, + { + "epoch": 0.34084222140492826, + "grad_norm": 1.8924494323856604, + "learning_rate": 7.673130402521854e-06, + "loss": 0.7299, + "step": 11121 + }, + { + "epoch": 0.34087286992766946, + "grad_norm": 1.6960609696518707, + "learning_rate": 7.672710956743491e-06, + "loss": 0.6322, + "step": 11122 + }, + { + "epoch": 0.34090351845041067, + "grad_norm": 0.8282379259645308, + "learning_rate": 7.672291484630226e-06, + "loss": 0.4875, + "step": 11123 + }, + { + "epoch": 0.3409341669731519, + "grad_norm": 1.6872949410244151, + "learning_rate": 7.67187198618619e-06, + "loss": 0.6881, + "step": 11124 + }, + { + "epoch": 0.3409648154958931, + "grad_norm": 1.8598970851796894, + "learning_rate": 7.671452461415514e-06, + "loss": 0.684, + "step": 11125 + }, + { + "epoch": 0.3409954640186343, + "grad_norm": 1.628384062017471, + "learning_rate": 7.671032910322333e-06, + "loss": 0.6683, + "step": 11126 + }, + { + "epoch": 0.3410261125413755, + "grad_norm": 1.892427540088753, + "learning_rate": 7.670613332910784e-06, + "loss": 0.9176, + "step": 11127 + }, + { + "epoch": 0.3410567610641167, + "grad_norm": 1.6460629146465666, + "learning_rate": 7.670193729184997e-06, + "loss": 0.7361, + "step": 11128 + }, + { + "epoch": 0.3410874095868579, + "grad_norm": 1.782371871643998, + "learning_rate": 7.669774099149107e-06, + "loss": 0.6957, + "step": 11129 + }, + { + "epoch": 0.3411180581095991, + "grad_norm": 0.8004039608944472, + "learning_rate": 7.66935444280725e-06, + "loss": 0.4684, + "step": 11130 + }, + { + "epoch": 0.3411487066323403, + "grad_norm": 1.590199108029428, + "learning_rate": 7.66893476016356e-06, + "loss": 0.6149, + "step": 11131 + }, + { + "epoch": 0.3411793551550815, + "grad_norm": 1.5758789102892312, + "learning_rate": 7.668515051222175e-06, + "loss": 0.6003, + "step": 11132 + }, + { + "epoch": 0.3412100036778227, + "grad_norm": 1.8270477586136262, + "learning_rate": 7.668095315987227e-06, + "loss": 0.7854, + "step": 11133 + }, + { + "epoch": 0.34124065220056393, + "grad_norm": 1.9925345128258245, + "learning_rate": 7.667675554462854e-06, + "loss": 0.6697, + "step": 11134 + }, + { + "epoch": 0.34127130072330514, + "grad_norm": 1.5154261549402217, + "learning_rate": 7.66725576665319e-06, + "loss": 0.6959, + "step": 11135 + }, + { + "epoch": 0.34130194924604634, + "grad_norm": 1.434709907530207, + "learning_rate": 7.666835952562375e-06, + "loss": 0.5269, + "step": 11136 + }, + { + "epoch": 0.34133259776878755, + "grad_norm": 1.8021842078742836, + "learning_rate": 7.66641611219454e-06, + "loss": 0.684, + "step": 11137 + }, + { + "epoch": 0.34136324629152875, + "grad_norm": 1.7375953566362994, + "learning_rate": 7.665996245553826e-06, + "loss": 0.705, + "step": 11138 + }, + { + "epoch": 0.34139389481426996, + "grad_norm": 0.8189114207890699, + "learning_rate": 7.665576352644369e-06, + "loss": 0.4685, + "step": 11139 + }, + { + "epoch": 0.34142454333701117, + "grad_norm": 1.905248513244333, + "learning_rate": 7.665156433470307e-06, + "loss": 0.6783, + "step": 11140 + }, + { + "epoch": 0.34145519185975237, + "grad_norm": 1.6398129635803431, + "learning_rate": 7.664736488035776e-06, + "loss": 0.6119, + "step": 11141 + }, + { + "epoch": 0.3414858403824936, + "grad_norm": 0.7497994074318924, + "learning_rate": 7.664316516344914e-06, + "loss": 0.4447, + "step": 11142 + }, + { + "epoch": 0.3415164889052348, + "grad_norm": 1.7095470237878994, + "learning_rate": 7.66389651840186e-06, + "loss": 0.6114, + "step": 11143 + }, + { + "epoch": 0.341547137427976, + "grad_norm": 1.7394905523908921, + "learning_rate": 7.663476494210753e-06, + "loss": 0.6664, + "step": 11144 + }, + { + "epoch": 0.3415777859507172, + "grad_norm": 1.6169133129591904, + "learning_rate": 7.663056443775729e-06, + "loss": 0.6253, + "step": 11145 + }, + { + "epoch": 0.3416084344734584, + "grad_norm": 1.737893489206989, + "learning_rate": 7.66263636710093e-06, + "loss": 0.6025, + "step": 11146 + }, + { + "epoch": 0.3416390829961996, + "grad_norm": 1.6143176854591519, + "learning_rate": 7.662216264190494e-06, + "loss": 0.5955, + "step": 11147 + }, + { + "epoch": 0.3416697315189408, + "grad_norm": 1.7283209763272815, + "learning_rate": 7.661796135048559e-06, + "loss": 0.6872, + "step": 11148 + }, + { + "epoch": 0.341700380041682, + "grad_norm": 0.8808422204733303, + "learning_rate": 7.661375979679265e-06, + "loss": 0.4599, + "step": 11149 + }, + { + "epoch": 0.34173102856442317, + "grad_norm": 2.1564342971160775, + "learning_rate": 7.660955798086754e-06, + "loss": 0.7548, + "step": 11150 + }, + { + "epoch": 0.3417616770871644, + "grad_norm": 1.7533229520146796, + "learning_rate": 7.660535590275163e-06, + "loss": 0.6626, + "step": 11151 + }, + { + "epoch": 0.3417923256099056, + "grad_norm": 1.8564411526135365, + "learning_rate": 7.660115356248635e-06, + "loss": 0.6717, + "step": 11152 + }, + { + "epoch": 0.3418229741326468, + "grad_norm": 1.7337131418935101, + "learning_rate": 7.659695096011308e-06, + "loss": 0.5979, + "step": 11153 + }, + { + "epoch": 0.341853622655388, + "grad_norm": 1.699466562440792, + "learning_rate": 7.659274809567327e-06, + "loss": 0.7249, + "step": 11154 + }, + { + "epoch": 0.3418842711781292, + "grad_norm": 1.6674604618390232, + "learning_rate": 7.658854496920831e-06, + "loss": 0.7498, + "step": 11155 + }, + { + "epoch": 0.3419149197008704, + "grad_norm": 1.9260123423042603, + "learning_rate": 7.658434158075961e-06, + "loss": 0.6861, + "step": 11156 + }, + { + "epoch": 0.3419455682236116, + "grad_norm": 1.9139127995756124, + "learning_rate": 7.658013793036858e-06, + "loss": 0.6719, + "step": 11157 + }, + { + "epoch": 0.3419762167463528, + "grad_norm": 1.8335412600630638, + "learning_rate": 7.657593401807667e-06, + "loss": 0.6888, + "step": 11158 + }, + { + "epoch": 0.342006865269094, + "grad_norm": 1.8273033218231878, + "learning_rate": 7.657172984392526e-06, + "loss": 0.7403, + "step": 11159 + }, + { + "epoch": 0.3420375137918352, + "grad_norm": 1.7995316675730761, + "learning_rate": 7.65675254079558e-06, + "loss": 0.612, + "step": 11160 + }, + { + "epoch": 0.34206816231457643, + "grad_norm": 1.598573798370139, + "learning_rate": 7.656332071020972e-06, + "loss": 0.6284, + "step": 11161 + }, + { + "epoch": 0.34209881083731764, + "grad_norm": 1.5978264735980567, + "learning_rate": 7.655911575072846e-06, + "loss": 0.6111, + "step": 11162 + }, + { + "epoch": 0.34212945936005884, + "grad_norm": 1.4822072548402252, + "learning_rate": 7.655491052955341e-06, + "loss": 0.6668, + "step": 11163 + }, + { + "epoch": 0.34216010788280005, + "grad_norm": 1.7468905210890169, + "learning_rate": 7.655070504672605e-06, + "loss": 0.682, + "step": 11164 + }, + { + "epoch": 0.34219075640554125, + "grad_norm": 1.6569722335595038, + "learning_rate": 7.65464993022878e-06, + "loss": 0.7527, + "step": 11165 + }, + { + "epoch": 0.34222140492828246, + "grad_norm": 1.807344333497844, + "learning_rate": 7.654229329628007e-06, + "loss": 0.6496, + "step": 11166 + }, + { + "epoch": 0.34225205345102366, + "grad_norm": 1.9747301723978958, + "learning_rate": 7.653808702874436e-06, + "loss": 0.7323, + "step": 11167 + }, + { + "epoch": 0.34228270197376487, + "grad_norm": 1.773379697276879, + "learning_rate": 7.65338804997221e-06, + "loss": 0.7685, + "step": 11168 + }, + { + "epoch": 0.3423133504965061, + "grad_norm": 1.8210076080901418, + "learning_rate": 7.652967370925471e-06, + "loss": 0.702, + "step": 11169 + }, + { + "epoch": 0.3423439990192473, + "grad_norm": 1.4683197433738575, + "learning_rate": 7.652546665738368e-06, + "loss": 0.4292, + "step": 11170 + }, + { + "epoch": 0.3423746475419885, + "grad_norm": 1.8952134088294055, + "learning_rate": 7.65212593441504e-06, + "loss": 0.7056, + "step": 11171 + }, + { + "epoch": 0.3424052960647297, + "grad_norm": 1.0040228980702566, + "learning_rate": 7.651705176959638e-06, + "loss": 0.4651, + "step": 11172 + }, + { + "epoch": 0.3424359445874709, + "grad_norm": 1.6484613293961434, + "learning_rate": 7.651284393376307e-06, + "loss": 0.6689, + "step": 11173 + }, + { + "epoch": 0.3424665931102121, + "grad_norm": 1.5228097212206781, + "learning_rate": 7.650863583669193e-06, + "loss": 0.6213, + "step": 11174 + }, + { + "epoch": 0.3424972416329533, + "grad_norm": 1.8371722880376224, + "learning_rate": 7.650442747842442e-06, + "loss": 0.6678, + "step": 11175 + }, + { + "epoch": 0.3425278901556945, + "grad_norm": 1.776989673279219, + "learning_rate": 7.6500218859002e-06, + "loss": 0.6701, + "step": 11176 + }, + { + "epoch": 0.3425585386784357, + "grad_norm": 1.7983793136630981, + "learning_rate": 7.649600997846613e-06, + "loss": 0.7721, + "step": 11177 + }, + { + "epoch": 0.3425891872011769, + "grad_norm": 1.4905412188652498, + "learning_rate": 7.649180083685832e-06, + "loss": 0.6411, + "step": 11178 + }, + { + "epoch": 0.34261983572391813, + "grad_norm": 0.8019831205829266, + "learning_rate": 7.648759143422e-06, + "loss": 0.4711, + "step": 11179 + }, + { + "epoch": 0.34265048424665934, + "grad_norm": 1.7691016546878624, + "learning_rate": 7.648338177059266e-06, + "loss": 0.7194, + "step": 11180 + }, + { + "epoch": 0.3426811327694005, + "grad_norm": 1.6008229547585124, + "learning_rate": 7.64791718460178e-06, + "loss": 0.7655, + "step": 11181 + }, + { + "epoch": 0.3427117812921417, + "grad_norm": 1.546291089908468, + "learning_rate": 7.647496166053687e-06, + "loss": 0.612, + "step": 11182 + }, + { + "epoch": 0.3427424298148829, + "grad_norm": 1.7163742146014553, + "learning_rate": 7.647075121419139e-06, + "loss": 0.7335, + "step": 11183 + }, + { + "epoch": 0.3427730783376241, + "grad_norm": 1.8229332708710881, + "learning_rate": 7.646654050702281e-06, + "loss": 0.7054, + "step": 11184 + }, + { + "epoch": 0.3428037268603653, + "grad_norm": 1.7027643515907858, + "learning_rate": 7.646232953907262e-06, + "loss": 0.7089, + "step": 11185 + }, + { + "epoch": 0.3428343753831065, + "grad_norm": 1.7551863913431895, + "learning_rate": 7.645811831038235e-06, + "loss": 0.6691, + "step": 11186 + }, + { + "epoch": 0.3428650239058477, + "grad_norm": 1.694541940879516, + "learning_rate": 7.645390682099346e-06, + "loss": 0.6801, + "step": 11187 + }, + { + "epoch": 0.3428956724285889, + "grad_norm": 1.6926002675900795, + "learning_rate": 7.644969507094747e-06, + "loss": 0.6958, + "step": 11188 + }, + { + "epoch": 0.34292632095133013, + "grad_norm": 1.8557410168367854, + "learning_rate": 7.644548306028585e-06, + "loss": 0.681, + "step": 11189 + }, + { + "epoch": 0.34295696947407134, + "grad_norm": 0.7854484374378099, + "learning_rate": 7.644127078905013e-06, + "loss": 0.4397, + "step": 11190 + }, + { + "epoch": 0.34298761799681254, + "grad_norm": 1.8471081543125603, + "learning_rate": 7.643705825728178e-06, + "loss": 0.7279, + "step": 11191 + }, + { + "epoch": 0.34301826651955375, + "grad_norm": 1.6726387022343958, + "learning_rate": 7.643284546502237e-06, + "loss": 0.6214, + "step": 11192 + }, + { + "epoch": 0.34304891504229496, + "grad_norm": 1.6242554948239316, + "learning_rate": 7.642863241231332e-06, + "loss": 0.6234, + "step": 11193 + }, + { + "epoch": 0.34307956356503616, + "grad_norm": 1.955176576039062, + "learning_rate": 7.642441909919625e-06, + "loss": 0.6317, + "step": 11194 + }, + { + "epoch": 0.34311021208777737, + "grad_norm": 1.8559282869189095, + "learning_rate": 7.64202055257126e-06, + "loss": 0.6734, + "step": 11195 + }, + { + "epoch": 0.34314086061051857, + "grad_norm": 1.892615602237088, + "learning_rate": 7.641599169190388e-06, + "loss": 0.7137, + "step": 11196 + }, + { + "epoch": 0.3431715091332598, + "grad_norm": 0.8137100182167522, + "learning_rate": 7.641177759781167e-06, + "loss": 0.4348, + "step": 11197 + }, + { + "epoch": 0.343202157656001, + "grad_norm": 1.6777014166548136, + "learning_rate": 7.640756324347743e-06, + "loss": 0.7013, + "step": 11198 + }, + { + "epoch": 0.3432328061787422, + "grad_norm": 1.853475959792435, + "learning_rate": 7.640334862894273e-06, + "loss": 0.757, + "step": 11199 + }, + { + "epoch": 0.3432634547014834, + "grad_norm": 2.0863734513945786, + "learning_rate": 7.639913375424906e-06, + "loss": 0.7054, + "step": 11200 + }, + { + "epoch": 0.3432941032242246, + "grad_norm": 1.6753178776678632, + "learning_rate": 7.639491861943799e-06, + "loss": 0.6689, + "step": 11201 + }, + { + "epoch": 0.3433247517469658, + "grad_norm": 1.7152942656015127, + "learning_rate": 7.639070322455101e-06, + "loss": 0.6891, + "step": 11202 + }, + { + "epoch": 0.343355400269707, + "grad_norm": 1.7818434549590572, + "learning_rate": 7.63864875696297e-06, + "loss": 0.6769, + "step": 11203 + }, + { + "epoch": 0.3433860487924482, + "grad_norm": 1.7546462928746582, + "learning_rate": 7.638227165471559e-06, + "loss": 0.723, + "step": 11204 + }, + { + "epoch": 0.3434166973151894, + "grad_norm": 0.8717464488434745, + "learning_rate": 7.637805547985018e-06, + "loss": 0.4758, + "step": 11205 + }, + { + "epoch": 0.34344734583793063, + "grad_norm": 1.4833496373635715, + "learning_rate": 7.637383904507505e-06, + "loss": 0.5926, + "step": 11206 + }, + { + "epoch": 0.34347799436067183, + "grad_norm": 0.797045017604631, + "learning_rate": 7.636962235043173e-06, + "loss": 0.4696, + "step": 11207 + }, + { + "epoch": 0.34350864288341304, + "grad_norm": 1.857985237604881, + "learning_rate": 7.636540539596178e-06, + "loss": 0.6714, + "step": 11208 + }, + { + "epoch": 0.34353929140615425, + "grad_norm": 1.5841141560217116, + "learning_rate": 7.636118818170675e-06, + "loss": 0.6824, + "step": 11209 + }, + { + "epoch": 0.34356993992889545, + "grad_norm": 1.6176099495622307, + "learning_rate": 7.635697070770818e-06, + "loss": 0.6766, + "step": 11210 + }, + { + "epoch": 0.34360058845163666, + "grad_norm": 1.6448016242088623, + "learning_rate": 7.635275297400764e-06, + "loss": 0.6744, + "step": 11211 + }, + { + "epoch": 0.3436312369743778, + "grad_norm": 1.6902634042274738, + "learning_rate": 7.634853498064667e-06, + "loss": 0.7661, + "step": 11212 + }, + { + "epoch": 0.343661885497119, + "grad_norm": 1.6834797742059024, + "learning_rate": 7.634431672766685e-06, + "loss": 0.6845, + "step": 11213 + }, + { + "epoch": 0.3436925340198602, + "grad_norm": 1.8851959462957972, + "learning_rate": 7.634009821510974e-06, + "loss": 0.6887, + "step": 11214 + }, + { + "epoch": 0.3437231825426014, + "grad_norm": 1.7849879838255127, + "learning_rate": 7.63358794430169e-06, + "loss": 0.6843, + "step": 11215 + }, + { + "epoch": 0.34375383106534263, + "grad_norm": 1.851904353015375, + "learning_rate": 7.63316604114299e-06, + "loss": 0.7076, + "step": 11216 + }, + { + "epoch": 0.34378447958808384, + "grad_norm": 1.6890471282252018, + "learning_rate": 7.632744112039033e-06, + "loss": 0.6055, + "step": 11217 + }, + { + "epoch": 0.34381512811082504, + "grad_norm": 2.109381384365075, + "learning_rate": 7.632322156993972e-06, + "loss": 0.7203, + "step": 11218 + }, + { + "epoch": 0.34384577663356625, + "grad_norm": 1.7833693075449022, + "learning_rate": 7.631900176011968e-06, + "loss": 0.7151, + "step": 11219 + }, + { + "epoch": 0.34387642515630745, + "grad_norm": 1.8639258778861556, + "learning_rate": 7.63147816909718e-06, + "loss": 0.7455, + "step": 11220 + }, + { + "epoch": 0.34390707367904866, + "grad_norm": 1.863077722559882, + "learning_rate": 7.631056136253762e-06, + "loss": 0.7175, + "step": 11221 + }, + { + "epoch": 0.34393772220178986, + "grad_norm": 1.7024533893231002, + "learning_rate": 7.630634077485875e-06, + "loss": 0.7051, + "step": 11222 + }, + { + "epoch": 0.34396837072453107, + "grad_norm": 1.884778892812447, + "learning_rate": 7.630211992797679e-06, + "loss": 0.7566, + "step": 11223 + }, + { + "epoch": 0.3439990192472723, + "grad_norm": 1.6861992280117808, + "learning_rate": 7.62978988219333e-06, + "loss": 0.7735, + "step": 11224 + }, + { + "epoch": 0.3440296677700135, + "grad_norm": 2.2047777344939403, + "learning_rate": 7.629367745676989e-06, + "loss": 0.6976, + "step": 11225 + }, + { + "epoch": 0.3440603162927547, + "grad_norm": 1.5952898433624545, + "learning_rate": 7.628945583252814e-06, + "loss": 0.6195, + "step": 11226 + }, + { + "epoch": 0.3440909648154959, + "grad_norm": 2.157536043530882, + "learning_rate": 7.628523394924966e-06, + "loss": 0.7573, + "step": 11227 + }, + { + "epoch": 0.3441216133382371, + "grad_norm": 1.6783331023368468, + "learning_rate": 7.628101180697606e-06, + "loss": 0.7682, + "step": 11228 + }, + { + "epoch": 0.3441522618609783, + "grad_norm": 1.792987256920334, + "learning_rate": 7.62767894057489e-06, + "loss": 0.6466, + "step": 11229 + }, + { + "epoch": 0.3441829103837195, + "grad_norm": 1.5183810069158008, + "learning_rate": 7.627256674560983e-06, + "loss": 0.6676, + "step": 11230 + }, + { + "epoch": 0.3442135589064607, + "grad_norm": 1.6668413249270138, + "learning_rate": 7.626834382660042e-06, + "loss": 0.6386, + "step": 11231 + }, + { + "epoch": 0.3442442074292019, + "grad_norm": 1.5794200626085382, + "learning_rate": 7.626412064876231e-06, + "loss": 0.6218, + "step": 11232 + }, + { + "epoch": 0.3442748559519431, + "grad_norm": 1.685682985355828, + "learning_rate": 7.625989721213709e-06, + "loss": 0.6115, + "step": 11233 + }, + { + "epoch": 0.34430550447468433, + "grad_norm": 1.7093370493129183, + "learning_rate": 7.62556735167664e-06, + "loss": 0.7303, + "step": 11234 + }, + { + "epoch": 0.34433615299742554, + "grad_norm": 1.7483745161871416, + "learning_rate": 7.625144956269183e-06, + "loss": 0.6729, + "step": 11235 + }, + { + "epoch": 0.34436680152016674, + "grad_norm": 1.0998739469092653, + "learning_rate": 7.6247225349955016e-06, + "loss": 0.4938, + "step": 11236 + }, + { + "epoch": 0.34439745004290795, + "grad_norm": 1.9063462749931108, + "learning_rate": 7.624300087859757e-06, + "loss": 0.7332, + "step": 11237 + }, + { + "epoch": 0.34442809856564915, + "grad_norm": 1.9124901915485282, + "learning_rate": 7.623877614866111e-06, + "loss": 0.7072, + "step": 11238 + }, + { + "epoch": 0.34445874708839036, + "grad_norm": 1.7183967165174303, + "learning_rate": 7.62345511601873e-06, + "loss": 0.7157, + "step": 11239 + }, + { + "epoch": 0.34448939561113157, + "grad_norm": 1.7064226926043515, + "learning_rate": 7.623032591321773e-06, + "loss": 0.7003, + "step": 11240 + }, + { + "epoch": 0.34452004413387277, + "grad_norm": 1.8949753152296882, + "learning_rate": 7.6226100407794055e-06, + "loss": 0.7257, + "step": 11241 + }, + { + "epoch": 0.344550692656614, + "grad_norm": 1.8313304030939062, + "learning_rate": 7.62218746439579e-06, + "loss": 0.7283, + "step": 11242 + }, + { + "epoch": 0.3445813411793552, + "grad_norm": 1.882303900026205, + "learning_rate": 7.62176486217509e-06, + "loss": 0.6752, + "step": 11243 + }, + { + "epoch": 0.34461198970209633, + "grad_norm": 1.5001993595952627, + "learning_rate": 7.6213422341214695e-06, + "loss": 0.6246, + "step": 11244 + }, + { + "epoch": 0.34464263822483754, + "grad_norm": 1.905830614887603, + "learning_rate": 7.620919580239094e-06, + "loss": 0.6558, + "step": 11245 + }, + { + "epoch": 0.34467328674757874, + "grad_norm": 1.756450552361871, + "learning_rate": 7.6204969005321284e-06, + "loss": 0.7107, + "step": 11246 + }, + { + "epoch": 0.34470393527031995, + "grad_norm": 1.8921951120361817, + "learning_rate": 7.620074195004734e-06, + "loss": 0.7012, + "step": 11247 + }, + { + "epoch": 0.34473458379306116, + "grad_norm": 1.9980415688320834, + "learning_rate": 7.619651463661081e-06, + "loss": 0.7348, + "step": 11248 + }, + { + "epoch": 0.34476523231580236, + "grad_norm": 1.6521019128764212, + "learning_rate": 7.619228706505329e-06, + "loss": 0.6353, + "step": 11249 + }, + { + "epoch": 0.34479588083854357, + "grad_norm": 1.044535156259894, + "learning_rate": 7.618805923541648e-06, + "loss": 0.4802, + "step": 11250 + }, + { + "epoch": 0.3448265293612848, + "grad_norm": 1.7732960057078044, + "learning_rate": 7.618383114774203e-06, + "loss": 0.6974, + "step": 11251 + }, + { + "epoch": 0.344857177884026, + "grad_norm": 2.152178696757823, + "learning_rate": 7.617960280207158e-06, + "loss": 0.7371, + "step": 11252 + }, + { + "epoch": 0.3448878264067672, + "grad_norm": 1.713323735496478, + "learning_rate": 7.6175374198446805e-06, + "loss": 0.7237, + "step": 11253 + }, + { + "epoch": 0.3449184749295084, + "grad_norm": 1.8138684603139514, + "learning_rate": 7.617114533690937e-06, + "loss": 0.6629, + "step": 11254 + }, + { + "epoch": 0.3449491234522496, + "grad_norm": 0.7956160379006225, + "learning_rate": 7.6166916217500945e-06, + "loss": 0.4737, + "step": 11255 + }, + { + "epoch": 0.3449797719749908, + "grad_norm": 1.5755568114158751, + "learning_rate": 7.6162686840263204e-06, + "loss": 0.6593, + "step": 11256 + }, + { + "epoch": 0.345010420497732, + "grad_norm": 1.8232631073238903, + "learning_rate": 7.6158457205237804e-06, + "loss": 0.7371, + "step": 11257 + }, + { + "epoch": 0.3450410690204732, + "grad_norm": 2.12309400290454, + "learning_rate": 7.6154227312466445e-06, + "loss": 0.8057, + "step": 11258 + }, + { + "epoch": 0.3450717175432144, + "grad_norm": 1.577034581724824, + "learning_rate": 7.61499971619908e-06, + "loss": 0.5747, + "step": 11259 + }, + { + "epoch": 0.3451023660659556, + "grad_norm": 1.8194257987018498, + "learning_rate": 7.614576675385253e-06, + "loss": 0.6705, + "step": 11260 + }, + { + "epoch": 0.34513301458869683, + "grad_norm": 1.8599574023644543, + "learning_rate": 7.614153608809332e-06, + "loss": 0.7115, + "step": 11261 + }, + { + "epoch": 0.34516366311143803, + "grad_norm": 2.066718560896306, + "learning_rate": 7.613730516475487e-06, + "loss": 0.6131, + "step": 11262 + }, + { + "epoch": 0.34519431163417924, + "grad_norm": 1.5205604708822802, + "learning_rate": 7.613307398387888e-06, + "loss": 0.6316, + "step": 11263 + }, + { + "epoch": 0.34522496015692045, + "grad_norm": 1.5075248868912772, + "learning_rate": 7.612884254550701e-06, + "loss": 0.6393, + "step": 11264 + }, + { + "epoch": 0.34525560867966165, + "grad_norm": 1.8311976453338075, + "learning_rate": 7.612461084968099e-06, + "loss": 0.7615, + "step": 11265 + }, + { + "epoch": 0.34528625720240286, + "grad_norm": 1.6117682541855822, + "learning_rate": 7.612037889644247e-06, + "loss": 0.608, + "step": 11266 + }, + { + "epoch": 0.34531690572514406, + "grad_norm": 1.9654201621930845, + "learning_rate": 7.611614668583321e-06, + "loss": 0.7594, + "step": 11267 + }, + { + "epoch": 0.34534755424788527, + "grad_norm": 0.9045653857456901, + "learning_rate": 7.611191421789483e-06, + "loss": 0.4444, + "step": 11268 + }, + { + "epoch": 0.3453782027706265, + "grad_norm": 1.6256503660097037, + "learning_rate": 7.61076814926691e-06, + "loss": 0.6075, + "step": 11269 + }, + { + "epoch": 0.3454088512933677, + "grad_norm": 1.7453132159625118, + "learning_rate": 7.61034485101977e-06, + "loss": 0.643, + "step": 11270 + }, + { + "epoch": 0.3454394998161089, + "grad_norm": 1.5729414038003164, + "learning_rate": 7.6099215270522344e-06, + "loss": 0.6323, + "step": 11271 + }, + { + "epoch": 0.3454701483388501, + "grad_norm": 1.9849732941102844, + "learning_rate": 7.609498177368475e-06, + "loss": 0.7375, + "step": 11272 + }, + { + "epoch": 0.3455007968615913, + "grad_norm": 1.876005718854759, + "learning_rate": 7.6090748019726624e-06, + "loss": 0.7173, + "step": 11273 + }, + { + "epoch": 0.3455314453843325, + "grad_norm": 1.6928737032949492, + "learning_rate": 7.608651400868967e-06, + "loss": 0.7757, + "step": 11274 + }, + { + "epoch": 0.34556209390707365, + "grad_norm": 1.666829040332369, + "learning_rate": 7.608227974061562e-06, + "loss": 0.6846, + "step": 11275 + }, + { + "epoch": 0.34559274242981486, + "grad_norm": 0.877546836704234, + "learning_rate": 7.60780452155462e-06, + "loss": 0.4852, + "step": 11276 + }, + { + "epoch": 0.34562339095255606, + "grad_norm": 1.8036546394874573, + "learning_rate": 7.6073810433523125e-06, + "loss": 0.6865, + "step": 11277 + }, + { + "epoch": 0.34565403947529727, + "grad_norm": 1.6821052037647786, + "learning_rate": 7.606957539458813e-06, + "loss": 0.5368, + "step": 11278 + }, + { + "epoch": 0.3456846879980385, + "grad_norm": 1.5960842021382757, + "learning_rate": 7.606534009878293e-06, + "loss": 0.5922, + "step": 11279 + }, + { + "epoch": 0.3457153365207797, + "grad_norm": 0.7694587975139161, + "learning_rate": 7.606110454614928e-06, + "loss": 0.4436, + "step": 11280 + }, + { + "epoch": 0.3457459850435209, + "grad_norm": 1.6832305021057121, + "learning_rate": 7.605686873672887e-06, + "loss": 0.6981, + "step": 11281 + }, + { + "epoch": 0.3457766335662621, + "grad_norm": 0.8174607608322992, + "learning_rate": 7.605263267056349e-06, + "loss": 0.4644, + "step": 11282 + }, + { + "epoch": 0.3458072820890033, + "grad_norm": 0.7670142181193865, + "learning_rate": 7.604839634769485e-06, + "loss": 0.4543, + "step": 11283 + }, + { + "epoch": 0.3458379306117445, + "grad_norm": 1.5473757212118047, + "learning_rate": 7.60441597681647e-06, + "loss": 0.6685, + "step": 11284 + }, + { + "epoch": 0.3458685791344857, + "grad_norm": 1.7696675505723434, + "learning_rate": 7.603992293201476e-06, + "loss": 0.6728, + "step": 11285 + }, + { + "epoch": 0.3458992276572269, + "grad_norm": 1.6086878721310487, + "learning_rate": 7.603568583928682e-06, + "loss": 0.5733, + "step": 11286 + }, + { + "epoch": 0.3459298761799681, + "grad_norm": 1.8672245509423466, + "learning_rate": 7.6031448490022595e-06, + "loss": 0.7585, + "step": 11287 + }, + { + "epoch": 0.3459605247027093, + "grad_norm": 1.648820107308981, + "learning_rate": 7.602721088426385e-06, + "loss": 0.7067, + "step": 11288 + }, + { + "epoch": 0.34599117322545053, + "grad_norm": 1.5089885450207945, + "learning_rate": 7.602297302205234e-06, + "loss": 0.5514, + "step": 11289 + }, + { + "epoch": 0.34602182174819174, + "grad_norm": 2.1050255951856127, + "learning_rate": 7.601873490342982e-06, + "loss": 0.6695, + "step": 11290 + }, + { + "epoch": 0.34605247027093294, + "grad_norm": 0.7933629657164045, + "learning_rate": 7.601449652843804e-06, + "loss": 0.443, + "step": 11291 + }, + { + "epoch": 0.34608311879367415, + "grad_norm": 1.6583064187850665, + "learning_rate": 7.601025789711877e-06, + "loss": 0.6764, + "step": 11292 + }, + { + "epoch": 0.34611376731641535, + "grad_norm": 1.904345410268112, + "learning_rate": 7.6006019009513775e-06, + "loss": 0.7692, + "step": 11293 + }, + { + "epoch": 0.34614441583915656, + "grad_norm": 1.6414651817957748, + "learning_rate": 7.600177986566483e-06, + "loss": 0.6262, + "step": 11294 + }, + { + "epoch": 0.34617506436189777, + "grad_norm": 1.7860072134196971, + "learning_rate": 7.5997540465613686e-06, + "loss": 0.6744, + "step": 11295 + }, + { + "epoch": 0.34620571288463897, + "grad_norm": 1.7626659441422396, + "learning_rate": 7.599330080940212e-06, + "loss": 0.6612, + "step": 11296 + }, + { + "epoch": 0.3462363614073802, + "grad_norm": 1.8965679374991975, + "learning_rate": 7.598906089707192e-06, + "loss": 0.7526, + "step": 11297 + }, + { + "epoch": 0.3462670099301214, + "grad_norm": 1.548051017842533, + "learning_rate": 7.598482072866485e-06, + "loss": 0.6569, + "step": 11298 + }, + { + "epoch": 0.3462976584528626, + "grad_norm": 1.7463886668026842, + "learning_rate": 7.598058030422269e-06, + "loss": 0.6775, + "step": 11299 + }, + { + "epoch": 0.3463283069756038, + "grad_norm": 1.6774661980118757, + "learning_rate": 7.597633962378722e-06, + "loss": 0.6284, + "step": 11300 + }, + { + "epoch": 0.346358955498345, + "grad_norm": 1.50166496626409, + "learning_rate": 7.597209868740024e-06, + "loss": 0.6366, + "step": 11301 + }, + { + "epoch": 0.3463896040210862, + "grad_norm": 1.54510780418444, + "learning_rate": 7.596785749510352e-06, + "loss": 0.6962, + "step": 11302 + }, + { + "epoch": 0.3464202525438274, + "grad_norm": 1.6537518291435982, + "learning_rate": 7.596361604693886e-06, + "loss": 0.7018, + "step": 11303 + }, + { + "epoch": 0.3464509010665686, + "grad_norm": 1.688430262825934, + "learning_rate": 7.595937434294804e-06, + "loss": 0.7363, + "step": 11304 + }, + { + "epoch": 0.3464815495893098, + "grad_norm": 1.7708634789065774, + "learning_rate": 7.595513238317288e-06, + "loss": 0.7017, + "step": 11305 + }, + { + "epoch": 0.346512198112051, + "grad_norm": 1.5374665647281163, + "learning_rate": 7.595089016765516e-06, + "loss": 0.5786, + "step": 11306 + }, + { + "epoch": 0.3465428466347922, + "grad_norm": 1.7234383248021359, + "learning_rate": 7.5946647696436645e-06, + "loss": 0.7208, + "step": 11307 + }, + { + "epoch": 0.3465734951575334, + "grad_norm": 1.7429622886741951, + "learning_rate": 7.59424049695592e-06, + "loss": 0.6845, + "step": 11308 + }, + { + "epoch": 0.3466041436802746, + "grad_norm": 1.6002843226132293, + "learning_rate": 7.593816198706461e-06, + "loss": 0.6684, + "step": 11309 + }, + { + "epoch": 0.3466347922030158, + "grad_norm": 1.700779479344675, + "learning_rate": 7.593391874899465e-06, + "loss": 0.6875, + "step": 11310 + }, + { + "epoch": 0.346665440725757, + "grad_norm": 0.8782847603848577, + "learning_rate": 7.592967525539117e-06, + "loss": 0.4769, + "step": 11311 + }, + { + "epoch": 0.3466960892484982, + "grad_norm": 1.6096284418474365, + "learning_rate": 7.592543150629595e-06, + "loss": 0.6978, + "step": 11312 + }, + { + "epoch": 0.3467267377712394, + "grad_norm": 1.6043734194571106, + "learning_rate": 7.5921187501750836e-06, + "loss": 0.7211, + "step": 11313 + }, + { + "epoch": 0.3467573862939806, + "grad_norm": 0.8317817991956592, + "learning_rate": 7.591694324179761e-06, + "loss": 0.4615, + "step": 11314 + }, + { + "epoch": 0.3467880348167218, + "grad_norm": 1.6676328006076309, + "learning_rate": 7.591269872647813e-06, + "loss": 0.6871, + "step": 11315 + }, + { + "epoch": 0.34681868333946303, + "grad_norm": 1.776525733695272, + "learning_rate": 7.590845395583418e-06, + "loss": 0.7347, + "step": 11316 + }, + { + "epoch": 0.34684933186220424, + "grad_norm": 1.6821913928312757, + "learning_rate": 7.590420892990763e-06, + "loss": 0.7344, + "step": 11317 + }, + { + "epoch": 0.34687998038494544, + "grad_norm": 1.7913779734832382, + "learning_rate": 7.589996364874026e-06, + "loss": 0.6712, + "step": 11318 + }, + { + "epoch": 0.34691062890768665, + "grad_norm": 1.7037952168288997, + "learning_rate": 7.5895718112373925e-06, + "loss": 0.7966, + "step": 11319 + }, + { + "epoch": 0.34694127743042785, + "grad_norm": 1.812165742744796, + "learning_rate": 7.589147232085046e-06, + "loss": 0.7497, + "step": 11320 + }, + { + "epoch": 0.34697192595316906, + "grad_norm": 1.5198608888965377, + "learning_rate": 7.58872262742117e-06, + "loss": 0.681, + "step": 11321 + }, + { + "epoch": 0.34700257447591026, + "grad_norm": 0.9336950989015755, + "learning_rate": 7.588297997249946e-06, + "loss": 0.476, + "step": 11322 + }, + { + "epoch": 0.34703322299865147, + "grad_norm": 0.8626131205819154, + "learning_rate": 7.58787334157556e-06, + "loss": 0.4555, + "step": 11323 + }, + { + "epoch": 0.3470638715213927, + "grad_norm": 1.7328149193762499, + "learning_rate": 7.587448660402197e-06, + "loss": 0.7092, + "step": 11324 + }, + { + "epoch": 0.3470945200441339, + "grad_norm": 0.7512390326092421, + "learning_rate": 7.587023953734041e-06, + "loss": 0.462, + "step": 11325 + }, + { + "epoch": 0.3471251685668751, + "grad_norm": 1.6355780823749482, + "learning_rate": 7.5865992215752725e-06, + "loss": 0.569, + "step": 11326 + }, + { + "epoch": 0.3471558170896163, + "grad_norm": 1.5631621789722578, + "learning_rate": 7.5861744639300825e-06, + "loss": 0.6101, + "step": 11327 + }, + { + "epoch": 0.3471864656123575, + "grad_norm": 1.5387732612211418, + "learning_rate": 7.585749680802654e-06, + "loss": 0.6555, + "step": 11328 + }, + { + "epoch": 0.3472171141350987, + "grad_norm": 1.7299661097512433, + "learning_rate": 7.58532487219717e-06, + "loss": 0.6766, + "step": 11329 + }, + { + "epoch": 0.3472477626578399, + "grad_norm": 1.6909784092741988, + "learning_rate": 7.584900038117822e-06, + "loss": 0.6892, + "step": 11330 + }, + { + "epoch": 0.3472784111805811, + "grad_norm": 0.9712284983998182, + "learning_rate": 7.584475178568789e-06, + "loss": 0.4914, + "step": 11331 + }, + { + "epoch": 0.3473090597033223, + "grad_norm": 1.681175567438718, + "learning_rate": 7.584050293554264e-06, + "loss": 0.7217, + "step": 11332 + }, + { + "epoch": 0.3473397082260635, + "grad_norm": 1.6204231557044884, + "learning_rate": 7.583625383078428e-06, + "loss": 0.717, + "step": 11333 + }, + { + "epoch": 0.34737035674880473, + "grad_norm": 1.719187443707247, + "learning_rate": 7.583200447145471e-06, + "loss": 0.6409, + "step": 11334 + }, + { + "epoch": 0.34740100527154594, + "grad_norm": 1.8925486699723455, + "learning_rate": 7.582775485759579e-06, + "loss": 0.6426, + "step": 11335 + }, + { + "epoch": 0.34743165379428714, + "grad_norm": 1.5593743585916835, + "learning_rate": 7.58235049892494e-06, + "loss": 0.5896, + "step": 11336 + }, + { + "epoch": 0.3474623023170283, + "grad_norm": 1.7304122869633263, + "learning_rate": 7.58192548664574e-06, + "loss": 0.7402, + "step": 11337 + }, + { + "epoch": 0.3474929508397695, + "grad_norm": 1.6406855593099157, + "learning_rate": 7.581500448926168e-06, + "loss": 0.803, + "step": 11338 + }, + { + "epoch": 0.3475235993625107, + "grad_norm": 1.876963135767928, + "learning_rate": 7.58107538577041e-06, + "loss": 0.6226, + "step": 11339 + }, + { + "epoch": 0.3475542478852519, + "grad_norm": 1.819181921275411, + "learning_rate": 7.580650297182658e-06, + "loss": 0.709, + "step": 11340 + }, + { + "epoch": 0.3475848964079931, + "grad_norm": 1.6399305421750654, + "learning_rate": 7.580225183167098e-06, + "loss": 0.6378, + "step": 11341 + }, + { + "epoch": 0.3476155449307343, + "grad_norm": 1.7131472511515196, + "learning_rate": 7.579800043727917e-06, + "loss": 0.6466, + "step": 11342 + }, + { + "epoch": 0.3476461934534755, + "grad_norm": 1.9076531032986057, + "learning_rate": 7.579374878869308e-06, + "loss": 0.6924, + "step": 11343 + }, + { + "epoch": 0.34767684197621673, + "grad_norm": 1.7706258062932363, + "learning_rate": 7.5789496885954585e-06, + "loss": 0.7933, + "step": 11344 + }, + { + "epoch": 0.34770749049895794, + "grad_norm": 0.8305982415033486, + "learning_rate": 7.578524472910558e-06, + "loss": 0.4598, + "step": 11345 + }, + { + "epoch": 0.34773813902169914, + "grad_norm": 1.7127295728150926, + "learning_rate": 7.578099231818794e-06, + "loss": 0.7329, + "step": 11346 + }, + { + "epoch": 0.34776878754444035, + "grad_norm": 0.8059600445020093, + "learning_rate": 7.57767396532436e-06, + "loss": 0.4695, + "step": 11347 + }, + { + "epoch": 0.34779943606718156, + "grad_norm": 1.4929293758085826, + "learning_rate": 7.5772486734314455e-06, + "loss": 0.7042, + "step": 11348 + }, + { + "epoch": 0.34783008458992276, + "grad_norm": 1.6740662787599263, + "learning_rate": 7.576823356144241e-06, + "loss": 0.758, + "step": 11349 + }, + { + "epoch": 0.34786073311266397, + "grad_norm": 0.7674531737114149, + "learning_rate": 7.576398013466935e-06, + "loss": 0.4499, + "step": 11350 + }, + { + "epoch": 0.3478913816354052, + "grad_norm": 1.8448648003779704, + "learning_rate": 7.575972645403722e-06, + "loss": 0.7956, + "step": 11351 + }, + { + "epoch": 0.3479220301581464, + "grad_norm": 1.8026510407003669, + "learning_rate": 7.575547251958788e-06, + "loss": 0.6786, + "step": 11352 + }, + { + "epoch": 0.3479526786808876, + "grad_norm": 0.8071193675165715, + "learning_rate": 7.57512183313633e-06, + "loss": 0.4622, + "step": 11353 + }, + { + "epoch": 0.3479833272036288, + "grad_norm": 1.5110023585014465, + "learning_rate": 7.5746963889405374e-06, + "loss": 0.6773, + "step": 11354 + }, + { + "epoch": 0.34801397572637, + "grad_norm": 1.5893585258487084, + "learning_rate": 7.574270919375604e-06, + "loss": 0.6292, + "step": 11355 + }, + { + "epoch": 0.3480446242491112, + "grad_norm": 1.5620435310214071, + "learning_rate": 7.57384542444572e-06, + "loss": 0.5969, + "step": 11356 + }, + { + "epoch": 0.3480752727718524, + "grad_norm": 1.5702541213791887, + "learning_rate": 7.573419904155077e-06, + "loss": 0.7586, + "step": 11357 + }, + { + "epoch": 0.3481059212945936, + "grad_norm": 1.617637707893616, + "learning_rate": 7.57299435850787e-06, + "loss": 0.7321, + "step": 11358 + }, + { + "epoch": 0.3481365698173348, + "grad_norm": 1.7334276112484386, + "learning_rate": 7.572568787508292e-06, + "loss": 0.6784, + "step": 11359 + }, + { + "epoch": 0.348167218340076, + "grad_norm": 0.8189916458344001, + "learning_rate": 7.572143191160535e-06, + "loss": 0.4538, + "step": 11360 + }, + { + "epoch": 0.34819786686281723, + "grad_norm": 1.7067942461695496, + "learning_rate": 7.5717175694687925e-06, + "loss": 0.6854, + "step": 11361 + }, + { + "epoch": 0.34822851538555843, + "grad_norm": 1.7817941440046672, + "learning_rate": 7.57129192243726e-06, + "loss": 0.7481, + "step": 11362 + }, + { + "epoch": 0.34825916390829964, + "grad_norm": 1.952093199764622, + "learning_rate": 7.57086625007013e-06, + "loss": 0.6866, + "step": 11363 + }, + { + "epoch": 0.34828981243104085, + "grad_norm": 1.6523043189985689, + "learning_rate": 7.570440552371596e-06, + "loss": 0.6886, + "step": 11364 + }, + { + "epoch": 0.34832046095378205, + "grad_norm": 2.0954209261534253, + "learning_rate": 7.570014829345854e-06, + "loss": 0.6914, + "step": 11365 + }, + { + "epoch": 0.34835110947652326, + "grad_norm": 1.7224366911912885, + "learning_rate": 7.5695890809971e-06, + "loss": 0.7365, + "step": 11366 + }, + { + "epoch": 0.34838175799926446, + "grad_norm": 1.953297165813685, + "learning_rate": 7.569163307329526e-06, + "loss": 0.835, + "step": 11367 + }, + { + "epoch": 0.3484124065220056, + "grad_norm": 1.6569299456222328, + "learning_rate": 7.568737508347327e-06, + "loss": 0.6525, + "step": 11368 + }, + { + "epoch": 0.3484430550447468, + "grad_norm": 0.7801975608285545, + "learning_rate": 7.568311684054701e-06, + "loss": 0.4647, + "step": 11369 + }, + { + "epoch": 0.348473703567488, + "grad_norm": 1.6139541147858858, + "learning_rate": 7.5678858344558436e-06, + "loss": 0.6998, + "step": 11370 + }, + { + "epoch": 0.34850435209022923, + "grad_norm": 1.6772555458553677, + "learning_rate": 7.567459959554951e-06, + "loss": 0.6343, + "step": 11371 + }, + { + "epoch": 0.34853500061297044, + "grad_norm": 0.8108612134421567, + "learning_rate": 7.567034059356216e-06, + "loss": 0.4708, + "step": 11372 + }, + { + "epoch": 0.34856564913571164, + "grad_norm": 1.5851887160321907, + "learning_rate": 7.56660813386384e-06, + "loss": 0.6612, + "step": 11373 + }, + { + "epoch": 0.34859629765845285, + "grad_norm": 0.764884276410695, + "learning_rate": 7.566182183082016e-06, + "loss": 0.4659, + "step": 11374 + }, + { + "epoch": 0.34862694618119405, + "grad_norm": 1.9274897753197784, + "learning_rate": 7.565756207014942e-06, + "loss": 0.7326, + "step": 11375 + }, + { + "epoch": 0.34865759470393526, + "grad_norm": 1.8526177414441598, + "learning_rate": 7.565330205666817e-06, + "loss": 0.7295, + "step": 11376 + }, + { + "epoch": 0.34868824322667646, + "grad_norm": 1.5971411234019859, + "learning_rate": 7.564904179041837e-06, + "loss": 0.6309, + "step": 11377 + }, + { + "epoch": 0.34871889174941767, + "grad_norm": 1.65398958827608, + "learning_rate": 7.564478127144199e-06, + "loss": 0.7646, + "step": 11378 + }, + { + "epoch": 0.3487495402721589, + "grad_norm": 1.699476157992531, + "learning_rate": 7.564052049978102e-06, + "loss": 0.685, + "step": 11379 + }, + { + "epoch": 0.3487801887949001, + "grad_norm": 1.9520430042160621, + "learning_rate": 7.563625947547743e-06, + "loss": 0.7107, + "step": 11380 + }, + { + "epoch": 0.3488108373176413, + "grad_norm": 1.6570412591437305, + "learning_rate": 7.5631998198573245e-06, + "loss": 0.7224, + "step": 11381 + }, + { + "epoch": 0.3488414858403825, + "grad_norm": 1.778664397064262, + "learning_rate": 7.5627736669110405e-06, + "loss": 0.6358, + "step": 11382 + }, + { + "epoch": 0.3488721343631237, + "grad_norm": 0.9508370782797205, + "learning_rate": 7.562347488713091e-06, + "loss": 0.4907, + "step": 11383 + }, + { + "epoch": 0.3489027828858649, + "grad_norm": 1.72655728663921, + "learning_rate": 7.561921285267677e-06, + "loss": 0.703, + "step": 11384 + }, + { + "epoch": 0.3489334314086061, + "grad_norm": 1.6484064547966484, + "learning_rate": 7.561495056578998e-06, + "loss": 0.6733, + "step": 11385 + }, + { + "epoch": 0.3489640799313473, + "grad_norm": 0.79226911555872, + "learning_rate": 7.561068802651253e-06, + "loss": 0.4516, + "step": 11386 + }, + { + "epoch": 0.3489947284540885, + "grad_norm": 1.7814202363865634, + "learning_rate": 7.560642523488639e-06, + "loss": 0.7362, + "step": 11387 + }, + { + "epoch": 0.3490253769768297, + "grad_norm": 1.9781032028763168, + "learning_rate": 7.560216219095362e-06, + "loss": 0.7378, + "step": 11388 + }, + { + "epoch": 0.34905602549957093, + "grad_norm": 1.7271543654755548, + "learning_rate": 7.5597898894756176e-06, + "loss": 0.6137, + "step": 11389 + }, + { + "epoch": 0.34908667402231214, + "grad_norm": 1.5923681992312784, + "learning_rate": 7.55936353463361e-06, + "loss": 0.6335, + "step": 11390 + }, + { + "epoch": 0.34911732254505334, + "grad_norm": 1.91320537691559, + "learning_rate": 7.558937154573538e-06, + "loss": 0.6678, + "step": 11391 + }, + { + "epoch": 0.34914797106779455, + "grad_norm": 1.6051421444813134, + "learning_rate": 7.558510749299604e-06, + "loss": 0.6673, + "step": 11392 + }, + { + "epoch": 0.34917861959053575, + "grad_norm": 1.621612804135462, + "learning_rate": 7.5580843188160084e-06, + "loss": 0.6748, + "step": 11393 + }, + { + "epoch": 0.34920926811327696, + "grad_norm": 1.8131482251215973, + "learning_rate": 7.557657863126954e-06, + "loss": 0.7028, + "step": 11394 + }, + { + "epoch": 0.34923991663601817, + "grad_norm": 1.532849125325236, + "learning_rate": 7.557231382236641e-06, + "loss": 0.6528, + "step": 11395 + }, + { + "epoch": 0.34927056515875937, + "grad_norm": 1.6196021385144945, + "learning_rate": 7.556804876149273e-06, + "loss": 0.6869, + "step": 11396 + }, + { + "epoch": 0.3493012136815006, + "grad_norm": 1.674040823677964, + "learning_rate": 7.556378344869053e-06, + "loss": 0.7547, + "step": 11397 + }, + { + "epoch": 0.3493318622042418, + "grad_norm": 1.7135597172685708, + "learning_rate": 7.5559517884001834e-06, + "loss": 0.7592, + "step": 11398 + }, + { + "epoch": 0.34936251072698293, + "grad_norm": 1.6147770766112153, + "learning_rate": 7.555525206746868e-06, + "loss": 0.6825, + "step": 11399 + }, + { + "epoch": 0.34939315924972414, + "grad_norm": 1.6856738255084414, + "learning_rate": 7.555098599913306e-06, + "loss": 0.668, + "step": 11400 + }, + { + "epoch": 0.34942380777246534, + "grad_norm": 1.8476950348277263, + "learning_rate": 7.5546719679037066e-06, + "loss": 0.7085, + "step": 11401 + }, + { + "epoch": 0.34945445629520655, + "grad_norm": 1.839559623174642, + "learning_rate": 7.554245310722268e-06, + "loss": 0.7037, + "step": 11402 + }, + { + "epoch": 0.34948510481794776, + "grad_norm": 1.7740451761458815, + "learning_rate": 7.553818628373198e-06, + "loss": 0.6977, + "step": 11403 + }, + { + "epoch": 0.34951575334068896, + "grad_norm": 1.905024500459886, + "learning_rate": 7.5533919208607e-06, + "loss": 0.4674, + "step": 11404 + }, + { + "epoch": 0.34954640186343017, + "grad_norm": 1.7030274509983612, + "learning_rate": 7.552965188188979e-06, + "loss": 0.7263, + "step": 11405 + }, + { + "epoch": 0.3495770503861714, + "grad_norm": 1.8063727228924362, + "learning_rate": 7.552538430362236e-06, + "loss": 0.7185, + "step": 11406 + }, + { + "epoch": 0.3496076989089126, + "grad_norm": 0.97020909694521, + "learning_rate": 7.552111647384681e-06, + "loss": 0.466, + "step": 11407 + }, + { + "epoch": 0.3496383474316538, + "grad_norm": 1.792790014005606, + "learning_rate": 7.551684839260516e-06, + "loss": 0.6789, + "step": 11408 + }, + { + "epoch": 0.349668995954395, + "grad_norm": 1.538442761982762, + "learning_rate": 7.5512580059939475e-06, + "loss": 0.6266, + "step": 11409 + }, + { + "epoch": 0.3496996444771362, + "grad_norm": 1.6566214153835692, + "learning_rate": 7.550831147589182e-06, + "loss": 0.6281, + "step": 11410 + }, + { + "epoch": 0.3497302929998774, + "grad_norm": 1.8118479822051448, + "learning_rate": 7.550404264050423e-06, + "loss": 0.6803, + "step": 11411 + }, + { + "epoch": 0.3497609415226186, + "grad_norm": 0.8084450175254454, + "learning_rate": 7.549977355381879e-06, + "loss": 0.4632, + "step": 11412 + }, + { + "epoch": 0.3497915900453598, + "grad_norm": 1.6813058246465835, + "learning_rate": 7.549550421587757e-06, + "loss": 0.6717, + "step": 11413 + }, + { + "epoch": 0.349822238568101, + "grad_norm": 1.7998250749333973, + "learning_rate": 7.549123462672261e-06, + "loss": 0.6863, + "step": 11414 + }, + { + "epoch": 0.3498528870908422, + "grad_norm": 1.5250261269720702, + "learning_rate": 7.548696478639599e-06, + "loss": 0.6141, + "step": 11415 + }, + { + "epoch": 0.34988353561358343, + "grad_norm": 1.5986276715608276, + "learning_rate": 7.548269469493978e-06, + "loss": 0.6598, + "step": 11416 + }, + { + "epoch": 0.34991418413632464, + "grad_norm": 1.6104724774710066, + "learning_rate": 7.547842435239608e-06, + "loss": 0.659, + "step": 11417 + }, + { + "epoch": 0.34994483265906584, + "grad_norm": 1.7366489468379782, + "learning_rate": 7.5474153758806935e-06, + "loss": 0.7213, + "step": 11418 + }, + { + "epoch": 0.34997548118180705, + "grad_norm": 1.7660592084258842, + "learning_rate": 7.546988291421442e-06, + "loss": 0.6351, + "step": 11419 + }, + { + "epoch": 0.35000612970454825, + "grad_norm": 1.715201510279455, + "learning_rate": 7.546561181866065e-06, + "loss": 0.7636, + "step": 11420 + }, + { + "epoch": 0.35003677822728946, + "grad_norm": 1.81460382087414, + "learning_rate": 7.546134047218769e-06, + "loss": 0.5843, + "step": 11421 + }, + { + "epoch": 0.35006742675003066, + "grad_norm": 0.9496076679442147, + "learning_rate": 7.545706887483764e-06, + "loss": 0.4865, + "step": 11422 + }, + { + "epoch": 0.35009807527277187, + "grad_norm": 1.5229618224793613, + "learning_rate": 7.5452797026652544e-06, + "loss": 0.7117, + "step": 11423 + }, + { + "epoch": 0.3501287237955131, + "grad_norm": 1.667713236464221, + "learning_rate": 7.544852492767455e-06, + "loss": 0.7514, + "step": 11424 + }, + { + "epoch": 0.3501593723182543, + "grad_norm": 0.8024918046993864, + "learning_rate": 7.544425257794572e-06, + "loss": 0.4743, + "step": 11425 + }, + { + "epoch": 0.3501900208409955, + "grad_norm": 1.696231445618546, + "learning_rate": 7.543997997750816e-06, + "loss": 0.6947, + "step": 11426 + }, + { + "epoch": 0.3502206693637367, + "grad_norm": 1.77292678641442, + "learning_rate": 7.543570712640398e-06, + "loss": 0.758, + "step": 11427 + }, + { + "epoch": 0.3502513178864779, + "grad_norm": 1.9264937555063708, + "learning_rate": 7.543143402467525e-06, + "loss": 0.8201, + "step": 11428 + }, + { + "epoch": 0.3502819664092191, + "grad_norm": 1.589107010196626, + "learning_rate": 7.542716067236411e-06, + "loss": 0.6782, + "step": 11429 + }, + { + "epoch": 0.35031261493196025, + "grad_norm": 1.7527072754780761, + "learning_rate": 7.542288706951264e-06, + "loss": 0.7433, + "step": 11430 + }, + { + "epoch": 0.35034326345470146, + "grad_norm": 1.7575091291230056, + "learning_rate": 7.541861321616297e-06, + "loss": 0.6372, + "step": 11431 + }, + { + "epoch": 0.35037391197744266, + "grad_norm": 1.5611545666343447, + "learning_rate": 7.541433911235719e-06, + "loss": 0.6264, + "step": 11432 + }, + { + "epoch": 0.35040456050018387, + "grad_norm": 1.7716748573201042, + "learning_rate": 7.541006475813744e-06, + "loss": 0.7652, + "step": 11433 + }, + { + "epoch": 0.3504352090229251, + "grad_norm": 1.8017955860799642, + "learning_rate": 7.540579015354579e-06, + "loss": 0.711, + "step": 11434 + }, + { + "epoch": 0.3504658575456663, + "grad_norm": 1.7534263850873835, + "learning_rate": 7.540151529862441e-06, + "loss": 0.7258, + "step": 11435 + }, + { + "epoch": 0.3504965060684075, + "grad_norm": 1.6320762561310378, + "learning_rate": 7.5397240193415415e-06, + "loss": 0.6937, + "step": 11436 + }, + { + "epoch": 0.3505271545911487, + "grad_norm": 1.6320308990067531, + "learning_rate": 7.539296483796088e-06, + "loss": 0.6829, + "step": 11437 + }, + { + "epoch": 0.3505578031138899, + "grad_norm": 1.8170083533437893, + "learning_rate": 7.538868923230298e-06, + "loss": 0.6099, + "step": 11438 + }, + { + "epoch": 0.3505884516366311, + "grad_norm": 1.9772144736689845, + "learning_rate": 7.538441337648383e-06, + "loss": 0.7692, + "step": 11439 + }, + { + "epoch": 0.3506191001593723, + "grad_norm": 0.8346239449011975, + "learning_rate": 7.538013727054557e-06, + "loss": 0.4463, + "step": 11440 + }, + { + "epoch": 0.3506497486821135, + "grad_norm": 2.017205711585814, + "learning_rate": 7.537586091453031e-06, + "loss": 0.6528, + "step": 11441 + }, + { + "epoch": 0.3506803972048547, + "grad_norm": 1.8295114341565275, + "learning_rate": 7.537158430848022e-06, + "loss": 0.6224, + "step": 11442 + }, + { + "epoch": 0.3507110457275959, + "grad_norm": 1.6225207200942493, + "learning_rate": 7.536730745243739e-06, + "loss": 0.6476, + "step": 11443 + }, + { + "epoch": 0.35074169425033713, + "grad_norm": 1.8772068136892393, + "learning_rate": 7.5363030346444014e-06, + "loss": 0.6549, + "step": 11444 + }, + { + "epoch": 0.35077234277307834, + "grad_norm": 1.6502766504568094, + "learning_rate": 7.535875299054218e-06, + "loss": 0.7194, + "step": 11445 + }, + { + "epoch": 0.35080299129581954, + "grad_norm": 1.7064956225709258, + "learning_rate": 7.535447538477409e-06, + "loss": 0.6089, + "step": 11446 + }, + { + "epoch": 0.35083363981856075, + "grad_norm": 1.7253715647515562, + "learning_rate": 7.535019752918186e-06, + "loss": 0.6242, + "step": 11447 + }, + { + "epoch": 0.35086428834130196, + "grad_norm": 1.764414358018767, + "learning_rate": 7.5345919423807655e-06, + "loss": 0.7836, + "step": 11448 + }, + { + "epoch": 0.35089493686404316, + "grad_norm": 1.747047141753099, + "learning_rate": 7.534164106869361e-06, + "loss": 0.6919, + "step": 11449 + }, + { + "epoch": 0.35092558538678437, + "grad_norm": 1.6847265805453844, + "learning_rate": 7.53373624638819e-06, + "loss": 0.7343, + "step": 11450 + }, + { + "epoch": 0.3509562339095256, + "grad_norm": 1.5907026096707517, + "learning_rate": 7.533308360941466e-06, + "loss": 0.6134, + "step": 11451 + }, + { + "epoch": 0.3509868824322668, + "grad_norm": 0.8681308766302552, + "learning_rate": 7.532880450533408e-06, + "loss": 0.4554, + "step": 11452 + }, + { + "epoch": 0.351017530955008, + "grad_norm": 1.8807056730129377, + "learning_rate": 7.53245251516823e-06, + "loss": 0.6965, + "step": 11453 + }, + { + "epoch": 0.3510481794777492, + "grad_norm": 2.170667274143468, + "learning_rate": 7.53202455485015e-06, + "loss": 0.6751, + "step": 11454 + }, + { + "epoch": 0.3510788280004904, + "grad_norm": 1.6218763583259161, + "learning_rate": 7.531596569583385e-06, + "loss": 0.6484, + "step": 11455 + }, + { + "epoch": 0.3511094765232316, + "grad_norm": 1.5748163231067256, + "learning_rate": 7.53116855937215e-06, + "loss": 0.7103, + "step": 11456 + }, + { + "epoch": 0.3511401250459728, + "grad_norm": 1.8956358803390392, + "learning_rate": 7.530740524220663e-06, + "loss": 0.7289, + "step": 11457 + }, + { + "epoch": 0.351170773568714, + "grad_norm": 1.8053427891717324, + "learning_rate": 7.530312464133143e-06, + "loss": 0.6851, + "step": 11458 + }, + { + "epoch": 0.3512014220914552, + "grad_norm": 2.135487965184047, + "learning_rate": 7.529884379113808e-06, + "loss": 0.7505, + "step": 11459 + }, + { + "epoch": 0.3512320706141964, + "grad_norm": 0.8594001014326774, + "learning_rate": 7.529456269166872e-06, + "loss": 0.4708, + "step": 11460 + }, + { + "epoch": 0.3512627191369376, + "grad_norm": 1.675873894996503, + "learning_rate": 7.529028134296559e-06, + "loss": 0.7047, + "step": 11461 + }, + { + "epoch": 0.3512933676596788, + "grad_norm": 1.9543806955179022, + "learning_rate": 7.5285999745070825e-06, + "loss": 0.6061, + "step": 11462 + }, + { + "epoch": 0.35132401618242, + "grad_norm": 2.0650936319087814, + "learning_rate": 7.528171789802665e-06, + "loss": 0.6731, + "step": 11463 + }, + { + "epoch": 0.3513546647051612, + "grad_norm": 1.7745616607812758, + "learning_rate": 7.527743580187525e-06, + "loss": 0.6357, + "step": 11464 + }, + { + "epoch": 0.3513853132279024, + "grad_norm": 1.9856771972517575, + "learning_rate": 7.527315345665879e-06, + "loss": 0.6877, + "step": 11465 + }, + { + "epoch": 0.3514159617506436, + "grad_norm": 1.740883464501038, + "learning_rate": 7.526887086241947e-06, + "loss": 0.6352, + "step": 11466 + }, + { + "epoch": 0.3514466102733848, + "grad_norm": 0.822084328284019, + "learning_rate": 7.526458801919954e-06, + "loss": 0.4703, + "step": 11467 + }, + { + "epoch": 0.351477258796126, + "grad_norm": 1.7838214686395422, + "learning_rate": 7.526030492704113e-06, + "loss": 0.7083, + "step": 11468 + }, + { + "epoch": 0.3515079073188672, + "grad_norm": 1.639553441444486, + "learning_rate": 7.525602158598647e-06, + "loss": 0.614, + "step": 11469 + }, + { + "epoch": 0.3515385558416084, + "grad_norm": 1.9393593566493537, + "learning_rate": 7.525173799607778e-06, + "loss": 0.7143, + "step": 11470 + }, + { + "epoch": 0.35156920436434963, + "grad_norm": 0.8145277886693462, + "learning_rate": 7.5247454157357254e-06, + "loss": 0.4801, + "step": 11471 + }, + { + "epoch": 0.35159985288709084, + "grad_norm": 1.8273658871984977, + "learning_rate": 7.52431700698671e-06, + "loss": 0.6965, + "step": 11472 + }, + { + "epoch": 0.35163050140983204, + "grad_norm": 1.9958343480203775, + "learning_rate": 7.523888573364953e-06, + "loss": 0.8387, + "step": 11473 + }, + { + "epoch": 0.35166114993257325, + "grad_norm": 1.6287248420306897, + "learning_rate": 7.523460114874677e-06, + "loss": 0.6896, + "step": 11474 + }, + { + "epoch": 0.35169179845531445, + "grad_norm": 2.665527712983616, + "learning_rate": 7.523031631520101e-06, + "loss": 0.6682, + "step": 11475 + }, + { + "epoch": 0.35172244697805566, + "grad_norm": 1.7739599117273548, + "learning_rate": 7.52260312330545e-06, + "loss": 0.6289, + "step": 11476 + }, + { + "epoch": 0.35175309550079686, + "grad_norm": 1.7801635111572465, + "learning_rate": 7.522174590234945e-06, + "loss": 0.6409, + "step": 11477 + }, + { + "epoch": 0.35178374402353807, + "grad_norm": 1.7521594650834111, + "learning_rate": 7.521746032312809e-06, + "loss": 0.768, + "step": 11478 + }, + { + "epoch": 0.3518143925462793, + "grad_norm": 1.6793466177062144, + "learning_rate": 7.521317449543263e-06, + "loss": 0.6873, + "step": 11479 + }, + { + "epoch": 0.3518450410690205, + "grad_norm": 1.7685644006632135, + "learning_rate": 7.520888841930532e-06, + "loss": 0.6978, + "step": 11480 + }, + { + "epoch": 0.3518756895917617, + "grad_norm": 1.571812427092118, + "learning_rate": 7.520460209478837e-06, + "loss": 0.6439, + "step": 11481 + }, + { + "epoch": 0.3519063381145029, + "grad_norm": 1.5889915217234905, + "learning_rate": 7.520031552192404e-06, + "loss": 0.6921, + "step": 11482 + }, + { + "epoch": 0.3519369866372441, + "grad_norm": 1.7806736915120018, + "learning_rate": 7.519602870075455e-06, + "loss": 0.6477, + "step": 11483 + }, + { + "epoch": 0.3519676351599853, + "grad_norm": 1.6982767196302293, + "learning_rate": 7.519174163132212e-06, + "loss": 0.7168, + "step": 11484 + }, + { + "epoch": 0.3519982836827265, + "grad_norm": 1.6249313177476208, + "learning_rate": 7.518745431366905e-06, + "loss": 0.5841, + "step": 11485 + }, + { + "epoch": 0.3520289322054677, + "grad_norm": 1.7301409121799092, + "learning_rate": 7.5183166747837534e-06, + "loss": 0.5982, + "step": 11486 + }, + { + "epoch": 0.3520595807282089, + "grad_norm": 1.65781341944439, + "learning_rate": 7.517887893386984e-06, + "loss": 0.5984, + "step": 11487 + }, + { + "epoch": 0.3520902292509501, + "grad_norm": 1.5612348235037676, + "learning_rate": 7.517459087180819e-06, + "loss": 0.6071, + "step": 11488 + }, + { + "epoch": 0.35212087777369133, + "grad_norm": 1.5387781967786345, + "learning_rate": 7.517030256169486e-06, + "loss": 0.5937, + "step": 11489 + }, + { + "epoch": 0.35215152629643254, + "grad_norm": 1.7757232352861334, + "learning_rate": 7.516601400357211e-06, + "loss": 0.7016, + "step": 11490 + }, + { + "epoch": 0.35218217481917374, + "grad_norm": 1.6716386475950529, + "learning_rate": 7.516172519748218e-06, + "loss": 0.6081, + "step": 11491 + }, + { + "epoch": 0.3522128233419149, + "grad_norm": 1.6001061950393538, + "learning_rate": 7.515743614346734e-06, + "loss": 0.6975, + "step": 11492 + }, + { + "epoch": 0.3522434718646561, + "grad_norm": 1.7216272854340722, + "learning_rate": 7.5153146841569825e-06, + "loss": 0.664, + "step": 11493 + }, + { + "epoch": 0.3522741203873973, + "grad_norm": 0.9104537840978975, + "learning_rate": 7.514885729183195e-06, + "loss": 0.4725, + "step": 11494 + }, + { + "epoch": 0.3523047689101385, + "grad_norm": 1.7847175505967632, + "learning_rate": 7.514456749429592e-06, + "loss": 0.766, + "step": 11495 + }, + { + "epoch": 0.3523354174328797, + "grad_norm": 1.9022721849673196, + "learning_rate": 7.514027744900404e-06, + "loss": 0.6111, + "step": 11496 + }, + { + "epoch": 0.3523660659556209, + "grad_norm": 1.575498281225898, + "learning_rate": 7.513598715599857e-06, + "loss": 0.6344, + "step": 11497 + }, + { + "epoch": 0.3523967144783621, + "grad_norm": 1.6331737009621388, + "learning_rate": 7.51316966153218e-06, + "loss": 0.5958, + "step": 11498 + }, + { + "epoch": 0.35242736300110333, + "grad_norm": 1.7249736684236623, + "learning_rate": 7.512740582701598e-06, + "loss": 0.7251, + "step": 11499 + }, + { + "epoch": 0.35245801152384454, + "grad_norm": 1.820602645318811, + "learning_rate": 7.512311479112342e-06, + "loss": 0.7119, + "step": 11500 + }, + { + "epoch": 0.35248866004658574, + "grad_norm": 0.7977080850338382, + "learning_rate": 7.511882350768636e-06, + "loss": 0.4807, + "step": 11501 + }, + { + "epoch": 0.35251930856932695, + "grad_norm": 1.6361909666884584, + "learning_rate": 7.511453197674714e-06, + "loss": 0.6599, + "step": 11502 + }, + { + "epoch": 0.35254995709206816, + "grad_norm": 0.8331475704820773, + "learning_rate": 7.511024019834798e-06, + "loss": 0.4584, + "step": 11503 + }, + { + "epoch": 0.35258060561480936, + "grad_norm": 0.7852891642350198, + "learning_rate": 7.51059481725312e-06, + "loss": 0.4426, + "step": 11504 + }, + { + "epoch": 0.35261125413755057, + "grad_norm": 1.5764130253158848, + "learning_rate": 7.510165589933911e-06, + "loss": 0.685, + "step": 11505 + }, + { + "epoch": 0.3526419026602918, + "grad_norm": 1.6135032672413598, + "learning_rate": 7.509736337881395e-06, + "loss": 0.6593, + "step": 11506 + }, + { + "epoch": 0.352672551183033, + "grad_norm": 0.7766728447706246, + "learning_rate": 7.509307061099806e-06, + "loss": 0.4699, + "step": 11507 + }, + { + "epoch": 0.3527031997057742, + "grad_norm": 0.7846030024922993, + "learning_rate": 7.508877759593373e-06, + "loss": 0.4401, + "step": 11508 + }, + { + "epoch": 0.3527338482285154, + "grad_norm": 1.7371215277171819, + "learning_rate": 7.508448433366326e-06, + "loss": 0.5609, + "step": 11509 + }, + { + "epoch": 0.3527644967512566, + "grad_norm": 1.822417826692636, + "learning_rate": 7.508019082422893e-06, + "loss": 0.6703, + "step": 11510 + }, + { + "epoch": 0.3527951452739978, + "grad_norm": 1.636929803066855, + "learning_rate": 7.507589706767309e-06, + "loss": 0.7077, + "step": 11511 + }, + { + "epoch": 0.352825793796739, + "grad_norm": 1.6307178598608998, + "learning_rate": 7.5071603064038e-06, + "loss": 0.7366, + "step": 11512 + }, + { + "epoch": 0.3528564423194802, + "grad_norm": 1.5967246435125362, + "learning_rate": 7.5067308813366e-06, + "loss": 0.7462, + "step": 11513 + }, + { + "epoch": 0.3528870908422214, + "grad_norm": 1.64729152022403, + "learning_rate": 7.506301431569938e-06, + "loss": 0.7352, + "step": 11514 + }, + { + "epoch": 0.3529177393649626, + "grad_norm": 1.6968999539677785, + "learning_rate": 7.505871957108049e-06, + "loss": 0.703, + "step": 11515 + }, + { + "epoch": 0.35294838788770383, + "grad_norm": 1.6707756841286392, + "learning_rate": 7.505442457955159e-06, + "loss": 0.6545, + "step": 11516 + }, + { + "epoch": 0.35297903641044504, + "grad_norm": 0.8646169115591696, + "learning_rate": 7.505012934115506e-06, + "loss": 0.4379, + "step": 11517 + }, + { + "epoch": 0.35300968493318624, + "grad_norm": 1.8269410607267949, + "learning_rate": 7.504583385593318e-06, + "loss": 0.6641, + "step": 11518 + }, + { + "epoch": 0.35304033345592745, + "grad_norm": 1.5696208205974542, + "learning_rate": 7.504153812392832e-06, + "loss": 0.6442, + "step": 11519 + }, + { + "epoch": 0.35307098197866865, + "grad_norm": 1.9961449937930107, + "learning_rate": 7.503724214518275e-06, + "loss": 0.6647, + "step": 11520 + }, + { + "epoch": 0.35310163050140986, + "grad_norm": 1.8136114742620981, + "learning_rate": 7.503294591973883e-06, + "loss": 0.5898, + "step": 11521 + }, + { + "epoch": 0.35313227902415106, + "grad_norm": 0.8385022822540775, + "learning_rate": 7.50286494476389e-06, + "loss": 0.489, + "step": 11522 + }, + { + "epoch": 0.3531629275468922, + "grad_norm": 1.536324633828694, + "learning_rate": 7.502435272892527e-06, + "loss": 0.6665, + "step": 11523 + }, + { + "epoch": 0.3531935760696334, + "grad_norm": 1.8619185268873737, + "learning_rate": 7.50200557636403e-06, + "loss": 0.7498, + "step": 11524 + }, + { + "epoch": 0.3532242245923746, + "grad_norm": 1.9954772785342152, + "learning_rate": 7.501575855182631e-06, + "loss": 0.6801, + "step": 11525 + }, + { + "epoch": 0.35325487311511583, + "grad_norm": 1.573652658339017, + "learning_rate": 7.501146109352566e-06, + "loss": 0.5985, + "step": 11526 + }, + { + "epoch": 0.35328552163785704, + "grad_norm": 0.8160655569598101, + "learning_rate": 7.500716338878068e-06, + "loss": 0.471, + "step": 11527 + }, + { + "epoch": 0.35331617016059824, + "grad_norm": 1.7897795574160613, + "learning_rate": 7.500286543763372e-06, + "loss": 0.6816, + "step": 11528 + }, + { + "epoch": 0.35334681868333945, + "grad_norm": 1.6072053020184927, + "learning_rate": 7.499856724012713e-06, + "loss": 0.6781, + "step": 11529 + }, + { + "epoch": 0.35337746720608065, + "grad_norm": 1.8161389294298538, + "learning_rate": 7.4994268796303275e-06, + "loss": 0.7143, + "step": 11530 + }, + { + "epoch": 0.35340811572882186, + "grad_norm": 1.7061376863277748, + "learning_rate": 7.498997010620447e-06, + "loss": 0.6641, + "step": 11531 + }, + { + "epoch": 0.35343876425156306, + "grad_norm": 1.8201608438375971, + "learning_rate": 7.498567116987311e-06, + "loss": 0.6838, + "step": 11532 + }, + { + "epoch": 0.35346941277430427, + "grad_norm": 1.9175811547233572, + "learning_rate": 7.498137198735153e-06, + "loss": 0.7904, + "step": 11533 + }, + { + "epoch": 0.3535000612970455, + "grad_norm": 1.681686081795786, + "learning_rate": 7.4977072558682104e-06, + "loss": 0.6137, + "step": 11534 + }, + { + "epoch": 0.3535307098197867, + "grad_norm": 1.7951524125526113, + "learning_rate": 7.497277288390719e-06, + "loss": 0.7353, + "step": 11535 + }, + { + "epoch": 0.3535613583425279, + "grad_norm": 1.6203213026860488, + "learning_rate": 7.496847296306917e-06, + "loss": 0.6703, + "step": 11536 + }, + { + "epoch": 0.3535920068652691, + "grad_norm": 1.8116215048671631, + "learning_rate": 7.496417279621039e-06, + "loss": 0.7456, + "step": 11537 + }, + { + "epoch": 0.3536226553880103, + "grad_norm": 1.8081705396122012, + "learning_rate": 7.495987238337321e-06, + "loss": 0.7373, + "step": 11538 + }, + { + "epoch": 0.3536533039107515, + "grad_norm": 1.6266718006144087, + "learning_rate": 7.495557172460004e-06, + "loss": 0.6565, + "step": 11539 + }, + { + "epoch": 0.3536839524334927, + "grad_norm": 1.8695362795312345, + "learning_rate": 7.495127081993323e-06, + "loss": 0.6285, + "step": 11540 + }, + { + "epoch": 0.3537146009562339, + "grad_norm": 1.6194557032570933, + "learning_rate": 7.494696966941517e-06, + "loss": 0.6686, + "step": 11541 + }, + { + "epoch": 0.3537452494789751, + "grad_norm": 1.6884560811547316, + "learning_rate": 7.494266827308823e-06, + "loss": 0.6828, + "step": 11542 + }, + { + "epoch": 0.3537758980017163, + "grad_norm": 1.6912695398119963, + "learning_rate": 7.4938366630994805e-06, + "loss": 0.6983, + "step": 11543 + }, + { + "epoch": 0.35380654652445753, + "grad_norm": 2.002398296430419, + "learning_rate": 7.493406474317726e-06, + "loss": 0.6877, + "step": 11544 + }, + { + "epoch": 0.35383719504719874, + "grad_norm": 1.8183020707661715, + "learning_rate": 7.492976260967801e-06, + "loss": 0.6688, + "step": 11545 + }, + { + "epoch": 0.35386784356993994, + "grad_norm": 1.6855873117672848, + "learning_rate": 7.492546023053941e-06, + "loss": 0.6308, + "step": 11546 + }, + { + "epoch": 0.35389849209268115, + "grad_norm": 1.6249546331224052, + "learning_rate": 7.49211576058039e-06, + "loss": 0.6074, + "step": 11547 + }, + { + "epoch": 0.35392914061542236, + "grad_norm": 1.7905327920465453, + "learning_rate": 7.491685473551384e-06, + "loss": 0.7201, + "step": 11548 + }, + { + "epoch": 0.35395978913816356, + "grad_norm": 1.5999403170752127, + "learning_rate": 7.4912551619711614e-06, + "loss": 0.631, + "step": 11549 + }, + { + "epoch": 0.35399043766090477, + "grad_norm": 1.9142260730969012, + "learning_rate": 7.490824825843965e-06, + "loss": 0.6763, + "step": 11550 + }, + { + "epoch": 0.35402108618364597, + "grad_norm": 2.0288307996504087, + "learning_rate": 7.490394465174036e-06, + "loss": 0.7325, + "step": 11551 + }, + { + "epoch": 0.3540517347063872, + "grad_norm": 1.549024979617377, + "learning_rate": 7.489964079965613e-06, + "loss": 0.6619, + "step": 11552 + }, + { + "epoch": 0.3540823832291284, + "grad_norm": 0.9190885024854405, + "learning_rate": 7.489533670222935e-06, + "loss": 0.4741, + "step": 11553 + }, + { + "epoch": 0.35411303175186953, + "grad_norm": 1.8391967850689488, + "learning_rate": 7.489103235950246e-06, + "loss": 0.6966, + "step": 11554 + }, + { + "epoch": 0.35414368027461074, + "grad_norm": 1.6211849751147696, + "learning_rate": 7.488672777151786e-06, + "loss": 0.5722, + "step": 11555 + }, + { + "epoch": 0.35417432879735194, + "grad_norm": 0.8117928839429486, + "learning_rate": 7.488242293831795e-06, + "loss": 0.4703, + "step": 11556 + }, + { + "epoch": 0.35420497732009315, + "grad_norm": 1.7054393163971326, + "learning_rate": 7.487811785994518e-06, + "loss": 0.6188, + "step": 11557 + }, + { + "epoch": 0.35423562584283436, + "grad_norm": 0.751975358976021, + "learning_rate": 7.487381253644193e-06, + "loss": 0.4491, + "step": 11558 + }, + { + "epoch": 0.35426627436557556, + "grad_norm": 1.9008345748263107, + "learning_rate": 7.486950696785066e-06, + "loss": 0.6799, + "step": 11559 + }, + { + "epoch": 0.35429692288831677, + "grad_norm": 1.7515405362746963, + "learning_rate": 7.4865201154213765e-06, + "loss": 0.598, + "step": 11560 + }, + { + "epoch": 0.354327571411058, + "grad_norm": 1.8483190864527228, + "learning_rate": 7.486089509557368e-06, + "loss": 0.792, + "step": 11561 + }, + { + "epoch": 0.3543582199337992, + "grad_norm": 0.7840131735797282, + "learning_rate": 7.485658879197284e-06, + "loss": 0.4594, + "step": 11562 + }, + { + "epoch": 0.3543888684565404, + "grad_norm": 0.8235559139544832, + "learning_rate": 7.4852282243453665e-06, + "loss": 0.4775, + "step": 11563 + }, + { + "epoch": 0.3544195169792816, + "grad_norm": 1.5646020864524388, + "learning_rate": 7.4847975450058596e-06, + "loss": 0.6929, + "step": 11564 + }, + { + "epoch": 0.3544501655020228, + "grad_norm": 1.812354445812359, + "learning_rate": 7.4843668411830076e-06, + "loss": 0.7436, + "step": 11565 + }, + { + "epoch": 0.354480814024764, + "grad_norm": 1.890227758800528, + "learning_rate": 7.483936112881052e-06, + "loss": 0.7275, + "step": 11566 + }, + { + "epoch": 0.3545114625475052, + "grad_norm": 1.6021352928150805, + "learning_rate": 7.48350536010424e-06, + "loss": 0.5744, + "step": 11567 + }, + { + "epoch": 0.3545421110702464, + "grad_norm": 1.8625177179043693, + "learning_rate": 7.483074582856812e-06, + "loss": 0.7647, + "step": 11568 + }, + { + "epoch": 0.3545727595929876, + "grad_norm": 1.812591108217456, + "learning_rate": 7.482643781143016e-06, + "loss": 0.6366, + "step": 11569 + }, + { + "epoch": 0.3546034081157288, + "grad_norm": 1.769639015508177, + "learning_rate": 7.482212954967095e-06, + "loss": 0.6404, + "step": 11570 + }, + { + "epoch": 0.35463405663847003, + "grad_norm": 1.700512543178736, + "learning_rate": 7.481782104333297e-06, + "loss": 0.5974, + "step": 11571 + }, + { + "epoch": 0.35466470516121124, + "grad_norm": 1.6314356675338788, + "learning_rate": 7.481351229245862e-06, + "loss": 0.7222, + "step": 11572 + }, + { + "epoch": 0.35469535368395244, + "grad_norm": 1.5207909745684254, + "learning_rate": 7.4809203297090405e-06, + "loss": 0.6637, + "step": 11573 + }, + { + "epoch": 0.35472600220669365, + "grad_norm": 1.5742764175188213, + "learning_rate": 7.480489405727075e-06, + "loss": 0.6345, + "step": 11574 + }, + { + "epoch": 0.35475665072943485, + "grad_norm": 1.6304422649658734, + "learning_rate": 7.480058457304214e-06, + "loss": 0.5993, + "step": 11575 + }, + { + "epoch": 0.35478729925217606, + "grad_norm": 1.8607597894148566, + "learning_rate": 7.479627484444701e-06, + "loss": 0.7531, + "step": 11576 + }, + { + "epoch": 0.35481794777491726, + "grad_norm": 1.6423509963450464, + "learning_rate": 7.479196487152784e-06, + "loss": 0.6692, + "step": 11577 + }, + { + "epoch": 0.35484859629765847, + "grad_norm": 1.0546201238381399, + "learning_rate": 7.478765465432712e-06, + "loss": 0.495, + "step": 11578 + }, + { + "epoch": 0.3548792448203997, + "grad_norm": 1.7107206739507406, + "learning_rate": 7.478334419288726e-06, + "loss": 0.6193, + "step": 11579 + }, + { + "epoch": 0.3549098933431409, + "grad_norm": 1.5851073401602482, + "learning_rate": 7.4779033487250796e-06, + "loss": 0.5442, + "step": 11580 + }, + { + "epoch": 0.3549405418658821, + "grad_norm": 1.707975379473729, + "learning_rate": 7.477472253746017e-06, + "loss": 0.601, + "step": 11581 + }, + { + "epoch": 0.3549711903886233, + "grad_norm": 1.6152203731041903, + "learning_rate": 7.477041134355787e-06, + "loss": 0.6315, + "step": 11582 + }, + { + "epoch": 0.3550018389113645, + "grad_norm": 1.6806908040263806, + "learning_rate": 7.476609990558636e-06, + "loss": 0.7617, + "step": 11583 + }, + { + "epoch": 0.3550324874341057, + "grad_norm": 0.7786381463097348, + "learning_rate": 7.476178822358813e-06, + "loss": 0.4588, + "step": 11584 + }, + { + "epoch": 0.35506313595684685, + "grad_norm": 1.7402228775699837, + "learning_rate": 7.4757476297605654e-06, + "loss": 0.7551, + "step": 11585 + }, + { + "epoch": 0.35509378447958806, + "grad_norm": 1.6216796027646538, + "learning_rate": 7.475316412768145e-06, + "loss": 0.6998, + "step": 11586 + }, + { + "epoch": 0.35512443300232927, + "grad_norm": 1.95091302872517, + "learning_rate": 7.474885171385797e-06, + "loss": 0.7294, + "step": 11587 + }, + { + "epoch": 0.35515508152507047, + "grad_norm": 0.8659523725684465, + "learning_rate": 7.4744539056177735e-06, + "loss": 0.4804, + "step": 11588 + }, + { + "epoch": 0.3551857300478117, + "grad_norm": 1.876965361264726, + "learning_rate": 7.4740226154683215e-06, + "loss": 0.6961, + "step": 11589 + }, + { + "epoch": 0.3552163785705529, + "grad_norm": 1.7013120183700576, + "learning_rate": 7.473591300941692e-06, + "loss": 0.762, + "step": 11590 + }, + { + "epoch": 0.3552470270932941, + "grad_norm": 1.487944029699359, + "learning_rate": 7.473159962042136e-06, + "loss": 0.5129, + "step": 11591 + }, + { + "epoch": 0.3552776756160353, + "grad_norm": 1.7019548096031836, + "learning_rate": 7.4727285987738995e-06, + "loss": 0.6674, + "step": 11592 + }, + { + "epoch": 0.3553083241387765, + "grad_norm": 1.6714334645108642, + "learning_rate": 7.472297211141237e-06, + "loss": 0.652, + "step": 11593 + }, + { + "epoch": 0.3553389726615177, + "grad_norm": 1.8973097769159681, + "learning_rate": 7.471865799148398e-06, + "loss": 0.6297, + "step": 11594 + }, + { + "epoch": 0.3553696211842589, + "grad_norm": 1.5419817637402538, + "learning_rate": 7.471434362799632e-06, + "loss": 0.625, + "step": 11595 + }, + { + "epoch": 0.3554002697070001, + "grad_norm": 1.4272647066523545, + "learning_rate": 7.47100290209919e-06, + "loss": 0.6916, + "step": 11596 + }, + { + "epoch": 0.3554309182297413, + "grad_norm": 0.8276034732007527, + "learning_rate": 7.470571417051327e-06, + "loss": 0.4789, + "step": 11597 + }, + { + "epoch": 0.3554615667524825, + "grad_norm": 1.8069693670211748, + "learning_rate": 7.470139907660288e-06, + "loss": 0.6227, + "step": 11598 + }, + { + "epoch": 0.35549221527522373, + "grad_norm": 1.75546383442252, + "learning_rate": 7.4697083739303315e-06, + "loss": 0.7052, + "step": 11599 + }, + { + "epoch": 0.35552286379796494, + "grad_norm": 1.7005328054719464, + "learning_rate": 7.469276815865705e-06, + "loss": 0.7193, + "step": 11600 + }, + { + "epoch": 0.35555351232070614, + "grad_norm": 1.5967382569194908, + "learning_rate": 7.468845233470663e-06, + "loss": 0.668, + "step": 11601 + }, + { + "epoch": 0.35558416084344735, + "grad_norm": 1.6656133972216702, + "learning_rate": 7.468413626749457e-06, + "loss": 0.6001, + "step": 11602 + }, + { + "epoch": 0.35561480936618856, + "grad_norm": 2.0845430972598766, + "learning_rate": 7.467981995706339e-06, + "loss": 0.6604, + "step": 11603 + }, + { + "epoch": 0.35564545788892976, + "grad_norm": 1.690675580335757, + "learning_rate": 7.467550340345564e-06, + "loss": 0.6465, + "step": 11604 + }, + { + "epoch": 0.35567610641167097, + "grad_norm": 1.6411408214555958, + "learning_rate": 7.467118660671383e-06, + "loss": 0.6777, + "step": 11605 + }, + { + "epoch": 0.3557067549344122, + "grad_norm": 0.8331927018839651, + "learning_rate": 7.466686956688053e-06, + "loss": 0.4663, + "step": 11606 + }, + { + "epoch": 0.3557374034571534, + "grad_norm": 0.8331391402626344, + "learning_rate": 7.466255228399824e-06, + "loss": 0.4604, + "step": 11607 + }, + { + "epoch": 0.3557680519798946, + "grad_norm": 1.663042447995613, + "learning_rate": 7.4658234758109505e-06, + "loss": 0.655, + "step": 11608 + }, + { + "epoch": 0.3557987005026358, + "grad_norm": 1.6023563730202857, + "learning_rate": 7.465391698925688e-06, + "loss": 0.6521, + "step": 11609 + }, + { + "epoch": 0.355829349025377, + "grad_norm": 1.5302993704400532, + "learning_rate": 7.4649598977482894e-06, + "loss": 0.698, + "step": 11610 + }, + { + "epoch": 0.3558599975481182, + "grad_norm": 1.623637301236848, + "learning_rate": 7.46452807228301e-06, + "loss": 0.6079, + "step": 11611 + }, + { + "epoch": 0.3558906460708594, + "grad_norm": 0.8634783038784498, + "learning_rate": 7.464096222534107e-06, + "loss": 0.4618, + "step": 11612 + }, + { + "epoch": 0.3559212945936006, + "grad_norm": 1.7107072399123349, + "learning_rate": 7.463664348505832e-06, + "loss": 0.6729, + "step": 11613 + }, + { + "epoch": 0.3559519431163418, + "grad_norm": 1.636793641182195, + "learning_rate": 7.463232450202443e-06, + "loss": 0.6768, + "step": 11614 + }, + { + "epoch": 0.355982591639083, + "grad_norm": 0.8792798630539294, + "learning_rate": 7.462800527628193e-06, + "loss": 0.4795, + "step": 11615 + }, + { + "epoch": 0.3560132401618242, + "grad_norm": 1.710344544405129, + "learning_rate": 7.462368580787341e-06, + "loss": 0.6986, + "step": 11616 + }, + { + "epoch": 0.3560438886845654, + "grad_norm": 0.8104013315825509, + "learning_rate": 7.46193660968414e-06, + "loss": 0.4539, + "step": 11617 + }, + { + "epoch": 0.3560745372073066, + "grad_norm": 1.669885723418072, + "learning_rate": 7.461504614322848e-06, + "loss": 0.6714, + "step": 11618 + }, + { + "epoch": 0.3561051857300478, + "grad_norm": 1.7259351129942682, + "learning_rate": 7.46107259470772e-06, + "loss": 0.7379, + "step": 11619 + }, + { + "epoch": 0.356135834252789, + "grad_norm": 1.5633331497472034, + "learning_rate": 7.460640550843015e-06, + "loss": 0.7105, + "step": 11620 + }, + { + "epoch": 0.3561664827755302, + "grad_norm": 1.6115477097054687, + "learning_rate": 7.46020848273299e-06, + "loss": 0.6685, + "step": 11621 + }, + { + "epoch": 0.3561971312982714, + "grad_norm": 1.6704200938320615, + "learning_rate": 7.459776390381901e-06, + "loss": 0.6615, + "step": 11622 + }, + { + "epoch": 0.3562277798210126, + "grad_norm": 1.6443037461239731, + "learning_rate": 7.459344273794004e-06, + "loss": 0.729, + "step": 11623 + }, + { + "epoch": 0.3562584283437538, + "grad_norm": 1.8027573000183872, + "learning_rate": 7.4589121329735604e-06, + "loss": 0.6762, + "step": 11624 + }, + { + "epoch": 0.356289076866495, + "grad_norm": 1.6543193855371827, + "learning_rate": 7.4584799679248275e-06, + "loss": 0.6837, + "step": 11625 + }, + { + "epoch": 0.35631972538923623, + "grad_norm": 1.506689520889283, + "learning_rate": 7.45804777865206e-06, + "loss": 0.7575, + "step": 11626 + }, + { + "epoch": 0.35635037391197744, + "grad_norm": 2.020036747174492, + "learning_rate": 7.457615565159521e-06, + "loss": 0.7668, + "step": 11627 + }, + { + "epoch": 0.35638102243471864, + "grad_norm": 1.5672536173331542, + "learning_rate": 7.457183327451465e-06, + "loss": 0.6147, + "step": 11628 + }, + { + "epoch": 0.35641167095745985, + "grad_norm": 1.8335877677423122, + "learning_rate": 7.456751065532153e-06, + "loss": 0.7291, + "step": 11629 + }, + { + "epoch": 0.35644231948020105, + "grad_norm": 1.1412993704195857, + "learning_rate": 7.456318779405845e-06, + "loss": 0.464, + "step": 11630 + }, + { + "epoch": 0.35647296800294226, + "grad_norm": 1.8619977626677557, + "learning_rate": 7.4558864690768e-06, + "loss": 0.6459, + "step": 11631 + }, + { + "epoch": 0.35650361652568346, + "grad_norm": 1.900572168036094, + "learning_rate": 7.455454134549278e-06, + "loss": 0.6322, + "step": 11632 + }, + { + "epoch": 0.35653426504842467, + "grad_norm": 0.8713776903275755, + "learning_rate": 7.455021775827536e-06, + "loss": 0.4664, + "step": 11633 + }, + { + "epoch": 0.3565649135711659, + "grad_norm": 1.6482902119801084, + "learning_rate": 7.454589392915838e-06, + "loss": 0.6895, + "step": 11634 + }, + { + "epoch": 0.3565955620939071, + "grad_norm": 1.6672597348325522, + "learning_rate": 7.454156985818441e-06, + "loss": 0.6856, + "step": 11635 + }, + { + "epoch": 0.3566262106166483, + "grad_norm": 1.8795221582324282, + "learning_rate": 7.453724554539609e-06, + "loss": 0.7449, + "step": 11636 + }, + { + "epoch": 0.3566568591393895, + "grad_norm": 1.487729927570375, + "learning_rate": 7.4532920990836e-06, + "loss": 0.5559, + "step": 11637 + }, + { + "epoch": 0.3566875076621307, + "grad_norm": 1.7059678331524135, + "learning_rate": 7.4528596194546775e-06, + "loss": 0.7335, + "step": 11638 + }, + { + "epoch": 0.3567181561848719, + "grad_norm": 1.706430980146084, + "learning_rate": 7.4524271156570996e-06, + "loss": 0.6692, + "step": 11639 + }, + { + "epoch": 0.3567488047076131, + "grad_norm": 1.7229600920938677, + "learning_rate": 7.451994587695133e-06, + "loss": 0.682, + "step": 11640 + }, + { + "epoch": 0.3567794532303543, + "grad_norm": 1.604056136596775, + "learning_rate": 7.4515620355730334e-06, + "loss": 0.6484, + "step": 11641 + }, + { + "epoch": 0.3568101017530955, + "grad_norm": 1.6281978392066858, + "learning_rate": 7.451129459295066e-06, + "loss": 0.6283, + "step": 11642 + }, + { + "epoch": 0.3568407502758367, + "grad_norm": 1.8884438664382392, + "learning_rate": 7.450696858865494e-06, + "loss": 0.7055, + "step": 11643 + }, + { + "epoch": 0.35687139879857793, + "grad_norm": 1.6251361786550427, + "learning_rate": 7.4502642342885775e-06, + "loss": 0.6091, + "step": 11644 + }, + { + "epoch": 0.35690204732131914, + "grad_norm": 1.702356729245703, + "learning_rate": 7.4498315855685835e-06, + "loss": 0.6423, + "step": 11645 + }, + { + "epoch": 0.35693269584406034, + "grad_norm": 1.1651568737667457, + "learning_rate": 7.449398912709768e-06, + "loss": 0.4436, + "step": 11646 + }, + { + "epoch": 0.3569633443668015, + "grad_norm": 1.6273361324262092, + "learning_rate": 7.448966215716401e-06, + "loss": 0.5841, + "step": 11647 + }, + { + "epoch": 0.3569939928895427, + "grad_norm": 1.8808769572255137, + "learning_rate": 7.448533494592743e-06, + "loss": 0.6946, + "step": 11648 + }, + { + "epoch": 0.3570246414122839, + "grad_norm": 0.9310667950306141, + "learning_rate": 7.448100749343059e-06, + "loss": 0.4647, + "step": 11649 + }, + { + "epoch": 0.3570552899350251, + "grad_norm": 1.7566759229377036, + "learning_rate": 7.447667979971609e-06, + "loss": 0.684, + "step": 11650 + }, + { + "epoch": 0.3570859384577663, + "grad_norm": 1.670385288307715, + "learning_rate": 7.447235186482662e-06, + "loss": 0.7101, + "step": 11651 + }, + { + "epoch": 0.3571165869805075, + "grad_norm": 1.746604416821685, + "learning_rate": 7.446802368880481e-06, + "loss": 0.694, + "step": 11652 + }, + { + "epoch": 0.35714723550324873, + "grad_norm": 1.8473340806044423, + "learning_rate": 7.44636952716933e-06, + "loss": 0.6701, + "step": 11653 + }, + { + "epoch": 0.35717788402598993, + "grad_norm": 1.6049912415347145, + "learning_rate": 7.445936661353472e-06, + "loss": 0.6156, + "step": 11654 + }, + { + "epoch": 0.35720853254873114, + "grad_norm": 1.7322573211938996, + "learning_rate": 7.445503771437177e-06, + "loss": 0.7593, + "step": 11655 + }, + { + "epoch": 0.35723918107147234, + "grad_norm": 1.489575960182561, + "learning_rate": 7.445070857424706e-06, + "loss": 0.5869, + "step": 11656 + }, + { + "epoch": 0.35726982959421355, + "grad_norm": 1.8339539606992972, + "learning_rate": 7.4446379193203265e-06, + "loss": 0.6394, + "step": 11657 + }, + { + "epoch": 0.35730047811695476, + "grad_norm": 1.499792996270755, + "learning_rate": 7.444204957128305e-06, + "loss": 0.6516, + "step": 11658 + }, + { + "epoch": 0.35733112663969596, + "grad_norm": 1.6190070136002814, + "learning_rate": 7.443771970852907e-06, + "loss": 0.6511, + "step": 11659 + }, + { + "epoch": 0.35736177516243717, + "grad_norm": 1.6859261911732035, + "learning_rate": 7.443338960498398e-06, + "loss": 0.6746, + "step": 11660 + }, + { + "epoch": 0.3573924236851784, + "grad_norm": 1.7328176436121998, + "learning_rate": 7.4429059260690425e-06, + "loss": 0.7041, + "step": 11661 + }, + { + "epoch": 0.3574230722079196, + "grad_norm": 1.8246883908405414, + "learning_rate": 7.442472867569112e-06, + "loss": 0.7081, + "step": 11662 + }, + { + "epoch": 0.3574537207306608, + "grad_norm": 1.6920086867171547, + "learning_rate": 7.442039785002872e-06, + "loss": 0.4697, + "step": 11663 + }, + { + "epoch": 0.357484369253402, + "grad_norm": 1.8796911058435575, + "learning_rate": 7.441606678374589e-06, + "loss": 0.709, + "step": 11664 + }, + { + "epoch": 0.3575150177761432, + "grad_norm": 0.8906052079677012, + "learning_rate": 7.441173547688529e-06, + "loss": 0.4744, + "step": 11665 + }, + { + "epoch": 0.3575456662988844, + "grad_norm": 1.8234440391156852, + "learning_rate": 7.440740392948964e-06, + "loss": 0.6585, + "step": 11666 + }, + { + "epoch": 0.3575763148216256, + "grad_norm": 1.7742711987014865, + "learning_rate": 7.4403072141601585e-06, + "loss": 0.6819, + "step": 11667 + }, + { + "epoch": 0.3576069633443668, + "grad_norm": 1.6701536867268716, + "learning_rate": 7.439874011326381e-06, + "loss": 0.7131, + "step": 11668 + }, + { + "epoch": 0.357637611867108, + "grad_norm": 2.0093544767595213, + "learning_rate": 7.439440784451901e-06, + "loss": 0.8084, + "step": 11669 + }, + { + "epoch": 0.3576682603898492, + "grad_norm": 1.744410670423588, + "learning_rate": 7.439007533540986e-06, + "loss": 0.6967, + "step": 11670 + }, + { + "epoch": 0.35769890891259043, + "grad_norm": 3.5050390034295607, + "learning_rate": 7.438574258597907e-06, + "loss": 0.6776, + "step": 11671 + }, + { + "epoch": 0.35772955743533164, + "grad_norm": 1.8629290678880424, + "learning_rate": 7.438140959626931e-06, + "loss": 0.656, + "step": 11672 + }, + { + "epoch": 0.35776020595807284, + "grad_norm": 2.021802473978107, + "learning_rate": 7.437707636632329e-06, + "loss": 0.7146, + "step": 11673 + }, + { + "epoch": 0.35779085448081405, + "grad_norm": 1.5662819824956624, + "learning_rate": 7.437274289618368e-06, + "loss": 0.6249, + "step": 11674 + }, + { + "epoch": 0.35782150300355525, + "grad_norm": 1.622144245169312, + "learning_rate": 7.436840918589323e-06, + "loss": 0.6368, + "step": 11675 + }, + { + "epoch": 0.35785215152629646, + "grad_norm": 1.734709978145341, + "learning_rate": 7.436407523549458e-06, + "loss": 0.688, + "step": 11676 + }, + { + "epoch": 0.35788280004903766, + "grad_norm": 1.7698030494434414, + "learning_rate": 7.435974104503048e-06, + "loss": 0.628, + "step": 11677 + }, + { + "epoch": 0.3579134485717788, + "grad_norm": 2.0653609029571367, + "learning_rate": 7.435540661454361e-06, + "loss": 0.5135, + "step": 11678 + }, + { + "epoch": 0.35794409709452, + "grad_norm": 1.749560636168231, + "learning_rate": 7.43510719440767e-06, + "loss": 0.624, + "step": 11679 + }, + { + "epoch": 0.3579747456172612, + "grad_norm": 1.7666587469474175, + "learning_rate": 7.434673703367243e-06, + "loss": 0.6713, + "step": 11680 + }, + { + "epoch": 0.35800539414000243, + "grad_norm": 1.7346319396335559, + "learning_rate": 7.434240188337355e-06, + "loss": 0.6542, + "step": 11681 + }, + { + "epoch": 0.35803604266274364, + "grad_norm": 0.9361569226920551, + "learning_rate": 7.433806649322274e-06, + "loss": 0.4733, + "step": 11682 + }, + { + "epoch": 0.35806669118548484, + "grad_norm": 2.0333593973416466, + "learning_rate": 7.433373086326274e-06, + "loss": 0.7028, + "step": 11683 + }, + { + "epoch": 0.35809733970822605, + "grad_norm": 2.091401143616636, + "learning_rate": 7.432939499353627e-06, + "loss": 0.7234, + "step": 11684 + }, + { + "epoch": 0.35812798823096725, + "grad_norm": 1.9615321714800125, + "learning_rate": 7.432505888408603e-06, + "loss": 0.7152, + "step": 11685 + }, + { + "epoch": 0.35815863675370846, + "grad_norm": 1.8000814638908997, + "learning_rate": 7.432072253495478e-06, + "loss": 0.6051, + "step": 11686 + }, + { + "epoch": 0.35818928527644966, + "grad_norm": 1.7536649996071698, + "learning_rate": 7.431638594618521e-06, + "loss": 0.6742, + "step": 11687 + }, + { + "epoch": 0.35821993379919087, + "grad_norm": 1.193788237045127, + "learning_rate": 7.431204911782009e-06, + "loss": 0.4726, + "step": 11688 + }, + { + "epoch": 0.3582505823219321, + "grad_norm": 1.9755724287618635, + "learning_rate": 7.4307712049902105e-06, + "loss": 0.7541, + "step": 11689 + }, + { + "epoch": 0.3582812308446733, + "grad_norm": 1.7476113091713474, + "learning_rate": 7.430337474247403e-06, + "loss": 0.7717, + "step": 11690 + }, + { + "epoch": 0.3583118793674145, + "grad_norm": 1.8755295909905776, + "learning_rate": 7.429903719557857e-06, + "loss": 0.728, + "step": 11691 + }, + { + "epoch": 0.3583425278901557, + "grad_norm": 2.157062261981498, + "learning_rate": 7.42946994092585e-06, + "loss": 0.6891, + "step": 11692 + }, + { + "epoch": 0.3583731764128969, + "grad_norm": 1.6535549075334186, + "learning_rate": 7.429036138355652e-06, + "loss": 0.7061, + "step": 11693 + }, + { + "epoch": 0.3584038249356381, + "grad_norm": 1.589439744105541, + "learning_rate": 7.428602311851542e-06, + "loss": 0.6647, + "step": 11694 + }, + { + "epoch": 0.3584344734583793, + "grad_norm": 0.8514630694112959, + "learning_rate": 7.42816846141779e-06, + "loss": 0.435, + "step": 11695 + }, + { + "epoch": 0.3584651219811205, + "grad_norm": 1.8713592182126215, + "learning_rate": 7.4277345870586725e-06, + "loss": 0.7719, + "step": 11696 + }, + { + "epoch": 0.3584957705038617, + "grad_norm": 1.7500009655456195, + "learning_rate": 7.427300688778465e-06, + "loss": 0.683, + "step": 11697 + }, + { + "epoch": 0.3585264190266029, + "grad_norm": 1.5376357559581828, + "learning_rate": 7.426866766581444e-06, + "loss": 0.6987, + "step": 11698 + }, + { + "epoch": 0.35855706754934413, + "grad_norm": 1.6823607755061114, + "learning_rate": 7.4264328204718835e-06, + "loss": 0.6752, + "step": 11699 + }, + { + "epoch": 0.35858771607208534, + "grad_norm": 1.8826446887534303, + "learning_rate": 7.425998850454059e-06, + "loss": 0.7262, + "step": 11700 + }, + { + "epoch": 0.35861836459482654, + "grad_norm": 1.8396172173564178, + "learning_rate": 7.425564856532248e-06, + "loss": 0.6885, + "step": 11701 + }, + { + "epoch": 0.35864901311756775, + "grad_norm": 1.4814253960086428, + "learning_rate": 7.4251308387107256e-06, + "loss": 0.634, + "step": 11702 + }, + { + "epoch": 0.35867966164030896, + "grad_norm": 1.4784877119788193, + "learning_rate": 7.424696796993769e-06, + "loss": 0.6608, + "step": 11703 + }, + { + "epoch": 0.35871031016305016, + "grad_norm": 1.7430366950624436, + "learning_rate": 7.424262731385653e-06, + "loss": 0.7758, + "step": 11704 + }, + { + "epoch": 0.35874095868579137, + "grad_norm": 0.879839568296998, + "learning_rate": 7.423828641890657e-06, + "loss": 0.4695, + "step": 11705 + }, + { + "epoch": 0.3587716072085326, + "grad_norm": 1.6650555851992757, + "learning_rate": 7.423394528513057e-06, + "loss": 0.7449, + "step": 11706 + }, + { + "epoch": 0.3588022557312738, + "grad_norm": 1.7293470513338305, + "learning_rate": 7.422960391257131e-06, + "loss": 0.6085, + "step": 11707 + }, + { + "epoch": 0.358832904254015, + "grad_norm": 1.4980524363038863, + "learning_rate": 7.422526230127156e-06, + "loss": 0.7375, + "step": 11708 + }, + { + "epoch": 0.35886355277675613, + "grad_norm": 1.6214439035974346, + "learning_rate": 7.42209204512741e-06, + "loss": 0.7459, + "step": 11709 + }, + { + "epoch": 0.35889420129949734, + "grad_norm": 1.7382453365031623, + "learning_rate": 7.421657836262172e-06, + "loss": 0.7497, + "step": 11710 + }, + { + "epoch": 0.35892484982223855, + "grad_norm": 1.7368588251382793, + "learning_rate": 7.42122360353572e-06, + "loss": 0.7225, + "step": 11711 + }, + { + "epoch": 0.35895549834497975, + "grad_norm": 1.8274880969467713, + "learning_rate": 7.4207893469523304e-06, + "loss": 0.733, + "step": 11712 + }, + { + "epoch": 0.35898614686772096, + "grad_norm": 1.4210488933590335, + "learning_rate": 7.420355066516286e-06, + "loss": 0.5975, + "step": 11713 + }, + { + "epoch": 0.35901679539046216, + "grad_norm": 1.5980891549794982, + "learning_rate": 7.419920762231864e-06, + "loss": 0.6376, + "step": 11714 + }, + { + "epoch": 0.35904744391320337, + "grad_norm": 1.7477119390207045, + "learning_rate": 7.419486434103341e-06, + "loss": 0.6425, + "step": 11715 + }, + { + "epoch": 0.3590780924359446, + "grad_norm": 1.5940158063927434, + "learning_rate": 7.419052082135001e-06, + "loss": 0.6914, + "step": 11716 + }, + { + "epoch": 0.3591087409586858, + "grad_norm": 1.7660345242211477, + "learning_rate": 7.418617706331123e-06, + "loss": 0.7765, + "step": 11717 + }, + { + "epoch": 0.359139389481427, + "grad_norm": 1.5038337406828026, + "learning_rate": 7.418183306695984e-06, + "loss": 0.7713, + "step": 11718 + }, + { + "epoch": 0.3591700380041682, + "grad_norm": 1.4469598725015715, + "learning_rate": 7.417748883233866e-06, + "loss": 0.6392, + "step": 11719 + }, + { + "epoch": 0.3592006865269094, + "grad_norm": 1.8515242464926172, + "learning_rate": 7.417314435949051e-06, + "loss": 0.7521, + "step": 11720 + }, + { + "epoch": 0.3592313350496506, + "grad_norm": 0.8148521796105942, + "learning_rate": 7.416879964845818e-06, + "loss": 0.4665, + "step": 11721 + }, + { + "epoch": 0.3592619835723918, + "grad_norm": 0.8376915371649019, + "learning_rate": 7.416445469928448e-06, + "loss": 0.4661, + "step": 11722 + }, + { + "epoch": 0.359292632095133, + "grad_norm": 1.7072326936475204, + "learning_rate": 7.416010951201222e-06, + "loss": 0.7823, + "step": 11723 + }, + { + "epoch": 0.3593232806178742, + "grad_norm": 1.6524891767128629, + "learning_rate": 7.415576408668423e-06, + "loss": 0.5893, + "step": 11724 + }, + { + "epoch": 0.3593539291406154, + "grad_norm": 1.490749959994526, + "learning_rate": 7.415141842334333e-06, + "loss": 0.6036, + "step": 11725 + }, + { + "epoch": 0.35938457766335663, + "grad_norm": 1.788772352339334, + "learning_rate": 7.4147072522032294e-06, + "loss": 0.7276, + "step": 11726 + }, + { + "epoch": 0.35941522618609784, + "grad_norm": 1.7075567107443836, + "learning_rate": 7.4142726382793984e-06, + "loss": 0.708, + "step": 11727 + }, + { + "epoch": 0.35944587470883904, + "grad_norm": 1.5805269599261371, + "learning_rate": 7.4138380005671214e-06, + "loss": 0.6694, + "step": 11728 + }, + { + "epoch": 0.35947652323158025, + "grad_norm": 1.7691985893947506, + "learning_rate": 7.413403339070682e-06, + "loss": 0.6925, + "step": 11729 + }, + { + "epoch": 0.35950717175432145, + "grad_norm": 1.7559831647726398, + "learning_rate": 7.412968653794362e-06, + "loss": 0.6342, + "step": 11730 + }, + { + "epoch": 0.35953782027706266, + "grad_norm": 1.6763470576594879, + "learning_rate": 7.412533944742443e-06, + "loss": 0.7245, + "step": 11731 + }, + { + "epoch": 0.35956846879980386, + "grad_norm": 1.8992546885154449, + "learning_rate": 7.412099211919209e-06, + "loss": 0.6545, + "step": 11732 + }, + { + "epoch": 0.35959911732254507, + "grad_norm": 1.5646727515105538, + "learning_rate": 7.411664455328948e-06, + "loss": 0.5714, + "step": 11733 + }, + { + "epoch": 0.3596297658452863, + "grad_norm": 1.7898941174484266, + "learning_rate": 7.411229674975937e-06, + "loss": 0.7741, + "step": 11734 + }, + { + "epoch": 0.3596604143680275, + "grad_norm": 1.6083417028056464, + "learning_rate": 7.410794870864464e-06, + "loss": 0.6777, + "step": 11735 + }, + { + "epoch": 0.3596910628907687, + "grad_norm": 1.5925479916846652, + "learning_rate": 7.410360042998812e-06, + "loss": 0.7249, + "step": 11736 + }, + { + "epoch": 0.3597217114135099, + "grad_norm": 1.6151729297194641, + "learning_rate": 7.409925191383267e-06, + "loss": 0.6795, + "step": 11737 + }, + { + "epoch": 0.3597523599362511, + "grad_norm": 1.6859992265420911, + "learning_rate": 7.40949031602211e-06, + "loss": 0.7046, + "step": 11738 + }, + { + "epoch": 0.3597830084589923, + "grad_norm": 1.7817084446840878, + "learning_rate": 7.4090554169196304e-06, + "loss": 0.6979, + "step": 11739 + }, + { + "epoch": 0.35981365698173345, + "grad_norm": 1.6976663280356061, + "learning_rate": 7.408620494080111e-06, + "loss": 0.6204, + "step": 11740 + }, + { + "epoch": 0.35984430550447466, + "grad_norm": 1.687572703997558, + "learning_rate": 7.408185547507837e-06, + "loss": 0.6886, + "step": 11741 + }, + { + "epoch": 0.35987495402721587, + "grad_norm": 1.6450351213899723, + "learning_rate": 7.4077505772070955e-06, + "loss": 0.676, + "step": 11742 + }, + { + "epoch": 0.35990560254995707, + "grad_norm": 1.5217156522571769, + "learning_rate": 7.407315583182171e-06, + "loss": 0.6132, + "step": 11743 + }, + { + "epoch": 0.3599362510726983, + "grad_norm": 0.8897906688643556, + "learning_rate": 7.406880565437351e-06, + "loss": 0.4595, + "step": 11744 + }, + { + "epoch": 0.3599668995954395, + "grad_norm": 1.727606868006014, + "learning_rate": 7.4064455239769195e-06, + "loss": 0.586, + "step": 11745 + }, + { + "epoch": 0.3599975481181807, + "grad_norm": 1.591072835372839, + "learning_rate": 7.406010458805165e-06, + "loss": 0.5661, + "step": 11746 + }, + { + "epoch": 0.3600281966409219, + "grad_norm": 1.8004804076472056, + "learning_rate": 7.405575369926374e-06, + "loss": 0.6542, + "step": 11747 + }, + { + "epoch": 0.3600588451636631, + "grad_norm": 1.6697298071183584, + "learning_rate": 7.405140257344835e-06, + "loss": 0.6588, + "step": 11748 + }, + { + "epoch": 0.3600894936864043, + "grad_norm": 1.5722053404512888, + "learning_rate": 7.404705121064831e-06, + "loss": 0.666, + "step": 11749 + }, + { + "epoch": 0.3601201422091455, + "grad_norm": 1.6908684149664086, + "learning_rate": 7.404269961090653e-06, + "loss": 0.6762, + "step": 11750 + }, + { + "epoch": 0.3601507907318867, + "grad_norm": 1.7782219559021692, + "learning_rate": 7.403834777426588e-06, + "loss": 0.6659, + "step": 11751 + }, + { + "epoch": 0.3601814392546279, + "grad_norm": 1.7422385676644414, + "learning_rate": 7.403399570076924e-06, + "loss": 0.7418, + "step": 11752 + }, + { + "epoch": 0.3602120877773691, + "grad_norm": 1.5466598688649502, + "learning_rate": 7.40296433904595e-06, + "loss": 0.7043, + "step": 11753 + }, + { + "epoch": 0.36024273630011033, + "grad_norm": 1.652303137370934, + "learning_rate": 7.4025290843379525e-06, + "loss": 0.6324, + "step": 11754 + }, + { + "epoch": 0.36027338482285154, + "grad_norm": 1.5471915495187951, + "learning_rate": 7.402093805957221e-06, + "loss": 0.6303, + "step": 11755 + }, + { + "epoch": 0.36030403334559274, + "grad_norm": 1.6202347298509956, + "learning_rate": 7.401658503908046e-06, + "loss": 0.6821, + "step": 11756 + }, + { + "epoch": 0.36033468186833395, + "grad_norm": 1.929181587608493, + "learning_rate": 7.401223178194714e-06, + "loss": 0.7274, + "step": 11757 + }, + { + "epoch": 0.36036533039107516, + "grad_norm": 1.6143222714209413, + "learning_rate": 7.400787828821516e-06, + "loss": 0.6269, + "step": 11758 + }, + { + "epoch": 0.36039597891381636, + "grad_norm": 1.6919991176857205, + "learning_rate": 7.400352455792741e-06, + "loss": 0.6368, + "step": 11759 + }, + { + "epoch": 0.36042662743655757, + "grad_norm": 0.8515313743486554, + "learning_rate": 7.39991705911268e-06, + "loss": 0.4567, + "step": 11760 + }, + { + "epoch": 0.3604572759592988, + "grad_norm": 1.728933482663062, + "learning_rate": 7.399481638785622e-06, + "loss": 0.6983, + "step": 11761 + }, + { + "epoch": 0.36048792448204, + "grad_norm": 0.7873718447785923, + "learning_rate": 7.3990461948158565e-06, + "loss": 0.442, + "step": 11762 + }, + { + "epoch": 0.3605185730047812, + "grad_norm": 2.0350770013173385, + "learning_rate": 7.398610727207677e-06, + "loss": 0.6406, + "step": 11763 + }, + { + "epoch": 0.3605492215275224, + "grad_norm": 1.8305035789685364, + "learning_rate": 7.39817523596537e-06, + "loss": 0.7188, + "step": 11764 + }, + { + "epoch": 0.3605798700502636, + "grad_norm": 1.4941683209938579, + "learning_rate": 7.39773972109323e-06, + "loss": 0.6627, + "step": 11765 + }, + { + "epoch": 0.3606105185730048, + "grad_norm": 0.7907457920346234, + "learning_rate": 7.397304182595546e-06, + "loss": 0.4726, + "step": 11766 + }, + { + "epoch": 0.360641167095746, + "grad_norm": 1.6950802007842276, + "learning_rate": 7.396868620476611e-06, + "loss": 0.6534, + "step": 11767 + }, + { + "epoch": 0.3606718156184872, + "grad_norm": 1.819303962534574, + "learning_rate": 7.396433034740718e-06, + "loss": 0.6499, + "step": 11768 + }, + { + "epoch": 0.3607024641412284, + "grad_norm": 1.6048135553299738, + "learning_rate": 7.395997425392154e-06, + "loss": 0.6894, + "step": 11769 + }, + { + "epoch": 0.3607331126639696, + "grad_norm": 1.6137692558266916, + "learning_rate": 7.395561792435216e-06, + "loss": 0.6325, + "step": 11770 + }, + { + "epoch": 0.3607637611867108, + "grad_norm": 1.7690230572504906, + "learning_rate": 7.395126135874196e-06, + "loss": 0.551, + "step": 11771 + }, + { + "epoch": 0.360794409709452, + "grad_norm": 1.8304570905958577, + "learning_rate": 7.394690455713383e-06, + "loss": 0.6823, + "step": 11772 + }, + { + "epoch": 0.3608250582321932, + "grad_norm": 1.8341693510695645, + "learning_rate": 7.394254751957073e-06, + "loss": 0.7466, + "step": 11773 + }, + { + "epoch": 0.3608557067549344, + "grad_norm": 0.881838618961353, + "learning_rate": 7.393819024609559e-06, + "loss": 0.4679, + "step": 11774 + }, + { + "epoch": 0.3608863552776756, + "grad_norm": 0.8380057310428838, + "learning_rate": 7.3933832736751335e-06, + "loss": 0.4647, + "step": 11775 + }, + { + "epoch": 0.3609170038004168, + "grad_norm": 1.7709156587881854, + "learning_rate": 7.39294749915809e-06, + "loss": 0.7188, + "step": 11776 + }, + { + "epoch": 0.360947652323158, + "grad_norm": 1.9201534870862416, + "learning_rate": 7.392511701062721e-06, + "loss": 0.7592, + "step": 11777 + }, + { + "epoch": 0.3609783008458992, + "grad_norm": 1.6801179452120625, + "learning_rate": 7.392075879393324e-06, + "loss": 0.6283, + "step": 11778 + }, + { + "epoch": 0.3610089493686404, + "grad_norm": 1.5859915994292149, + "learning_rate": 7.391640034154192e-06, + "loss": 0.703, + "step": 11779 + }, + { + "epoch": 0.3610395978913816, + "grad_norm": 2.0266018461787274, + "learning_rate": 7.391204165349618e-06, + "loss": 0.692, + "step": 11780 + }, + { + "epoch": 0.36107024641412283, + "grad_norm": 1.8068630434393183, + "learning_rate": 7.390768272983896e-06, + "loss": 0.6324, + "step": 11781 + }, + { + "epoch": 0.36110089493686404, + "grad_norm": 1.486958452065328, + "learning_rate": 7.390332357061324e-06, + "loss": 0.6807, + "step": 11782 + }, + { + "epoch": 0.36113154345960524, + "grad_norm": 1.6847781667906452, + "learning_rate": 7.389896417586195e-06, + "loss": 0.6928, + "step": 11783 + }, + { + "epoch": 0.36116219198234645, + "grad_norm": 1.7342589344351254, + "learning_rate": 7.389460454562806e-06, + "loss": 0.6821, + "step": 11784 + }, + { + "epoch": 0.36119284050508765, + "grad_norm": 1.90780228211002, + "learning_rate": 7.3890244679954516e-06, + "loss": 0.7093, + "step": 11785 + }, + { + "epoch": 0.36122348902782886, + "grad_norm": 1.618240400119155, + "learning_rate": 7.3885884578884256e-06, + "loss": 0.7523, + "step": 11786 + }, + { + "epoch": 0.36125413755057006, + "grad_norm": 1.5422623503703698, + "learning_rate": 7.388152424246031e-06, + "loss": 0.6795, + "step": 11787 + }, + { + "epoch": 0.36128478607331127, + "grad_norm": 1.5514006045261939, + "learning_rate": 7.387716367072556e-06, + "loss": 0.6068, + "step": 11788 + }, + { + "epoch": 0.3613154345960525, + "grad_norm": 1.6220981249002688, + "learning_rate": 7.387280286372302e-06, + "loss": 0.7613, + "step": 11789 + }, + { + "epoch": 0.3613460831187937, + "grad_norm": 1.5311129148234055, + "learning_rate": 7.386844182149564e-06, + "loss": 0.6625, + "step": 11790 + }, + { + "epoch": 0.3613767316415349, + "grad_norm": 1.761859527611915, + "learning_rate": 7.38640805440864e-06, + "loss": 0.6776, + "step": 11791 + }, + { + "epoch": 0.3614073801642761, + "grad_norm": 1.6815795944385727, + "learning_rate": 7.385971903153826e-06, + "loss": 0.7018, + "step": 11792 + }, + { + "epoch": 0.3614380286870173, + "grad_norm": 0.9455953547229894, + "learning_rate": 7.3855357283894216e-06, + "loss": 0.4803, + "step": 11793 + }, + { + "epoch": 0.3614686772097585, + "grad_norm": 0.8549566638728768, + "learning_rate": 7.385099530119723e-06, + "loss": 0.4659, + "step": 11794 + }, + { + "epoch": 0.3614993257324997, + "grad_norm": 1.9608432996467833, + "learning_rate": 7.384663308349027e-06, + "loss": 0.6101, + "step": 11795 + }, + { + "epoch": 0.3615299742552409, + "grad_norm": 1.5614156815549416, + "learning_rate": 7.3842270630816345e-06, + "loss": 0.6693, + "step": 11796 + }, + { + "epoch": 0.3615606227779821, + "grad_norm": 1.6404937122578007, + "learning_rate": 7.383790794321841e-06, + "loss": 0.6893, + "step": 11797 + }, + { + "epoch": 0.3615912713007233, + "grad_norm": 1.940344804858651, + "learning_rate": 7.383354502073951e-06, + "loss": 0.6762, + "step": 11798 + }, + { + "epoch": 0.36162191982346453, + "grad_norm": 1.9416624275362635, + "learning_rate": 7.382918186342256e-06, + "loss": 0.7399, + "step": 11799 + }, + { + "epoch": 0.36165256834620574, + "grad_norm": 1.5883028412595501, + "learning_rate": 7.382481847131059e-06, + "loss": 0.7412, + "step": 11800 + }, + { + "epoch": 0.36168321686894694, + "grad_norm": 1.62407953563252, + "learning_rate": 7.382045484444658e-06, + "loss": 0.6964, + "step": 11801 + }, + { + "epoch": 0.3617138653916881, + "grad_norm": 1.794602925651006, + "learning_rate": 7.3816090982873554e-06, + "loss": 0.7186, + "step": 11802 + }, + { + "epoch": 0.3617445139144293, + "grad_norm": 1.6358070476611657, + "learning_rate": 7.381172688663447e-06, + "loss": 0.7136, + "step": 11803 + }, + { + "epoch": 0.3617751624371705, + "grad_norm": 1.6715699634553867, + "learning_rate": 7.380736255577236e-06, + "loss": 0.642, + "step": 11804 + }, + { + "epoch": 0.3618058109599117, + "grad_norm": 1.991969436581181, + "learning_rate": 7.380299799033022e-06, + "loss": 0.72, + "step": 11805 + }, + { + "epoch": 0.3618364594826529, + "grad_norm": 1.660626196136167, + "learning_rate": 7.379863319035104e-06, + "loss": 0.6347, + "step": 11806 + }, + { + "epoch": 0.3618671080053941, + "grad_norm": 1.982664310076471, + "learning_rate": 7.3794268155877845e-06, + "loss": 0.6973, + "step": 11807 + }, + { + "epoch": 0.36189775652813533, + "grad_norm": 1.7378638877274504, + "learning_rate": 7.3789902886953636e-06, + "loss": 0.7692, + "step": 11808 + }, + { + "epoch": 0.36192840505087653, + "grad_norm": 1.7607176483896434, + "learning_rate": 7.378553738362142e-06, + "loss": 0.6181, + "step": 11809 + }, + { + "epoch": 0.36195905357361774, + "grad_norm": 1.6120675718295905, + "learning_rate": 7.378117164592422e-06, + "loss": 0.6094, + "step": 11810 + }, + { + "epoch": 0.36198970209635895, + "grad_norm": 1.5999828881251499, + "learning_rate": 7.377680567390506e-06, + "loss": 0.6404, + "step": 11811 + }, + { + "epoch": 0.36202035061910015, + "grad_norm": 1.592790413349076, + "learning_rate": 7.3772439467606934e-06, + "loss": 0.6497, + "step": 11812 + }, + { + "epoch": 0.36205099914184136, + "grad_norm": 1.8079217601809667, + "learning_rate": 7.376807302707291e-06, + "loss": 0.71, + "step": 11813 + }, + { + "epoch": 0.36208164766458256, + "grad_norm": 1.1138256430912026, + "learning_rate": 7.376370635234596e-06, + "loss": 0.5016, + "step": 11814 + }, + { + "epoch": 0.36211229618732377, + "grad_norm": 2.09488125697356, + "learning_rate": 7.375933944346913e-06, + "loss": 0.6341, + "step": 11815 + }, + { + "epoch": 0.362142944710065, + "grad_norm": 1.6438130109529314, + "learning_rate": 7.375497230048544e-06, + "loss": 0.6193, + "step": 11816 + }, + { + "epoch": 0.3621735932328062, + "grad_norm": 1.9348851893446348, + "learning_rate": 7.375060492343796e-06, + "loss": 0.7968, + "step": 11817 + }, + { + "epoch": 0.3622042417555474, + "grad_norm": 1.7561187614775478, + "learning_rate": 7.374623731236966e-06, + "loss": 0.6797, + "step": 11818 + }, + { + "epoch": 0.3622348902782886, + "grad_norm": 1.658140178486378, + "learning_rate": 7.374186946732362e-06, + "loss": 0.6661, + "step": 11819 + }, + { + "epoch": 0.3622655388010298, + "grad_norm": 1.6857158408795387, + "learning_rate": 7.373750138834287e-06, + "loss": 0.7433, + "step": 11820 + }, + { + "epoch": 0.362296187323771, + "grad_norm": 1.7878082418411756, + "learning_rate": 7.373313307547044e-06, + "loss": 0.5904, + "step": 11821 + }, + { + "epoch": 0.3623268358465122, + "grad_norm": 1.6759628652775214, + "learning_rate": 7.372876452874938e-06, + "loss": 0.6824, + "step": 11822 + }, + { + "epoch": 0.3623574843692534, + "grad_norm": 1.6586337305358108, + "learning_rate": 7.372439574822273e-06, + "loss": 0.6627, + "step": 11823 + }, + { + "epoch": 0.3623881328919946, + "grad_norm": 1.5774134779187323, + "learning_rate": 7.372002673393355e-06, + "loss": 0.6559, + "step": 11824 + }, + { + "epoch": 0.3624187814147358, + "grad_norm": 1.7153665611151745, + "learning_rate": 7.371565748592487e-06, + "loss": 0.7217, + "step": 11825 + }, + { + "epoch": 0.36244942993747703, + "grad_norm": 0.7897293084245643, + "learning_rate": 7.371128800423975e-06, + "loss": 0.4544, + "step": 11826 + }, + { + "epoch": 0.36248007846021824, + "grad_norm": 1.7466127505848505, + "learning_rate": 7.370691828892124e-06, + "loss": 0.6965, + "step": 11827 + }, + { + "epoch": 0.36251072698295944, + "grad_norm": 0.7561964566746053, + "learning_rate": 7.370254834001241e-06, + "loss": 0.4657, + "step": 11828 + }, + { + "epoch": 0.36254137550570065, + "grad_norm": 1.486393998854381, + "learning_rate": 7.369817815755629e-06, + "loss": 0.7252, + "step": 11829 + }, + { + "epoch": 0.36257202402844185, + "grad_norm": 1.8371031171886416, + "learning_rate": 7.3693807741595955e-06, + "loss": 0.6699, + "step": 11830 + }, + { + "epoch": 0.36260267255118306, + "grad_norm": 1.9085158969642777, + "learning_rate": 7.368943709217448e-06, + "loss": 0.7271, + "step": 11831 + }, + { + "epoch": 0.36263332107392426, + "grad_norm": 1.7189517430297285, + "learning_rate": 7.368506620933491e-06, + "loss": 0.8163, + "step": 11832 + }, + { + "epoch": 0.3626639695966654, + "grad_norm": 1.8740664319038198, + "learning_rate": 7.3680695093120334e-06, + "loss": 0.6761, + "step": 11833 + }, + { + "epoch": 0.3626946181194066, + "grad_norm": 0.7947866935943672, + "learning_rate": 7.3676323743573805e-06, + "loss": 0.4726, + "step": 11834 + }, + { + "epoch": 0.3627252666421478, + "grad_norm": 1.9244114762963553, + "learning_rate": 7.36719521607384e-06, + "loss": 0.6876, + "step": 11835 + }, + { + "epoch": 0.36275591516488903, + "grad_norm": 1.862571028119381, + "learning_rate": 7.366758034465719e-06, + "loss": 0.6438, + "step": 11836 + }, + { + "epoch": 0.36278656368763024, + "grad_norm": 0.7976529477457022, + "learning_rate": 7.366320829537328e-06, + "loss": 0.4973, + "step": 11837 + }, + { + "epoch": 0.36281721221037144, + "grad_norm": 1.8026143357468725, + "learning_rate": 7.365883601292969e-06, + "loss": 0.7318, + "step": 11838 + }, + { + "epoch": 0.36284786073311265, + "grad_norm": 0.788116206085327, + "learning_rate": 7.365446349736955e-06, + "loss": 0.4707, + "step": 11839 + }, + { + "epoch": 0.36287850925585385, + "grad_norm": 1.8058657590479295, + "learning_rate": 7.365009074873594e-06, + "loss": 0.5932, + "step": 11840 + }, + { + "epoch": 0.36290915777859506, + "grad_norm": 1.629146759753325, + "learning_rate": 7.364571776707192e-06, + "loss": 0.6967, + "step": 11841 + }, + { + "epoch": 0.36293980630133627, + "grad_norm": 0.7940838859739818, + "learning_rate": 7.3641344552420605e-06, + "loss": 0.4768, + "step": 11842 + }, + { + "epoch": 0.36297045482407747, + "grad_norm": 1.6361326801200538, + "learning_rate": 7.363697110482506e-06, + "loss": 0.7501, + "step": 11843 + }, + { + "epoch": 0.3630011033468187, + "grad_norm": 1.6301507336233525, + "learning_rate": 7.36325974243284e-06, + "loss": 0.6452, + "step": 11844 + }, + { + "epoch": 0.3630317518695599, + "grad_norm": 1.4415200889257511, + "learning_rate": 7.362822351097371e-06, + "loss": 0.6442, + "step": 11845 + }, + { + "epoch": 0.3630624003923011, + "grad_norm": 1.9245679898862527, + "learning_rate": 7.362384936480407e-06, + "loss": 0.6552, + "step": 11846 + }, + { + "epoch": 0.3630930489150423, + "grad_norm": 1.6880347695006948, + "learning_rate": 7.361947498586262e-06, + "loss": 0.7192, + "step": 11847 + }, + { + "epoch": 0.3631236974377835, + "grad_norm": 1.6167056832517435, + "learning_rate": 7.361510037419244e-06, + "loss": 0.5677, + "step": 11848 + }, + { + "epoch": 0.3631543459605247, + "grad_norm": 1.8356570883786905, + "learning_rate": 7.361072552983661e-06, + "loss": 0.6669, + "step": 11849 + }, + { + "epoch": 0.3631849944832659, + "grad_norm": 1.703845902987877, + "learning_rate": 7.360635045283828e-06, + "loss": 0.7157, + "step": 11850 + }, + { + "epoch": 0.3632156430060071, + "grad_norm": 1.7165975182612068, + "learning_rate": 7.360197514324055e-06, + "loss": 0.5678, + "step": 11851 + }, + { + "epoch": 0.3632462915287483, + "grad_norm": 1.8213408085483755, + "learning_rate": 7.35975996010865e-06, + "loss": 0.5974, + "step": 11852 + }, + { + "epoch": 0.3632769400514895, + "grad_norm": 1.954926020510581, + "learning_rate": 7.359322382641926e-06, + "loss": 0.7565, + "step": 11853 + }, + { + "epoch": 0.36330758857423073, + "grad_norm": 1.8796740538709942, + "learning_rate": 7.358884781928196e-06, + "loss": 0.7562, + "step": 11854 + }, + { + "epoch": 0.36333823709697194, + "grad_norm": 1.6583272087126517, + "learning_rate": 7.35844715797177e-06, + "loss": 0.6824, + "step": 11855 + }, + { + "epoch": 0.36336888561971314, + "grad_norm": 1.8476515270061806, + "learning_rate": 7.358009510776963e-06, + "loss": 0.6871, + "step": 11856 + }, + { + "epoch": 0.36339953414245435, + "grad_norm": 1.7901582041491726, + "learning_rate": 7.357571840348082e-06, + "loss": 0.6864, + "step": 11857 + }, + { + "epoch": 0.36343018266519556, + "grad_norm": 1.7347553407694762, + "learning_rate": 7.357134146689444e-06, + "loss": 0.6764, + "step": 11858 + }, + { + "epoch": 0.36346083118793676, + "grad_norm": 1.6405186342426716, + "learning_rate": 7.356696429805359e-06, + "loss": 0.6775, + "step": 11859 + }, + { + "epoch": 0.36349147971067797, + "grad_norm": 1.6101181671329419, + "learning_rate": 7.356258689700143e-06, + "loss": 0.7071, + "step": 11860 + }, + { + "epoch": 0.3635221282334192, + "grad_norm": 1.5860156516014632, + "learning_rate": 7.355820926378105e-06, + "loss": 0.6924, + "step": 11861 + }, + { + "epoch": 0.3635527767561604, + "grad_norm": 0.9464929242409337, + "learning_rate": 7.3553831398435625e-06, + "loss": 0.4603, + "step": 11862 + }, + { + "epoch": 0.3635834252789016, + "grad_norm": 0.8855267457652044, + "learning_rate": 7.354945330100827e-06, + "loss": 0.4385, + "step": 11863 + }, + { + "epoch": 0.36361407380164273, + "grad_norm": 1.6036724507451803, + "learning_rate": 7.354507497154212e-06, + "loss": 0.7352, + "step": 11864 + }, + { + "epoch": 0.36364472232438394, + "grad_norm": 1.615557727326425, + "learning_rate": 7.354069641008032e-06, + "loss": 0.6806, + "step": 11865 + }, + { + "epoch": 0.36367537084712515, + "grad_norm": 1.7549470931866713, + "learning_rate": 7.353631761666602e-06, + "loss": 0.6898, + "step": 11866 + }, + { + "epoch": 0.36370601936986635, + "grad_norm": 1.6064214842170736, + "learning_rate": 7.353193859134237e-06, + "loss": 0.6082, + "step": 11867 + }, + { + "epoch": 0.36373666789260756, + "grad_norm": 1.7418202543674604, + "learning_rate": 7.352755933415249e-06, + "loss": 0.7859, + "step": 11868 + }, + { + "epoch": 0.36376731641534876, + "grad_norm": 1.8067364818357825, + "learning_rate": 7.352317984513956e-06, + "loss": 0.7891, + "step": 11869 + }, + { + "epoch": 0.36379796493808997, + "grad_norm": 1.72920322246423, + "learning_rate": 7.351880012434671e-06, + "loss": 0.6301, + "step": 11870 + }, + { + "epoch": 0.3638286134608312, + "grad_norm": 1.7810048096664512, + "learning_rate": 7.351442017181711e-06, + "loss": 0.7437, + "step": 11871 + }, + { + "epoch": 0.3638592619835724, + "grad_norm": 1.5372968160725653, + "learning_rate": 7.351003998759392e-06, + "loss": 0.6373, + "step": 11872 + }, + { + "epoch": 0.3638899105063136, + "grad_norm": 1.6641493032515744, + "learning_rate": 7.350565957172028e-06, + "loss": 0.6357, + "step": 11873 + }, + { + "epoch": 0.3639205590290548, + "grad_norm": 1.6208010720241492, + "learning_rate": 7.350127892423936e-06, + "loss": 0.687, + "step": 11874 + }, + { + "epoch": 0.363951207551796, + "grad_norm": 1.834496105404569, + "learning_rate": 7.349689804519433e-06, + "loss": 0.6447, + "step": 11875 + }, + { + "epoch": 0.3639818560745372, + "grad_norm": 1.4747443117858556, + "learning_rate": 7.349251693462836e-06, + "loss": 0.6103, + "step": 11876 + }, + { + "epoch": 0.3640125045972784, + "grad_norm": 1.7704716796684985, + "learning_rate": 7.348813559258461e-06, + "loss": 0.6465, + "step": 11877 + }, + { + "epoch": 0.3640431531200196, + "grad_norm": 1.8361796321251835, + "learning_rate": 7.348375401910624e-06, + "loss": 0.6786, + "step": 11878 + }, + { + "epoch": 0.3640738016427608, + "grad_norm": 1.5829071913180992, + "learning_rate": 7.3479372214236425e-06, + "loss": 0.6936, + "step": 11879 + }, + { + "epoch": 0.364104450165502, + "grad_norm": 1.642265358667297, + "learning_rate": 7.347499017801837e-06, + "loss": 0.6851, + "step": 11880 + }, + { + "epoch": 0.36413509868824323, + "grad_norm": 1.7132039984291716, + "learning_rate": 7.347060791049521e-06, + "loss": 0.7251, + "step": 11881 + }, + { + "epoch": 0.36416574721098444, + "grad_norm": 1.922915281693989, + "learning_rate": 7.3466225411710156e-06, + "loss": 0.6789, + "step": 11882 + }, + { + "epoch": 0.36419639573372564, + "grad_norm": 1.6338897184926406, + "learning_rate": 7.3461842681706375e-06, + "loss": 0.68, + "step": 11883 + }, + { + "epoch": 0.36422704425646685, + "grad_norm": 1.5786844143855063, + "learning_rate": 7.345745972052707e-06, + "loss": 0.6298, + "step": 11884 + }, + { + "epoch": 0.36425769277920805, + "grad_norm": 1.0859927793823447, + "learning_rate": 7.345307652821538e-06, + "loss": 0.4811, + "step": 11885 + }, + { + "epoch": 0.36428834130194926, + "grad_norm": 1.5376424216030362, + "learning_rate": 7.344869310481455e-06, + "loss": 0.6938, + "step": 11886 + }, + { + "epoch": 0.36431898982469046, + "grad_norm": 1.756991299058162, + "learning_rate": 7.344430945036776e-06, + "loss": 0.6654, + "step": 11887 + }, + { + "epoch": 0.36434963834743167, + "grad_norm": 1.730035579508242, + "learning_rate": 7.343992556491817e-06, + "loss": 0.7723, + "step": 11888 + }, + { + "epoch": 0.3643802868701729, + "grad_norm": 1.7742228672622329, + "learning_rate": 7.3435541448509e-06, + "loss": 0.732, + "step": 11889 + }, + { + "epoch": 0.3644109353929141, + "grad_norm": 1.503438555589714, + "learning_rate": 7.343115710118344e-06, + "loss": 0.6881, + "step": 11890 + }, + { + "epoch": 0.3644415839156553, + "grad_norm": 1.6756952871322308, + "learning_rate": 7.34267725229847e-06, + "loss": 0.6888, + "step": 11891 + }, + { + "epoch": 0.3644722324383965, + "grad_norm": 1.5456475466880972, + "learning_rate": 7.3422387713955965e-06, + "loss": 0.5731, + "step": 11892 + }, + { + "epoch": 0.3645028809611377, + "grad_norm": 1.749973964614965, + "learning_rate": 7.341800267414047e-06, + "loss": 0.674, + "step": 11893 + }, + { + "epoch": 0.3645335294838789, + "grad_norm": 1.8381869009324765, + "learning_rate": 7.341361740358139e-06, + "loss": 0.645, + "step": 11894 + }, + { + "epoch": 0.36456417800662005, + "grad_norm": 1.7458080958285467, + "learning_rate": 7.340923190232195e-06, + "loss": 0.6767, + "step": 11895 + }, + { + "epoch": 0.36459482652936126, + "grad_norm": 1.756992362918409, + "learning_rate": 7.3404846170405355e-06, + "loss": 0.7311, + "step": 11896 + }, + { + "epoch": 0.36462547505210247, + "grad_norm": 1.509947390783055, + "learning_rate": 7.340046020787484e-06, + "loss": 0.6717, + "step": 11897 + }, + { + "epoch": 0.36465612357484367, + "grad_norm": 1.639876141331752, + "learning_rate": 7.339607401477357e-06, + "loss": 0.6308, + "step": 11898 + }, + { + "epoch": 0.3646867720975849, + "grad_norm": 0.8399284776690098, + "learning_rate": 7.339168759114483e-06, + "loss": 0.4503, + "step": 11899 + }, + { + "epoch": 0.3647174206203261, + "grad_norm": 1.7471549363017784, + "learning_rate": 7.338730093703179e-06, + "loss": 0.7777, + "step": 11900 + }, + { + "epoch": 0.3647480691430673, + "grad_norm": 1.5364278189264926, + "learning_rate": 7.33829140524777e-06, + "loss": 0.6842, + "step": 11901 + }, + { + "epoch": 0.3647787176658085, + "grad_norm": 1.4744136455479242, + "learning_rate": 7.337852693752576e-06, + "loss": 0.6823, + "step": 11902 + }, + { + "epoch": 0.3648093661885497, + "grad_norm": 1.8691972427308234, + "learning_rate": 7.337413959221923e-06, + "loss": 0.6968, + "step": 11903 + }, + { + "epoch": 0.3648400147112909, + "grad_norm": 1.6480161567037255, + "learning_rate": 7.33697520166013e-06, + "loss": 0.7452, + "step": 11904 + }, + { + "epoch": 0.3648706632340321, + "grad_norm": 1.7543391305515357, + "learning_rate": 7.336536421071524e-06, + "loss": 0.7191, + "step": 11905 + }, + { + "epoch": 0.3649013117567733, + "grad_norm": 1.9525188909633007, + "learning_rate": 7.336097617460427e-06, + "loss": 0.726, + "step": 11906 + }, + { + "epoch": 0.3649319602795145, + "grad_norm": 1.8658384885045007, + "learning_rate": 7.335658790831162e-06, + "loss": 0.7154, + "step": 11907 + }, + { + "epoch": 0.36496260880225573, + "grad_norm": 1.7884443590595949, + "learning_rate": 7.335219941188052e-06, + "loss": 0.6676, + "step": 11908 + }, + { + "epoch": 0.36499325732499693, + "grad_norm": 1.6337570432786692, + "learning_rate": 7.334781068535424e-06, + "loss": 0.6983, + "step": 11909 + }, + { + "epoch": 0.36502390584773814, + "grad_norm": 1.8219007730622787, + "learning_rate": 7.334342172877601e-06, + "loss": 0.623, + "step": 11910 + }, + { + "epoch": 0.36505455437047934, + "grad_norm": 1.6300918565622353, + "learning_rate": 7.333903254218906e-06, + "loss": 0.654, + "step": 11911 + }, + { + "epoch": 0.36508520289322055, + "grad_norm": 1.7504391315817085, + "learning_rate": 7.333464312563666e-06, + "loss": 0.6468, + "step": 11912 + }, + { + "epoch": 0.36511585141596176, + "grad_norm": 1.6833014576451293, + "learning_rate": 7.333025347916205e-06, + "loss": 0.7118, + "step": 11913 + }, + { + "epoch": 0.36514649993870296, + "grad_norm": 1.8532396773608157, + "learning_rate": 7.3325863602808486e-06, + "loss": 0.6909, + "step": 11914 + }, + { + "epoch": 0.36517714846144417, + "grad_norm": 1.6941924698720634, + "learning_rate": 7.332147349661921e-06, + "loss": 0.7235, + "step": 11915 + }, + { + "epoch": 0.3652077969841854, + "grad_norm": 1.671897571315355, + "learning_rate": 7.33170831606375e-06, + "loss": 0.7312, + "step": 11916 + }, + { + "epoch": 0.3652384455069266, + "grad_norm": 1.7294827482778758, + "learning_rate": 7.33126925949066e-06, + "loss": 0.6136, + "step": 11917 + }, + { + "epoch": 0.3652690940296678, + "grad_norm": 1.6943974701989306, + "learning_rate": 7.330830179946977e-06, + "loss": 0.633, + "step": 11918 + }, + { + "epoch": 0.365299742552409, + "grad_norm": 0.8286690448982382, + "learning_rate": 7.330391077437028e-06, + "loss": 0.4707, + "step": 11919 + }, + { + "epoch": 0.3653303910751502, + "grad_norm": 0.8043578523488317, + "learning_rate": 7.32995195196514e-06, + "loss": 0.4658, + "step": 11920 + }, + { + "epoch": 0.3653610395978914, + "grad_norm": 1.5817409845451522, + "learning_rate": 7.329512803535639e-06, + "loss": 0.7165, + "step": 11921 + }, + { + "epoch": 0.3653916881206326, + "grad_norm": 1.7825513699186044, + "learning_rate": 7.329073632152852e-06, + "loss": 0.8032, + "step": 11922 + }, + { + "epoch": 0.3654223366433738, + "grad_norm": 2.1996009308821614, + "learning_rate": 7.328634437821107e-06, + "loss": 0.746, + "step": 11923 + }, + { + "epoch": 0.365452985166115, + "grad_norm": 1.7162844568685178, + "learning_rate": 7.328195220544731e-06, + "loss": 0.6603, + "step": 11924 + }, + { + "epoch": 0.3654836336888562, + "grad_norm": 1.712839446623128, + "learning_rate": 7.327755980328053e-06, + "loss": 0.7013, + "step": 11925 + }, + { + "epoch": 0.3655142822115974, + "grad_norm": 1.830441082503169, + "learning_rate": 7.3273167171753965e-06, + "loss": 0.6925, + "step": 11926 + }, + { + "epoch": 0.3655449307343386, + "grad_norm": 1.7169093183793545, + "learning_rate": 7.326877431091095e-06, + "loss": 0.6483, + "step": 11927 + }, + { + "epoch": 0.3655755792570798, + "grad_norm": 1.7508347413753376, + "learning_rate": 7.326438122079474e-06, + "loss": 0.716, + "step": 11928 + }, + { + "epoch": 0.365606227779821, + "grad_norm": 0.9667735753717682, + "learning_rate": 7.325998790144866e-06, + "loss": 0.4612, + "step": 11929 + }, + { + "epoch": 0.3656368763025622, + "grad_norm": 1.7192894886698737, + "learning_rate": 7.325559435291594e-06, + "loss": 0.6867, + "step": 11930 + }, + { + "epoch": 0.3656675248253034, + "grad_norm": 1.655569366881228, + "learning_rate": 7.3251200575239904e-06, + "loss": 0.635, + "step": 11931 + }, + { + "epoch": 0.3656981733480446, + "grad_norm": 1.8249199479931724, + "learning_rate": 7.324680656846382e-06, + "loss": 0.6175, + "step": 11932 + }, + { + "epoch": 0.3657288218707858, + "grad_norm": 1.6641456346886139, + "learning_rate": 7.324241233263103e-06, + "loss": 0.6431, + "step": 11933 + }, + { + "epoch": 0.365759470393527, + "grad_norm": 1.7752808469092285, + "learning_rate": 7.32380178677848e-06, + "loss": 0.6409, + "step": 11934 + }, + { + "epoch": 0.3657901189162682, + "grad_norm": 1.6760271361881416, + "learning_rate": 7.323362317396843e-06, + "loss": 0.7503, + "step": 11935 + }, + { + "epoch": 0.36582076743900943, + "grad_norm": 1.7585486373502144, + "learning_rate": 7.322922825122522e-06, + "loss": 0.6819, + "step": 11936 + }, + { + "epoch": 0.36585141596175064, + "grad_norm": 1.6938054208211488, + "learning_rate": 7.322483309959849e-06, + "loss": 0.6469, + "step": 11937 + }, + { + "epoch": 0.36588206448449184, + "grad_norm": 0.8470614423660866, + "learning_rate": 7.322043771913154e-06, + "loss": 0.4777, + "step": 11938 + }, + { + "epoch": 0.36591271300723305, + "grad_norm": 1.667021723019065, + "learning_rate": 7.321604210986766e-06, + "loss": 0.7073, + "step": 11939 + }, + { + "epoch": 0.36594336152997425, + "grad_norm": 1.6527680038245707, + "learning_rate": 7.321164627185019e-06, + "loss": 0.7027, + "step": 11940 + }, + { + "epoch": 0.36597401005271546, + "grad_norm": 1.6209807777375556, + "learning_rate": 7.320725020512243e-06, + "loss": 0.6142, + "step": 11941 + }, + { + "epoch": 0.36600465857545667, + "grad_norm": 1.815724125797066, + "learning_rate": 7.32028539097277e-06, + "loss": 0.687, + "step": 11942 + }, + { + "epoch": 0.36603530709819787, + "grad_norm": 1.7395456061008094, + "learning_rate": 7.31984573857093e-06, + "loss": 0.7291, + "step": 11943 + }, + { + "epoch": 0.3660659556209391, + "grad_norm": 1.6344927384972794, + "learning_rate": 7.319406063311056e-06, + "loss": 0.5912, + "step": 11944 + }, + { + "epoch": 0.3660966041436803, + "grad_norm": 0.7823425548902037, + "learning_rate": 7.318966365197483e-06, + "loss": 0.4677, + "step": 11945 + }, + { + "epoch": 0.3661272526664215, + "grad_norm": 1.7103674749435072, + "learning_rate": 7.318526644234538e-06, + "loss": 0.6688, + "step": 11946 + }, + { + "epoch": 0.3661579011891627, + "grad_norm": 0.8064031478976332, + "learning_rate": 7.31808690042656e-06, + "loss": 0.4711, + "step": 11947 + }, + { + "epoch": 0.3661885497119039, + "grad_norm": 0.7854862355726725, + "learning_rate": 7.317647133777877e-06, + "loss": 0.4767, + "step": 11948 + }, + { + "epoch": 0.3662191982346451, + "grad_norm": 1.5997923350789176, + "learning_rate": 7.317207344292825e-06, + "loss": 0.6463, + "step": 11949 + }, + { + "epoch": 0.3662498467573863, + "grad_norm": 1.884846224819618, + "learning_rate": 7.316767531975734e-06, + "loss": 0.6519, + "step": 11950 + }, + { + "epoch": 0.3662804952801275, + "grad_norm": 0.7719245962882257, + "learning_rate": 7.31632769683094e-06, + "loss": 0.4748, + "step": 11951 + }, + { + "epoch": 0.3663111438028687, + "grad_norm": 0.7558982304670666, + "learning_rate": 7.315887838862778e-06, + "loss": 0.4633, + "step": 11952 + }, + { + "epoch": 0.3663417923256099, + "grad_norm": 1.7649388448386407, + "learning_rate": 7.315447958075581e-06, + "loss": 0.6977, + "step": 11953 + }, + { + "epoch": 0.36637244084835113, + "grad_norm": 1.5270002828650828, + "learning_rate": 7.315008054473681e-06, + "loss": 0.5891, + "step": 11954 + }, + { + "epoch": 0.36640308937109234, + "grad_norm": 1.723956446743994, + "learning_rate": 7.314568128061416e-06, + "loss": 0.6318, + "step": 11955 + }, + { + "epoch": 0.36643373789383354, + "grad_norm": 1.8959170401012302, + "learning_rate": 7.314128178843118e-06, + "loss": 0.739, + "step": 11956 + }, + { + "epoch": 0.3664643864165747, + "grad_norm": 1.659905663785205, + "learning_rate": 7.313688206823123e-06, + "loss": 0.7229, + "step": 11957 + }, + { + "epoch": 0.3664950349393159, + "grad_norm": 1.8230607569126343, + "learning_rate": 7.3132482120057654e-06, + "loss": 0.6973, + "step": 11958 + }, + { + "epoch": 0.3665256834620571, + "grad_norm": 1.7102567357234906, + "learning_rate": 7.312808194395382e-06, + "loss": 0.6926, + "step": 11959 + }, + { + "epoch": 0.3665563319847983, + "grad_norm": 1.5966721688037933, + "learning_rate": 7.312368153996309e-06, + "loss": 0.6981, + "step": 11960 + }, + { + "epoch": 0.3665869805075395, + "grad_norm": 1.5602094192014102, + "learning_rate": 7.311928090812878e-06, + "loss": 0.6567, + "step": 11961 + }, + { + "epoch": 0.3666176290302807, + "grad_norm": 1.430178218801929, + "learning_rate": 7.31148800484943e-06, + "loss": 0.6365, + "step": 11962 + }, + { + "epoch": 0.36664827755302193, + "grad_norm": 1.6210857310552254, + "learning_rate": 7.311047896110299e-06, + "loss": 0.6084, + "step": 11963 + }, + { + "epoch": 0.36667892607576313, + "grad_norm": 1.63049110940471, + "learning_rate": 7.310607764599823e-06, + "loss": 0.6238, + "step": 11964 + }, + { + "epoch": 0.36670957459850434, + "grad_norm": 0.9124571482046374, + "learning_rate": 7.310167610322337e-06, + "loss": 0.4548, + "step": 11965 + }, + { + "epoch": 0.36674022312124555, + "grad_norm": 1.7532335226275393, + "learning_rate": 7.309727433282177e-06, + "loss": 0.7797, + "step": 11966 + }, + { + "epoch": 0.36677087164398675, + "grad_norm": 1.1695966712247374, + "learning_rate": 7.3092872334836814e-06, + "loss": 0.4661, + "step": 11967 + }, + { + "epoch": 0.36680152016672796, + "grad_norm": 2.1168865177536698, + "learning_rate": 7.30884701093119e-06, + "loss": 0.6638, + "step": 11968 + }, + { + "epoch": 0.36683216868946916, + "grad_norm": 1.4781182599939093, + "learning_rate": 7.308406765629037e-06, + "loss": 0.6608, + "step": 11969 + }, + { + "epoch": 0.36686281721221037, + "grad_norm": 1.7390752619601275, + "learning_rate": 7.307966497581562e-06, + "loss": 0.7419, + "step": 11970 + }, + { + "epoch": 0.3668934657349516, + "grad_norm": 1.6166711067405735, + "learning_rate": 7.307526206793102e-06, + "loss": 0.6261, + "step": 11971 + }, + { + "epoch": 0.3669241142576928, + "grad_norm": 1.8074198867381708, + "learning_rate": 7.3070858932679956e-06, + "loss": 0.6586, + "step": 11972 + }, + { + "epoch": 0.366954762780434, + "grad_norm": 1.8709770138093789, + "learning_rate": 7.3066455570105824e-06, + "loss": 0.8503, + "step": 11973 + }, + { + "epoch": 0.3669854113031752, + "grad_norm": 0.8581936428860965, + "learning_rate": 7.3062051980252e-06, + "loss": 0.4691, + "step": 11974 + }, + { + "epoch": 0.3670160598259164, + "grad_norm": 1.5600908310330608, + "learning_rate": 7.305764816316188e-06, + "loss": 0.6785, + "step": 11975 + }, + { + "epoch": 0.3670467083486576, + "grad_norm": 1.7544586666692585, + "learning_rate": 7.305324411887884e-06, + "loss": 0.731, + "step": 11976 + }, + { + "epoch": 0.3670773568713988, + "grad_norm": 1.7289641517620526, + "learning_rate": 7.3048839847446305e-06, + "loss": 0.6796, + "step": 11977 + }, + { + "epoch": 0.36710800539414, + "grad_norm": 0.7863724856499854, + "learning_rate": 7.304443534890764e-06, + "loss": 0.4801, + "step": 11978 + }, + { + "epoch": 0.3671386539168812, + "grad_norm": 1.8142925599200521, + "learning_rate": 7.304003062330627e-06, + "loss": 0.6999, + "step": 11979 + }, + { + "epoch": 0.3671693024396224, + "grad_norm": 0.8112577881771054, + "learning_rate": 7.303562567068557e-06, + "loss": 0.492, + "step": 11980 + }, + { + "epoch": 0.36719995096236363, + "grad_norm": 0.7549940987925461, + "learning_rate": 7.303122049108897e-06, + "loss": 0.4571, + "step": 11981 + }, + { + "epoch": 0.36723059948510484, + "grad_norm": 1.5200419980016946, + "learning_rate": 7.302681508455985e-06, + "loss": 0.6778, + "step": 11982 + }, + { + "epoch": 0.36726124800784604, + "grad_norm": 1.7159513212061295, + "learning_rate": 7.302240945114164e-06, + "loss": 0.6355, + "step": 11983 + }, + { + "epoch": 0.36729189653058725, + "grad_norm": 1.7279448557503554, + "learning_rate": 7.301800359087773e-06, + "loss": 0.8405, + "step": 11984 + }, + { + "epoch": 0.36732254505332845, + "grad_norm": 1.506937533078609, + "learning_rate": 7.301359750381154e-06, + "loss": 0.6001, + "step": 11985 + }, + { + "epoch": 0.36735319357606966, + "grad_norm": 1.7049074706573764, + "learning_rate": 7.300919118998648e-06, + "loss": 0.7185, + "step": 11986 + }, + { + "epoch": 0.36738384209881086, + "grad_norm": 1.6981258408726976, + "learning_rate": 7.300478464944599e-06, + "loss": 0.6687, + "step": 11987 + }, + { + "epoch": 0.367414490621552, + "grad_norm": 1.8453057334856953, + "learning_rate": 7.300037788223346e-06, + "loss": 0.738, + "step": 11988 + }, + { + "epoch": 0.3674451391442932, + "grad_norm": 1.6098595358252636, + "learning_rate": 7.299597088839232e-06, + "loss": 0.6409, + "step": 11989 + }, + { + "epoch": 0.3674757876670344, + "grad_norm": 1.7855708316082364, + "learning_rate": 7.2991563667966004e-06, + "loss": 0.7257, + "step": 11990 + }, + { + "epoch": 0.36750643618977563, + "grad_norm": 0.9438077339869825, + "learning_rate": 7.298715622099793e-06, + "loss": 0.4725, + "step": 11991 + }, + { + "epoch": 0.36753708471251684, + "grad_norm": 1.7239306973415156, + "learning_rate": 7.298274854753153e-06, + "loss": 0.6152, + "step": 11992 + }, + { + "epoch": 0.36756773323525804, + "grad_norm": 1.6255295597419779, + "learning_rate": 7.29783406476102e-06, + "loss": 0.6999, + "step": 11993 + }, + { + "epoch": 0.36759838175799925, + "grad_norm": 1.910990809469809, + "learning_rate": 7.297393252127744e-06, + "loss": 0.7288, + "step": 11994 + }, + { + "epoch": 0.36762903028074045, + "grad_norm": 1.7236496673355202, + "learning_rate": 7.2969524168576615e-06, + "loss": 0.6875, + "step": 11995 + }, + { + "epoch": 0.36765967880348166, + "grad_norm": 1.7488384536656245, + "learning_rate": 7.29651155895512e-06, + "loss": 0.6526, + "step": 11996 + }, + { + "epoch": 0.36769032732622287, + "grad_norm": 0.8060225927716155, + "learning_rate": 7.296070678424461e-06, + "loss": 0.4641, + "step": 11997 + }, + { + "epoch": 0.36772097584896407, + "grad_norm": 1.7944227365395153, + "learning_rate": 7.295629775270033e-06, + "loss": 0.709, + "step": 11998 + }, + { + "epoch": 0.3677516243717053, + "grad_norm": 1.7045135937984934, + "learning_rate": 7.2951888494961755e-06, + "loss": 0.7342, + "step": 11999 + }, + { + "epoch": 0.3677822728944465, + "grad_norm": 1.7735203190489142, + "learning_rate": 7.294747901107235e-06, + "loss": 0.7215, + "step": 12000 + }, + { + "epoch": 0.3678129214171877, + "grad_norm": 1.6248115246448727, + "learning_rate": 7.294306930107556e-06, + "loss": 0.5861, + "step": 12001 + }, + { + "epoch": 0.3678435699399289, + "grad_norm": 1.5727490610092845, + "learning_rate": 7.293865936501485e-06, + "loss": 0.6411, + "step": 12002 + }, + { + "epoch": 0.3678742184626701, + "grad_norm": 1.6214322125982272, + "learning_rate": 7.293424920293366e-06, + "loss": 0.7747, + "step": 12003 + }, + { + "epoch": 0.3679048669854113, + "grad_norm": 2.231141327042331, + "learning_rate": 7.2929838814875434e-06, + "loss": 0.6673, + "step": 12004 + }, + { + "epoch": 0.3679355155081525, + "grad_norm": 0.7893231853390528, + "learning_rate": 7.292542820088364e-06, + "loss": 0.457, + "step": 12005 + }, + { + "epoch": 0.3679661640308937, + "grad_norm": 1.765272853599741, + "learning_rate": 7.292101736100175e-06, + "loss": 0.7359, + "step": 12006 + }, + { + "epoch": 0.3679968125536349, + "grad_norm": 1.7068570760296162, + "learning_rate": 7.291660629527321e-06, + "loss": 0.6468, + "step": 12007 + }, + { + "epoch": 0.36802746107637613, + "grad_norm": 1.7313151485109948, + "learning_rate": 7.291219500374147e-06, + "loss": 0.6665, + "step": 12008 + }, + { + "epoch": 0.36805810959911733, + "grad_norm": 1.7359734585312312, + "learning_rate": 7.2907783486450016e-06, + "loss": 0.5823, + "step": 12009 + }, + { + "epoch": 0.36808875812185854, + "grad_norm": 1.8136066199693734, + "learning_rate": 7.290337174344231e-06, + "loss": 0.7444, + "step": 12010 + }, + { + "epoch": 0.36811940664459974, + "grad_norm": 0.779069838708038, + "learning_rate": 7.289895977476184e-06, + "loss": 0.4659, + "step": 12011 + }, + { + "epoch": 0.36815005516734095, + "grad_norm": 1.5046625872449784, + "learning_rate": 7.289454758045203e-06, + "loss": 0.6906, + "step": 12012 + }, + { + "epoch": 0.36818070369008216, + "grad_norm": 1.6889450584711594, + "learning_rate": 7.289013516055639e-06, + "loss": 0.7128, + "step": 12013 + }, + { + "epoch": 0.36821135221282336, + "grad_norm": 3.1276504096141466, + "learning_rate": 7.288572251511842e-06, + "loss": 0.7061, + "step": 12014 + }, + { + "epoch": 0.36824200073556457, + "grad_norm": 1.670444446646396, + "learning_rate": 7.2881309644181546e-06, + "loss": 0.6388, + "step": 12015 + }, + { + "epoch": 0.3682726492583058, + "grad_norm": 0.7831171423663651, + "learning_rate": 7.287689654778928e-06, + "loss": 0.4669, + "step": 12016 + }, + { + "epoch": 0.368303297781047, + "grad_norm": 1.8563976038665644, + "learning_rate": 7.287248322598509e-06, + "loss": 0.6587, + "step": 12017 + }, + { + "epoch": 0.3683339463037882, + "grad_norm": 1.5322474778764954, + "learning_rate": 7.286806967881248e-06, + "loss": 0.6008, + "step": 12018 + }, + { + "epoch": 0.36836459482652933, + "grad_norm": 1.7301494793333687, + "learning_rate": 7.286365590631492e-06, + "loss": 0.727, + "step": 12019 + }, + { + "epoch": 0.36839524334927054, + "grad_norm": 1.6593758876330031, + "learning_rate": 7.285924190853593e-06, + "loss": 0.6594, + "step": 12020 + }, + { + "epoch": 0.36842589187201175, + "grad_norm": 0.8403587590012654, + "learning_rate": 7.285482768551897e-06, + "loss": 0.4794, + "step": 12021 + }, + { + "epoch": 0.36845654039475295, + "grad_norm": 1.6899003117371312, + "learning_rate": 7.285041323730754e-06, + "loss": 0.6722, + "step": 12022 + }, + { + "epoch": 0.36848718891749416, + "grad_norm": 1.67282333035223, + "learning_rate": 7.284599856394513e-06, + "loss": 0.694, + "step": 12023 + }, + { + "epoch": 0.36851783744023536, + "grad_norm": 1.4785213882549788, + "learning_rate": 7.284158366547527e-06, + "loss": 0.6011, + "step": 12024 + }, + { + "epoch": 0.36854848596297657, + "grad_norm": 1.6013121472470704, + "learning_rate": 7.283716854194144e-06, + "loss": 0.5672, + "step": 12025 + }, + { + "epoch": 0.3685791344857178, + "grad_norm": 1.6596705116124484, + "learning_rate": 7.283275319338714e-06, + "loss": 0.602, + "step": 12026 + }, + { + "epoch": 0.368609783008459, + "grad_norm": 1.8334245997000658, + "learning_rate": 7.282833761985588e-06, + "loss": 0.5861, + "step": 12027 + }, + { + "epoch": 0.3686404315312002, + "grad_norm": 1.7290431744554877, + "learning_rate": 7.282392182139117e-06, + "loss": 0.5904, + "step": 12028 + }, + { + "epoch": 0.3686710800539414, + "grad_norm": 1.730673162488494, + "learning_rate": 7.2819505798036525e-06, + "loss": 0.6733, + "step": 12029 + }, + { + "epoch": 0.3687017285766826, + "grad_norm": 1.7623209253852177, + "learning_rate": 7.281508954983544e-06, + "loss": 0.6663, + "step": 12030 + }, + { + "epoch": 0.3687323770994238, + "grad_norm": 1.811747679083268, + "learning_rate": 7.281067307683144e-06, + "loss": 0.7045, + "step": 12031 + }, + { + "epoch": 0.368763025622165, + "grad_norm": 1.5515157096985661, + "learning_rate": 7.280625637906804e-06, + "loss": 0.6983, + "step": 12032 + }, + { + "epoch": 0.3687936741449062, + "grad_norm": 1.8768504510819328, + "learning_rate": 7.280183945658879e-06, + "loss": 0.6511, + "step": 12033 + }, + { + "epoch": 0.3688243226676474, + "grad_norm": 0.8793150656243185, + "learning_rate": 7.279742230943714e-06, + "loss": 0.4537, + "step": 12034 + }, + { + "epoch": 0.3688549711903886, + "grad_norm": 0.819350298768413, + "learning_rate": 7.2793004937656686e-06, + "loss": 0.4749, + "step": 12035 + }, + { + "epoch": 0.36888561971312983, + "grad_norm": 0.813318553484185, + "learning_rate": 7.27885873412909e-06, + "loss": 0.4872, + "step": 12036 + }, + { + "epoch": 0.36891626823587104, + "grad_norm": 1.7323520888001567, + "learning_rate": 7.278416952038335e-06, + "loss": 0.69, + "step": 12037 + }, + { + "epoch": 0.36894691675861224, + "grad_norm": 1.6562346240664423, + "learning_rate": 7.277975147497753e-06, + "loss": 0.6206, + "step": 12038 + }, + { + "epoch": 0.36897756528135345, + "grad_norm": 0.7991343093717975, + "learning_rate": 7.277533320511702e-06, + "loss": 0.4684, + "step": 12039 + }, + { + "epoch": 0.36900821380409465, + "grad_norm": 1.8674440987090777, + "learning_rate": 7.27709147108453e-06, + "loss": 0.8086, + "step": 12040 + }, + { + "epoch": 0.36903886232683586, + "grad_norm": 1.665449640485241, + "learning_rate": 7.276649599220594e-06, + "loss": 0.6876, + "step": 12041 + }, + { + "epoch": 0.36906951084957706, + "grad_norm": 1.7393215154366302, + "learning_rate": 7.276207704924247e-06, + "loss": 0.6801, + "step": 12042 + }, + { + "epoch": 0.36910015937231827, + "grad_norm": 1.9231469163517698, + "learning_rate": 7.275765788199842e-06, + "loss": 0.6246, + "step": 12043 + }, + { + "epoch": 0.3691308078950595, + "grad_norm": 1.8177328790588514, + "learning_rate": 7.275323849051734e-06, + "loss": 0.7178, + "step": 12044 + }, + { + "epoch": 0.3691614564178007, + "grad_norm": 1.580081866715608, + "learning_rate": 7.27488188748428e-06, + "loss": 0.6655, + "step": 12045 + }, + { + "epoch": 0.3691921049405419, + "grad_norm": 1.6746154783966114, + "learning_rate": 7.274439903501832e-06, + "loss": 0.7126, + "step": 12046 + }, + { + "epoch": 0.3692227534632831, + "grad_norm": 1.7443469448191071, + "learning_rate": 7.273997897108744e-06, + "loss": 0.6685, + "step": 12047 + }, + { + "epoch": 0.3692534019860243, + "grad_norm": 1.6182430571737634, + "learning_rate": 7.273555868309377e-06, + "loss": 0.6965, + "step": 12048 + }, + { + "epoch": 0.3692840505087655, + "grad_norm": 1.8013251965099661, + "learning_rate": 7.273113817108078e-06, + "loss": 0.6606, + "step": 12049 + }, + { + "epoch": 0.36931469903150665, + "grad_norm": 1.885781309257011, + "learning_rate": 7.2726717435092095e-06, + "loss": 0.5874, + "step": 12050 + }, + { + "epoch": 0.36934534755424786, + "grad_norm": 1.4265225749524721, + "learning_rate": 7.272229647517124e-06, + "loss": 0.7493, + "step": 12051 + }, + { + "epoch": 0.36937599607698907, + "grad_norm": 1.6147386889644988, + "learning_rate": 7.2717875291361796e-06, + "loss": 0.6053, + "step": 12052 + }, + { + "epoch": 0.36940664459973027, + "grad_norm": 1.5409606590535647, + "learning_rate": 7.2713453883707294e-06, + "loss": 0.7133, + "step": 12053 + }, + { + "epoch": 0.3694372931224715, + "grad_norm": 1.8361290535705972, + "learning_rate": 7.270903225225132e-06, + "loss": 0.6958, + "step": 12054 + }, + { + "epoch": 0.3694679416452127, + "grad_norm": 1.7489003086461905, + "learning_rate": 7.2704610397037445e-06, + "loss": 0.5812, + "step": 12055 + }, + { + "epoch": 0.3694985901679539, + "grad_norm": 1.804652618541021, + "learning_rate": 7.270018831810924e-06, + "loss": 0.856, + "step": 12056 + }, + { + "epoch": 0.3695292386906951, + "grad_norm": 1.552057049896232, + "learning_rate": 7.269576601551027e-06, + "loss": 0.6758, + "step": 12057 + }, + { + "epoch": 0.3695598872134363, + "grad_norm": 1.545593675829641, + "learning_rate": 7.269134348928411e-06, + "loss": 0.6033, + "step": 12058 + }, + { + "epoch": 0.3695905357361775, + "grad_norm": 1.492961672795765, + "learning_rate": 7.268692073947434e-06, + "loss": 0.6431, + "step": 12059 + }, + { + "epoch": 0.3696211842589187, + "grad_norm": 1.5830277514796482, + "learning_rate": 7.268249776612453e-06, + "loss": 0.6563, + "step": 12060 + }, + { + "epoch": 0.3696518327816599, + "grad_norm": 1.707902930550058, + "learning_rate": 7.267807456927828e-06, + "loss": 0.6273, + "step": 12061 + }, + { + "epoch": 0.3696824813044011, + "grad_norm": 0.9132648474099486, + "learning_rate": 7.267365114897914e-06, + "loss": 0.4822, + "step": 12062 + }, + { + "epoch": 0.36971312982714233, + "grad_norm": 0.9300949762765142, + "learning_rate": 7.266922750527073e-06, + "loss": 0.4851, + "step": 12063 + }, + { + "epoch": 0.36974377834988353, + "grad_norm": 1.6407514824999754, + "learning_rate": 7.26648036381966e-06, + "loss": 0.6281, + "step": 12064 + }, + { + "epoch": 0.36977442687262474, + "grad_norm": 0.7299502461727269, + "learning_rate": 7.266037954780038e-06, + "loss": 0.4509, + "step": 12065 + }, + { + "epoch": 0.36980507539536595, + "grad_norm": 1.7871522188534559, + "learning_rate": 7.265595523412563e-06, + "loss": 0.6684, + "step": 12066 + }, + { + "epoch": 0.36983572391810715, + "grad_norm": 1.676666659112615, + "learning_rate": 7.265153069721597e-06, + "loss": 0.7402, + "step": 12067 + }, + { + "epoch": 0.36986637244084836, + "grad_norm": 1.759043567968306, + "learning_rate": 7.264710593711497e-06, + "loss": 0.6944, + "step": 12068 + }, + { + "epoch": 0.36989702096358956, + "grad_norm": 1.7728016326027969, + "learning_rate": 7.264268095386625e-06, + "loss": 0.6145, + "step": 12069 + }, + { + "epoch": 0.36992766948633077, + "grad_norm": 1.7923656393257283, + "learning_rate": 7.263825574751339e-06, + "loss": 0.6593, + "step": 12070 + }, + { + "epoch": 0.369958318009072, + "grad_norm": 0.8628118337246518, + "learning_rate": 7.26338303181e-06, + "loss": 0.4378, + "step": 12071 + }, + { + "epoch": 0.3699889665318132, + "grad_norm": 1.9282693519428937, + "learning_rate": 7.262940466566971e-06, + "loss": 0.6497, + "step": 12072 + }, + { + "epoch": 0.3700196150545544, + "grad_norm": 1.8537772203432865, + "learning_rate": 7.262497879026609e-06, + "loss": 0.7453, + "step": 12073 + }, + { + "epoch": 0.3700502635772956, + "grad_norm": 1.8241426803892673, + "learning_rate": 7.2620552691932766e-06, + "loss": 0.7283, + "step": 12074 + }, + { + "epoch": 0.3700809121000368, + "grad_norm": 0.8610869651180998, + "learning_rate": 7.2616126370713355e-06, + "loss": 0.4657, + "step": 12075 + }, + { + "epoch": 0.370111560622778, + "grad_norm": 0.803036721310234, + "learning_rate": 7.261169982665146e-06, + "loss": 0.441, + "step": 12076 + }, + { + "epoch": 0.3701422091455192, + "grad_norm": 0.7761469116427766, + "learning_rate": 7.26072730597907e-06, + "loss": 0.4517, + "step": 12077 + }, + { + "epoch": 0.3701728576682604, + "grad_norm": 1.8731518241162082, + "learning_rate": 7.26028460701747e-06, + "loss": 0.6965, + "step": 12078 + }, + { + "epoch": 0.3702035061910016, + "grad_norm": 0.7923206852397874, + "learning_rate": 7.259841885784707e-06, + "loss": 0.4625, + "step": 12079 + }, + { + "epoch": 0.3702341547137428, + "grad_norm": 1.6634616015667814, + "learning_rate": 7.259399142285145e-06, + "loss": 0.6647, + "step": 12080 + }, + { + "epoch": 0.370264803236484, + "grad_norm": 1.7509294081442095, + "learning_rate": 7.258956376523143e-06, + "loss": 0.6992, + "step": 12081 + }, + { + "epoch": 0.3702954517592252, + "grad_norm": 1.6528775226026273, + "learning_rate": 7.258513588503067e-06, + "loss": 0.6711, + "step": 12082 + }, + { + "epoch": 0.3703261002819664, + "grad_norm": 1.737100963674655, + "learning_rate": 7.258070778229279e-06, + "loss": 0.6895, + "step": 12083 + }, + { + "epoch": 0.3703567488047076, + "grad_norm": 1.774539912195661, + "learning_rate": 7.257627945706141e-06, + "loss": 0.7002, + "step": 12084 + }, + { + "epoch": 0.3703873973274488, + "grad_norm": 1.6199519005972736, + "learning_rate": 7.257185090938017e-06, + "loss": 0.7178, + "step": 12085 + }, + { + "epoch": 0.37041804585019, + "grad_norm": 1.789315533494675, + "learning_rate": 7.2567422139292706e-06, + "loss": 0.7141, + "step": 12086 + }, + { + "epoch": 0.3704486943729312, + "grad_norm": 0.9679257582691889, + "learning_rate": 7.256299314684269e-06, + "loss": 0.4747, + "step": 12087 + }, + { + "epoch": 0.3704793428956724, + "grad_norm": 1.827713734910388, + "learning_rate": 7.25585639320737e-06, + "loss": 0.6878, + "step": 12088 + }, + { + "epoch": 0.3705099914184136, + "grad_norm": 1.7130442112005113, + "learning_rate": 7.255413449502942e-06, + "loss": 0.6438, + "step": 12089 + }, + { + "epoch": 0.3705406399411548, + "grad_norm": 1.6548357721609477, + "learning_rate": 7.254970483575345e-06, + "loss": 0.6033, + "step": 12090 + }, + { + "epoch": 0.37057128846389603, + "grad_norm": 0.8285253211577897, + "learning_rate": 7.254527495428951e-06, + "loss": 0.4595, + "step": 12091 + }, + { + "epoch": 0.37060193698663724, + "grad_norm": 2.1577909642034174, + "learning_rate": 7.254084485068119e-06, + "loss": 0.7897, + "step": 12092 + }, + { + "epoch": 0.37063258550937844, + "grad_norm": 2.0717266027524524, + "learning_rate": 7.2536414524972154e-06, + "loss": 0.7508, + "step": 12093 + }, + { + "epoch": 0.37066323403211965, + "grad_norm": 1.7263214705709007, + "learning_rate": 7.253198397720607e-06, + "loss": 0.7366, + "step": 12094 + }, + { + "epoch": 0.37069388255486085, + "grad_norm": 0.7932951091270575, + "learning_rate": 7.252755320742658e-06, + "loss": 0.4477, + "step": 12095 + }, + { + "epoch": 0.37072453107760206, + "grad_norm": 1.7834779327427344, + "learning_rate": 7.252312221567734e-06, + "loss": 0.7141, + "step": 12096 + }, + { + "epoch": 0.37075517960034327, + "grad_norm": 1.8455865852615192, + "learning_rate": 7.2518691002002014e-06, + "loss": 0.6834, + "step": 12097 + }, + { + "epoch": 0.37078582812308447, + "grad_norm": 1.6856037435370421, + "learning_rate": 7.251425956644426e-06, + "loss": 0.5863, + "step": 12098 + }, + { + "epoch": 0.3708164766458257, + "grad_norm": 0.8040019455942432, + "learning_rate": 7.250982790904776e-06, + "loss": 0.4457, + "step": 12099 + }, + { + "epoch": 0.3708471251685669, + "grad_norm": 2.052058039978196, + "learning_rate": 7.250539602985616e-06, + "loss": 0.7852, + "step": 12100 + }, + { + "epoch": 0.3708777736913081, + "grad_norm": 0.8228225798442527, + "learning_rate": 7.250096392891312e-06, + "loss": 0.469, + "step": 12101 + }, + { + "epoch": 0.3709084222140493, + "grad_norm": 1.7629590595099163, + "learning_rate": 7.249653160626236e-06, + "loss": 0.6504, + "step": 12102 + }, + { + "epoch": 0.3709390707367905, + "grad_norm": 1.6144373575732587, + "learning_rate": 7.249209906194748e-06, + "loss": 0.5829, + "step": 12103 + }, + { + "epoch": 0.3709697192595317, + "grad_norm": 1.8083619105252715, + "learning_rate": 7.248766629601221e-06, + "loss": 0.5598, + "step": 12104 + }, + { + "epoch": 0.3710003677822729, + "grad_norm": 1.863457809858878, + "learning_rate": 7.2483233308500215e-06, + "loss": 0.6742, + "step": 12105 + }, + { + "epoch": 0.3710310163050141, + "grad_norm": 1.7360764310479921, + "learning_rate": 7.247880009945517e-06, + "loss": 0.5653, + "step": 12106 + }, + { + "epoch": 0.3710616648277553, + "grad_norm": 1.7571824608841802, + "learning_rate": 7.247436666892075e-06, + "loss": 0.6584, + "step": 12107 + }, + { + "epoch": 0.3710923133504965, + "grad_norm": 1.598615909180718, + "learning_rate": 7.246993301694064e-06, + "loss": 0.7191, + "step": 12108 + }, + { + "epoch": 0.37112296187323773, + "grad_norm": 1.704509968903115, + "learning_rate": 7.246549914355853e-06, + "loss": 0.6883, + "step": 12109 + }, + { + "epoch": 0.37115361039597894, + "grad_norm": 1.8086405012082, + "learning_rate": 7.246106504881811e-06, + "loss": 0.6047, + "step": 12110 + }, + { + "epoch": 0.37118425891872014, + "grad_norm": 1.6963583018810418, + "learning_rate": 7.245663073276309e-06, + "loss": 0.6817, + "step": 12111 + }, + { + "epoch": 0.3712149074414613, + "grad_norm": 1.9519030169905156, + "learning_rate": 7.245219619543712e-06, + "loss": 0.5797, + "step": 12112 + }, + { + "epoch": 0.3712455559642025, + "grad_norm": 1.9193061182452564, + "learning_rate": 7.244776143688392e-06, + "loss": 0.6801, + "step": 12113 + }, + { + "epoch": 0.3712762044869437, + "grad_norm": 1.4847265798243212, + "learning_rate": 7.244332645714719e-06, + "loss": 0.5649, + "step": 12114 + }, + { + "epoch": 0.3713068530096849, + "grad_norm": 1.5906427027286307, + "learning_rate": 7.2438891256270615e-06, + "loss": 0.6435, + "step": 12115 + }, + { + "epoch": 0.3713375015324261, + "grad_norm": 1.638487932275848, + "learning_rate": 7.24344558342979e-06, + "loss": 0.7449, + "step": 12116 + }, + { + "epoch": 0.3713681500551673, + "grad_norm": 1.857436338011162, + "learning_rate": 7.243002019127277e-06, + "loss": 0.7152, + "step": 12117 + }, + { + "epoch": 0.37139879857790853, + "grad_norm": 1.7057296551417844, + "learning_rate": 7.2425584327238895e-06, + "loss": 0.5611, + "step": 12118 + }, + { + "epoch": 0.37142944710064973, + "grad_norm": 1.6440381371712822, + "learning_rate": 7.2421148242240005e-06, + "loss": 0.621, + "step": 12119 + }, + { + "epoch": 0.37146009562339094, + "grad_norm": 1.787982788750262, + "learning_rate": 7.241671193631979e-06, + "loss": 0.6864, + "step": 12120 + }, + { + "epoch": 0.37149074414613215, + "grad_norm": 1.7840073202509992, + "learning_rate": 7.2412275409522e-06, + "loss": 0.7318, + "step": 12121 + }, + { + "epoch": 0.37152139266887335, + "grad_norm": 1.8759841094697238, + "learning_rate": 7.240783866189031e-06, + "loss": 0.6936, + "step": 12122 + }, + { + "epoch": 0.37155204119161456, + "grad_norm": 1.7441736323628707, + "learning_rate": 7.240340169346847e-06, + "loss": 0.5183, + "step": 12123 + }, + { + "epoch": 0.37158268971435576, + "grad_norm": 1.509326220741127, + "learning_rate": 7.239896450430016e-06, + "loss": 0.6236, + "step": 12124 + }, + { + "epoch": 0.37161333823709697, + "grad_norm": 1.727299159164586, + "learning_rate": 7.239452709442914e-06, + "loss": 0.7294, + "step": 12125 + }, + { + "epoch": 0.3716439867598382, + "grad_norm": 1.9037548300859821, + "learning_rate": 7.23900894638991e-06, + "loss": 0.6828, + "step": 12126 + }, + { + "epoch": 0.3716746352825794, + "grad_norm": 1.7548974870050595, + "learning_rate": 7.238565161275379e-06, + "loss": 0.7289, + "step": 12127 + }, + { + "epoch": 0.3717052838053206, + "grad_norm": 0.9648829492058526, + "learning_rate": 7.2381213541036925e-06, + "loss": 0.4562, + "step": 12128 + }, + { + "epoch": 0.3717359323280618, + "grad_norm": 1.6556072047485082, + "learning_rate": 7.237677524879223e-06, + "loss": 0.6734, + "step": 12129 + }, + { + "epoch": 0.371766580850803, + "grad_norm": 1.7741237665998855, + "learning_rate": 7.2372336736063456e-06, + "loss": 0.652, + "step": 12130 + }, + { + "epoch": 0.3717972293735442, + "grad_norm": 1.9094276161421677, + "learning_rate": 7.23678980028943e-06, + "loss": 0.6318, + "step": 12131 + }, + { + "epoch": 0.3718278778962854, + "grad_norm": 1.6289079144767822, + "learning_rate": 7.2363459049328545e-06, + "loss": 0.6752, + "step": 12132 + }, + { + "epoch": 0.3718585264190266, + "grad_norm": 1.752744009746961, + "learning_rate": 7.23590198754099e-06, + "loss": 0.7195, + "step": 12133 + }, + { + "epoch": 0.3718891749417678, + "grad_norm": 1.652232636916924, + "learning_rate": 7.235458048118211e-06, + "loss": 0.7197, + "step": 12134 + }, + { + "epoch": 0.371919823464509, + "grad_norm": 1.633314452395313, + "learning_rate": 7.235014086668892e-06, + "loss": 0.6519, + "step": 12135 + }, + { + "epoch": 0.37195047198725023, + "grad_norm": 1.7675590979614435, + "learning_rate": 7.234570103197407e-06, + "loss": 0.7377, + "step": 12136 + }, + { + "epoch": 0.37198112050999144, + "grad_norm": 1.8701314894336365, + "learning_rate": 7.2341260977081314e-06, + "loss": 0.6025, + "step": 12137 + }, + { + "epoch": 0.37201176903273264, + "grad_norm": 1.6142795763795903, + "learning_rate": 7.233682070205439e-06, + "loss": 0.6618, + "step": 12138 + }, + { + "epoch": 0.37204241755547385, + "grad_norm": 1.659996589768685, + "learning_rate": 7.2332380206937055e-06, + "loss": 0.64, + "step": 12139 + }, + { + "epoch": 0.37207306607821505, + "grad_norm": 0.8417480388927286, + "learning_rate": 7.232793949177308e-06, + "loss": 0.4832, + "step": 12140 + }, + { + "epoch": 0.37210371460095626, + "grad_norm": 0.9048126206986115, + "learning_rate": 7.23234985566062e-06, + "loss": 0.4722, + "step": 12141 + }, + { + "epoch": 0.37213436312369746, + "grad_norm": 1.8846394617046065, + "learning_rate": 7.231905740148017e-06, + "loss": 0.6098, + "step": 12142 + }, + { + "epoch": 0.3721650116464386, + "grad_norm": 1.844771884788096, + "learning_rate": 7.231461602643876e-06, + "loss": 0.6583, + "step": 12143 + }, + { + "epoch": 0.3721956601691798, + "grad_norm": 1.6331649552024132, + "learning_rate": 7.2310174431525715e-06, + "loss": 0.7469, + "step": 12144 + }, + { + "epoch": 0.372226308691921, + "grad_norm": 1.9444999987435725, + "learning_rate": 7.230573261678484e-06, + "loss": 0.7352, + "step": 12145 + }, + { + "epoch": 0.37225695721466223, + "grad_norm": 1.685122110824292, + "learning_rate": 7.230129058225986e-06, + "loss": 0.6251, + "step": 12146 + }, + { + "epoch": 0.37228760573740344, + "grad_norm": 1.8986392105644143, + "learning_rate": 7.229684832799455e-06, + "loss": 0.8284, + "step": 12147 + }, + { + "epoch": 0.37231825426014464, + "grad_norm": 1.8373923928884404, + "learning_rate": 7.22924058540327e-06, + "loss": 0.6646, + "step": 12148 + }, + { + "epoch": 0.37234890278288585, + "grad_norm": 0.8946413936221389, + "learning_rate": 7.228796316041807e-06, + "loss": 0.4703, + "step": 12149 + }, + { + "epoch": 0.37237955130562705, + "grad_norm": 1.6993520071327004, + "learning_rate": 7.228352024719442e-06, + "loss": 0.6207, + "step": 12150 + }, + { + "epoch": 0.37241019982836826, + "grad_norm": 1.6065093251966975, + "learning_rate": 7.2279077114405575e-06, + "loss": 0.6534, + "step": 12151 + }, + { + "epoch": 0.37244084835110947, + "grad_norm": 1.5674762068005441, + "learning_rate": 7.227463376209527e-06, + "loss": 0.6724, + "step": 12152 + }, + { + "epoch": 0.37247149687385067, + "grad_norm": 1.4692549870974585, + "learning_rate": 7.227019019030729e-06, + "loss": 0.6352, + "step": 12153 + }, + { + "epoch": 0.3725021453965919, + "grad_norm": 1.8236354999132107, + "learning_rate": 7.226574639908543e-06, + "loss": 0.7856, + "step": 12154 + }, + { + "epoch": 0.3725327939193331, + "grad_norm": 1.6808674343188152, + "learning_rate": 7.226130238847347e-06, + "loss": 0.7058, + "step": 12155 + }, + { + "epoch": 0.3725634424420743, + "grad_norm": 1.5692838704948626, + "learning_rate": 7.225685815851522e-06, + "loss": 0.6589, + "step": 12156 + }, + { + "epoch": 0.3725940909648155, + "grad_norm": 1.9086327413358086, + "learning_rate": 7.225241370925444e-06, + "loss": 0.6763, + "step": 12157 + }, + { + "epoch": 0.3726247394875567, + "grad_norm": 1.7048211722127806, + "learning_rate": 7.224796904073493e-06, + "loss": 0.5647, + "step": 12158 + }, + { + "epoch": 0.3726553880102979, + "grad_norm": 0.794714610782924, + "learning_rate": 7.224352415300049e-06, + "loss": 0.4851, + "step": 12159 + }, + { + "epoch": 0.3726860365330391, + "grad_norm": 1.5530540452682566, + "learning_rate": 7.223907904609493e-06, + "loss": 0.5805, + "step": 12160 + }, + { + "epoch": 0.3727166850557803, + "grad_norm": 1.844796884566749, + "learning_rate": 7.223463372006202e-06, + "loss": 0.7534, + "step": 12161 + }, + { + "epoch": 0.3727473335785215, + "grad_norm": 1.7690419251990455, + "learning_rate": 7.223018817494558e-06, + "loss": 0.6745, + "step": 12162 + }, + { + "epoch": 0.37277798210126273, + "grad_norm": 1.818568177445544, + "learning_rate": 7.222574241078939e-06, + "loss": 0.7357, + "step": 12163 + }, + { + "epoch": 0.37280863062400393, + "grad_norm": 1.9876624200556456, + "learning_rate": 7.22212964276373e-06, + "loss": 0.703, + "step": 12164 + }, + { + "epoch": 0.37283927914674514, + "grad_norm": 1.6357554201987023, + "learning_rate": 7.221685022553309e-06, + "loss": 0.6652, + "step": 12165 + }, + { + "epoch": 0.37286992766948635, + "grad_norm": 1.6542059130849291, + "learning_rate": 7.221240380452055e-06, + "loss": 0.6068, + "step": 12166 + }, + { + "epoch": 0.37290057619222755, + "grad_norm": 0.8475096330126426, + "learning_rate": 7.220795716464352e-06, + "loss": 0.4635, + "step": 12167 + }, + { + "epoch": 0.37293122471496876, + "grad_norm": 0.77028219801398, + "learning_rate": 7.2203510305945815e-06, + "loss": 0.4277, + "step": 12168 + }, + { + "epoch": 0.37296187323770996, + "grad_norm": 1.8048418160529354, + "learning_rate": 7.2199063228471235e-06, + "loss": 0.6478, + "step": 12169 + }, + { + "epoch": 0.37299252176045117, + "grad_norm": 1.866765099764495, + "learning_rate": 7.21946159322636e-06, + "loss": 0.7256, + "step": 12170 + }, + { + "epoch": 0.3730231702831924, + "grad_norm": 1.6575013088328656, + "learning_rate": 7.219016841736675e-06, + "loss": 0.6638, + "step": 12171 + }, + { + "epoch": 0.3730538188059336, + "grad_norm": 1.6084180354810498, + "learning_rate": 7.218572068382448e-06, + "loss": 0.7129, + "step": 12172 + }, + { + "epoch": 0.3730844673286748, + "grad_norm": 1.7117962900756194, + "learning_rate": 7.218127273168063e-06, + "loss": 0.6495, + "step": 12173 + }, + { + "epoch": 0.37311511585141593, + "grad_norm": 1.4751430312704048, + "learning_rate": 7.217682456097902e-06, + "loss": 0.58, + "step": 12174 + }, + { + "epoch": 0.37314576437415714, + "grad_norm": 1.7920261678669223, + "learning_rate": 7.21723761717635e-06, + "loss": 0.6605, + "step": 12175 + }, + { + "epoch": 0.37317641289689835, + "grad_norm": 1.6051159153231207, + "learning_rate": 7.216792756407787e-06, + "loss": 0.5819, + "step": 12176 + }, + { + "epoch": 0.37320706141963955, + "grad_norm": 1.7399247717610302, + "learning_rate": 7.216347873796598e-06, + "loss": 0.7089, + "step": 12177 + }, + { + "epoch": 0.37323770994238076, + "grad_norm": 1.8261016759931141, + "learning_rate": 7.215902969347166e-06, + "loss": 0.6695, + "step": 12178 + }, + { + "epoch": 0.37326835846512196, + "grad_norm": 1.7033711744826416, + "learning_rate": 7.215458043063877e-06, + "loss": 0.6235, + "step": 12179 + }, + { + "epoch": 0.37329900698786317, + "grad_norm": 1.5694427053139612, + "learning_rate": 7.215013094951111e-06, + "loss": 0.6958, + "step": 12180 + }, + { + "epoch": 0.3733296555106044, + "grad_norm": 1.853190843812496, + "learning_rate": 7.214568125013254e-06, + "loss": 0.7668, + "step": 12181 + }, + { + "epoch": 0.3733603040333456, + "grad_norm": 1.548984555538895, + "learning_rate": 7.214123133254691e-06, + "loss": 0.6976, + "step": 12182 + }, + { + "epoch": 0.3733909525560868, + "grad_norm": 1.904960514652845, + "learning_rate": 7.2136781196798075e-06, + "loss": 0.7471, + "step": 12183 + }, + { + "epoch": 0.373421601078828, + "grad_norm": 1.561760791574482, + "learning_rate": 7.213233084292986e-06, + "loss": 0.6554, + "step": 12184 + }, + { + "epoch": 0.3734522496015692, + "grad_norm": 1.806355019460574, + "learning_rate": 7.212788027098613e-06, + "loss": 0.6746, + "step": 12185 + }, + { + "epoch": 0.3734828981243104, + "grad_norm": 1.7064177674473195, + "learning_rate": 7.212342948101075e-06, + "loss": 0.6428, + "step": 12186 + }, + { + "epoch": 0.3735135466470516, + "grad_norm": 1.7694766926822996, + "learning_rate": 7.211897847304753e-06, + "loss": 0.6888, + "step": 12187 + }, + { + "epoch": 0.3735441951697928, + "grad_norm": 1.9784468122016903, + "learning_rate": 7.211452724714037e-06, + "loss": 0.7545, + "step": 12188 + }, + { + "epoch": 0.373574843692534, + "grad_norm": 0.9977351678945053, + "learning_rate": 7.211007580333311e-06, + "loss": 0.4492, + "step": 12189 + }, + { + "epoch": 0.3736054922152752, + "grad_norm": 1.7777922886061523, + "learning_rate": 7.2105624141669615e-06, + "loss": 0.6991, + "step": 12190 + }, + { + "epoch": 0.37363614073801643, + "grad_norm": 0.9124647174756735, + "learning_rate": 7.210117226219377e-06, + "loss": 0.4834, + "step": 12191 + }, + { + "epoch": 0.37366678926075764, + "grad_norm": 1.662050114692412, + "learning_rate": 7.20967201649494e-06, + "loss": 0.7135, + "step": 12192 + }, + { + "epoch": 0.37369743778349884, + "grad_norm": 0.7755058546210565, + "learning_rate": 7.209226784998039e-06, + "loss": 0.4465, + "step": 12193 + }, + { + "epoch": 0.37372808630624005, + "grad_norm": 1.6890621431683097, + "learning_rate": 7.2087815317330625e-06, + "loss": 0.6538, + "step": 12194 + }, + { + "epoch": 0.37375873482898125, + "grad_norm": 0.8342648960597374, + "learning_rate": 7.2083362567043955e-06, + "loss": 0.4912, + "step": 12195 + }, + { + "epoch": 0.37378938335172246, + "grad_norm": 0.8389124427377364, + "learning_rate": 7.207890959916426e-06, + "loss": 0.4562, + "step": 12196 + }, + { + "epoch": 0.37382003187446367, + "grad_norm": 1.557364974744378, + "learning_rate": 7.207445641373543e-06, + "loss": 0.6791, + "step": 12197 + }, + { + "epoch": 0.37385068039720487, + "grad_norm": 2.2052418529320414, + "learning_rate": 7.207000301080132e-06, + "loss": 0.7087, + "step": 12198 + }, + { + "epoch": 0.3738813289199461, + "grad_norm": 1.6989396472977563, + "learning_rate": 7.206554939040585e-06, + "loss": 0.6013, + "step": 12199 + }, + { + "epoch": 0.3739119774426873, + "grad_norm": 0.7710098715686627, + "learning_rate": 7.206109555259284e-06, + "loss": 0.4505, + "step": 12200 + }, + { + "epoch": 0.3739426259654285, + "grad_norm": 1.5995561452725473, + "learning_rate": 7.205664149740623e-06, + "loss": 0.6877, + "step": 12201 + }, + { + "epoch": 0.3739732744881697, + "grad_norm": 1.7826446143394137, + "learning_rate": 7.205218722488989e-06, + "loss": 0.7341, + "step": 12202 + }, + { + "epoch": 0.3740039230109109, + "grad_norm": 1.5735645986240705, + "learning_rate": 7.204773273508772e-06, + "loss": 0.6395, + "step": 12203 + }, + { + "epoch": 0.3740345715336521, + "grad_norm": 1.4692051306131362, + "learning_rate": 7.2043278028043565e-06, + "loss": 0.6186, + "step": 12204 + }, + { + "epoch": 0.37406522005639326, + "grad_norm": 1.7015653038411356, + "learning_rate": 7.203882310380137e-06, + "loss": 0.7037, + "step": 12205 + }, + { + "epoch": 0.37409586857913446, + "grad_norm": 1.5925858373464934, + "learning_rate": 7.203436796240502e-06, + "loss": 0.6555, + "step": 12206 + }, + { + "epoch": 0.37412651710187567, + "grad_norm": 1.3624479572966426, + "learning_rate": 7.202991260389839e-06, + "loss": 0.5989, + "step": 12207 + }, + { + "epoch": 0.37415716562461687, + "grad_norm": 1.8380187290335452, + "learning_rate": 7.202545702832539e-06, + "loss": 0.6666, + "step": 12208 + }, + { + "epoch": 0.3741878141473581, + "grad_norm": 1.6400631592086337, + "learning_rate": 7.202100123572994e-06, + "loss": 0.5384, + "step": 12209 + }, + { + "epoch": 0.3742184626700993, + "grad_norm": 1.8578257322528031, + "learning_rate": 7.201654522615593e-06, + "loss": 0.6018, + "step": 12210 + }, + { + "epoch": 0.3742491111928405, + "grad_norm": 1.602248678339458, + "learning_rate": 7.2012088999647264e-06, + "loss": 0.6123, + "step": 12211 + }, + { + "epoch": 0.3742797597155817, + "grad_norm": 1.985984480499888, + "learning_rate": 7.200763255624785e-06, + "loss": 0.6688, + "step": 12212 + }, + { + "epoch": 0.3743104082383229, + "grad_norm": 1.7326279014719825, + "learning_rate": 7.200317589600161e-06, + "loss": 0.6805, + "step": 12213 + }, + { + "epoch": 0.3743410567610641, + "grad_norm": 1.7470244399709207, + "learning_rate": 7.199871901895244e-06, + "loss": 0.6002, + "step": 12214 + }, + { + "epoch": 0.3743717052838053, + "grad_norm": 1.7849958673874666, + "learning_rate": 7.199426192514427e-06, + "loss": 0.6985, + "step": 12215 + }, + { + "epoch": 0.3744023538065465, + "grad_norm": 1.6904637666715694, + "learning_rate": 7.198980461462101e-06, + "loss": 0.7227, + "step": 12216 + }, + { + "epoch": 0.3744330023292877, + "grad_norm": 1.7449475900569589, + "learning_rate": 7.198534708742656e-06, + "loss": 0.6166, + "step": 12217 + }, + { + "epoch": 0.37446365085202893, + "grad_norm": 1.5772600536631989, + "learning_rate": 7.198088934360488e-06, + "loss": 0.7723, + "step": 12218 + }, + { + "epoch": 0.37449429937477013, + "grad_norm": 1.5842821617922467, + "learning_rate": 7.197643138319988e-06, + "loss": 0.727, + "step": 12219 + }, + { + "epoch": 0.37452494789751134, + "grad_norm": 1.8922293358507671, + "learning_rate": 7.197197320625546e-06, + "loss": 0.7291, + "step": 12220 + }, + { + "epoch": 0.37455559642025255, + "grad_norm": 1.5927583190457357, + "learning_rate": 7.196751481281556e-06, + "loss": 0.6146, + "step": 12221 + }, + { + "epoch": 0.37458624494299375, + "grad_norm": 1.6727548846961353, + "learning_rate": 7.196305620292413e-06, + "loss": 0.6792, + "step": 12222 + }, + { + "epoch": 0.37461689346573496, + "grad_norm": 1.7564438099084825, + "learning_rate": 7.195859737662509e-06, + "loss": 0.7347, + "step": 12223 + }, + { + "epoch": 0.37464754198847616, + "grad_norm": 1.6069439138117396, + "learning_rate": 7.195413833396236e-06, + "loss": 0.7009, + "step": 12224 + }, + { + "epoch": 0.37467819051121737, + "grad_norm": 1.8520611902143735, + "learning_rate": 7.19496790749799e-06, + "loss": 0.6897, + "step": 12225 + }, + { + "epoch": 0.3747088390339586, + "grad_norm": 1.656721062359851, + "learning_rate": 7.194521959972163e-06, + "loss": 0.5846, + "step": 12226 + }, + { + "epoch": 0.3747394875566998, + "grad_norm": 1.7707955155123105, + "learning_rate": 7.194075990823151e-06, + "loss": 0.6862, + "step": 12227 + }, + { + "epoch": 0.374770136079441, + "grad_norm": 1.58158656088169, + "learning_rate": 7.193630000055344e-06, + "loss": 0.5989, + "step": 12228 + }, + { + "epoch": 0.3748007846021822, + "grad_norm": 1.673172495369431, + "learning_rate": 7.193183987673143e-06, + "loss": 0.6879, + "step": 12229 + }, + { + "epoch": 0.3748314331249234, + "grad_norm": 1.810609849487826, + "learning_rate": 7.192737953680936e-06, + "loss": 0.7421, + "step": 12230 + }, + { + "epoch": 0.3748620816476646, + "grad_norm": 1.6946112852458386, + "learning_rate": 7.192291898083122e-06, + "loss": 0.6885, + "step": 12231 + }, + { + "epoch": 0.3748927301704058, + "grad_norm": 1.7730191993545037, + "learning_rate": 7.191845820884093e-06, + "loss": 0.6121, + "step": 12232 + }, + { + "epoch": 0.374923378693147, + "grad_norm": 1.7048826438194529, + "learning_rate": 7.191399722088249e-06, + "loss": 0.6371, + "step": 12233 + }, + { + "epoch": 0.3749540272158882, + "grad_norm": 1.599824175952047, + "learning_rate": 7.190953601699983e-06, + "loss": 0.5357, + "step": 12234 + }, + { + "epoch": 0.3749846757386294, + "grad_norm": 1.7096434677225085, + "learning_rate": 7.190507459723689e-06, + "loss": 0.6854, + "step": 12235 + }, + { + "epoch": 0.3750153242613706, + "grad_norm": 1.5961888447059378, + "learning_rate": 7.190061296163765e-06, + "loss": 0.7056, + "step": 12236 + }, + { + "epoch": 0.3750459727841118, + "grad_norm": 1.6326185339636918, + "learning_rate": 7.189615111024608e-06, + "loss": 0.6613, + "step": 12237 + }, + { + "epoch": 0.375076621306853, + "grad_norm": 1.7370351108601472, + "learning_rate": 7.189168904310612e-06, + "loss": 0.6647, + "step": 12238 + }, + { + "epoch": 0.3751072698295942, + "grad_norm": 1.6921644562818914, + "learning_rate": 7.188722676026174e-06, + "loss": 0.7222, + "step": 12239 + }, + { + "epoch": 0.3751379183523354, + "grad_norm": 2.1487484337072753, + "learning_rate": 7.1882764261756925e-06, + "loss": 0.6839, + "step": 12240 + }, + { + "epoch": 0.3751685668750766, + "grad_norm": 0.889709275915457, + "learning_rate": 7.187830154763563e-06, + "loss": 0.4654, + "step": 12241 + }, + { + "epoch": 0.3751992153978178, + "grad_norm": 1.7454109201369838, + "learning_rate": 7.187383861794184e-06, + "loss": 0.7285, + "step": 12242 + }, + { + "epoch": 0.375229863920559, + "grad_norm": 0.8485411002347986, + "learning_rate": 7.186937547271951e-06, + "loss": 0.4789, + "step": 12243 + }, + { + "epoch": 0.3752605124433002, + "grad_norm": 1.869859717608598, + "learning_rate": 7.186491211201263e-06, + "loss": 0.8004, + "step": 12244 + }, + { + "epoch": 0.3752911609660414, + "grad_norm": 1.7741989365541018, + "learning_rate": 7.186044853586518e-06, + "loss": 0.7161, + "step": 12245 + }, + { + "epoch": 0.37532180948878263, + "grad_norm": 1.8871070498914557, + "learning_rate": 7.1855984744321135e-06, + "loss": 0.5849, + "step": 12246 + }, + { + "epoch": 0.37535245801152384, + "grad_norm": 1.795438277352554, + "learning_rate": 7.185152073742448e-06, + "loss": 0.7452, + "step": 12247 + }, + { + "epoch": 0.37538310653426504, + "grad_norm": 1.7531971320323172, + "learning_rate": 7.184705651521919e-06, + "loss": 0.7176, + "step": 12248 + }, + { + "epoch": 0.37541375505700625, + "grad_norm": 0.7939381171289878, + "learning_rate": 7.184259207774928e-06, + "loss": 0.4696, + "step": 12249 + }, + { + "epoch": 0.37544440357974745, + "grad_norm": 2.427780982709156, + "learning_rate": 7.183812742505871e-06, + "loss": 0.6613, + "step": 12250 + }, + { + "epoch": 0.37547505210248866, + "grad_norm": 1.8551126707940566, + "learning_rate": 7.183366255719149e-06, + "loss": 0.6786, + "step": 12251 + }, + { + "epoch": 0.37550570062522987, + "grad_norm": 1.6243889492553216, + "learning_rate": 7.182919747419161e-06, + "loss": 0.6347, + "step": 12252 + }, + { + "epoch": 0.37553634914797107, + "grad_norm": 1.7283830322786773, + "learning_rate": 7.182473217610306e-06, + "loss": 0.6413, + "step": 12253 + }, + { + "epoch": 0.3755669976707123, + "grad_norm": 1.5511335648258389, + "learning_rate": 7.182026666296983e-06, + "loss": 0.6814, + "step": 12254 + }, + { + "epoch": 0.3755976461934535, + "grad_norm": 1.7313201272197662, + "learning_rate": 7.1815800934835945e-06, + "loss": 0.6625, + "step": 12255 + }, + { + "epoch": 0.3756282947161947, + "grad_norm": 1.554803314915936, + "learning_rate": 7.181133499174538e-06, + "loss": 0.6804, + "step": 12256 + }, + { + "epoch": 0.3756589432389359, + "grad_norm": 1.7965505033045461, + "learning_rate": 7.180686883374216e-06, + "loss": 0.6285, + "step": 12257 + }, + { + "epoch": 0.3756895917616771, + "grad_norm": 1.5713416942395428, + "learning_rate": 7.180240246087027e-06, + "loss": 0.6295, + "step": 12258 + }, + { + "epoch": 0.3757202402844183, + "grad_norm": 1.5587181325791992, + "learning_rate": 7.179793587317374e-06, + "loss": 0.6535, + "step": 12259 + }, + { + "epoch": 0.3757508888071595, + "grad_norm": 1.6783997050877024, + "learning_rate": 7.179346907069657e-06, + "loss": 0.7504, + "step": 12260 + }, + { + "epoch": 0.3757815373299007, + "grad_norm": 1.8422705032192241, + "learning_rate": 7.178900205348276e-06, + "loss": 0.6697, + "step": 12261 + }, + { + "epoch": 0.3758121858526419, + "grad_norm": 1.5408050705016831, + "learning_rate": 7.178453482157635e-06, + "loss": 0.6205, + "step": 12262 + }, + { + "epoch": 0.37584283437538313, + "grad_norm": 0.8959067136802906, + "learning_rate": 7.178006737502135e-06, + "loss": 0.4723, + "step": 12263 + }, + { + "epoch": 0.37587348289812433, + "grad_norm": 1.692910914789313, + "learning_rate": 7.177559971386175e-06, + "loss": 0.6696, + "step": 12264 + }, + { + "epoch": 0.37590413142086554, + "grad_norm": 1.9872834766159848, + "learning_rate": 7.177113183814162e-06, + "loss": 0.6678, + "step": 12265 + }, + { + "epoch": 0.37593477994360674, + "grad_norm": 0.8346872260975212, + "learning_rate": 7.176666374790494e-06, + "loss": 0.4694, + "step": 12266 + }, + { + "epoch": 0.3759654284663479, + "grad_norm": 1.7522836245104776, + "learning_rate": 7.176219544319576e-06, + "loss": 0.625, + "step": 12267 + }, + { + "epoch": 0.3759960769890891, + "grad_norm": 1.8516315682125493, + "learning_rate": 7.175772692405811e-06, + "loss": 0.6803, + "step": 12268 + }, + { + "epoch": 0.3760267255118303, + "grad_norm": 1.9196571594562213, + "learning_rate": 7.175325819053599e-06, + "loss": 0.7704, + "step": 12269 + }, + { + "epoch": 0.3760573740345715, + "grad_norm": 1.6853447752130095, + "learning_rate": 7.174878924267346e-06, + "loss": 0.6064, + "step": 12270 + }, + { + "epoch": 0.3760880225573127, + "grad_norm": 1.7829723727545426, + "learning_rate": 7.174432008051454e-06, + "loss": 0.7571, + "step": 12271 + }, + { + "epoch": 0.3761186710800539, + "grad_norm": 1.9022757127083019, + "learning_rate": 7.1739850704103295e-06, + "loss": 0.6514, + "step": 12272 + }, + { + "epoch": 0.37614931960279513, + "grad_norm": 1.7588526871566006, + "learning_rate": 7.17353811134837e-06, + "loss": 0.7704, + "step": 12273 + }, + { + "epoch": 0.37617996812553633, + "grad_norm": 1.431842050037238, + "learning_rate": 7.1730911308699865e-06, + "loss": 0.6822, + "step": 12274 + }, + { + "epoch": 0.37621061664827754, + "grad_norm": 1.621047262186365, + "learning_rate": 7.172644128979578e-06, + "loss": 0.6665, + "step": 12275 + }, + { + "epoch": 0.37624126517101875, + "grad_norm": 1.8177763059754255, + "learning_rate": 7.172197105681553e-06, + "loss": 0.6562, + "step": 12276 + }, + { + "epoch": 0.37627191369375995, + "grad_norm": 1.868428007710165, + "learning_rate": 7.171750060980314e-06, + "loss": 0.7066, + "step": 12277 + }, + { + "epoch": 0.37630256221650116, + "grad_norm": 1.5919495865981623, + "learning_rate": 7.171302994880264e-06, + "loss": 0.5965, + "step": 12278 + }, + { + "epoch": 0.37633321073924236, + "grad_norm": 1.9057117662871794, + "learning_rate": 7.170855907385812e-06, + "loss": 0.6897, + "step": 12279 + }, + { + "epoch": 0.37636385926198357, + "grad_norm": 1.7752591099552695, + "learning_rate": 7.17040879850136e-06, + "loss": 0.6559, + "step": 12280 + }, + { + "epoch": 0.3763945077847248, + "grad_norm": 1.6786092098915262, + "learning_rate": 7.169961668231316e-06, + "loss": 0.6473, + "step": 12281 + }, + { + "epoch": 0.376425156307466, + "grad_norm": 1.4857559897086887, + "learning_rate": 7.169514516580083e-06, + "loss": 0.5915, + "step": 12282 + }, + { + "epoch": 0.3764558048302072, + "grad_norm": 1.028929832544415, + "learning_rate": 7.169067343552069e-06, + "loss": 0.4588, + "step": 12283 + }, + { + "epoch": 0.3764864533529484, + "grad_norm": 1.6817048497608615, + "learning_rate": 7.1686201491516795e-06, + "loss": 0.6829, + "step": 12284 + }, + { + "epoch": 0.3765171018756896, + "grad_norm": 0.8294081331416995, + "learning_rate": 7.168172933383322e-06, + "loss": 0.4631, + "step": 12285 + }, + { + "epoch": 0.3765477503984308, + "grad_norm": 1.7591912779927352, + "learning_rate": 7.167725696251399e-06, + "loss": 0.6844, + "step": 12286 + }, + { + "epoch": 0.376578398921172, + "grad_norm": 1.714890188077214, + "learning_rate": 7.167278437760322e-06, + "loss": 0.7981, + "step": 12287 + }, + { + "epoch": 0.3766090474439132, + "grad_norm": 0.7624142794517267, + "learning_rate": 7.1668311579144966e-06, + "loss": 0.4523, + "step": 12288 + }, + { + "epoch": 0.3766396959666544, + "grad_norm": 1.9947290391661965, + "learning_rate": 7.166383856718328e-06, + "loss": 0.6882, + "step": 12289 + }, + { + "epoch": 0.3766703444893956, + "grad_norm": 1.8190084787875156, + "learning_rate": 7.165936534176225e-06, + "loss": 0.7698, + "step": 12290 + }, + { + "epoch": 0.37670099301213683, + "grad_norm": 2.0393362915549127, + "learning_rate": 7.165489190292596e-06, + "loss": 0.6938, + "step": 12291 + }, + { + "epoch": 0.37673164153487804, + "grad_norm": 1.5657707432613284, + "learning_rate": 7.1650418250718475e-06, + "loss": 0.6871, + "step": 12292 + }, + { + "epoch": 0.37676229005761924, + "grad_norm": 1.761597815358482, + "learning_rate": 7.164594438518389e-06, + "loss": 0.6967, + "step": 12293 + }, + { + "epoch": 0.37679293858036045, + "grad_norm": 1.899295658726107, + "learning_rate": 7.164147030636627e-06, + "loss": 0.7913, + "step": 12294 + }, + { + "epoch": 0.37682358710310165, + "grad_norm": 1.5943513018841664, + "learning_rate": 7.16369960143097e-06, + "loss": 0.7233, + "step": 12295 + }, + { + "epoch": 0.37685423562584286, + "grad_norm": 1.6629008638712799, + "learning_rate": 7.163252150905828e-06, + "loss": 0.7536, + "step": 12296 + }, + { + "epoch": 0.37688488414858407, + "grad_norm": 1.6875771231090297, + "learning_rate": 7.162804679065608e-06, + "loss": 0.7384, + "step": 12297 + }, + { + "epoch": 0.3769155326713252, + "grad_norm": 1.8817942823634677, + "learning_rate": 7.162357185914721e-06, + "loss": 0.6644, + "step": 12298 + }, + { + "epoch": 0.3769461811940664, + "grad_norm": 1.6688958136064083, + "learning_rate": 7.161909671457576e-06, + "loss": 0.7279, + "step": 12299 + }, + { + "epoch": 0.3769768297168076, + "grad_norm": 1.0550269941111907, + "learning_rate": 7.161462135698581e-06, + "loss": 0.4417, + "step": 12300 + }, + { + "epoch": 0.37700747823954883, + "grad_norm": 1.7992460872574563, + "learning_rate": 7.161014578642146e-06, + "loss": 0.8152, + "step": 12301 + }, + { + "epoch": 0.37703812676229004, + "grad_norm": 1.5184879019247883, + "learning_rate": 7.160567000292682e-06, + "loss": 0.7346, + "step": 12302 + }, + { + "epoch": 0.37706877528503124, + "grad_norm": 1.5260594577440585, + "learning_rate": 7.160119400654599e-06, + "loss": 0.6351, + "step": 12303 + }, + { + "epoch": 0.37709942380777245, + "grad_norm": 1.5297961492462506, + "learning_rate": 7.159671779732305e-06, + "loss": 0.6564, + "step": 12304 + }, + { + "epoch": 0.37713007233051365, + "grad_norm": 1.6503519050663444, + "learning_rate": 7.1592241375302145e-06, + "loss": 0.6643, + "step": 12305 + }, + { + "epoch": 0.37716072085325486, + "grad_norm": 2.101681317587911, + "learning_rate": 7.158776474052735e-06, + "loss": 0.7216, + "step": 12306 + }, + { + "epoch": 0.37719136937599607, + "grad_norm": 1.6953213773302869, + "learning_rate": 7.1583287893042795e-06, + "loss": 0.6703, + "step": 12307 + }, + { + "epoch": 0.37722201789873727, + "grad_norm": 1.6856320899463715, + "learning_rate": 7.157881083289257e-06, + "loss": 0.7377, + "step": 12308 + }, + { + "epoch": 0.3772526664214785, + "grad_norm": 1.7024485802004206, + "learning_rate": 7.157433356012081e-06, + "loss": 0.6442, + "step": 12309 + }, + { + "epoch": 0.3772833149442197, + "grad_norm": 1.8181205872008646, + "learning_rate": 7.156985607477163e-06, + "loss": 0.6335, + "step": 12310 + }, + { + "epoch": 0.3773139634669609, + "grad_norm": 0.9358350604114135, + "learning_rate": 7.156537837688913e-06, + "loss": 0.4667, + "step": 12311 + }, + { + "epoch": 0.3773446119897021, + "grad_norm": 1.622937222969028, + "learning_rate": 7.156090046651742e-06, + "loss": 0.632, + "step": 12312 + }, + { + "epoch": 0.3773752605124433, + "grad_norm": 1.8746280049740438, + "learning_rate": 7.155642234370066e-06, + "loss": 0.6346, + "step": 12313 + }, + { + "epoch": 0.3774059090351845, + "grad_norm": 1.8316090366530027, + "learning_rate": 7.1551944008482964e-06, + "loss": 0.6738, + "step": 12314 + }, + { + "epoch": 0.3774365575579257, + "grad_norm": 1.7782137501010642, + "learning_rate": 7.154746546090844e-06, + "loss": 0.6827, + "step": 12315 + }, + { + "epoch": 0.3774672060806669, + "grad_norm": 1.6607620341814284, + "learning_rate": 7.154298670102122e-06, + "loss": 0.6336, + "step": 12316 + }, + { + "epoch": 0.3774978546034081, + "grad_norm": 0.8262582590540148, + "learning_rate": 7.1538507728865445e-06, + "loss": 0.4426, + "step": 12317 + }, + { + "epoch": 0.37752850312614933, + "grad_norm": 1.6870177761494392, + "learning_rate": 7.153402854448525e-06, + "loss": 0.7256, + "step": 12318 + }, + { + "epoch": 0.37755915164889053, + "grad_norm": 1.6376723307112229, + "learning_rate": 7.1529549147924735e-06, + "loss": 0.6734, + "step": 12319 + }, + { + "epoch": 0.37758980017163174, + "grad_norm": 1.7996724270448752, + "learning_rate": 7.152506953922808e-06, + "loss": 0.6785, + "step": 12320 + }, + { + "epoch": 0.37762044869437295, + "grad_norm": 1.4197222395541735, + "learning_rate": 7.152058971843941e-06, + "loss": 0.5508, + "step": 12321 + }, + { + "epoch": 0.37765109721711415, + "grad_norm": 0.8593405251631275, + "learning_rate": 7.151610968560287e-06, + "loss": 0.4884, + "step": 12322 + }, + { + "epoch": 0.37768174573985536, + "grad_norm": 1.7082622732563988, + "learning_rate": 7.151162944076259e-06, + "loss": 0.6518, + "step": 12323 + }, + { + "epoch": 0.37771239426259656, + "grad_norm": 1.611025204455271, + "learning_rate": 7.150714898396273e-06, + "loss": 0.7375, + "step": 12324 + }, + { + "epoch": 0.37774304278533777, + "grad_norm": 1.731125596729317, + "learning_rate": 7.15026683152474e-06, + "loss": 0.7222, + "step": 12325 + }, + { + "epoch": 0.377773691308079, + "grad_norm": 1.5595506141329998, + "learning_rate": 7.149818743466081e-06, + "loss": 0.7058, + "step": 12326 + }, + { + "epoch": 0.3778043398308202, + "grad_norm": 1.7207782502731408, + "learning_rate": 7.149370634224706e-06, + "loss": 0.7852, + "step": 12327 + }, + { + "epoch": 0.3778349883535614, + "grad_norm": 1.5958096671709945, + "learning_rate": 7.148922503805034e-06, + "loss": 0.6619, + "step": 12328 + }, + { + "epoch": 0.37786563687630254, + "grad_norm": 0.7805895292248324, + "learning_rate": 7.1484743522114766e-06, + "loss": 0.457, + "step": 12329 + }, + { + "epoch": 0.37789628539904374, + "grad_norm": 0.7828674183099353, + "learning_rate": 7.148026179448452e-06, + "loss": 0.4504, + "step": 12330 + }, + { + "epoch": 0.37792693392178495, + "grad_norm": 1.7739410851620634, + "learning_rate": 7.1475779855203764e-06, + "loss": 0.7228, + "step": 12331 + }, + { + "epoch": 0.37795758244452615, + "grad_norm": 1.7162599843213766, + "learning_rate": 7.147129770431666e-06, + "loss": 0.6639, + "step": 12332 + }, + { + "epoch": 0.37798823096726736, + "grad_norm": 1.5053089072392654, + "learning_rate": 7.146681534186737e-06, + "loss": 0.6477, + "step": 12333 + }, + { + "epoch": 0.37801887949000856, + "grad_norm": 1.530569170398407, + "learning_rate": 7.146233276790003e-06, + "loss": 0.7345, + "step": 12334 + }, + { + "epoch": 0.37804952801274977, + "grad_norm": 0.8502336964046631, + "learning_rate": 7.145784998245886e-06, + "loss": 0.4683, + "step": 12335 + }, + { + "epoch": 0.378080176535491, + "grad_norm": 1.3880414678221906, + "learning_rate": 7.145336698558798e-06, + "loss": 0.5823, + "step": 12336 + }, + { + "epoch": 0.3781108250582322, + "grad_norm": 1.9134885233710546, + "learning_rate": 7.144888377733161e-06, + "loss": 0.6994, + "step": 12337 + }, + { + "epoch": 0.3781414735809734, + "grad_norm": 1.7912757384475453, + "learning_rate": 7.144440035773388e-06, + "loss": 0.6221, + "step": 12338 + }, + { + "epoch": 0.3781721221037146, + "grad_norm": 1.6115889893075566, + "learning_rate": 7.143991672683901e-06, + "loss": 0.6643, + "step": 12339 + }, + { + "epoch": 0.3782027706264558, + "grad_norm": 1.953866038569664, + "learning_rate": 7.143543288469113e-06, + "loss": 0.5856, + "step": 12340 + }, + { + "epoch": 0.378233419149197, + "grad_norm": 0.7943356663229123, + "learning_rate": 7.143094883133447e-06, + "loss": 0.4318, + "step": 12341 + }, + { + "epoch": 0.3782640676719382, + "grad_norm": 1.6844570887058734, + "learning_rate": 7.142646456681317e-06, + "loss": 0.6271, + "step": 12342 + }, + { + "epoch": 0.3782947161946794, + "grad_norm": 0.8065344140472965, + "learning_rate": 7.142198009117143e-06, + "loss": 0.4695, + "step": 12343 + }, + { + "epoch": 0.3783253647174206, + "grad_norm": 1.9054634171796125, + "learning_rate": 7.141749540445344e-06, + "loss": 0.6883, + "step": 12344 + }, + { + "epoch": 0.3783560132401618, + "grad_norm": 1.7683066667893688, + "learning_rate": 7.141301050670339e-06, + "loss": 0.5817, + "step": 12345 + }, + { + "epoch": 0.37838666176290303, + "grad_norm": 1.6903437119867994, + "learning_rate": 7.140852539796548e-06, + "loss": 0.6517, + "step": 12346 + }, + { + "epoch": 0.37841731028564424, + "grad_norm": 1.6614249572985496, + "learning_rate": 7.140404007828387e-06, + "loss": 0.6228, + "step": 12347 + }, + { + "epoch": 0.37844795880838544, + "grad_norm": 1.7376354769287032, + "learning_rate": 7.1399554547702806e-06, + "loss": 0.7096, + "step": 12348 + }, + { + "epoch": 0.37847860733112665, + "grad_norm": 1.658284257622475, + "learning_rate": 7.139506880626645e-06, + "loss": 0.7049, + "step": 12349 + }, + { + "epoch": 0.37850925585386785, + "grad_norm": 0.8081207031737456, + "learning_rate": 7.1390582854018995e-06, + "loss": 0.4622, + "step": 12350 + }, + { + "epoch": 0.37853990437660906, + "grad_norm": 2.326003964280761, + "learning_rate": 7.138609669100465e-06, + "loss": 0.5933, + "step": 12351 + }, + { + "epoch": 0.37857055289935027, + "grad_norm": 0.7712066733787885, + "learning_rate": 7.138161031726762e-06, + "loss": 0.4408, + "step": 12352 + }, + { + "epoch": 0.37860120142209147, + "grad_norm": 1.9173667543210478, + "learning_rate": 7.137712373285213e-06, + "loss": 0.7634, + "step": 12353 + }, + { + "epoch": 0.3786318499448327, + "grad_norm": 0.7778575594235201, + "learning_rate": 7.1372636937802365e-06, + "loss": 0.456, + "step": 12354 + }, + { + "epoch": 0.3786624984675739, + "grad_norm": 1.8179556403743535, + "learning_rate": 7.136814993216253e-06, + "loss": 0.6722, + "step": 12355 + }, + { + "epoch": 0.3786931469903151, + "grad_norm": 1.725602326833194, + "learning_rate": 7.136366271597685e-06, + "loss": 0.6234, + "step": 12356 + }, + { + "epoch": 0.3787237955130563, + "grad_norm": 1.7165610475269242, + "learning_rate": 7.135917528928955e-06, + "loss": 0.7384, + "step": 12357 + }, + { + "epoch": 0.3787544440357975, + "grad_norm": 1.854210243886279, + "learning_rate": 7.135468765214481e-06, + "loss": 0.7133, + "step": 12358 + }, + { + "epoch": 0.3787850925585387, + "grad_norm": 1.887607521652132, + "learning_rate": 7.135019980458688e-06, + "loss": 0.6188, + "step": 12359 + }, + { + "epoch": 0.37881574108127986, + "grad_norm": 0.8284288421760376, + "learning_rate": 7.1345711746659975e-06, + "loss": 0.4538, + "step": 12360 + }, + { + "epoch": 0.37884638960402106, + "grad_norm": 1.805271899038268, + "learning_rate": 7.134122347840831e-06, + "loss": 0.7666, + "step": 12361 + }, + { + "epoch": 0.37887703812676227, + "grad_norm": 1.7326357979975653, + "learning_rate": 7.133673499987609e-06, + "loss": 0.6567, + "step": 12362 + }, + { + "epoch": 0.3789076866495035, + "grad_norm": 1.674793261810419, + "learning_rate": 7.133224631110758e-06, + "loss": 0.646, + "step": 12363 + }, + { + "epoch": 0.3789383351722447, + "grad_norm": 1.7597014434365528, + "learning_rate": 7.132775741214698e-06, + "loss": 0.6575, + "step": 12364 + }, + { + "epoch": 0.3789689836949859, + "grad_norm": 1.6617214862364742, + "learning_rate": 7.132326830303853e-06, + "loss": 0.6889, + "step": 12365 + }, + { + "epoch": 0.3789996322177271, + "grad_norm": 1.7523869801341134, + "learning_rate": 7.131877898382647e-06, + "loss": 0.7224, + "step": 12366 + }, + { + "epoch": 0.3790302807404683, + "grad_norm": 2.2357166957810444, + "learning_rate": 7.131428945455501e-06, + "loss": 0.6212, + "step": 12367 + }, + { + "epoch": 0.3790609292632095, + "grad_norm": 1.704083170293474, + "learning_rate": 7.130979971526841e-06, + "loss": 0.6992, + "step": 12368 + }, + { + "epoch": 0.3790915777859507, + "grad_norm": 2.009703934801357, + "learning_rate": 7.130530976601091e-06, + "loss": 0.7697, + "step": 12369 + }, + { + "epoch": 0.3791222263086919, + "grad_norm": 1.9866144695737096, + "learning_rate": 7.130081960682673e-06, + "loss": 0.7044, + "step": 12370 + }, + { + "epoch": 0.3791528748314331, + "grad_norm": 1.670503375010645, + "learning_rate": 7.129632923776013e-06, + "loss": 0.6491, + "step": 12371 + }, + { + "epoch": 0.3791835233541743, + "grad_norm": 1.572501048319628, + "learning_rate": 7.129183865885535e-06, + "loss": 0.648, + "step": 12372 + }, + { + "epoch": 0.37921417187691553, + "grad_norm": 1.73313045336625, + "learning_rate": 7.128734787015662e-06, + "loss": 0.6597, + "step": 12373 + }, + { + "epoch": 0.37924482039965673, + "grad_norm": 1.5023923666415393, + "learning_rate": 7.128285687170823e-06, + "loss": 0.6777, + "step": 12374 + }, + { + "epoch": 0.37927546892239794, + "grad_norm": 2.0552875124132846, + "learning_rate": 7.127836566355438e-06, + "loss": 0.6699, + "step": 12375 + }, + { + "epoch": 0.37930611744513915, + "grad_norm": 1.5954375306976218, + "learning_rate": 7.1273874245739375e-06, + "loss": 0.739, + "step": 12376 + }, + { + "epoch": 0.37933676596788035, + "grad_norm": 1.676575223546246, + "learning_rate": 7.126938261830743e-06, + "loss": 0.6796, + "step": 12377 + }, + { + "epoch": 0.37936741449062156, + "grad_norm": 1.5827047781091284, + "learning_rate": 7.126489078130282e-06, + "loss": 0.6632, + "step": 12378 + }, + { + "epoch": 0.37939806301336276, + "grad_norm": 1.5985653311853818, + "learning_rate": 7.126039873476979e-06, + "loss": 0.7009, + "step": 12379 + }, + { + "epoch": 0.37942871153610397, + "grad_norm": 0.8584481024517451, + "learning_rate": 7.125590647875263e-06, + "loss": 0.4761, + "step": 12380 + }, + { + "epoch": 0.3794593600588452, + "grad_norm": 1.8714630515470925, + "learning_rate": 7.125141401329557e-06, + "loss": 0.7277, + "step": 12381 + }, + { + "epoch": 0.3794900085815864, + "grad_norm": 1.616011613113855, + "learning_rate": 7.12469213384429e-06, + "loss": 0.7565, + "step": 12382 + }, + { + "epoch": 0.3795206571043276, + "grad_norm": 1.9228843669872124, + "learning_rate": 7.124242845423887e-06, + "loss": 0.7539, + "step": 12383 + }, + { + "epoch": 0.3795513056270688, + "grad_norm": 0.7975487172620325, + "learning_rate": 7.123793536072776e-06, + "loss": 0.4434, + "step": 12384 + }, + { + "epoch": 0.37958195414981, + "grad_norm": 1.4997324745352771, + "learning_rate": 7.123344205795384e-06, + "loss": 0.6691, + "step": 12385 + }, + { + "epoch": 0.3796126026725512, + "grad_norm": 0.7839329350851566, + "learning_rate": 7.122894854596139e-06, + "loss": 0.4615, + "step": 12386 + }, + { + "epoch": 0.3796432511952924, + "grad_norm": 1.8298877831860474, + "learning_rate": 7.122445482479467e-06, + "loss": 0.6854, + "step": 12387 + }, + { + "epoch": 0.3796738997180336, + "grad_norm": 1.6759096235728765, + "learning_rate": 7.121996089449795e-06, + "loss": 0.757, + "step": 12388 + }, + { + "epoch": 0.3797045482407748, + "grad_norm": 1.9258989709261431, + "learning_rate": 7.121546675511555e-06, + "loss": 0.7656, + "step": 12389 + }, + { + "epoch": 0.379735196763516, + "grad_norm": 1.8296508449892346, + "learning_rate": 7.12109724066917e-06, + "loss": 0.7205, + "step": 12390 + }, + { + "epoch": 0.3797658452862572, + "grad_norm": 1.8045758431111045, + "learning_rate": 7.120647784927075e-06, + "loss": 0.7487, + "step": 12391 + }, + { + "epoch": 0.3797964938089984, + "grad_norm": 1.7667775892396451, + "learning_rate": 7.120198308289693e-06, + "loss": 0.6922, + "step": 12392 + }, + { + "epoch": 0.3798271423317396, + "grad_norm": 1.85096283104568, + "learning_rate": 7.119748810761454e-06, + "loss": 0.6664, + "step": 12393 + }, + { + "epoch": 0.3798577908544808, + "grad_norm": 1.6724361627251294, + "learning_rate": 7.119299292346788e-06, + "loss": 0.6052, + "step": 12394 + }, + { + "epoch": 0.379888439377222, + "grad_norm": 1.6652444348635866, + "learning_rate": 7.118849753050126e-06, + "loss": 0.6326, + "step": 12395 + }, + { + "epoch": 0.3799190878999632, + "grad_norm": 1.657280300647117, + "learning_rate": 7.1184001928758915e-06, + "loss": 0.6703, + "step": 12396 + }, + { + "epoch": 0.3799497364227044, + "grad_norm": 1.5307207705837147, + "learning_rate": 7.11795061182852e-06, + "loss": 0.7076, + "step": 12397 + }, + { + "epoch": 0.3799803849454456, + "grad_norm": 1.8282204516053364, + "learning_rate": 7.117501009912437e-06, + "loss": 0.7863, + "step": 12398 + }, + { + "epoch": 0.3800110334681868, + "grad_norm": 0.8229131183042544, + "learning_rate": 7.117051387132078e-06, + "loss": 0.4472, + "step": 12399 + }, + { + "epoch": 0.380041681990928, + "grad_norm": 1.7927907215049994, + "learning_rate": 7.1166017434918685e-06, + "loss": 0.7266, + "step": 12400 + }, + { + "epoch": 0.38007233051366923, + "grad_norm": 1.6375243925465948, + "learning_rate": 7.11615207899624e-06, + "loss": 0.6371, + "step": 12401 + }, + { + "epoch": 0.38010297903641044, + "grad_norm": 1.5754559643758113, + "learning_rate": 7.115702393649625e-06, + "loss": 0.6867, + "step": 12402 + }, + { + "epoch": 0.38013362755915164, + "grad_norm": 2.003614227711755, + "learning_rate": 7.115252687456452e-06, + "loss": 0.7122, + "step": 12403 + }, + { + "epoch": 0.38016427608189285, + "grad_norm": 1.7018461090069774, + "learning_rate": 7.114802960421155e-06, + "loss": 0.6963, + "step": 12404 + }, + { + "epoch": 0.38019492460463405, + "grad_norm": 1.768506628793403, + "learning_rate": 7.114353212548159e-06, + "loss": 0.7402, + "step": 12405 + }, + { + "epoch": 0.38022557312737526, + "grad_norm": 1.7494175398725065, + "learning_rate": 7.113903443841904e-06, + "loss": 0.6402, + "step": 12406 + }, + { + "epoch": 0.38025622165011647, + "grad_norm": 1.7892297358579639, + "learning_rate": 7.113453654306815e-06, + "loss": 0.6825, + "step": 12407 + }, + { + "epoch": 0.38028687017285767, + "grad_norm": 1.6727310575571719, + "learning_rate": 7.113003843947328e-06, + "loss": 0.71, + "step": 12408 + }, + { + "epoch": 0.3803175186955989, + "grad_norm": 1.7607096935657418, + "learning_rate": 7.112554012767872e-06, + "loss": 0.6737, + "step": 12409 + }, + { + "epoch": 0.3803481672183401, + "grad_norm": 0.8060388702829894, + "learning_rate": 7.112104160772883e-06, + "loss": 0.4648, + "step": 12410 + }, + { + "epoch": 0.3803788157410813, + "grad_norm": 1.72372157148546, + "learning_rate": 7.11165428796679e-06, + "loss": 0.7317, + "step": 12411 + }, + { + "epoch": 0.3804094642638225, + "grad_norm": 1.578681999076581, + "learning_rate": 7.111204394354026e-06, + "loss": 0.6834, + "step": 12412 + }, + { + "epoch": 0.3804401127865637, + "grad_norm": 1.8215869692522333, + "learning_rate": 7.110754479939025e-06, + "loss": 0.6882, + "step": 12413 + }, + { + "epoch": 0.3804707613093049, + "grad_norm": 1.8535793490523635, + "learning_rate": 7.1103045447262205e-06, + "loss": 0.7543, + "step": 12414 + }, + { + "epoch": 0.3805014098320461, + "grad_norm": 1.8087871679022978, + "learning_rate": 7.109854588720047e-06, + "loss": 0.686, + "step": 12415 + }, + { + "epoch": 0.3805320583547873, + "grad_norm": 1.860213921729575, + "learning_rate": 7.1094046119249345e-06, + "loss": 0.7278, + "step": 12416 + }, + { + "epoch": 0.3805627068775285, + "grad_norm": 1.7382352941568147, + "learning_rate": 7.108954614345319e-06, + "loss": 0.7006, + "step": 12417 + }, + { + "epoch": 0.38059335540026973, + "grad_norm": 1.8014857719126607, + "learning_rate": 7.108504595985635e-06, + "loss": 0.6963, + "step": 12418 + }, + { + "epoch": 0.38062400392301093, + "grad_norm": 0.7997844985317345, + "learning_rate": 7.108054556850316e-06, + "loss": 0.4539, + "step": 12419 + }, + { + "epoch": 0.38065465244575214, + "grad_norm": 1.5842885084113467, + "learning_rate": 7.107604496943794e-06, + "loss": 0.7401, + "step": 12420 + }, + { + "epoch": 0.38068530096849335, + "grad_norm": 1.8280568698137352, + "learning_rate": 7.107154416270508e-06, + "loss": 0.7942, + "step": 12421 + }, + { + "epoch": 0.3807159494912345, + "grad_norm": 1.6891393205127434, + "learning_rate": 7.106704314834889e-06, + "loss": 0.657, + "step": 12422 + }, + { + "epoch": 0.3807465980139757, + "grad_norm": 1.5979942348979694, + "learning_rate": 7.106254192641373e-06, + "loss": 0.6867, + "step": 12423 + }, + { + "epoch": 0.3807772465367169, + "grad_norm": 1.4449040919844118, + "learning_rate": 7.1058040496943955e-06, + "loss": 0.5254, + "step": 12424 + }, + { + "epoch": 0.3808078950594581, + "grad_norm": 1.614606752408276, + "learning_rate": 7.105353885998393e-06, + "loss": 0.7755, + "step": 12425 + }, + { + "epoch": 0.3808385435821993, + "grad_norm": 1.519699383521856, + "learning_rate": 7.104903701557799e-06, + "loss": 0.6442, + "step": 12426 + }, + { + "epoch": 0.3808691921049405, + "grad_norm": 1.6452857597546366, + "learning_rate": 7.104453496377051e-06, + "loss": 0.6795, + "step": 12427 + }, + { + "epoch": 0.38089984062768173, + "grad_norm": 1.6272944474350695, + "learning_rate": 7.104003270460585e-06, + "loss": 0.6952, + "step": 12428 + }, + { + "epoch": 0.38093048915042294, + "grad_norm": 1.7421337067435836, + "learning_rate": 7.103553023812834e-06, + "loss": 0.7297, + "step": 12429 + }, + { + "epoch": 0.38096113767316414, + "grad_norm": 1.7731909797919143, + "learning_rate": 7.10310275643824e-06, + "loss": 0.6683, + "step": 12430 + }, + { + "epoch": 0.38099178619590535, + "grad_norm": 0.8760956209456109, + "learning_rate": 7.102652468341234e-06, + "loss": 0.4678, + "step": 12431 + }, + { + "epoch": 0.38102243471864655, + "grad_norm": 1.6336629383074779, + "learning_rate": 7.102202159526256e-06, + "loss": 0.6328, + "step": 12432 + }, + { + "epoch": 0.38105308324138776, + "grad_norm": 1.7583305795454358, + "learning_rate": 7.101751829997743e-06, + "loss": 0.6956, + "step": 12433 + }, + { + "epoch": 0.38108373176412896, + "grad_norm": 1.4856429540663054, + "learning_rate": 7.10130147976013e-06, + "loss": 0.6899, + "step": 12434 + }, + { + "epoch": 0.38111438028687017, + "grad_norm": 1.7526233869305177, + "learning_rate": 7.100851108817857e-06, + "loss": 0.6777, + "step": 12435 + }, + { + "epoch": 0.3811450288096114, + "grad_norm": 1.8680383972717907, + "learning_rate": 7.100400717175359e-06, + "loss": 0.6888, + "step": 12436 + }, + { + "epoch": 0.3811756773323526, + "grad_norm": 1.7637412001953348, + "learning_rate": 7.099950304837078e-06, + "loss": 0.6498, + "step": 12437 + }, + { + "epoch": 0.3812063258550938, + "grad_norm": 1.7257604000448497, + "learning_rate": 7.099499871807447e-06, + "loss": 0.6193, + "step": 12438 + }, + { + "epoch": 0.381236974377835, + "grad_norm": 1.7066023200439515, + "learning_rate": 7.099049418090907e-06, + "loss": 0.6642, + "step": 12439 + }, + { + "epoch": 0.3812676229005762, + "grad_norm": 1.7407952705047296, + "learning_rate": 7.0985989436918965e-06, + "loss": 0.5681, + "step": 12440 + }, + { + "epoch": 0.3812982714233174, + "grad_norm": 1.7613482300379557, + "learning_rate": 7.098148448614855e-06, + "loss": 0.672, + "step": 12441 + }, + { + "epoch": 0.3813289199460586, + "grad_norm": 1.6333922601386994, + "learning_rate": 7.097697932864216e-06, + "loss": 0.6273, + "step": 12442 + }, + { + "epoch": 0.3813595684687998, + "grad_norm": 1.0495986547807423, + "learning_rate": 7.097247396444426e-06, + "loss": 0.4812, + "step": 12443 + }, + { + "epoch": 0.381390216991541, + "grad_norm": 1.7953085878091275, + "learning_rate": 7.096796839359918e-06, + "loss": 0.7077, + "step": 12444 + }, + { + "epoch": 0.3814208655142822, + "grad_norm": 1.7104426505876584, + "learning_rate": 7.096346261615139e-06, + "loss": 0.5877, + "step": 12445 + }, + { + "epoch": 0.38145151403702343, + "grad_norm": 0.7873064333547676, + "learning_rate": 7.0958956632145206e-06, + "loss": 0.4521, + "step": 12446 + }, + { + "epoch": 0.38148216255976464, + "grad_norm": 1.8090969761769622, + "learning_rate": 7.095445044162505e-06, + "loss": 0.7699, + "step": 12447 + }, + { + "epoch": 0.38151281108250584, + "grad_norm": 0.7560233646309934, + "learning_rate": 7.094994404463534e-06, + "loss": 0.4386, + "step": 12448 + }, + { + "epoch": 0.38154345960524705, + "grad_norm": 1.6907159216461878, + "learning_rate": 7.09454374412205e-06, + "loss": 0.727, + "step": 12449 + }, + { + "epoch": 0.38157410812798825, + "grad_norm": 1.810969686903362, + "learning_rate": 7.094093063142487e-06, + "loss": 0.7088, + "step": 12450 + }, + { + "epoch": 0.38160475665072946, + "grad_norm": 1.616519996771201, + "learning_rate": 7.093642361529291e-06, + "loss": 0.6999, + "step": 12451 + }, + { + "epoch": 0.38163540517347067, + "grad_norm": 1.7480791647219323, + "learning_rate": 7.0931916392869e-06, + "loss": 0.7075, + "step": 12452 + }, + { + "epoch": 0.3816660536962118, + "grad_norm": 1.9002452348966752, + "learning_rate": 7.092740896419757e-06, + "loss": 0.6471, + "step": 12453 + }, + { + "epoch": 0.381696702218953, + "grad_norm": 1.6464615008110597, + "learning_rate": 7.092290132932302e-06, + "loss": 0.7225, + "step": 12454 + }, + { + "epoch": 0.3817273507416942, + "grad_norm": 1.806500493529527, + "learning_rate": 7.09183934882898e-06, + "loss": 0.7677, + "step": 12455 + }, + { + "epoch": 0.38175799926443543, + "grad_norm": 1.5881018111588645, + "learning_rate": 7.091388544114225e-06, + "loss": 0.609, + "step": 12456 + }, + { + "epoch": 0.38178864778717664, + "grad_norm": 1.8193126909696673, + "learning_rate": 7.090937718792486e-06, + "loss": 0.6692, + "step": 12457 + }, + { + "epoch": 0.38181929630991784, + "grad_norm": 1.8163229684018667, + "learning_rate": 7.090486872868203e-06, + "loss": 0.7686, + "step": 12458 + }, + { + "epoch": 0.38184994483265905, + "grad_norm": 1.9161871243691158, + "learning_rate": 7.090036006345816e-06, + "loss": 0.6963, + "step": 12459 + }, + { + "epoch": 0.38188059335540026, + "grad_norm": 1.595539292158665, + "learning_rate": 7.089585119229772e-06, + "loss": 0.5727, + "step": 12460 + }, + { + "epoch": 0.38191124187814146, + "grad_norm": 1.5601455441797327, + "learning_rate": 7.089134211524508e-06, + "loss": 0.6051, + "step": 12461 + }, + { + "epoch": 0.38194189040088267, + "grad_norm": 1.7681141264914182, + "learning_rate": 7.088683283234474e-06, + "loss": 0.7475, + "step": 12462 + }, + { + "epoch": 0.38197253892362387, + "grad_norm": 1.8058768980592796, + "learning_rate": 7.088232334364107e-06, + "loss": 0.7386, + "step": 12463 + }, + { + "epoch": 0.3820031874463651, + "grad_norm": 1.5375811011650584, + "learning_rate": 7.087781364917853e-06, + "loss": 0.6797, + "step": 12464 + }, + { + "epoch": 0.3820338359691063, + "grad_norm": 2.014616949914649, + "learning_rate": 7.087330374900154e-06, + "loss": 0.6841, + "step": 12465 + }, + { + "epoch": 0.3820644844918475, + "grad_norm": 1.8363827569815447, + "learning_rate": 7.086879364315455e-06, + "loss": 0.715, + "step": 12466 + }, + { + "epoch": 0.3820951330145887, + "grad_norm": 1.810393711839318, + "learning_rate": 7.0864283331682e-06, + "loss": 0.7653, + "step": 12467 + }, + { + "epoch": 0.3821257815373299, + "grad_norm": 1.919366971321183, + "learning_rate": 7.085977281462834e-06, + "loss": 0.6349, + "step": 12468 + }, + { + "epoch": 0.3821564300600711, + "grad_norm": 1.7980559742204276, + "learning_rate": 7.085526209203799e-06, + "loss": 0.7029, + "step": 12469 + }, + { + "epoch": 0.3821870785828123, + "grad_norm": 1.6903610103212077, + "learning_rate": 7.08507511639554e-06, + "loss": 0.6727, + "step": 12470 + }, + { + "epoch": 0.3822177271055535, + "grad_norm": 1.7654483542169923, + "learning_rate": 7.084624003042504e-06, + "loss": 0.7154, + "step": 12471 + }, + { + "epoch": 0.3822483756282947, + "grad_norm": 1.6334576343125387, + "learning_rate": 7.084172869149133e-06, + "loss": 0.6541, + "step": 12472 + }, + { + "epoch": 0.38227902415103593, + "grad_norm": 1.8556168694383675, + "learning_rate": 7.083721714719874e-06, + "loss": 0.7083, + "step": 12473 + }, + { + "epoch": 0.38230967267377713, + "grad_norm": 1.6034061058664388, + "learning_rate": 7.0832705397591715e-06, + "loss": 0.6473, + "step": 12474 + }, + { + "epoch": 0.38234032119651834, + "grad_norm": 1.7435872314975704, + "learning_rate": 7.082819344271472e-06, + "loss": 0.6963, + "step": 12475 + }, + { + "epoch": 0.38237096971925955, + "grad_norm": 1.6868182516883854, + "learning_rate": 7.08236812826122e-06, + "loss": 0.6216, + "step": 12476 + }, + { + "epoch": 0.38240161824200075, + "grad_norm": 1.7542090520303264, + "learning_rate": 7.0819168917328625e-06, + "loss": 0.6218, + "step": 12477 + }, + { + "epoch": 0.38243226676474196, + "grad_norm": 1.6275742666916746, + "learning_rate": 7.081465634690844e-06, + "loss": 0.6557, + "step": 12478 + }, + { + "epoch": 0.38246291528748316, + "grad_norm": 1.6433742397652336, + "learning_rate": 7.081014357139613e-06, + "loss": 0.6513, + "step": 12479 + }, + { + "epoch": 0.38249356381022437, + "grad_norm": 0.9022823464163364, + "learning_rate": 7.080563059083616e-06, + "loss": 0.457, + "step": 12480 + }, + { + "epoch": 0.3825242123329656, + "grad_norm": 1.7587002562480312, + "learning_rate": 7.0801117405272975e-06, + "loss": 0.6565, + "step": 12481 + }, + { + "epoch": 0.3825548608557068, + "grad_norm": 0.8488887158532309, + "learning_rate": 7.079660401475106e-06, + "loss": 0.4805, + "step": 12482 + }, + { + "epoch": 0.382585509378448, + "grad_norm": 1.655140107753154, + "learning_rate": 7.079209041931489e-06, + "loss": 0.689, + "step": 12483 + }, + { + "epoch": 0.38261615790118914, + "grad_norm": 1.7587863976614244, + "learning_rate": 7.078757661900893e-06, + "loss": 0.7067, + "step": 12484 + }, + { + "epoch": 0.38264680642393034, + "grad_norm": 1.6107516512764437, + "learning_rate": 7.078306261387765e-06, + "loss": 0.6401, + "step": 12485 + }, + { + "epoch": 0.38267745494667155, + "grad_norm": 0.8417742703755524, + "learning_rate": 7.077854840396554e-06, + "loss": 0.4566, + "step": 12486 + }, + { + "epoch": 0.38270810346941275, + "grad_norm": 1.6404464789717905, + "learning_rate": 7.077403398931709e-06, + "loss": 0.7478, + "step": 12487 + }, + { + "epoch": 0.38273875199215396, + "grad_norm": 1.8927734326231094, + "learning_rate": 7.0769519369976755e-06, + "loss": 0.6121, + "step": 12488 + }, + { + "epoch": 0.38276940051489516, + "grad_norm": 1.8636892032507673, + "learning_rate": 7.076500454598903e-06, + "loss": 0.7396, + "step": 12489 + }, + { + "epoch": 0.38280004903763637, + "grad_norm": 1.889860707327642, + "learning_rate": 7.076048951739841e-06, + "loss": 0.7319, + "step": 12490 + }, + { + "epoch": 0.3828306975603776, + "grad_norm": 1.9119639931355825, + "learning_rate": 7.075597428424939e-06, + "loss": 0.7919, + "step": 12491 + }, + { + "epoch": 0.3828613460831188, + "grad_norm": 1.6687845725232924, + "learning_rate": 7.075145884658642e-06, + "loss": 0.678, + "step": 12492 + }, + { + "epoch": 0.38289199460586, + "grad_norm": 1.7176264157898504, + "learning_rate": 7.074694320445401e-06, + "loss": 0.7624, + "step": 12493 + }, + { + "epoch": 0.3829226431286012, + "grad_norm": 1.5820956606812382, + "learning_rate": 7.074242735789668e-06, + "loss": 0.6563, + "step": 12494 + }, + { + "epoch": 0.3829532916513424, + "grad_norm": 1.521110187666586, + "learning_rate": 7.073791130695891e-06, + "loss": 0.7193, + "step": 12495 + }, + { + "epoch": 0.3829839401740836, + "grad_norm": 0.8329322975338033, + "learning_rate": 7.073339505168517e-06, + "loss": 0.4397, + "step": 12496 + }, + { + "epoch": 0.3830145886968248, + "grad_norm": 1.637648085542418, + "learning_rate": 7.072887859212001e-06, + "loss": 0.7062, + "step": 12497 + }, + { + "epoch": 0.383045237219566, + "grad_norm": 0.8291799806690053, + "learning_rate": 7.072436192830788e-06, + "loss": 0.4342, + "step": 12498 + }, + { + "epoch": 0.3830758857423072, + "grad_norm": 0.768568753416181, + "learning_rate": 7.071984506029333e-06, + "loss": 0.4739, + "step": 12499 + }, + { + "epoch": 0.3831065342650484, + "grad_norm": 1.6461777565306428, + "learning_rate": 7.071532798812084e-06, + "loss": 0.6374, + "step": 12500 + }, + { + "epoch": 0.38313718278778963, + "grad_norm": 1.866518680691406, + "learning_rate": 7.071081071183492e-06, + "loss": 0.7168, + "step": 12501 + }, + { + "epoch": 0.38316783131053084, + "grad_norm": 1.5824113883774278, + "learning_rate": 7.0706293231480074e-06, + "loss": 0.6139, + "step": 12502 + }, + { + "epoch": 0.38319847983327204, + "grad_norm": 1.6909818179385276, + "learning_rate": 7.070177554710085e-06, + "loss": 0.63, + "step": 12503 + }, + { + "epoch": 0.38322912835601325, + "grad_norm": 1.6675419821024484, + "learning_rate": 7.069725765874171e-06, + "loss": 0.7174, + "step": 12504 + }, + { + "epoch": 0.38325977687875445, + "grad_norm": 1.7617329560493045, + "learning_rate": 7.06927395664472e-06, + "loss": 0.6922, + "step": 12505 + }, + { + "epoch": 0.38329042540149566, + "grad_norm": 1.8697769920840686, + "learning_rate": 7.068822127026183e-06, + "loss": 0.6962, + "step": 12506 + }, + { + "epoch": 0.38332107392423687, + "grad_norm": 1.709579798093553, + "learning_rate": 7.0683702770230135e-06, + "loss": 0.7058, + "step": 12507 + }, + { + "epoch": 0.38335172244697807, + "grad_norm": 1.0777164750831127, + "learning_rate": 7.067918406639661e-06, + "loss": 0.4525, + "step": 12508 + }, + { + "epoch": 0.3833823709697193, + "grad_norm": 1.6969737787204937, + "learning_rate": 7.067466515880581e-06, + "loss": 0.6247, + "step": 12509 + }, + { + "epoch": 0.3834130194924605, + "grad_norm": 1.9555491681207569, + "learning_rate": 7.067014604750223e-06, + "loss": 0.6862, + "step": 12510 + }, + { + "epoch": 0.3834436680152017, + "grad_norm": 1.6581488466461414, + "learning_rate": 7.066562673253042e-06, + "loss": 0.7193, + "step": 12511 + }, + { + "epoch": 0.3834743165379429, + "grad_norm": 2.0140434199242065, + "learning_rate": 7.0661107213934885e-06, + "loss": 0.7126, + "step": 12512 + }, + { + "epoch": 0.3835049650606841, + "grad_norm": 1.616062127345154, + "learning_rate": 7.065658749176019e-06, + "loss": 0.6347, + "step": 12513 + }, + { + "epoch": 0.3835356135834253, + "grad_norm": 0.8198219439548036, + "learning_rate": 7.0652067566050865e-06, + "loss": 0.4559, + "step": 12514 + }, + { + "epoch": 0.38356626210616646, + "grad_norm": 0.8371310802599218, + "learning_rate": 7.064754743685141e-06, + "loss": 0.4429, + "step": 12515 + }, + { + "epoch": 0.38359691062890766, + "grad_norm": 1.9326329852640602, + "learning_rate": 7.064302710420641e-06, + "loss": 0.8029, + "step": 12516 + }, + { + "epoch": 0.38362755915164887, + "grad_norm": 1.697024558440496, + "learning_rate": 7.063850656816036e-06, + "loss": 0.6832, + "step": 12517 + }, + { + "epoch": 0.3836582076743901, + "grad_norm": 1.8170138341673014, + "learning_rate": 7.063398582875785e-06, + "loss": 0.688, + "step": 12518 + }, + { + "epoch": 0.3836888561971313, + "grad_norm": 1.845278668706287, + "learning_rate": 7.062946488604337e-06, + "loss": 0.5853, + "step": 12519 + }, + { + "epoch": 0.3837195047198725, + "grad_norm": 1.718421739917768, + "learning_rate": 7.06249437400615e-06, + "loss": 0.7509, + "step": 12520 + }, + { + "epoch": 0.3837501532426137, + "grad_norm": 1.651833523011401, + "learning_rate": 7.062042239085679e-06, + "loss": 0.5707, + "step": 12521 + }, + { + "epoch": 0.3837808017653549, + "grad_norm": 1.60074545936047, + "learning_rate": 7.0615900838473785e-06, + "loss": 0.6696, + "step": 12522 + }, + { + "epoch": 0.3838114502880961, + "grad_norm": 1.767908589014101, + "learning_rate": 7.061137908295703e-06, + "loss": 0.7158, + "step": 12523 + }, + { + "epoch": 0.3838420988108373, + "grad_norm": 1.8171516981162439, + "learning_rate": 7.060685712435107e-06, + "loss": 0.707, + "step": 12524 + }, + { + "epoch": 0.3838727473335785, + "grad_norm": 1.8563072569138648, + "learning_rate": 7.06023349627005e-06, + "loss": 0.7423, + "step": 12525 + }, + { + "epoch": 0.3839033958563197, + "grad_norm": 1.763668985300677, + "learning_rate": 7.059781259804984e-06, + "loss": 0.7021, + "step": 12526 + }, + { + "epoch": 0.3839340443790609, + "grad_norm": 1.733807263702497, + "learning_rate": 7.0593290030443665e-06, + "loss": 0.7098, + "step": 12527 + }, + { + "epoch": 0.38396469290180213, + "grad_norm": 1.677770025876056, + "learning_rate": 7.058876725992653e-06, + "loss": 0.5843, + "step": 12528 + }, + { + "epoch": 0.38399534142454333, + "grad_norm": 1.6615966998929084, + "learning_rate": 7.0584244286543e-06, + "loss": 0.7098, + "step": 12529 + }, + { + "epoch": 0.38402598994728454, + "grad_norm": 1.7618897854065645, + "learning_rate": 7.0579721110337655e-06, + "loss": 0.682, + "step": 12530 + }, + { + "epoch": 0.38405663847002575, + "grad_norm": 1.6971212574492094, + "learning_rate": 7.057519773135505e-06, + "loss": 0.5475, + "step": 12531 + }, + { + "epoch": 0.38408728699276695, + "grad_norm": 1.8711775776443746, + "learning_rate": 7.057067414963974e-06, + "loss": 0.6959, + "step": 12532 + }, + { + "epoch": 0.38411793551550816, + "grad_norm": 1.841501365408269, + "learning_rate": 7.056615036523633e-06, + "loss": 0.7913, + "step": 12533 + }, + { + "epoch": 0.38414858403824936, + "grad_norm": 1.8614122802526112, + "learning_rate": 7.056162637818939e-06, + "loss": 0.7925, + "step": 12534 + }, + { + "epoch": 0.38417923256099057, + "grad_norm": 1.772840188049756, + "learning_rate": 7.055710218854347e-06, + "loss": 0.665, + "step": 12535 + }, + { + "epoch": 0.3842098810837318, + "grad_norm": 1.9410148046073048, + "learning_rate": 7.055257779634316e-06, + "loss": 0.6287, + "step": 12536 + }, + { + "epoch": 0.384240529606473, + "grad_norm": 1.6811799987513574, + "learning_rate": 7.054805320163305e-06, + "loss": 0.7019, + "step": 12537 + }, + { + "epoch": 0.3842711781292142, + "grad_norm": 0.9624292001934165, + "learning_rate": 7.05435284044577e-06, + "loss": 0.453, + "step": 12538 + }, + { + "epoch": 0.3843018266519554, + "grad_norm": 1.6858894151479469, + "learning_rate": 7.053900340486172e-06, + "loss": 0.7415, + "step": 12539 + }, + { + "epoch": 0.3843324751746966, + "grad_norm": 1.5749982639072808, + "learning_rate": 7.053447820288968e-06, + "loss": 0.7041, + "step": 12540 + }, + { + "epoch": 0.3843631236974378, + "grad_norm": 0.8307311398119095, + "learning_rate": 7.052995279858619e-06, + "loss": 0.473, + "step": 12541 + }, + { + "epoch": 0.384393772220179, + "grad_norm": 1.550081710642525, + "learning_rate": 7.05254271919958e-06, + "loss": 0.6427, + "step": 12542 + }, + { + "epoch": 0.3844244207429202, + "grad_norm": 1.5514997715504673, + "learning_rate": 7.052090138316312e-06, + "loss": 0.6067, + "step": 12543 + }, + { + "epoch": 0.3844550692656614, + "grad_norm": 1.7995955167665494, + "learning_rate": 7.051637537213276e-06, + "loss": 0.7247, + "step": 12544 + }, + { + "epoch": 0.3844857177884026, + "grad_norm": 1.73143276213533, + "learning_rate": 7.051184915894932e-06, + "loss": 0.7208, + "step": 12545 + }, + { + "epoch": 0.3845163663111438, + "grad_norm": 0.8425183826717481, + "learning_rate": 7.050732274365737e-06, + "loss": 0.4671, + "step": 12546 + }, + { + "epoch": 0.384547014833885, + "grad_norm": 2.027752909847489, + "learning_rate": 7.050279612630151e-06, + "loss": 0.7239, + "step": 12547 + }, + { + "epoch": 0.3845776633566262, + "grad_norm": 1.7646663974697874, + "learning_rate": 7.049826930692636e-06, + "loss": 0.648, + "step": 12548 + }, + { + "epoch": 0.3846083118793674, + "grad_norm": 1.738681979440453, + "learning_rate": 7.0493742285576525e-06, + "loss": 0.664, + "step": 12549 + }, + { + "epoch": 0.3846389604021086, + "grad_norm": 1.7982734625105647, + "learning_rate": 7.048921506229659e-06, + "loss": 0.7678, + "step": 12550 + }, + { + "epoch": 0.3846696089248498, + "grad_norm": 1.7037233695184246, + "learning_rate": 7.0484687637131185e-06, + "loss": 0.7474, + "step": 12551 + }, + { + "epoch": 0.384700257447591, + "grad_norm": 1.8236339857462762, + "learning_rate": 7.048016001012492e-06, + "loss": 0.6512, + "step": 12552 + }, + { + "epoch": 0.3847309059703322, + "grad_norm": 1.7301884930789346, + "learning_rate": 7.047563218132238e-06, + "loss": 0.6322, + "step": 12553 + }, + { + "epoch": 0.3847615544930734, + "grad_norm": 0.8379653451488212, + "learning_rate": 7.047110415076821e-06, + "loss": 0.4559, + "step": 12554 + }, + { + "epoch": 0.3847922030158146, + "grad_norm": 1.6717616436506275, + "learning_rate": 7.0466575918507e-06, + "loss": 0.6032, + "step": 12555 + }, + { + "epoch": 0.38482285153855583, + "grad_norm": 2.272977812769102, + "learning_rate": 7.04620474845834e-06, + "loss": 0.784, + "step": 12556 + }, + { + "epoch": 0.38485350006129704, + "grad_norm": 1.8637123565303662, + "learning_rate": 7.045751884904201e-06, + "loss": 0.6493, + "step": 12557 + }, + { + "epoch": 0.38488414858403824, + "grad_norm": 1.7015398751860138, + "learning_rate": 7.045299001192743e-06, + "loss": 0.5514, + "step": 12558 + }, + { + "epoch": 0.38491479710677945, + "grad_norm": 1.7683350963320437, + "learning_rate": 7.0448460973284325e-06, + "loss": 0.6679, + "step": 12559 + }, + { + "epoch": 0.38494544562952066, + "grad_norm": 1.8751361821406198, + "learning_rate": 7.0443931733157285e-06, + "loss": 0.7275, + "step": 12560 + }, + { + "epoch": 0.38497609415226186, + "grad_norm": 1.5634251551928733, + "learning_rate": 7.043940229159099e-06, + "loss": 0.6187, + "step": 12561 + }, + { + "epoch": 0.38500674267500307, + "grad_norm": 1.7180800114449941, + "learning_rate": 7.043487264863e-06, + "loss": 0.6291, + "step": 12562 + }, + { + "epoch": 0.38503739119774427, + "grad_norm": 1.7792759257116328, + "learning_rate": 7.0430342804318996e-06, + "loss": 0.6535, + "step": 12563 + }, + { + "epoch": 0.3850680397204855, + "grad_norm": 1.9326708655173868, + "learning_rate": 7.0425812758702595e-06, + "loss": 0.6495, + "step": 12564 + }, + { + "epoch": 0.3850986882432267, + "grad_norm": 1.7540754632871818, + "learning_rate": 7.042128251182543e-06, + "loss": 0.7112, + "step": 12565 + }, + { + "epoch": 0.3851293367659679, + "grad_norm": 1.551495813345524, + "learning_rate": 7.041675206373215e-06, + "loss": 0.7218, + "step": 12566 + }, + { + "epoch": 0.3851599852887091, + "grad_norm": 1.6413644575546644, + "learning_rate": 7.041222141446737e-06, + "loss": 0.6757, + "step": 12567 + }, + { + "epoch": 0.3851906338114503, + "grad_norm": 1.778317274477018, + "learning_rate": 7.0407690564075795e-06, + "loss": 0.7715, + "step": 12568 + }, + { + "epoch": 0.3852212823341915, + "grad_norm": 1.600529927817669, + "learning_rate": 7.0403159512601985e-06, + "loss": 0.5951, + "step": 12569 + }, + { + "epoch": 0.3852519308569327, + "grad_norm": 1.7722837413419656, + "learning_rate": 7.039862826009063e-06, + "loss": 0.595, + "step": 12570 + }, + { + "epoch": 0.3852825793796739, + "grad_norm": 1.998855305110881, + "learning_rate": 7.039409680658636e-06, + "loss": 0.7228, + "step": 12571 + }, + { + "epoch": 0.3853132279024151, + "grad_norm": 1.8655436387442537, + "learning_rate": 7.038956515213387e-06, + "loss": 0.7431, + "step": 12572 + }, + { + "epoch": 0.38534387642515633, + "grad_norm": 1.8459385923118228, + "learning_rate": 7.038503329677775e-06, + "loss": 0.7295, + "step": 12573 + }, + { + "epoch": 0.38537452494789753, + "grad_norm": 1.748002082382606, + "learning_rate": 7.03805012405627e-06, + "loss": 0.6903, + "step": 12574 + }, + { + "epoch": 0.38540517347063874, + "grad_norm": 1.655190732148016, + "learning_rate": 7.037596898353333e-06, + "loss": 0.7454, + "step": 12575 + }, + { + "epoch": 0.38543582199337995, + "grad_norm": 0.8419412356737042, + "learning_rate": 7.037143652573435e-06, + "loss": 0.4794, + "step": 12576 + }, + { + "epoch": 0.3854664705161211, + "grad_norm": 1.7640900437885103, + "learning_rate": 7.036690386721038e-06, + "loss": 0.6029, + "step": 12577 + }, + { + "epoch": 0.3854971190388623, + "grad_norm": 1.5644186586223745, + "learning_rate": 7.0362371008006104e-06, + "loss": 0.5066, + "step": 12578 + }, + { + "epoch": 0.3855277675616035, + "grad_norm": 1.7626361677934304, + "learning_rate": 7.035783794816616e-06, + "loss": 0.6925, + "step": 12579 + }, + { + "epoch": 0.3855584160843447, + "grad_norm": 1.7303293788654877, + "learning_rate": 7.035330468773524e-06, + "loss": 0.6327, + "step": 12580 + }, + { + "epoch": 0.3855890646070859, + "grad_norm": 1.976113691102881, + "learning_rate": 7.034877122675801e-06, + "loss": 0.7108, + "step": 12581 + }, + { + "epoch": 0.3856197131298271, + "grad_norm": 1.722097434483835, + "learning_rate": 7.034423756527912e-06, + "loss": 0.6614, + "step": 12582 + }, + { + "epoch": 0.38565036165256833, + "grad_norm": 1.5610378215511926, + "learning_rate": 7.033970370334325e-06, + "loss": 0.6201, + "step": 12583 + }, + { + "epoch": 0.38568101017530954, + "grad_norm": 1.5582772459777205, + "learning_rate": 7.033516964099508e-06, + "loss": 0.6646, + "step": 12584 + }, + { + "epoch": 0.38571165869805074, + "grad_norm": 1.9135013785401147, + "learning_rate": 7.033063537827929e-06, + "loss": 0.6632, + "step": 12585 + }, + { + "epoch": 0.38574230722079195, + "grad_norm": 1.769540098352752, + "learning_rate": 7.032610091524052e-06, + "loss": 0.6736, + "step": 12586 + }, + { + "epoch": 0.38577295574353315, + "grad_norm": 1.6619501359324254, + "learning_rate": 7.032156625192353e-06, + "loss": 0.6149, + "step": 12587 + }, + { + "epoch": 0.38580360426627436, + "grad_norm": 1.7646846507152667, + "learning_rate": 7.031703138837289e-06, + "loss": 0.7456, + "step": 12588 + }, + { + "epoch": 0.38583425278901556, + "grad_norm": 1.6394081794357291, + "learning_rate": 7.031249632463337e-06, + "loss": 0.7204, + "step": 12589 + }, + { + "epoch": 0.38586490131175677, + "grad_norm": 1.6778224339482113, + "learning_rate": 7.030796106074962e-06, + "loss": 0.6721, + "step": 12590 + }, + { + "epoch": 0.385895549834498, + "grad_norm": 1.9835748331513037, + "learning_rate": 7.030342559676633e-06, + "loss": 0.6082, + "step": 12591 + }, + { + "epoch": 0.3859261983572392, + "grad_norm": 1.6878754868052703, + "learning_rate": 7.029888993272821e-06, + "loss": 0.6887, + "step": 12592 + }, + { + "epoch": 0.3859568468799804, + "grad_norm": 1.7287728102659656, + "learning_rate": 7.02943540686799e-06, + "loss": 0.665, + "step": 12593 + }, + { + "epoch": 0.3859874954027216, + "grad_norm": 0.8829288936098492, + "learning_rate": 7.028981800466617e-06, + "loss": 0.4855, + "step": 12594 + }, + { + "epoch": 0.3860181439254628, + "grad_norm": 1.556559806179213, + "learning_rate": 7.028528174073165e-06, + "loss": 0.7073, + "step": 12595 + }, + { + "epoch": 0.386048792448204, + "grad_norm": 1.8027102129983112, + "learning_rate": 7.028074527692106e-06, + "loss": 0.7725, + "step": 12596 + }, + { + "epoch": 0.3860794409709452, + "grad_norm": 1.627757686894866, + "learning_rate": 7.027620861327908e-06, + "loss": 0.7713, + "step": 12597 + }, + { + "epoch": 0.3861100894936864, + "grad_norm": 1.83941173106393, + "learning_rate": 7.027167174985046e-06, + "loss": 0.7043, + "step": 12598 + }, + { + "epoch": 0.3861407380164276, + "grad_norm": 1.8508142512280314, + "learning_rate": 7.026713468667985e-06, + "loss": 0.7526, + "step": 12599 + }, + { + "epoch": 0.3861713865391688, + "grad_norm": 1.836115887403888, + "learning_rate": 7.026259742381199e-06, + "loss": 0.7051, + "step": 12600 + }, + { + "epoch": 0.38620203506191003, + "grad_norm": 1.7111623416513777, + "learning_rate": 7.0258059961291555e-06, + "loss": 0.6453, + "step": 12601 + }, + { + "epoch": 0.38623268358465124, + "grad_norm": 1.414312941868564, + "learning_rate": 7.025352229916329e-06, + "loss": 0.5395, + "step": 12602 + }, + { + "epoch": 0.38626333210739244, + "grad_norm": 0.8937469556090092, + "learning_rate": 7.024898443747189e-06, + "loss": 0.4478, + "step": 12603 + }, + { + "epoch": 0.38629398063013365, + "grad_norm": 1.6226467573910117, + "learning_rate": 7.024444637626206e-06, + "loss": 0.7081, + "step": 12604 + }, + { + "epoch": 0.38632462915287485, + "grad_norm": 1.8707654380121206, + "learning_rate": 7.023990811557851e-06, + "loss": 0.5779, + "step": 12605 + }, + { + "epoch": 0.38635527767561606, + "grad_norm": 2.0979403246218147, + "learning_rate": 7.023536965546598e-06, + "loss": 0.6954, + "step": 12606 + }, + { + "epoch": 0.38638592619835727, + "grad_norm": 0.7573938656753694, + "learning_rate": 7.023083099596917e-06, + "loss": 0.4501, + "step": 12607 + }, + { + "epoch": 0.3864165747210984, + "grad_norm": 1.8267374778017103, + "learning_rate": 7.022629213713279e-06, + "loss": 0.7027, + "step": 12608 + }, + { + "epoch": 0.3864472232438396, + "grad_norm": 1.5591360205403832, + "learning_rate": 7.0221753079001595e-06, + "loss": 0.6445, + "step": 12609 + }, + { + "epoch": 0.3864778717665808, + "grad_norm": 1.5899005487954998, + "learning_rate": 7.021721382162029e-06, + "loss": 0.6092, + "step": 12610 + }, + { + "epoch": 0.38650852028932203, + "grad_norm": 1.5964472701529042, + "learning_rate": 7.021267436503362e-06, + "loss": 0.6564, + "step": 12611 + }, + { + "epoch": 0.38653916881206324, + "grad_norm": 1.8001735383825996, + "learning_rate": 7.0208134709286265e-06, + "loss": 0.6637, + "step": 12612 + }, + { + "epoch": 0.38656981733480444, + "grad_norm": 1.981768206014162, + "learning_rate": 7.020359485442302e-06, + "loss": 0.7071, + "step": 12613 + }, + { + "epoch": 0.38660046585754565, + "grad_norm": 1.73101842966499, + "learning_rate": 7.019905480048858e-06, + "loss": 0.6685, + "step": 12614 + }, + { + "epoch": 0.38663111438028686, + "grad_norm": 1.8001237199129818, + "learning_rate": 7.019451454752767e-06, + "loss": 0.6598, + "step": 12615 + }, + { + "epoch": 0.38666176290302806, + "grad_norm": 1.552309887193767, + "learning_rate": 7.018997409558504e-06, + "loss": 0.7581, + "step": 12616 + }, + { + "epoch": 0.38669241142576927, + "grad_norm": 1.7272448098922697, + "learning_rate": 7.018543344470544e-06, + "loss": 0.7331, + "step": 12617 + }, + { + "epoch": 0.3867230599485105, + "grad_norm": 1.7807824327963642, + "learning_rate": 7.01808925949336e-06, + "loss": 0.7205, + "step": 12618 + }, + { + "epoch": 0.3867537084712517, + "grad_norm": 1.605772548096926, + "learning_rate": 7.0176351546314256e-06, + "loss": 0.7487, + "step": 12619 + }, + { + "epoch": 0.3867843569939929, + "grad_norm": 1.699652098361673, + "learning_rate": 7.017181029889216e-06, + "loss": 0.6116, + "step": 12620 + }, + { + "epoch": 0.3868150055167341, + "grad_norm": 1.7448345883936498, + "learning_rate": 7.016726885271206e-06, + "loss": 0.6843, + "step": 12621 + }, + { + "epoch": 0.3868456540394753, + "grad_norm": 0.8624540706833134, + "learning_rate": 7.01627272078187e-06, + "loss": 0.4711, + "step": 12622 + }, + { + "epoch": 0.3868763025622165, + "grad_norm": 1.5813556883817963, + "learning_rate": 7.0158185364256825e-06, + "loss": 0.7346, + "step": 12623 + }, + { + "epoch": 0.3869069510849577, + "grad_norm": 1.5497509315779128, + "learning_rate": 7.0153643322071195e-06, + "loss": 0.6503, + "step": 12624 + }, + { + "epoch": 0.3869375996076989, + "grad_norm": 1.7705733342410763, + "learning_rate": 7.014910108130655e-06, + "loss": 0.687, + "step": 12625 + }, + { + "epoch": 0.3869682481304401, + "grad_norm": 1.7237011853557924, + "learning_rate": 7.014455864200768e-06, + "loss": 0.6654, + "step": 12626 + }, + { + "epoch": 0.3869988966531813, + "grad_norm": 0.8130074626865187, + "learning_rate": 7.01400160042193e-06, + "loss": 0.4656, + "step": 12627 + }, + { + "epoch": 0.38702954517592253, + "grad_norm": 2.037099733427609, + "learning_rate": 7.01354731679862e-06, + "loss": 0.7771, + "step": 12628 + }, + { + "epoch": 0.38706019369866373, + "grad_norm": 1.5363458471023497, + "learning_rate": 7.013093013335312e-06, + "loss": 0.6808, + "step": 12629 + }, + { + "epoch": 0.38709084222140494, + "grad_norm": 1.8109995045247582, + "learning_rate": 7.012638690036485e-06, + "loss": 0.5774, + "step": 12630 + }, + { + "epoch": 0.38712149074414615, + "grad_norm": 1.6303647276251971, + "learning_rate": 7.012184346906612e-06, + "loss": 0.6423, + "step": 12631 + }, + { + "epoch": 0.38715213926688735, + "grad_norm": 1.7080320246605136, + "learning_rate": 7.011729983950174e-06, + "loss": 0.7013, + "step": 12632 + }, + { + "epoch": 0.38718278778962856, + "grad_norm": 0.8082519643273688, + "learning_rate": 7.011275601171643e-06, + "loss": 0.4723, + "step": 12633 + }, + { + "epoch": 0.38721343631236976, + "grad_norm": 1.685914001140179, + "learning_rate": 7.010821198575501e-06, + "loss": 0.6458, + "step": 12634 + }, + { + "epoch": 0.38724408483511097, + "grad_norm": 1.5937640463719887, + "learning_rate": 7.010366776166224e-06, + "loss": 0.7147, + "step": 12635 + }, + { + "epoch": 0.3872747333578522, + "grad_norm": 1.8933941121665627, + "learning_rate": 7.009912333948287e-06, + "loss": 0.7071, + "step": 12636 + }, + { + "epoch": 0.3873053818805934, + "grad_norm": 1.6142178553101856, + "learning_rate": 7.009457871926169e-06, + "loss": 0.6595, + "step": 12637 + }, + { + "epoch": 0.3873360304033346, + "grad_norm": 1.6713093899768974, + "learning_rate": 7.009003390104351e-06, + "loss": 0.6901, + "step": 12638 + }, + { + "epoch": 0.38736667892607574, + "grad_norm": 1.6406824686689843, + "learning_rate": 7.008548888487308e-06, + "loss": 0.6916, + "step": 12639 + }, + { + "epoch": 0.38739732744881694, + "grad_norm": 1.681594349825039, + "learning_rate": 7.008094367079516e-06, + "loss": 0.576, + "step": 12640 + }, + { + "epoch": 0.38742797597155815, + "grad_norm": 1.5637778158829712, + "learning_rate": 7.00763982588546e-06, + "loss": 0.6646, + "step": 12641 + }, + { + "epoch": 0.38745862449429935, + "grad_norm": 1.8401102813821884, + "learning_rate": 7.007185264909613e-06, + "loss": 0.6319, + "step": 12642 + }, + { + "epoch": 0.38748927301704056, + "grad_norm": 1.661769133767752, + "learning_rate": 7.006730684156456e-06, + "loss": 0.6831, + "step": 12643 + }, + { + "epoch": 0.38751992153978176, + "grad_norm": 0.8469472334716449, + "learning_rate": 7.0062760836304685e-06, + "loss": 0.4907, + "step": 12644 + }, + { + "epoch": 0.38755057006252297, + "grad_norm": 1.659578378341949, + "learning_rate": 7.00582146333613e-06, + "loss": 0.64, + "step": 12645 + }, + { + "epoch": 0.3875812185852642, + "grad_norm": 1.674913681174053, + "learning_rate": 7.0053668232779195e-06, + "loss": 0.6602, + "step": 12646 + }, + { + "epoch": 0.3876118671080054, + "grad_norm": 1.6065001012428397, + "learning_rate": 7.004912163460316e-06, + "loss": 0.6134, + "step": 12647 + }, + { + "epoch": 0.3876425156307466, + "grad_norm": 1.6830729111569194, + "learning_rate": 7.004457483887799e-06, + "loss": 0.6313, + "step": 12648 + }, + { + "epoch": 0.3876731641534878, + "grad_norm": 0.8078558932645793, + "learning_rate": 7.004002784564852e-06, + "loss": 0.4593, + "step": 12649 + }, + { + "epoch": 0.387703812676229, + "grad_norm": 1.664093608196273, + "learning_rate": 7.003548065495951e-06, + "loss": 0.6482, + "step": 12650 + }, + { + "epoch": 0.3877344611989702, + "grad_norm": 1.7896244633782834, + "learning_rate": 7.003093326685578e-06, + "loss": 0.6502, + "step": 12651 + }, + { + "epoch": 0.3877651097217114, + "grad_norm": 1.6672678125382048, + "learning_rate": 7.002638568138214e-06, + "loss": 0.7447, + "step": 12652 + }, + { + "epoch": 0.3877957582444526, + "grad_norm": 1.531486537405969, + "learning_rate": 7.002183789858341e-06, + "loss": 0.698, + "step": 12653 + }, + { + "epoch": 0.3878264067671938, + "grad_norm": 1.6987061406330657, + "learning_rate": 7.00172899185044e-06, + "loss": 0.6601, + "step": 12654 + }, + { + "epoch": 0.387857055289935, + "grad_norm": 1.6560626277899524, + "learning_rate": 7.0012741741189886e-06, + "loss": 0.6354, + "step": 12655 + }, + { + "epoch": 0.38788770381267623, + "grad_norm": 1.5859928457495538, + "learning_rate": 7.0008193366684706e-06, + "loss": 0.646, + "step": 12656 + }, + { + "epoch": 0.38791835233541744, + "grad_norm": 1.8454804433206013, + "learning_rate": 7.00036447950337e-06, + "loss": 0.7207, + "step": 12657 + }, + { + "epoch": 0.38794900085815864, + "grad_norm": 1.7028600879656, + "learning_rate": 6.999909602628164e-06, + "loss": 0.6549, + "step": 12658 + }, + { + "epoch": 0.38797964938089985, + "grad_norm": 1.7788885358533122, + "learning_rate": 6.999454706047338e-06, + "loss": 0.6271, + "step": 12659 + }, + { + "epoch": 0.38801029790364105, + "grad_norm": 1.7742368043810637, + "learning_rate": 6.998999789765372e-06, + "loss": 0.7273, + "step": 12660 + }, + { + "epoch": 0.38804094642638226, + "grad_norm": 1.7914735916917235, + "learning_rate": 6.998544853786753e-06, + "loss": 0.7547, + "step": 12661 + }, + { + "epoch": 0.38807159494912347, + "grad_norm": 0.7947825446291579, + "learning_rate": 6.998089898115956e-06, + "loss": 0.4415, + "step": 12662 + }, + { + "epoch": 0.38810224347186467, + "grad_norm": 1.5818238262784583, + "learning_rate": 6.99763492275747e-06, + "loss": 0.6812, + "step": 12663 + }, + { + "epoch": 0.3881328919946059, + "grad_norm": 1.5880697671627038, + "learning_rate": 6.997179927715777e-06, + "loss": 0.7236, + "step": 12664 + }, + { + "epoch": 0.3881635405173471, + "grad_norm": 3.4177724780849768, + "learning_rate": 6.996724912995359e-06, + "loss": 0.6253, + "step": 12665 + }, + { + "epoch": 0.3881941890400883, + "grad_norm": 1.8453853513918317, + "learning_rate": 6.996269878600698e-06, + "loss": 0.6637, + "step": 12666 + }, + { + "epoch": 0.3882248375628295, + "grad_norm": 1.9440700866720277, + "learning_rate": 6.99581482453628e-06, + "loss": 0.8003, + "step": 12667 + }, + { + "epoch": 0.3882554860855707, + "grad_norm": 1.6520697549411745, + "learning_rate": 6.995359750806587e-06, + "loss": 0.7039, + "step": 12668 + }, + { + "epoch": 0.3882861346083119, + "grad_norm": 1.7573801284597406, + "learning_rate": 6.994904657416105e-06, + "loss": 0.6584, + "step": 12669 + }, + { + "epoch": 0.38831678313105306, + "grad_norm": 2.5418774115039593, + "learning_rate": 6.994449544369316e-06, + "loss": 0.7549, + "step": 12670 + }, + { + "epoch": 0.38834743165379426, + "grad_norm": 2.8642274543711292, + "learning_rate": 6.993994411670706e-06, + "loss": 0.6841, + "step": 12671 + }, + { + "epoch": 0.38837808017653547, + "grad_norm": 1.6931632049373855, + "learning_rate": 6.9935392593247595e-06, + "loss": 0.6555, + "step": 12672 + }, + { + "epoch": 0.3884087286992767, + "grad_norm": 0.871118242867026, + "learning_rate": 6.99308408733596e-06, + "loss": 0.4646, + "step": 12673 + }, + { + "epoch": 0.3884393772220179, + "grad_norm": 1.833096875741303, + "learning_rate": 6.992628895708792e-06, + "loss": 0.7903, + "step": 12674 + }, + { + "epoch": 0.3884700257447591, + "grad_norm": 1.8070423393361357, + "learning_rate": 6.992173684447743e-06, + "loss": 0.7415, + "step": 12675 + }, + { + "epoch": 0.3885006742675003, + "grad_norm": 1.7002493024940741, + "learning_rate": 6.991718453557297e-06, + "loss": 0.7133, + "step": 12676 + }, + { + "epoch": 0.3885313227902415, + "grad_norm": 1.7035555127379378, + "learning_rate": 6.991263203041938e-06, + "loss": 0.6415, + "step": 12677 + }, + { + "epoch": 0.3885619713129827, + "grad_norm": 1.703624093279578, + "learning_rate": 6.990807932906154e-06, + "loss": 0.6455, + "step": 12678 + }, + { + "epoch": 0.3885926198357239, + "grad_norm": 1.9249162461553257, + "learning_rate": 6.99035264315443e-06, + "loss": 0.7155, + "step": 12679 + }, + { + "epoch": 0.3886232683584651, + "grad_norm": 0.8135470568899501, + "learning_rate": 6.9898973337912534e-06, + "loss": 0.4487, + "step": 12680 + }, + { + "epoch": 0.3886539168812063, + "grad_norm": 1.7775978075930614, + "learning_rate": 6.989442004821108e-06, + "loss": 0.6966, + "step": 12681 + }, + { + "epoch": 0.3886845654039475, + "grad_norm": 1.770894567604518, + "learning_rate": 6.988986656248482e-06, + "loss": 0.6175, + "step": 12682 + }, + { + "epoch": 0.38871521392668873, + "grad_norm": 1.6025021715367855, + "learning_rate": 6.98853128807786e-06, + "loss": 0.6698, + "step": 12683 + }, + { + "epoch": 0.38874586244942994, + "grad_norm": 1.6692246295185458, + "learning_rate": 6.988075900313734e-06, + "loss": 0.558, + "step": 12684 + }, + { + "epoch": 0.38877651097217114, + "grad_norm": 1.486427752157363, + "learning_rate": 6.987620492960584e-06, + "loss": 0.6306, + "step": 12685 + }, + { + "epoch": 0.38880715949491235, + "grad_norm": 1.4382756540372517, + "learning_rate": 6.987165066022902e-06, + "loss": 0.6636, + "step": 12686 + }, + { + "epoch": 0.38883780801765355, + "grad_norm": 1.5627652918928732, + "learning_rate": 6.986709619505173e-06, + "loss": 0.6089, + "step": 12687 + }, + { + "epoch": 0.38886845654039476, + "grad_norm": 1.7500102172331096, + "learning_rate": 6.986254153411888e-06, + "loss": 0.6296, + "step": 12688 + }, + { + "epoch": 0.38889910506313596, + "grad_norm": 1.7700675527155267, + "learning_rate": 6.985798667747531e-06, + "loss": 0.6312, + "step": 12689 + }, + { + "epoch": 0.38892975358587717, + "grad_norm": 1.4987137790332312, + "learning_rate": 6.985343162516591e-06, + "loss": 0.6931, + "step": 12690 + }, + { + "epoch": 0.3889604021086184, + "grad_norm": 1.9366387164104144, + "learning_rate": 6.98488763772356e-06, + "loss": 0.6134, + "step": 12691 + }, + { + "epoch": 0.3889910506313596, + "grad_norm": 1.5537458253085297, + "learning_rate": 6.9844320933729205e-06, + "loss": 0.6791, + "step": 12692 + }, + { + "epoch": 0.3890216991541008, + "grad_norm": 1.979792114480207, + "learning_rate": 6.983976529469165e-06, + "loss": 0.6357, + "step": 12693 + }, + { + "epoch": 0.389052347676842, + "grad_norm": 1.543856878971336, + "learning_rate": 6.983520946016779e-06, + "loss": 0.5921, + "step": 12694 + }, + { + "epoch": 0.3890829961995832, + "grad_norm": 1.8052283650489354, + "learning_rate": 6.983065343020258e-06, + "loss": 0.607, + "step": 12695 + }, + { + "epoch": 0.3891136447223244, + "grad_norm": 1.5734786468080972, + "learning_rate": 6.982609720484082e-06, + "loss": 0.7297, + "step": 12696 + }, + { + "epoch": 0.3891442932450656, + "grad_norm": 1.7242013828810228, + "learning_rate": 6.9821540784127485e-06, + "loss": 0.6966, + "step": 12697 + }, + { + "epoch": 0.3891749417678068, + "grad_norm": 0.7860915674902687, + "learning_rate": 6.981698416810742e-06, + "loss": 0.4571, + "step": 12698 + }, + { + "epoch": 0.389205590290548, + "grad_norm": 0.8032644671252684, + "learning_rate": 6.981242735682554e-06, + "loss": 0.4467, + "step": 12699 + }, + { + "epoch": 0.3892362388132892, + "grad_norm": 1.6603071269745728, + "learning_rate": 6.980787035032676e-06, + "loss": 0.7133, + "step": 12700 + }, + { + "epoch": 0.3892668873360304, + "grad_norm": 1.592393242184925, + "learning_rate": 6.980331314865596e-06, + "loss": 0.6236, + "step": 12701 + }, + { + "epoch": 0.3892975358587716, + "grad_norm": 1.7171673290029916, + "learning_rate": 6.9798755751858025e-06, + "loss": 0.7159, + "step": 12702 + }, + { + "epoch": 0.3893281843815128, + "grad_norm": 0.8351930567281197, + "learning_rate": 6.979419815997791e-06, + "loss": 0.459, + "step": 12703 + }, + { + "epoch": 0.389358832904254, + "grad_norm": 1.647060617403317, + "learning_rate": 6.9789640373060486e-06, + "loss": 0.6666, + "step": 12704 + }, + { + "epoch": 0.3893894814269952, + "grad_norm": 1.62916346123947, + "learning_rate": 6.978508239115067e-06, + "loss": 0.6997, + "step": 12705 + }, + { + "epoch": 0.3894201299497364, + "grad_norm": 0.8299804148891916, + "learning_rate": 6.9780524214293375e-06, + "loss": 0.4639, + "step": 12706 + }, + { + "epoch": 0.3894507784724776, + "grad_norm": 1.530051517010672, + "learning_rate": 6.977596584253352e-06, + "loss": 0.6777, + "step": 12707 + }, + { + "epoch": 0.3894814269952188, + "grad_norm": 0.798040044080886, + "learning_rate": 6.977140727591601e-06, + "loss": 0.4598, + "step": 12708 + }, + { + "epoch": 0.38951207551796, + "grad_norm": 0.7948781265124893, + "learning_rate": 6.976684851448577e-06, + "loss": 0.4419, + "step": 12709 + }, + { + "epoch": 0.3895427240407012, + "grad_norm": 1.7872733548260118, + "learning_rate": 6.976228955828771e-06, + "loss": 0.7294, + "step": 12710 + }, + { + "epoch": 0.38957337256344243, + "grad_norm": 1.716427587074317, + "learning_rate": 6.975773040736675e-06, + "loss": 0.7177, + "step": 12711 + }, + { + "epoch": 0.38960402108618364, + "grad_norm": 1.7349072889030157, + "learning_rate": 6.975317106176783e-06, + "loss": 0.7013, + "step": 12712 + }, + { + "epoch": 0.38963466960892484, + "grad_norm": 1.8261772982416566, + "learning_rate": 6.9748611521535845e-06, + "loss": 0.655, + "step": 12713 + }, + { + "epoch": 0.38966531813166605, + "grad_norm": 1.4957282437106152, + "learning_rate": 6.974405178671575e-06, + "loss": 0.6298, + "step": 12714 + }, + { + "epoch": 0.38969596665440726, + "grad_norm": 1.5944441726852834, + "learning_rate": 6.973949185735246e-06, + "loss": 0.6273, + "step": 12715 + }, + { + "epoch": 0.38972661517714846, + "grad_norm": 1.7337019312922026, + "learning_rate": 6.973493173349089e-06, + "loss": 0.6771, + "step": 12716 + }, + { + "epoch": 0.38975726369988967, + "grad_norm": 1.5935646553957434, + "learning_rate": 6.9730371415176014e-06, + "loss": 0.6296, + "step": 12717 + }, + { + "epoch": 0.3897879122226309, + "grad_norm": 1.5587222655206754, + "learning_rate": 6.9725810902452725e-06, + "loss": 0.6245, + "step": 12718 + }, + { + "epoch": 0.3898185607453721, + "grad_norm": 1.716027794347996, + "learning_rate": 6.972125019536599e-06, + "loss": 0.7065, + "step": 12719 + }, + { + "epoch": 0.3898492092681133, + "grad_norm": 2.0326883880709703, + "learning_rate": 6.971668929396071e-06, + "loss": 0.6701, + "step": 12720 + }, + { + "epoch": 0.3898798577908545, + "grad_norm": 2.1746826269534547, + "learning_rate": 6.971212819828185e-06, + "loss": 0.6605, + "step": 12721 + }, + { + "epoch": 0.3899105063135957, + "grad_norm": 1.0809988153900127, + "learning_rate": 6.970756690837436e-06, + "loss": 0.47, + "step": 12722 + }, + { + "epoch": 0.3899411548363369, + "grad_norm": 0.9477486641902411, + "learning_rate": 6.970300542428315e-06, + "loss": 0.4384, + "step": 12723 + }, + { + "epoch": 0.3899718033590781, + "grad_norm": 1.6355719179612387, + "learning_rate": 6.96984437460532e-06, + "loss": 0.6132, + "step": 12724 + }, + { + "epoch": 0.3900024518818193, + "grad_norm": 0.7668267500342858, + "learning_rate": 6.969388187372944e-06, + "loss": 0.443, + "step": 12725 + }, + { + "epoch": 0.3900331004045605, + "grad_norm": 1.8060469126924077, + "learning_rate": 6.968931980735683e-06, + "loss": 0.5602, + "step": 12726 + }, + { + "epoch": 0.3900637489273017, + "grad_norm": 1.526959393389725, + "learning_rate": 6.968475754698032e-06, + "loss": 0.6396, + "step": 12727 + }, + { + "epoch": 0.39009439745004293, + "grad_norm": 1.7154481104983585, + "learning_rate": 6.968019509264483e-06, + "loss": 0.704, + "step": 12728 + }, + { + "epoch": 0.39012504597278413, + "grad_norm": 1.905471296706804, + "learning_rate": 6.967563244439537e-06, + "loss": 0.69, + "step": 12729 + }, + { + "epoch": 0.39015569449552534, + "grad_norm": 1.847946423826652, + "learning_rate": 6.9671069602276854e-06, + "loss": 0.6997, + "step": 12730 + }, + { + "epoch": 0.39018634301826655, + "grad_norm": 1.1117043262734296, + "learning_rate": 6.966650656633424e-06, + "loss": 0.4652, + "step": 12731 + }, + { + "epoch": 0.3902169915410077, + "grad_norm": 1.919954752068234, + "learning_rate": 6.966194333661254e-06, + "loss": 0.7087, + "step": 12732 + }, + { + "epoch": 0.3902476400637489, + "grad_norm": 1.9400851696526562, + "learning_rate": 6.965737991315667e-06, + "loss": 0.6533, + "step": 12733 + }, + { + "epoch": 0.3902782885864901, + "grad_norm": 1.7075700855251963, + "learning_rate": 6.965281629601161e-06, + "loss": 0.7162, + "step": 12734 + }, + { + "epoch": 0.3903089371092313, + "grad_norm": 1.7155443122058538, + "learning_rate": 6.9648252485222304e-06, + "loss": 0.6792, + "step": 12735 + }, + { + "epoch": 0.3903395856319725, + "grad_norm": 1.793830614571341, + "learning_rate": 6.9643688480833746e-06, + "loss": 0.6195, + "step": 12736 + }, + { + "epoch": 0.3903702341547137, + "grad_norm": 1.8627657470146337, + "learning_rate": 6.96391242828909e-06, + "loss": 0.65, + "step": 12737 + }, + { + "epoch": 0.39040088267745493, + "grad_norm": 0.8098942369013034, + "learning_rate": 6.963455989143876e-06, + "loss": 0.4488, + "step": 12738 + }, + { + "epoch": 0.39043153120019614, + "grad_norm": 0.7653368801458168, + "learning_rate": 6.9629995306522245e-06, + "loss": 0.4379, + "step": 12739 + }, + { + "epoch": 0.39046217972293734, + "grad_norm": 1.7425157222542564, + "learning_rate": 6.962543052818638e-06, + "loss": 0.6111, + "step": 12740 + }, + { + "epoch": 0.39049282824567855, + "grad_norm": 0.7754055021492906, + "learning_rate": 6.962086555647614e-06, + "loss": 0.4641, + "step": 12741 + }, + { + "epoch": 0.39052347676841975, + "grad_norm": 1.774002693705309, + "learning_rate": 6.9616300391436456e-06, + "loss": 0.6393, + "step": 12742 + }, + { + "epoch": 0.39055412529116096, + "grad_norm": 1.7746747358904598, + "learning_rate": 6.961173503311237e-06, + "loss": 0.6905, + "step": 12743 + }, + { + "epoch": 0.39058477381390216, + "grad_norm": 0.8153402098660923, + "learning_rate": 6.960716948154884e-06, + "loss": 0.438, + "step": 12744 + }, + { + "epoch": 0.39061542233664337, + "grad_norm": 1.7412768481976593, + "learning_rate": 6.960260373679085e-06, + "loss": 0.7125, + "step": 12745 + }, + { + "epoch": 0.3906460708593846, + "grad_norm": 1.864563060971494, + "learning_rate": 6.959803779888338e-06, + "loss": 0.6995, + "step": 12746 + }, + { + "epoch": 0.3906767193821258, + "grad_norm": 1.7203673224176361, + "learning_rate": 6.959347166787144e-06, + "loss": 0.6572, + "step": 12747 + }, + { + "epoch": 0.390707367904867, + "grad_norm": 1.5232167950193063, + "learning_rate": 6.95889053438e-06, + "loss": 0.5556, + "step": 12748 + }, + { + "epoch": 0.3907380164276082, + "grad_norm": 1.5896683537506235, + "learning_rate": 6.958433882671408e-06, + "loss": 0.6127, + "step": 12749 + }, + { + "epoch": 0.3907686649503494, + "grad_norm": 1.8382186320999734, + "learning_rate": 6.957977211665865e-06, + "loss": 0.7805, + "step": 12750 + }, + { + "epoch": 0.3907993134730906, + "grad_norm": 0.8185325856739444, + "learning_rate": 6.957520521367871e-06, + "loss": 0.4565, + "step": 12751 + }, + { + "epoch": 0.3908299619958318, + "grad_norm": 0.7778606345153953, + "learning_rate": 6.9570638117819266e-06, + "loss": 0.4504, + "step": 12752 + }, + { + "epoch": 0.390860610518573, + "grad_norm": 1.6459618694949483, + "learning_rate": 6.9566070829125345e-06, + "loss": 0.7205, + "step": 12753 + }, + { + "epoch": 0.3908912590413142, + "grad_norm": 1.8047470810102222, + "learning_rate": 6.956150334764188e-06, + "loss": 0.5629, + "step": 12754 + }, + { + "epoch": 0.3909219075640554, + "grad_norm": 2.0622597881034364, + "learning_rate": 6.9556935673413935e-06, + "loss": 0.7502, + "step": 12755 + }, + { + "epoch": 0.39095255608679663, + "grad_norm": 1.476840190359142, + "learning_rate": 6.95523678064865e-06, + "loss": 0.6769, + "step": 12756 + }, + { + "epoch": 0.39098320460953784, + "grad_norm": 1.728362493115384, + "learning_rate": 6.9547799746904575e-06, + "loss": 0.6309, + "step": 12757 + }, + { + "epoch": 0.39101385313227904, + "grad_norm": 1.7422526793158561, + "learning_rate": 6.954323149471319e-06, + "loss": 0.6189, + "step": 12758 + }, + { + "epoch": 0.39104450165502025, + "grad_norm": 0.8568792960875706, + "learning_rate": 6.953866304995733e-06, + "loss": 0.4699, + "step": 12759 + }, + { + "epoch": 0.39107515017776145, + "grad_norm": 0.8684078609714987, + "learning_rate": 6.953409441268204e-06, + "loss": 0.4749, + "step": 12760 + }, + { + "epoch": 0.39110579870050266, + "grad_norm": 2.1094195201843355, + "learning_rate": 6.952952558293231e-06, + "loss": 0.6761, + "step": 12761 + }, + { + "epoch": 0.39113644722324387, + "grad_norm": 1.6363448279486303, + "learning_rate": 6.952495656075318e-06, + "loss": 0.6322, + "step": 12762 + }, + { + "epoch": 0.391167095745985, + "grad_norm": 1.8772298049910563, + "learning_rate": 6.952038734618964e-06, + "loss": 0.6401, + "step": 12763 + }, + { + "epoch": 0.3911977442687262, + "grad_norm": 1.8000760826591178, + "learning_rate": 6.951581793928674e-06, + "loss": 0.6974, + "step": 12764 + }, + { + "epoch": 0.3912283927914674, + "grad_norm": 0.7894620216619117, + "learning_rate": 6.951124834008948e-06, + "loss": 0.4502, + "step": 12765 + }, + { + "epoch": 0.39125904131420863, + "grad_norm": 0.8056016697795857, + "learning_rate": 6.950667854864293e-06, + "loss": 0.4842, + "step": 12766 + }, + { + "epoch": 0.39128968983694984, + "grad_norm": 1.6156362023846293, + "learning_rate": 6.950210856499204e-06, + "loss": 0.5239, + "step": 12767 + }, + { + "epoch": 0.39132033835969104, + "grad_norm": 1.8873954403762434, + "learning_rate": 6.949753838918192e-06, + "loss": 0.7155, + "step": 12768 + }, + { + "epoch": 0.39135098688243225, + "grad_norm": 1.7520178447802595, + "learning_rate": 6.949296802125755e-06, + "loss": 0.7118, + "step": 12769 + }, + { + "epoch": 0.39138163540517346, + "grad_norm": 1.910964658130452, + "learning_rate": 6.948839746126399e-06, + "loss": 0.5817, + "step": 12770 + }, + { + "epoch": 0.39141228392791466, + "grad_norm": 1.6999650657650454, + "learning_rate": 6.948382670924625e-06, + "loss": 0.6159, + "step": 12771 + }, + { + "epoch": 0.39144293245065587, + "grad_norm": 0.860422747312449, + "learning_rate": 6.947925576524939e-06, + "loss": 0.4492, + "step": 12772 + }, + { + "epoch": 0.3914735809733971, + "grad_norm": 1.7880058430905446, + "learning_rate": 6.947468462931843e-06, + "loss": 0.6654, + "step": 12773 + }, + { + "epoch": 0.3915042294961383, + "grad_norm": 1.716583844626467, + "learning_rate": 6.947011330149842e-06, + "loss": 0.7193, + "step": 12774 + }, + { + "epoch": 0.3915348780188795, + "grad_norm": 1.7880296510129128, + "learning_rate": 6.94655417818344e-06, + "loss": 0.6885, + "step": 12775 + }, + { + "epoch": 0.3915655265416207, + "grad_norm": 1.6891656944902296, + "learning_rate": 6.9460970070371425e-06, + "loss": 0.6431, + "step": 12776 + }, + { + "epoch": 0.3915961750643619, + "grad_norm": 1.4796139140458953, + "learning_rate": 6.945639816715454e-06, + "loss": 0.6366, + "step": 12777 + }, + { + "epoch": 0.3916268235871031, + "grad_norm": 1.7894918566377458, + "learning_rate": 6.945182607222876e-06, + "loss": 0.6828, + "step": 12778 + }, + { + "epoch": 0.3916574721098443, + "grad_norm": 1.8446242698084916, + "learning_rate": 6.944725378563918e-06, + "loss": 0.7392, + "step": 12779 + }, + { + "epoch": 0.3916881206325855, + "grad_norm": 1.6677522955941477, + "learning_rate": 6.944268130743083e-06, + "loss": 0.6327, + "step": 12780 + }, + { + "epoch": 0.3917187691553267, + "grad_norm": 1.6457542949724568, + "learning_rate": 6.943810863764877e-06, + "loss": 0.5945, + "step": 12781 + }, + { + "epoch": 0.3917494176780679, + "grad_norm": 1.8473250379479866, + "learning_rate": 6.943353577633803e-06, + "loss": 0.7271, + "step": 12782 + }, + { + "epoch": 0.39178006620080913, + "grad_norm": 1.8323651313782747, + "learning_rate": 6.9428962723543716e-06, + "loss": 0.7699, + "step": 12783 + }, + { + "epoch": 0.39181071472355034, + "grad_norm": 1.7040744038439128, + "learning_rate": 6.942438947931085e-06, + "loss": 0.6073, + "step": 12784 + }, + { + "epoch": 0.39184136324629154, + "grad_norm": 0.7975332287222682, + "learning_rate": 6.94198160436845e-06, + "loss": 0.4347, + "step": 12785 + }, + { + "epoch": 0.39187201176903275, + "grad_norm": 1.5618508728599232, + "learning_rate": 6.941524241670975e-06, + "loss": 0.669, + "step": 12786 + }, + { + "epoch": 0.39190266029177395, + "grad_norm": 1.651166647190108, + "learning_rate": 6.941066859843163e-06, + "loss": 0.5949, + "step": 12787 + }, + { + "epoch": 0.39193330881451516, + "grad_norm": 1.7138495462287429, + "learning_rate": 6.940609458889525e-06, + "loss": 0.6587, + "step": 12788 + }, + { + "epoch": 0.39196395733725636, + "grad_norm": 2.4088492549421585, + "learning_rate": 6.940152038814563e-06, + "loss": 0.4381, + "step": 12789 + }, + { + "epoch": 0.39199460585999757, + "grad_norm": 0.8283650991135721, + "learning_rate": 6.939694599622788e-06, + "loss": 0.4663, + "step": 12790 + }, + { + "epoch": 0.3920252543827388, + "grad_norm": 1.6558448777783836, + "learning_rate": 6.939237141318704e-06, + "loss": 0.6496, + "step": 12791 + }, + { + "epoch": 0.39205590290548, + "grad_norm": 1.8557509550626794, + "learning_rate": 6.9387796639068224e-06, + "loss": 0.7312, + "step": 12792 + }, + { + "epoch": 0.3920865514282212, + "grad_norm": 1.674859801184698, + "learning_rate": 6.9383221673916475e-06, + "loss": 0.6088, + "step": 12793 + }, + { + "epoch": 0.39211719995096234, + "grad_norm": 1.6955987594026192, + "learning_rate": 6.93786465177769e-06, + "loss": 0.624, + "step": 12794 + }, + { + "epoch": 0.39214784847370354, + "grad_norm": 1.735710910519497, + "learning_rate": 6.937407117069454e-06, + "loss": 0.5943, + "step": 12795 + }, + { + "epoch": 0.39217849699644475, + "grad_norm": 1.7002425276368625, + "learning_rate": 6.936949563271452e-06, + "loss": 0.6165, + "step": 12796 + }, + { + "epoch": 0.39220914551918595, + "grad_norm": 1.7982960705253035, + "learning_rate": 6.936491990388189e-06, + "loss": 0.7252, + "step": 12797 + }, + { + "epoch": 0.39223979404192716, + "grad_norm": 1.6488466389118879, + "learning_rate": 6.936034398424175e-06, + "loss": 0.6354, + "step": 12798 + }, + { + "epoch": 0.39227044256466836, + "grad_norm": 1.5702549822950276, + "learning_rate": 6.93557678738392e-06, + "loss": 0.7178, + "step": 12799 + }, + { + "epoch": 0.39230109108740957, + "grad_norm": 1.735117195791221, + "learning_rate": 6.9351191572719304e-06, + "loss": 0.6275, + "step": 12800 + }, + { + "epoch": 0.3923317396101508, + "grad_norm": 1.729155765169266, + "learning_rate": 6.9346615080927175e-06, + "loss": 0.7279, + "step": 12801 + }, + { + "epoch": 0.392362388132892, + "grad_norm": 0.906763635762934, + "learning_rate": 6.9342038398507875e-06, + "loss": 0.4582, + "step": 12802 + }, + { + "epoch": 0.3923930366556332, + "grad_norm": 0.8370077203268396, + "learning_rate": 6.933746152550655e-06, + "loss": 0.4704, + "step": 12803 + }, + { + "epoch": 0.3924236851783744, + "grad_norm": 1.6358095677475146, + "learning_rate": 6.933288446196825e-06, + "loss": 0.7186, + "step": 12804 + }, + { + "epoch": 0.3924543337011156, + "grad_norm": 0.815014318578277, + "learning_rate": 6.932830720793811e-06, + "loss": 0.4686, + "step": 12805 + }, + { + "epoch": 0.3924849822238568, + "grad_norm": 1.6217960683738073, + "learning_rate": 6.932372976346119e-06, + "loss": 0.676, + "step": 12806 + }, + { + "epoch": 0.392515630746598, + "grad_norm": 0.8749956222513517, + "learning_rate": 6.931915212858265e-06, + "loss": 0.4495, + "step": 12807 + }, + { + "epoch": 0.3925462792693392, + "grad_norm": 1.846153546163894, + "learning_rate": 6.931457430334753e-06, + "loss": 0.7735, + "step": 12808 + }, + { + "epoch": 0.3925769277920804, + "grad_norm": 1.7942164329945463, + "learning_rate": 6.930999628780097e-06, + "loss": 0.6428, + "step": 12809 + }, + { + "epoch": 0.3926075763148216, + "grad_norm": 1.8282447014359697, + "learning_rate": 6.930541808198809e-06, + "loss": 0.7487, + "step": 12810 + }, + { + "epoch": 0.39263822483756283, + "grad_norm": 1.7035169703975575, + "learning_rate": 6.930083968595398e-06, + "loss": 0.6839, + "step": 12811 + }, + { + "epoch": 0.39266887336030404, + "grad_norm": 1.9281776453044426, + "learning_rate": 6.929626109974377e-06, + "loss": 0.7545, + "step": 12812 + }, + { + "epoch": 0.39269952188304524, + "grad_norm": 1.6718661971462319, + "learning_rate": 6.929168232340253e-06, + "loss": 0.7312, + "step": 12813 + }, + { + "epoch": 0.39273017040578645, + "grad_norm": 1.8364944143778645, + "learning_rate": 6.928710335697544e-06, + "loss": 0.7256, + "step": 12814 + }, + { + "epoch": 0.39276081892852766, + "grad_norm": 1.6699192381094436, + "learning_rate": 6.9282524200507585e-06, + "loss": 0.666, + "step": 12815 + }, + { + "epoch": 0.39279146745126886, + "grad_norm": 1.9766038908375703, + "learning_rate": 6.927794485404407e-06, + "loss": 0.7725, + "step": 12816 + }, + { + "epoch": 0.39282211597401007, + "grad_norm": 1.5427094322953856, + "learning_rate": 6.927336531763002e-06, + "loss": 0.6792, + "step": 12817 + }, + { + "epoch": 0.39285276449675127, + "grad_norm": 1.8253684867356315, + "learning_rate": 6.926878559131061e-06, + "loss": 0.6281, + "step": 12818 + }, + { + "epoch": 0.3928834130194925, + "grad_norm": 1.5093679376845308, + "learning_rate": 6.92642056751309e-06, + "loss": 0.6776, + "step": 12819 + }, + { + "epoch": 0.3929140615422337, + "grad_norm": 1.5316544055121653, + "learning_rate": 6.925962556913605e-06, + "loss": 0.5614, + "step": 12820 + }, + { + "epoch": 0.3929447100649749, + "grad_norm": 1.8253346400275936, + "learning_rate": 6.925504527337117e-06, + "loss": 0.7215, + "step": 12821 + }, + { + "epoch": 0.3929753585877161, + "grad_norm": 2.2109968619213833, + "learning_rate": 6.925046478788142e-06, + "loss": 0.6611, + "step": 12822 + }, + { + "epoch": 0.3930060071104573, + "grad_norm": 0.8744746741890788, + "learning_rate": 6.92458841127119e-06, + "loss": 0.4748, + "step": 12823 + }, + { + "epoch": 0.3930366556331985, + "grad_norm": 1.6885910501225752, + "learning_rate": 6.924130324790776e-06, + "loss": 0.6397, + "step": 12824 + }, + { + "epoch": 0.39306730415593966, + "grad_norm": 1.7675412106673032, + "learning_rate": 6.923672219351414e-06, + "loss": 0.7229, + "step": 12825 + }, + { + "epoch": 0.39309795267868086, + "grad_norm": 1.6381360654202766, + "learning_rate": 6.923214094957618e-06, + "loss": 0.64, + "step": 12826 + }, + { + "epoch": 0.39312860120142207, + "grad_norm": 1.5276315609202775, + "learning_rate": 6.922755951613901e-06, + "loss": 0.6471, + "step": 12827 + }, + { + "epoch": 0.3931592497241633, + "grad_norm": 1.7389559045197212, + "learning_rate": 6.922297789324777e-06, + "loss": 0.5621, + "step": 12828 + }, + { + "epoch": 0.3931898982469045, + "grad_norm": 1.8017707019161715, + "learning_rate": 6.921839608094761e-06, + "loss": 0.6837, + "step": 12829 + }, + { + "epoch": 0.3932205467696457, + "grad_norm": 1.604889268678593, + "learning_rate": 6.92138140792837e-06, + "loss": 0.7017, + "step": 12830 + }, + { + "epoch": 0.3932511952923869, + "grad_norm": 1.755390270194418, + "learning_rate": 6.920923188830113e-06, + "loss": 0.6892, + "step": 12831 + }, + { + "epoch": 0.3932818438151281, + "grad_norm": 1.6750370040788523, + "learning_rate": 6.9204649508045095e-06, + "loss": 0.7041, + "step": 12832 + }, + { + "epoch": 0.3933124923378693, + "grad_norm": 1.6407824556769819, + "learning_rate": 6.920006693856074e-06, + "loss": 0.6216, + "step": 12833 + }, + { + "epoch": 0.3933431408606105, + "grad_norm": 1.7481222160731953, + "learning_rate": 6.919548417989321e-06, + "loss": 0.7578, + "step": 12834 + }, + { + "epoch": 0.3933737893833517, + "grad_norm": 0.8498210067746387, + "learning_rate": 6.919090123208767e-06, + "loss": 0.4731, + "step": 12835 + }, + { + "epoch": 0.3934044379060929, + "grad_norm": 1.5208837032172386, + "learning_rate": 6.918631809518926e-06, + "loss": 0.5833, + "step": 12836 + }, + { + "epoch": 0.3934350864288341, + "grad_norm": 1.7995836035036437, + "learning_rate": 6.918173476924316e-06, + "loss": 0.7343, + "step": 12837 + }, + { + "epoch": 0.39346573495157533, + "grad_norm": 1.8029815237023334, + "learning_rate": 6.917715125429452e-06, + "loss": 0.7122, + "step": 12838 + }, + { + "epoch": 0.39349638347431654, + "grad_norm": 1.555929613905045, + "learning_rate": 6.917256755038848e-06, + "loss": 0.6443, + "step": 12839 + }, + { + "epoch": 0.39352703199705774, + "grad_norm": 1.6855063348680142, + "learning_rate": 6.916798365757025e-06, + "loss": 0.6976, + "step": 12840 + }, + { + "epoch": 0.39355768051979895, + "grad_norm": 1.877829324453549, + "learning_rate": 6.916339957588496e-06, + "loss": 0.703, + "step": 12841 + }, + { + "epoch": 0.39358832904254015, + "grad_norm": 1.8127994046274514, + "learning_rate": 6.91588153053778e-06, + "loss": 0.6791, + "step": 12842 + }, + { + "epoch": 0.39361897756528136, + "grad_norm": 1.727699929204338, + "learning_rate": 6.915423084609392e-06, + "loss": 0.6515, + "step": 12843 + }, + { + "epoch": 0.39364962608802256, + "grad_norm": 1.7096445625388936, + "learning_rate": 6.914964619807851e-06, + "loss": 0.7228, + "step": 12844 + }, + { + "epoch": 0.39368027461076377, + "grad_norm": 1.9199666392800885, + "learning_rate": 6.914506136137674e-06, + "loss": 0.7644, + "step": 12845 + }, + { + "epoch": 0.393710923133505, + "grad_norm": 1.9348219618895068, + "learning_rate": 6.914047633603378e-06, + "loss": 0.7129, + "step": 12846 + }, + { + "epoch": 0.3937415716562462, + "grad_norm": 0.9106743425410112, + "learning_rate": 6.91358911220948e-06, + "loss": 0.4753, + "step": 12847 + }, + { + "epoch": 0.3937722201789874, + "grad_norm": 1.7152729618215143, + "learning_rate": 6.913130571960499e-06, + "loss": 0.721, + "step": 12848 + }, + { + "epoch": 0.3938028687017286, + "grad_norm": 1.8330881128916736, + "learning_rate": 6.912672012860954e-06, + "loss": 0.769, + "step": 12849 + }, + { + "epoch": 0.3938335172244698, + "grad_norm": 0.7795835031878718, + "learning_rate": 6.912213434915362e-06, + "loss": 0.4722, + "step": 12850 + }, + { + "epoch": 0.393864165747211, + "grad_norm": 1.6237081313292945, + "learning_rate": 6.91175483812824e-06, + "loss": 0.5959, + "step": 12851 + }, + { + "epoch": 0.3938948142699522, + "grad_norm": 0.783607038016501, + "learning_rate": 6.911296222504111e-06, + "loss": 0.4341, + "step": 12852 + }, + { + "epoch": 0.3939254627926934, + "grad_norm": 1.7436865335420524, + "learning_rate": 6.910837588047491e-06, + "loss": 0.7171, + "step": 12853 + }, + { + "epoch": 0.3939561113154346, + "grad_norm": 1.6384395700305767, + "learning_rate": 6.910378934762898e-06, + "loss": 0.6949, + "step": 12854 + }, + { + "epoch": 0.3939867598381758, + "grad_norm": 1.9521798638149523, + "learning_rate": 6.909920262654852e-06, + "loss": 0.6925, + "step": 12855 + }, + { + "epoch": 0.394017408360917, + "grad_norm": 1.605679663451887, + "learning_rate": 6.909461571727874e-06, + "loss": 0.5139, + "step": 12856 + }, + { + "epoch": 0.3940480568836582, + "grad_norm": 1.6286419976001338, + "learning_rate": 6.909002861986485e-06, + "loss": 0.6907, + "step": 12857 + }, + { + "epoch": 0.3940787054063994, + "grad_norm": 1.6854299863175992, + "learning_rate": 6.908544133435199e-06, + "loss": 0.6986, + "step": 12858 + }, + { + "epoch": 0.3941093539291406, + "grad_norm": 1.6820807733073577, + "learning_rate": 6.90808538607854e-06, + "loss": 0.7011, + "step": 12859 + }, + { + "epoch": 0.3941400024518818, + "grad_norm": 1.333275035428145, + "learning_rate": 6.907626619921027e-06, + "loss": 0.5877, + "step": 12860 + }, + { + "epoch": 0.394170650974623, + "grad_norm": 1.7456204632439525, + "learning_rate": 6.907167834967183e-06, + "loss": 0.6229, + "step": 12861 + }, + { + "epoch": 0.3942012994973642, + "grad_norm": 1.7679834684235984, + "learning_rate": 6.906709031221524e-06, + "loss": 0.7108, + "step": 12862 + }, + { + "epoch": 0.3942319480201054, + "grad_norm": 1.8230435416470023, + "learning_rate": 6.906250208688575e-06, + "loss": 0.7062, + "step": 12863 + }, + { + "epoch": 0.3942625965428466, + "grad_norm": 1.7560050673429715, + "learning_rate": 6.9057913673728535e-06, + "loss": 0.6814, + "step": 12864 + }, + { + "epoch": 0.3942932450655878, + "grad_norm": 0.9188743807309934, + "learning_rate": 6.905332507278882e-06, + "loss": 0.453, + "step": 12865 + }, + { + "epoch": 0.39432389358832903, + "grad_norm": 2.2175165527211527, + "learning_rate": 6.904873628411184e-06, + "loss": 0.6916, + "step": 12866 + }, + { + "epoch": 0.39435454211107024, + "grad_norm": 1.6741034374258674, + "learning_rate": 6.904414730774277e-06, + "loss": 0.7004, + "step": 12867 + }, + { + "epoch": 0.39438519063381144, + "grad_norm": 2.065245077004732, + "learning_rate": 6.903955814372684e-06, + "loss": 0.7526, + "step": 12868 + }, + { + "epoch": 0.39441583915655265, + "grad_norm": 1.666574323240617, + "learning_rate": 6.903496879210927e-06, + "loss": 0.7019, + "step": 12869 + }, + { + "epoch": 0.39444648767929386, + "grad_norm": 1.4989705015924042, + "learning_rate": 6.90303792529353e-06, + "loss": 0.588, + "step": 12870 + }, + { + "epoch": 0.39447713620203506, + "grad_norm": 1.7419143662363756, + "learning_rate": 6.902578952625012e-06, + "loss": 0.6865, + "step": 12871 + }, + { + "epoch": 0.39450778472477627, + "grad_norm": 1.7854108155240758, + "learning_rate": 6.9021199612098976e-06, + "loss": 0.5822, + "step": 12872 + }, + { + "epoch": 0.3945384332475175, + "grad_norm": 1.9826734464155653, + "learning_rate": 6.901660951052707e-06, + "loss": 0.6644, + "step": 12873 + }, + { + "epoch": 0.3945690817702587, + "grad_norm": 1.9137795528426877, + "learning_rate": 6.901201922157967e-06, + "loss": 0.6543, + "step": 12874 + }, + { + "epoch": 0.3945997302929999, + "grad_norm": 1.685837959486255, + "learning_rate": 6.900742874530195e-06, + "loss": 0.68, + "step": 12875 + }, + { + "epoch": 0.3946303788157411, + "grad_norm": 1.5984631237108222, + "learning_rate": 6.90028380817392e-06, + "loss": 0.649, + "step": 12876 + }, + { + "epoch": 0.3946610273384823, + "grad_norm": 1.7245710826626266, + "learning_rate": 6.899824723093661e-06, + "loss": 0.6967, + "step": 12877 + }, + { + "epoch": 0.3946916758612235, + "grad_norm": 1.5758136480672293, + "learning_rate": 6.899365619293943e-06, + "loss": 0.6375, + "step": 12878 + }, + { + "epoch": 0.3947223243839647, + "grad_norm": 1.9027676977841994, + "learning_rate": 6.89890649677929e-06, + "loss": 0.7094, + "step": 12879 + }, + { + "epoch": 0.3947529729067059, + "grad_norm": 0.9410153286112082, + "learning_rate": 6.898447355554225e-06, + "loss": 0.4664, + "step": 12880 + }, + { + "epoch": 0.3947836214294471, + "grad_norm": 0.8955716710066053, + "learning_rate": 6.8979881956232734e-06, + "loss": 0.4595, + "step": 12881 + }, + { + "epoch": 0.3948142699521883, + "grad_norm": 1.5914866236594785, + "learning_rate": 6.8975290169909555e-06, + "loss": 0.6625, + "step": 12882 + }, + { + "epoch": 0.39484491847492953, + "grad_norm": 1.5122520827998696, + "learning_rate": 6.8970698196618016e-06, + "loss": 0.6532, + "step": 12883 + }, + { + "epoch": 0.39487556699767073, + "grad_norm": 1.9036342863965625, + "learning_rate": 6.896610603640332e-06, + "loss": 0.7152, + "step": 12884 + }, + { + "epoch": 0.39490621552041194, + "grad_norm": 1.669192250543987, + "learning_rate": 6.896151368931075e-06, + "loss": 0.7197, + "step": 12885 + }, + { + "epoch": 0.39493686404315315, + "grad_norm": 1.7488590635793642, + "learning_rate": 6.8956921155385505e-06, + "loss": 0.6758, + "step": 12886 + }, + { + "epoch": 0.3949675125658943, + "grad_norm": 1.5236374471987029, + "learning_rate": 6.895232843467289e-06, + "loss": 0.682, + "step": 12887 + }, + { + "epoch": 0.3949981610886355, + "grad_norm": 1.6384080814252058, + "learning_rate": 6.894773552721812e-06, + "loss": 0.5999, + "step": 12888 + }, + { + "epoch": 0.3950288096113767, + "grad_norm": 1.820885890265432, + "learning_rate": 6.8943142433066466e-06, + "loss": 0.7426, + "step": 12889 + }, + { + "epoch": 0.3950594581341179, + "grad_norm": 1.6629243196485897, + "learning_rate": 6.893854915226318e-06, + "loss": 0.6959, + "step": 12890 + }, + { + "epoch": 0.3950901066568591, + "grad_norm": 1.2785081352060492, + "learning_rate": 6.893395568485352e-06, + "loss": 0.464, + "step": 12891 + }, + { + "epoch": 0.3951207551796003, + "grad_norm": 1.4601229272345777, + "learning_rate": 6.892936203088278e-06, + "loss": 0.6401, + "step": 12892 + }, + { + "epoch": 0.39515140370234153, + "grad_norm": 1.5885845154015903, + "learning_rate": 6.892476819039616e-06, + "loss": 0.6696, + "step": 12893 + }, + { + "epoch": 0.39518205222508274, + "grad_norm": 0.8437906612355025, + "learning_rate": 6.892017416343897e-06, + "loss": 0.4472, + "step": 12894 + }, + { + "epoch": 0.39521270074782394, + "grad_norm": 1.7086300108218286, + "learning_rate": 6.891557995005646e-06, + "loss": 0.7603, + "step": 12895 + }, + { + "epoch": 0.39524334927056515, + "grad_norm": 0.7751484502057416, + "learning_rate": 6.891098555029389e-06, + "loss": 0.4513, + "step": 12896 + }, + { + "epoch": 0.39527399779330635, + "grad_norm": 1.523388820454628, + "learning_rate": 6.890639096419656e-06, + "loss": 0.6067, + "step": 12897 + }, + { + "epoch": 0.39530464631604756, + "grad_norm": 1.5941095002511114, + "learning_rate": 6.8901796191809715e-06, + "loss": 0.6911, + "step": 12898 + }, + { + "epoch": 0.39533529483878876, + "grad_norm": 1.5904466678801266, + "learning_rate": 6.889720123317863e-06, + "loss": 0.7125, + "step": 12899 + }, + { + "epoch": 0.39536594336152997, + "grad_norm": 1.770311751809896, + "learning_rate": 6.88926060883486e-06, + "loss": 0.589, + "step": 12900 + }, + { + "epoch": 0.3953965918842712, + "grad_norm": 1.0050177238140263, + "learning_rate": 6.888801075736487e-06, + "loss": 0.4337, + "step": 12901 + }, + { + "epoch": 0.3954272404070124, + "grad_norm": 1.8137234270619016, + "learning_rate": 6.888341524027275e-06, + "loss": 0.739, + "step": 12902 + }, + { + "epoch": 0.3954578889297536, + "grad_norm": 1.7377569011151421, + "learning_rate": 6.8878819537117514e-06, + "loss": 0.7483, + "step": 12903 + }, + { + "epoch": 0.3954885374524948, + "grad_norm": 1.5008542961774802, + "learning_rate": 6.887422364794443e-06, + "loss": 0.6285, + "step": 12904 + }, + { + "epoch": 0.395519185975236, + "grad_norm": 1.586429624942359, + "learning_rate": 6.886962757279878e-06, + "loss": 0.5438, + "step": 12905 + }, + { + "epoch": 0.3955498344979772, + "grad_norm": 1.7057851594239668, + "learning_rate": 6.8865031311725885e-06, + "loss": 0.7624, + "step": 12906 + }, + { + "epoch": 0.3955804830207184, + "grad_norm": 1.8690651124876176, + "learning_rate": 6.8860434864771e-06, + "loss": 0.5968, + "step": 12907 + }, + { + "epoch": 0.3956111315434596, + "grad_norm": 1.662370593737395, + "learning_rate": 6.885583823197941e-06, + "loss": 0.7683, + "step": 12908 + }, + { + "epoch": 0.3956417800662008, + "grad_norm": 2.0056408703778543, + "learning_rate": 6.885124141339643e-06, + "loss": 0.6996, + "step": 12909 + }, + { + "epoch": 0.395672428588942, + "grad_norm": 1.8410299999697135, + "learning_rate": 6.8846644409067345e-06, + "loss": 0.7274, + "step": 12910 + }, + { + "epoch": 0.39570307711168323, + "grad_norm": 1.8025637756012949, + "learning_rate": 6.884204721903747e-06, + "loss": 0.7223, + "step": 12911 + }, + { + "epoch": 0.39573372563442444, + "grad_norm": 1.6720559982091334, + "learning_rate": 6.883744984335205e-06, + "loss": 0.7133, + "step": 12912 + }, + { + "epoch": 0.39576437415716564, + "grad_norm": 1.749670749127907, + "learning_rate": 6.883285228205645e-06, + "loss": 0.668, + "step": 12913 + }, + { + "epoch": 0.39579502267990685, + "grad_norm": 1.7739017443733622, + "learning_rate": 6.882825453519591e-06, + "loss": 0.6851, + "step": 12914 + }, + { + "epoch": 0.39582567120264806, + "grad_norm": 1.8420818013064661, + "learning_rate": 6.882365660281578e-06, + "loss": 0.6417, + "step": 12915 + }, + { + "epoch": 0.39585631972538926, + "grad_norm": 1.7782661058452631, + "learning_rate": 6.881905848496133e-06, + "loss": 0.6896, + "step": 12916 + }, + { + "epoch": 0.39588696824813047, + "grad_norm": 1.7329839025713918, + "learning_rate": 6.88144601816779e-06, + "loss": 0.6434, + "step": 12917 + }, + { + "epoch": 0.3959176167708716, + "grad_norm": 2.088411208348128, + "learning_rate": 6.880986169301076e-06, + "loss": 0.7372, + "step": 12918 + }, + { + "epoch": 0.3959482652936128, + "grad_norm": 1.545753598653505, + "learning_rate": 6.8805263019005265e-06, + "loss": 0.6757, + "step": 12919 + }, + { + "epoch": 0.39597891381635403, + "grad_norm": 0.9462790887586137, + "learning_rate": 6.880066415970668e-06, + "loss": 0.476, + "step": 12920 + }, + { + "epoch": 0.39600956233909523, + "grad_norm": 1.554014791487098, + "learning_rate": 6.879606511516035e-06, + "loss": 0.6226, + "step": 12921 + }, + { + "epoch": 0.39604021086183644, + "grad_norm": 1.7094581900398491, + "learning_rate": 6.879146588541158e-06, + "loss": 0.5895, + "step": 12922 + }, + { + "epoch": 0.39607085938457764, + "grad_norm": 1.6388170995963394, + "learning_rate": 6.878686647050567e-06, + "loss": 0.6446, + "step": 12923 + }, + { + "epoch": 0.39610150790731885, + "grad_norm": 0.781746559064236, + "learning_rate": 6.878226687048798e-06, + "loss": 0.4757, + "step": 12924 + }, + { + "epoch": 0.39613215643006006, + "grad_norm": 1.9197641414015658, + "learning_rate": 6.8777667085403795e-06, + "loss": 0.701, + "step": 12925 + }, + { + "epoch": 0.39616280495280126, + "grad_norm": 1.5428043218400278, + "learning_rate": 6.8773067115298476e-06, + "loss": 0.6352, + "step": 12926 + }, + { + "epoch": 0.39619345347554247, + "grad_norm": 1.6242689546660536, + "learning_rate": 6.8768466960217306e-06, + "loss": 0.7727, + "step": 12927 + }, + { + "epoch": 0.3962241019982837, + "grad_norm": 1.7839646962956388, + "learning_rate": 6.876386662020562e-06, + "loss": 0.5536, + "step": 12928 + }, + { + "epoch": 0.3962547505210249, + "grad_norm": 1.5952992077472954, + "learning_rate": 6.875926609530876e-06, + "loss": 0.5583, + "step": 12929 + }, + { + "epoch": 0.3962853990437661, + "grad_norm": 1.531461145811626, + "learning_rate": 6.875466538557207e-06, + "loss": 0.686, + "step": 12930 + }, + { + "epoch": 0.3963160475665073, + "grad_norm": 1.8111518715604313, + "learning_rate": 6.8750064491040845e-06, + "loss": 0.6882, + "step": 12931 + }, + { + "epoch": 0.3963466960892485, + "grad_norm": 1.7138153425755234, + "learning_rate": 6.874546341176045e-06, + "loss": 0.699, + "step": 12932 + }, + { + "epoch": 0.3963773446119897, + "grad_norm": 0.8018554139655781, + "learning_rate": 6.874086214777619e-06, + "loss": 0.4655, + "step": 12933 + }, + { + "epoch": 0.3964079931347309, + "grad_norm": 1.59872251534501, + "learning_rate": 6.873626069913344e-06, + "loss": 0.597, + "step": 12934 + }, + { + "epoch": 0.3964386416574721, + "grad_norm": 1.6967778625464622, + "learning_rate": 6.8731659065877505e-06, + "loss": 0.7017, + "step": 12935 + }, + { + "epoch": 0.3964692901802133, + "grad_norm": 1.8271645526437423, + "learning_rate": 6.8727057248053745e-06, + "loss": 0.8143, + "step": 12936 + }, + { + "epoch": 0.3964999387029545, + "grad_norm": 0.7730998746502482, + "learning_rate": 6.87224552457075e-06, + "loss": 0.463, + "step": 12937 + }, + { + "epoch": 0.39653058722569573, + "grad_norm": 0.775033336494515, + "learning_rate": 6.871785305888411e-06, + "loss": 0.4357, + "step": 12938 + }, + { + "epoch": 0.39656123574843694, + "grad_norm": 1.6606900680234702, + "learning_rate": 6.8713250687628926e-06, + "loss": 0.6074, + "step": 12939 + }, + { + "epoch": 0.39659188427117814, + "grad_norm": 1.7218883020368951, + "learning_rate": 6.87086481319873e-06, + "loss": 0.6362, + "step": 12940 + }, + { + "epoch": 0.39662253279391935, + "grad_norm": 1.6543587140423237, + "learning_rate": 6.870404539200457e-06, + "loss": 0.6541, + "step": 12941 + }, + { + "epoch": 0.39665318131666055, + "grad_norm": 0.8084369434832033, + "learning_rate": 6.869944246772611e-06, + "loss": 0.4603, + "step": 12942 + }, + { + "epoch": 0.39668382983940176, + "grad_norm": 1.608876052873772, + "learning_rate": 6.869483935919724e-06, + "loss": 0.6184, + "step": 12943 + }, + { + "epoch": 0.39671447836214296, + "grad_norm": 1.5987396487648584, + "learning_rate": 6.869023606646334e-06, + "loss": 0.7358, + "step": 12944 + }, + { + "epoch": 0.39674512688488417, + "grad_norm": 0.756207565796295, + "learning_rate": 6.868563258956976e-06, + "loss": 0.4543, + "step": 12945 + }, + { + "epoch": 0.3967757754076254, + "grad_norm": 1.7837120586751376, + "learning_rate": 6.868102892856186e-06, + "loss": 0.6524, + "step": 12946 + }, + { + "epoch": 0.3968064239303666, + "grad_norm": 1.6876004663558128, + "learning_rate": 6.867642508348502e-06, + "loss": 0.7189, + "step": 12947 + }, + { + "epoch": 0.3968370724531078, + "grad_norm": 1.8575847235608882, + "learning_rate": 6.867182105438457e-06, + "loss": 0.6629, + "step": 12948 + }, + { + "epoch": 0.39686772097584894, + "grad_norm": 1.706326267716556, + "learning_rate": 6.866721684130588e-06, + "loss": 0.7322, + "step": 12949 + }, + { + "epoch": 0.39689836949859014, + "grad_norm": 1.5173063521611692, + "learning_rate": 6.866261244429435e-06, + "loss": 0.6256, + "step": 12950 + }, + { + "epoch": 0.39692901802133135, + "grad_norm": 0.7921127382776443, + "learning_rate": 6.86580078633953e-06, + "loss": 0.4604, + "step": 12951 + }, + { + "epoch": 0.39695966654407255, + "grad_norm": 1.908977251018469, + "learning_rate": 6.865340309865413e-06, + "loss": 0.6964, + "step": 12952 + }, + { + "epoch": 0.39699031506681376, + "grad_norm": 1.589035696412955, + "learning_rate": 6.864879815011622e-06, + "loss": 0.674, + "step": 12953 + }, + { + "epoch": 0.39702096358955496, + "grad_norm": 0.7518638764251384, + "learning_rate": 6.8644193017826935e-06, + "loss": 0.4624, + "step": 12954 + }, + { + "epoch": 0.39705161211229617, + "grad_norm": 1.7132911208669286, + "learning_rate": 6.863958770183163e-06, + "loss": 0.6167, + "step": 12955 + }, + { + "epoch": 0.3970822606350374, + "grad_norm": 1.8006094712032847, + "learning_rate": 6.86349822021757e-06, + "loss": 0.6669, + "step": 12956 + }, + { + "epoch": 0.3971129091577786, + "grad_norm": 1.4942020232821083, + "learning_rate": 6.863037651890453e-06, + "loss": 0.5325, + "step": 12957 + }, + { + "epoch": 0.3971435576805198, + "grad_norm": 0.7743523897004215, + "learning_rate": 6.862577065206349e-06, + "loss": 0.4484, + "step": 12958 + }, + { + "epoch": 0.397174206203261, + "grad_norm": 1.4680055628914592, + "learning_rate": 6.862116460169796e-06, + "loss": 0.5805, + "step": 12959 + }, + { + "epoch": 0.3972048547260022, + "grad_norm": 1.5721388373589482, + "learning_rate": 6.8616558367853336e-06, + "loss": 0.6252, + "step": 12960 + }, + { + "epoch": 0.3972355032487434, + "grad_norm": 1.8026136334297993, + "learning_rate": 6.861195195057501e-06, + "loss": 0.7374, + "step": 12961 + }, + { + "epoch": 0.3972661517714846, + "grad_norm": 1.7425066422237543, + "learning_rate": 6.860734534990834e-06, + "loss": 0.6479, + "step": 12962 + }, + { + "epoch": 0.3972968002942258, + "grad_norm": 1.8484888196410263, + "learning_rate": 6.860273856589874e-06, + "loss": 0.7537, + "step": 12963 + }, + { + "epoch": 0.397327448816967, + "grad_norm": 1.981690174353681, + "learning_rate": 6.859813159859161e-06, + "loss": 0.6344, + "step": 12964 + }, + { + "epoch": 0.3973580973397082, + "grad_norm": 1.4852062600334204, + "learning_rate": 6.859352444803233e-06, + "loss": 0.6687, + "step": 12965 + }, + { + "epoch": 0.39738874586244943, + "grad_norm": 0.8288131306067621, + "learning_rate": 6.858891711426627e-06, + "loss": 0.4439, + "step": 12966 + }, + { + "epoch": 0.39741939438519064, + "grad_norm": 2.183390070150591, + "learning_rate": 6.858430959733888e-06, + "loss": 0.6246, + "step": 12967 + }, + { + "epoch": 0.39745004290793184, + "grad_norm": 1.7101276899866764, + "learning_rate": 6.857970189729552e-06, + "loss": 0.6732, + "step": 12968 + }, + { + "epoch": 0.39748069143067305, + "grad_norm": 1.8745831137416524, + "learning_rate": 6.857509401418161e-06, + "loss": 0.7734, + "step": 12969 + }, + { + "epoch": 0.39751133995341426, + "grad_norm": 1.8497503270011701, + "learning_rate": 6.857048594804254e-06, + "loss": 0.6946, + "step": 12970 + }, + { + "epoch": 0.39754198847615546, + "grad_norm": 1.650658178061215, + "learning_rate": 6.856587769892372e-06, + "loss": 0.7492, + "step": 12971 + }, + { + "epoch": 0.39757263699889667, + "grad_norm": 0.7900345305326703, + "learning_rate": 6.8561269266870555e-06, + "loss": 0.4436, + "step": 12972 + }, + { + "epoch": 0.3976032855216379, + "grad_norm": 2.0211115398435306, + "learning_rate": 6.855666065192848e-06, + "loss": 0.7162, + "step": 12973 + }, + { + "epoch": 0.3976339340443791, + "grad_norm": 1.8399005817430043, + "learning_rate": 6.855205185414284e-06, + "loss": 0.6507, + "step": 12974 + }, + { + "epoch": 0.3976645825671203, + "grad_norm": 1.7180561192506592, + "learning_rate": 6.854744287355912e-06, + "loss": 0.6749, + "step": 12975 + }, + { + "epoch": 0.3976952310898615, + "grad_norm": 1.9677204871443217, + "learning_rate": 6.854283371022269e-06, + "loss": 0.6268, + "step": 12976 + }, + { + "epoch": 0.3977258796126027, + "grad_norm": 1.553854855502085, + "learning_rate": 6.853822436417896e-06, + "loss": 0.6611, + "step": 12977 + }, + { + "epoch": 0.3977565281353439, + "grad_norm": 1.8565637632087322, + "learning_rate": 6.853361483547338e-06, + "loss": 0.6833, + "step": 12978 + }, + { + "epoch": 0.3977871766580851, + "grad_norm": 1.883474821725989, + "learning_rate": 6.852900512415134e-06, + "loss": 0.6916, + "step": 12979 + }, + { + "epoch": 0.39781782518082626, + "grad_norm": 1.688915933399336, + "learning_rate": 6.852439523025829e-06, + "loss": 0.7097, + "step": 12980 + }, + { + "epoch": 0.39784847370356746, + "grad_norm": 1.6932444089370782, + "learning_rate": 6.851978515383962e-06, + "loss": 0.6792, + "step": 12981 + }, + { + "epoch": 0.39787912222630867, + "grad_norm": 1.6402176642898385, + "learning_rate": 6.851517489494076e-06, + "loss": 0.5936, + "step": 12982 + }, + { + "epoch": 0.3979097707490499, + "grad_norm": 1.7404408703838945, + "learning_rate": 6.851056445360714e-06, + "loss": 0.6337, + "step": 12983 + }, + { + "epoch": 0.3979404192717911, + "grad_norm": 1.7703689343303428, + "learning_rate": 6.850595382988422e-06, + "loss": 0.7113, + "step": 12984 + }, + { + "epoch": 0.3979710677945323, + "grad_norm": 1.8065795909604536, + "learning_rate": 6.850134302381738e-06, + "loss": 0.7227, + "step": 12985 + }, + { + "epoch": 0.3980017163172735, + "grad_norm": 1.678078557746351, + "learning_rate": 6.849673203545208e-06, + "loss": 0.6599, + "step": 12986 + }, + { + "epoch": 0.3980323648400147, + "grad_norm": 1.6782954018232799, + "learning_rate": 6.849212086483374e-06, + "loss": 0.6977, + "step": 12987 + }, + { + "epoch": 0.3980630133627559, + "grad_norm": 1.9878754978299051, + "learning_rate": 6.848750951200782e-06, + "loss": 0.696, + "step": 12988 + }, + { + "epoch": 0.3980936618854971, + "grad_norm": 1.762454166312318, + "learning_rate": 6.848289797701972e-06, + "loss": 0.5908, + "step": 12989 + }, + { + "epoch": 0.3981243104082383, + "grad_norm": 1.7263492881943905, + "learning_rate": 6.847828625991492e-06, + "loss": 0.7575, + "step": 12990 + }, + { + "epoch": 0.3981549589309795, + "grad_norm": 1.6364275885613735, + "learning_rate": 6.847367436073881e-06, + "loss": 0.678, + "step": 12991 + }, + { + "epoch": 0.3981856074537207, + "grad_norm": 2.059943385640417, + "learning_rate": 6.8469062279536865e-06, + "loss": 0.7559, + "step": 12992 + }, + { + "epoch": 0.39821625597646193, + "grad_norm": 1.6450964324654618, + "learning_rate": 6.8464450016354546e-06, + "loss": 0.7743, + "step": 12993 + }, + { + "epoch": 0.39824690449920314, + "grad_norm": 1.8159516440915382, + "learning_rate": 6.845983757123726e-06, + "loss": 0.713, + "step": 12994 + }, + { + "epoch": 0.39827755302194434, + "grad_norm": 1.5273056183598364, + "learning_rate": 6.845522494423047e-06, + "loss": 0.6088, + "step": 12995 + }, + { + "epoch": 0.39830820154468555, + "grad_norm": 0.8598230916564741, + "learning_rate": 6.845061213537962e-06, + "loss": 0.4394, + "step": 12996 + }, + { + "epoch": 0.39833885006742675, + "grad_norm": 1.9291626814051133, + "learning_rate": 6.844599914473019e-06, + "loss": 0.6984, + "step": 12997 + }, + { + "epoch": 0.39836949859016796, + "grad_norm": 1.6362579028064301, + "learning_rate": 6.844138597232759e-06, + "loss": 0.6107, + "step": 12998 + }, + { + "epoch": 0.39840014711290916, + "grad_norm": 1.6654218183799068, + "learning_rate": 6.843677261821732e-06, + "loss": 0.7478, + "step": 12999 + }, + { + "epoch": 0.39843079563565037, + "grad_norm": 1.8537939287118292, + "learning_rate": 6.843215908244478e-06, + "loss": 0.7647, + "step": 13000 + }, + { + "epoch": 0.3984614441583916, + "grad_norm": 1.8626602728106398, + "learning_rate": 6.842754536505549e-06, + "loss": 0.7859, + "step": 13001 + }, + { + "epoch": 0.3984920926811328, + "grad_norm": 1.6632729562607353, + "learning_rate": 6.842293146609485e-06, + "loss": 0.6601, + "step": 13002 + }, + { + "epoch": 0.398522741203874, + "grad_norm": 1.8611420247551558, + "learning_rate": 6.841831738560838e-06, + "loss": 0.7491, + "step": 13003 + }, + { + "epoch": 0.3985533897266152, + "grad_norm": 1.661575283125316, + "learning_rate": 6.841370312364151e-06, + "loss": 0.6102, + "step": 13004 + }, + { + "epoch": 0.3985840382493564, + "grad_norm": 1.88652704803404, + "learning_rate": 6.84090886802397e-06, + "loss": 0.5895, + "step": 13005 + }, + { + "epoch": 0.3986146867720976, + "grad_norm": 1.7893980247076844, + "learning_rate": 6.8404474055448434e-06, + "loss": 0.6936, + "step": 13006 + }, + { + "epoch": 0.3986453352948388, + "grad_norm": 0.8574993398938129, + "learning_rate": 6.8399859249313186e-06, + "loss": 0.4403, + "step": 13007 + }, + { + "epoch": 0.39867598381758, + "grad_norm": 1.897484291644445, + "learning_rate": 6.839524426187941e-06, + "loss": 0.7647, + "step": 13008 + }, + { + "epoch": 0.3987066323403212, + "grad_norm": 0.8625286580783705, + "learning_rate": 6.839062909319258e-06, + "loss": 0.4722, + "step": 13009 + }, + { + "epoch": 0.3987372808630624, + "grad_norm": 1.6956135477838752, + "learning_rate": 6.838601374329819e-06, + "loss": 0.7519, + "step": 13010 + }, + { + "epoch": 0.3987679293858036, + "grad_norm": 1.7448842883292257, + "learning_rate": 6.838139821224169e-06, + "loss": 0.5911, + "step": 13011 + }, + { + "epoch": 0.3987985779085448, + "grad_norm": 1.8217058659477083, + "learning_rate": 6.837678250006859e-06, + "loss": 0.6664, + "step": 13012 + }, + { + "epoch": 0.398829226431286, + "grad_norm": 1.6939652553154845, + "learning_rate": 6.837216660682432e-06, + "loss": 0.7042, + "step": 13013 + }, + { + "epoch": 0.3988598749540272, + "grad_norm": 1.7386074208919993, + "learning_rate": 6.83675505325544e-06, + "loss": 0.6624, + "step": 13014 + }, + { + "epoch": 0.3988905234767684, + "grad_norm": 1.778435797999328, + "learning_rate": 6.836293427730431e-06, + "loss": 0.661, + "step": 13015 + }, + { + "epoch": 0.3989211719995096, + "grad_norm": 1.7502708116549321, + "learning_rate": 6.835831784111955e-06, + "loss": 0.7196, + "step": 13016 + }, + { + "epoch": 0.3989518205222508, + "grad_norm": 1.6998078400857266, + "learning_rate": 6.835370122404555e-06, + "loss": 0.6476, + "step": 13017 + }, + { + "epoch": 0.398982469044992, + "grad_norm": 0.9937784245882989, + "learning_rate": 6.834908442612786e-06, + "loss": 0.4612, + "step": 13018 + }, + { + "epoch": 0.3990131175677332, + "grad_norm": 1.6641774098811661, + "learning_rate": 6.834446744741195e-06, + "loss": 0.5851, + "step": 13019 + }, + { + "epoch": 0.3990437660904744, + "grad_norm": 1.4446723758082787, + "learning_rate": 6.8339850287943285e-06, + "loss": 0.5618, + "step": 13020 + }, + { + "epoch": 0.39907441461321563, + "grad_norm": 1.76113540375855, + "learning_rate": 6.83352329477674e-06, + "loss": 0.6845, + "step": 13021 + }, + { + "epoch": 0.39910506313595684, + "grad_norm": 1.7255454393971668, + "learning_rate": 6.833061542692976e-06, + "loss": 0.6441, + "step": 13022 + }, + { + "epoch": 0.39913571165869804, + "grad_norm": 1.7250313421398986, + "learning_rate": 6.83259977254759e-06, + "loss": 0.6134, + "step": 13023 + }, + { + "epoch": 0.39916636018143925, + "grad_norm": 1.5575736552163721, + "learning_rate": 6.8321379843451286e-06, + "loss": 0.6291, + "step": 13024 + }, + { + "epoch": 0.39919700870418046, + "grad_norm": 1.844784837086635, + "learning_rate": 6.831676178090142e-06, + "loss": 0.7399, + "step": 13025 + }, + { + "epoch": 0.39922765722692166, + "grad_norm": 1.6860327925221514, + "learning_rate": 6.831214353787182e-06, + "loss": 0.6129, + "step": 13026 + }, + { + "epoch": 0.39925830574966287, + "grad_norm": 1.638101427297558, + "learning_rate": 6.8307525114407994e-06, + "loss": 0.7425, + "step": 13027 + }, + { + "epoch": 0.3992889542724041, + "grad_norm": 1.7063342992369397, + "learning_rate": 6.830290651055541e-06, + "loss": 0.6067, + "step": 13028 + }, + { + "epoch": 0.3993196027951453, + "grad_norm": 0.8209143724709959, + "learning_rate": 6.8298287726359625e-06, + "loss": 0.4489, + "step": 13029 + }, + { + "epoch": 0.3993502513178865, + "grad_norm": 1.8404080589490102, + "learning_rate": 6.829366876186614e-06, + "loss": 0.6857, + "step": 13030 + }, + { + "epoch": 0.3993808998406277, + "grad_norm": 1.8194777927050798, + "learning_rate": 6.828904961712043e-06, + "loss": 0.6005, + "step": 13031 + }, + { + "epoch": 0.3994115483633689, + "grad_norm": 1.9474129474068074, + "learning_rate": 6.828443029216805e-06, + "loss": 0.7202, + "step": 13032 + }, + { + "epoch": 0.3994421968861101, + "grad_norm": 1.8061016185307353, + "learning_rate": 6.82798107870545e-06, + "loss": 0.6613, + "step": 13033 + }, + { + "epoch": 0.3994728454088513, + "grad_norm": 1.7344284919916453, + "learning_rate": 6.82751911018253e-06, + "loss": 0.7359, + "step": 13034 + }, + { + "epoch": 0.3995034939315925, + "grad_norm": 1.5502589220982734, + "learning_rate": 6.8270571236525955e-06, + "loss": 0.6905, + "step": 13035 + }, + { + "epoch": 0.3995341424543337, + "grad_norm": 1.713502602709454, + "learning_rate": 6.8265951191202005e-06, + "loss": 0.7964, + "step": 13036 + }, + { + "epoch": 0.3995647909770749, + "grad_norm": 1.6890644807933473, + "learning_rate": 6.826133096589895e-06, + "loss": 0.7575, + "step": 13037 + }, + { + "epoch": 0.39959543949981613, + "grad_norm": 1.759141681454577, + "learning_rate": 6.825671056066237e-06, + "loss": 0.6992, + "step": 13038 + }, + { + "epoch": 0.39962608802255734, + "grad_norm": 1.5371794394828213, + "learning_rate": 6.8252089975537705e-06, + "loss": 0.653, + "step": 13039 + }, + { + "epoch": 0.39965673654529854, + "grad_norm": 1.8371788377256324, + "learning_rate": 6.824746921057054e-06, + "loss": 0.6889, + "step": 13040 + }, + { + "epoch": 0.39968738506803975, + "grad_norm": 1.875803971447146, + "learning_rate": 6.824284826580639e-06, + "loss": 0.6855, + "step": 13041 + }, + { + "epoch": 0.3997180335907809, + "grad_norm": 1.7731766773963902, + "learning_rate": 6.82382271412908e-06, + "loss": 0.7116, + "step": 13042 + }, + { + "epoch": 0.3997486821135221, + "grad_norm": 1.6021878016450704, + "learning_rate": 6.823360583706928e-06, + "loss": 0.5207, + "step": 13043 + }, + { + "epoch": 0.3997793306362633, + "grad_norm": 0.8728681959479901, + "learning_rate": 6.822898435318739e-06, + "loss": 0.4454, + "step": 13044 + }, + { + "epoch": 0.3998099791590045, + "grad_norm": 1.5494970073404777, + "learning_rate": 6.822436268969064e-06, + "loss": 0.6635, + "step": 13045 + }, + { + "epoch": 0.3998406276817457, + "grad_norm": 1.702161862701147, + "learning_rate": 6.821974084662458e-06, + "loss": 0.5814, + "step": 13046 + }, + { + "epoch": 0.3998712762044869, + "grad_norm": 0.8043572499014161, + "learning_rate": 6.821511882403477e-06, + "loss": 0.4564, + "step": 13047 + }, + { + "epoch": 0.39990192472722813, + "grad_norm": 1.7467356312683537, + "learning_rate": 6.82104966219667e-06, + "loss": 0.6264, + "step": 13048 + }, + { + "epoch": 0.39993257324996934, + "grad_norm": 0.8003571633468967, + "learning_rate": 6.820587424046598e-06, + "loss": 0.4451, + "step": 13049 + }, + { + "epoch": 0.39996322177271054, + "grad_norm": 1.7983831029662187, + "learning_rate": 6.820125167957812e-06, + "loss": 0.5625, + "step": 13050 + }, + { + "epoch": 0.39999387029545175, + "grad_norm": 1.7311635313134968, + "learning_rate": 6.819662893934866e-06, + "loss": 0.6657, + "step": 13051 + }, + { + "epoch": 0.40002451881819295, + "grad_norm": 0.7875613709469863, + "learning_rate": 6.819200601982316e-06, + "loss": 0.4408, + "step": 13052 + }, + { + "epoch": 0.40005516734093416, + "grad_norm": 0.7490424911182856, + "learning_rate": 6.818738292104719e-06, + "loss": 0.4644, + "step": 13053 + }, + { + "epoch": 0.40008581586367536, + "grad_norm": 0.7729336070045049, + "learning_rate": 6.818275964306624e-06, + "loss": 0.4515, + "step": 13054 + }, + { + "epoch": 0.40011646438641657, + "grad_norm": 0.7564066821874743, + "learning_rate": 6.817813618592595e-06, + "loss": 0.4691, + "step": 13055 + }, + { + "epoch": 0.4001471129091578, + "grad_norm": 1.6214130646804639, + "learning_rate": 6.817351254967179e-06, + "loss": 0.7572, + "step": 13056 + }, + { + "epoch": 0.400177761431899, + "grad_norm": 1.9187831115199396, + "learning_rate": 6.816888873434939e-06, + "loss": 0.7226, + "step": 13057 + }, + { + "epoch": 0.4002084099546402, + "grad_norm": 1.5423602688125984, + "learning_rate": 6.816426474000428e-06, + "loss": 0.69, + "step": 13058 + }, + { + "epoch": 0.4002390584773814, + "grad_norm": 1.9055019950719079, + "learning_rate": 6.815964056668203e-06, + "loss": 0.6823, + "step": 13059 + }, + { + "epoch": 0.4002697070001226, + "grad_norm": 1.7359815244571408, + "learning_rate": 6.815501621442817e-06, + "loss": 0.7111, + "step": 13060 + }, + { + "epoch": 0.4003003555228638, + "grad_norm": 1.6035980892954151, + "learning_rate": 6.815039168328831e-06, + "loss": 0.6965, + "step": 13061 + }, + { + "epoch": 0.400331004045605, + "grad_norm": 1.813344558195773, + "learning_rate": 6.814576697330799e-06, + "loss": 0.7263, + "step": 13062 + }, + { + "epoch": 0.4003616525683462, + "grad_norm": 1.696934507117339, + "learning_rate": 6.814114208453277e-06, + "loss": 0.6449, + "step": 13063 + }, + { + "epoch": 0.4003923010910874, + "grad_norm": 1.5499616498535271, + "learning_rate": 6.813651701700826e-06, + "loss": 0.6241, + "step": 13064 + }, + { + "epoch": 0.4004229496138286, + "grad_norm": 0.8896268300167294, + "learning_rate": 6.813189177078e-06, + "loss": 0.4634, + "step": 13065 + }, + { + "epoch": 0.40045359813656983, + "grad_norm": 1.7839860559208738, + "learning_rate": 6.812726634589357e-06, + "loss": 0.6493, + "step": 13066 + }, + { + "epoch": 0.40048424665931104, + "grad_norm": 1.7333850986098167, + "learning_rate": 6.812264074239454e-06, + "loss": 0.6278, + "step": 13067 + }, + { + "epoch": 0.40051489518205224, + "grad_norm": 2.0444617155470524, + "learning_rate": 6.8118014960328506e-06, + "loss": 0.705, + "step": 13068 + }, + { + "epoch": 0.40054554370479345, + "grad_norm": 2.031329340971062, + "learning_rate": 6.811338899974102e-06, + "loss": 0.7383, + "step": 13069 + }, + { + "epoch": 0.40057619222753466, + "grad_norm": 1.70493521572197, + "learning_rate": 6.8108762860677695e-06, + "loss": 0.6719, + "step": 13070 + }, + { + "epoch": 0.40060684075027586, + "grad_norm": 1.6619916193873505, + "learning_rate": 6.810413654318409e-06, + "loss": 0.6718, + "step": 13071 + }, + { + "epoch": 0.40063748927301707, + "grad_norm": 1.780990668701932, + "learning_rate": 6.809951004730578e-06, + "loss": 0.6308, + "step": 13072 + }, + { + "epoch": 0.4006681377957582, + "grad_norm": 1.51013577206518, + "learning_rate": 6.8094883373088385e-06, + "loss": 0.578, + "step": 13073 + }, + { + "epoch": 0.4006987863184994, + "grad_norm": 1.8588473097540712, + "learning_rate": 6.809025652057747e-06, + "loss": 0.6817, + "step": 13074 + }, + { + "epoch": 0.40072943484124063, + "grad_norm": 1.6806993194025774, + "learning_rate": 6.808562948981863e-06, + "loss": 0.6276, + "step": 13075 + }, + { + "epoch": 0.40076008336398183, + "grad_norm": 1.7041692080917294, + "learning_rate": 6.808100228085745e-06, + "loss": 0.731, + "step": 13076 + }, + { + "epoch": 0.40079073188672304, + "grad_norm": 1.4106432638266044, + "learning_rate": 6.807637489373954e-06, + "loss": 0.6215, + "step": 13077 + }, + { + "epoch": 0.40082138040946425, + "grad_norm": 1.8111906175342496, + "learning_rate": 6.807174732851046e-06, + "loss": 0.6583, + "step": 13078 + }, + { + "epoch": 0.40085202893220545, + "grad_norm": 1.4369668958900979, + "learning_rate": 6.806711958521584e-06, + "loss": 0.5576, + "step": 13079 + }, + { + "epoch": 0.40088267745494666, + "grad_norm": 1.5574388790799685, + "learning_rate": 6.806249166390129e-06, + "loss": 0.5684, + "step": 13080 + }, + { + "epoch": 0.40091332597768786, + "grad_norm": 1.8104553794928946, + "learning_rate": 6.805786356461237e-06, + "loss": 0.6858, + "step": 13081 + }, + { + "epoch": 0.40094397450042907, + "grad_norm": 1.5028954993452752, + "learning_rate": 6.80532352873947e-06, + "loss": 0.6268, + "step": 13082 + }, + { + "epoch": 0.4009746230231703, + "grad_norm": 1.5636233990816801, + "learning_rate": 6.804860683229387e-06, + "loss": 0.5345, + "step": 13083 + }, + { + "epoch": 0.4010052715459115, + "grad_norm": 0.9159089627719667, + "learning_rate": 6.804397819935552e-06, + "loss": 0.4433, + "step": 13084 + }, + { + "epoch": 0.4010359200686527, + "grad_norm": 1.6315295768980722, + "learning_rate": 6.803934938862523e-06, + "loss": 0.5718, + "step": 13085 + }, + { + "epoch": 0.4010665685913939, + "grad_norm": 1.6511192772551482, + "learning_rate": 6.803472040014862e-06, + "loss": 0.6734, + "step": 13086 + }, + { + "epoch": 0.4010972171141351, + "grad_norm": 1.8476404149690153, + "learning_rate": 6.803009123397128e-06, + "loss": 0.7064, + "step": 13087 + }, + { + "epoch": 0.4011278656368763, + "grad_norm": 1.705924952788557, + "learning_rate": 6.802546189013886e-06, + "loss": 0.69, + "step": 13088 + }, + { + "epoch": 0.4011585141596175, + "grad_norm": 2.0739031807771573, + "learning_rate": 6.802083236869692e-06, + "loss": 0.6466, + "step": 13089 + }, + { + "epoch": 0.4011891626823587, + "grad_norm": 1.7622407756291683, + "learning_rate": 6.801620266969113e-06, + "loss": 0.6361, + "step": 13090 + }, + { + "epoch": 0.4012198112050999, + "grad_norm": 1.93865344599298, + "learning_rate": 6.801157279316708e-06, + "loss": 0.6636, + "step": 13091 + }, + { + "epoch": 0.4012504597278411, + "grad_norm": 1.6307792140679065, + "learning_rate": 6.800694273917041e-06, + "loss": 0.6347, + "step": 13092 + }, + { + "epoch": 0.40128110825058233, + "grad_norm": 1.6534445050757947, + "learning_rate": 6.80023125077467e-06, + "loss": 0.6221, + "step": 13093 + }, + { + "epoch": 0.40131175677332354, + "grad_norm": 1.7444013634907214, + "learning_rate": 6.799768209894162e-06, + "loss": 0.733, + "step": 13094 + }, + { + "epoch": 0.40134240529606474, + "grad_norm": 1.7054061761188941, + "learning_rate": 6.799305151280076e-06, + "loss": 0.6809, + "step": 13095 + }, + { + "epoch": 0.40137305381880595, + "grad_norm": 1.644183950157889, + "learning_rate": 6.798842074936978e-06, + "loss": 0.6035, + "step": 13096 + }, + { + "epoch": 0.40140370234154715, + "grad_norm": 1.7643164503674456, + "learning_rate": 6.7983789808694255e-06, + "loss": 0.6237, + "step": 13097 + }, + { + "epoch": 0.40143435086428836, + "grad_norm": 1.625295511252115, + "learning_rate": 6.7979158690819865e-06, + "loss": 0.6582, + "step": 13098 + }, + { + "epoch": 0.40146499938702956, + "grad_norm": 1.8284504102528012, + "learning_rate": 6.797452739579223e-06, + "loss": 0.6778, + "step": 13099 + }, + { + "epoch": 0.40149564790977077, + "grad_norm": 0.829551060694743, + "learning_rate": 6.796989592365697e-06, + "loss": 0.435, + "step": 13100 + }, + { + "epoch": 0.401526296432512, + "grad_norm": 1.6147139135245956, + "learning_rate": 6.796526427445973e-06, + "loss": 0.663, + "step": 13101 + }, + { + "epoch": 0.4015569449552532, + "grad_norm": 1.8337047680716523, + "learning_rate": 6.796063244824613e-06, + "loss": 0.737, + "step": 13102 + }, + { + "epoch": 0.4015875934779944, + "grad_norm": 4.6833788445246585, + "learning_rate": 6.7956000445061856e-06, + "loss": 0.808, + "step": 13103 + }, + { + "epoch": 0.40161824200073554, + "grad_norm": 1.592626984558874, + "learning_rate": 6.795136826495249e-06, + "loss": 0.6031, + "step": 13104 + }, + { + "epoch": 0.40164889052347674, + "grad_norm": 2.045579328424955, + "learning_rate": 6.7946735907963715e-06, + "loss": 0.7007, + "step": 13105 + }, + { + "epoch": 0.40167953904621795, + "grad_norm": 1.6891985683920399, + "learning_rate": 6.794210337414113e-06, + "loss": 0.6705, + "step": 13106 + }, + { + "epoch": 0.40171018756895915, + "grad_norm": 1.689439293100246, + "learning_rate": 6.793747066353044e-06, + "loss": 0.6739, + "step": 13107 + }, + { + "epoch": 0.40174083609170036, + "grad_norm": 1.5683335479028904, + "learning_rate": 6.793283777617725e-06, + "loss": 0.6942, + "step": 13108 + }, + { + "epoch": 0.40177148461444157, + "grad_norm": 1.8412092691209057, + "learning_rate": 6.792820471212724e-06, + "loss": 0.6969, + "step": 13109 + }, + { + "epoch": 0.40180213313718277, + "grad_norm": 1.5530699679550541, + "learning_rate": 6.792357147142601e-06, + "loss": 0.6588, + "step": 13110 + }, + { + "epoch": 0.401832781659924, + "grad_norm": 1.8641676545719184, + "learning_rate": 6.791893805411928e-06, + "loss": 0.7133, + "step": 13111 + }, + { + "epoch": 0.4018634301826652, + "grad_norm": 1.5491521358872868, + "learning_rate": 6.791430446025263e-06, + "loss": 0.5829, + "step": 13112 + }, + { + "epoch": 0.4018940787054064, + "grad_norm": 1.83865038318167, + "learning_rate": 6.790967068987177e-06, + "loss": 0.7299, + "step": 13113 + }, + { + "epoch": 0.4019247272281476, + "grad_norm": 1.7913210087438602, + "learning_rate": 6.790503674302235e-06, + "loss": 0.6473, + "step": 13114 + }, + { + "epoch": 0.4019553757508888, + "grad_norm": 0.8190066207272246, + "learning_rate": 6.7900402619750015e-06, + "loss": 0.4599, + "step": 13115 + }, + { + "epoch": 0.40198602427363, + "grad_norm": 1.8293188822195419, + "learning_rate": 6.789576832010044e-06, + "loss": 0.6249, + "step": 13116 + }, + { + "epoch": 0.4020166727963712, + "grad_norm": 1.7285397382819787, + "learning_rate": 6.7891133844119276e-06, + "loss": 0.661, + "step": 13117 + }, + { + "epoch": 0.4020473213191124, + "grad_norm": 0.7798714413558965, + "learning_rate": 6.788649919185218e-06, + "loss": 0.46, + "step": 13118 + }, + { + "epoch": 0.4020779698418536, + "grad_norm": 0.8039827744433375, + "learning_rate": 6.788186436334485e-06, + "loss": 0.4637, + "step": 13119 + }, + { + "epoch": 0.4021086183645948, + "grad_norm": 1.6603393091452212, + "learning_rate": 6.787722935864294e-06, + "loss": 0.645, + "step": 13120 + }, + { + "epoch": 0.40213926688733603, + "grad_norm": 1.6365932180094496, + "learning_rate": 6.787259417779209e-06, + "loss": 0.691, + "step": 13121 + }, + { + "epoch": 0.40216991541007724, + "grad_norm": 1.5383970535984646, + "learning_rate": 6.786795882083801e-06, + "loss": 0.641, + "step": 13122 + }, + { + "epoch": 0.40220056393281844, + "grad_norm": 0.7593807570276212, + "learning_rate": 6.7863323287826365e-06, + "loss": 0.4508, + "step": 13123 + }, + { + "epoch": 0.40223121245555965, + "grad_norm": 0.735686814293279, + "learning_rate": 6.785868757880283e-06, + "loss": 0.4497, + "step": 13124 + }, + { + "epoch": 0.40226186097830086, + "grad_norm": 0.758904134447392, + "learning_rate": 6.785405169381305e-06, + "loss": 0.4342, + "step": 13125 + }, + { + "epoch": 0.40229250950104206, + "grad_norm": 1.64565177456025, + "learning_rate": 6.784941563290276e-06, + "loss": 0.6369, + "step": 13126 + }, + { + "epoch": 0.40232315802378327, + "grad_norm": 1.641681436536184, + "learning_rate": 6.78447793961176e-06, + "loss": 0.6975, + "step": 13127 + }, + { + "epoch": 0.4023538065465245, + "grad_norm": 1.5894080044280543, + "learning_rate": 6.784014298350326e-06, + "loss": 0.681, + "step": 13128 + }, + { + "epoch": 0.4023844550692657, + "grad_norm": 1.7157154714801512, + "learning_rate": 6.783550639510542e-06, + "loss": 0.7338, + "step": 13129 + }, + { + "epoch": 0.4024151035920069, + "grad_norm": 0.7708733596407032, + "learning_rate": 6.783086963096979e-06, + "loss": 0.444, + "step": 13130 + }, + { + "epoch": 0.4024457521147481, + "grad_norm": 1.7941201196028298, + "learning_rate": 6.782623269114203e-06, + "loss": 0.6424, + "step": 13131 + }, + { + "epoch": 0.4024764006374893, + "grad_norm": 1.572758104961907, + "learning_rate": 6.782159557566783e-06, + "loss": 0.6229, + "step": 13132 + }, + { + "epoch": 0.4025070491602305, + "grad_norm": 1.8099694650369627, + "learning_rate": 6.7816958284592896e-06, + "loss": 0.6889, + "step": 13133 + }, + { + "epoch": 0.4025376976829717, + "grad_norm": 1.4951712363101872, + "learning_rate": 6.781232081796292e-06, + "loss": 0.6716, + "step": 13134 + }, + { + "epoch": 0.40256834620571286, + "grad_norm": 1.8312235381473265, + "learning_rate": 6.780768317582358e-06, + "loss": 0.6916, + "step": 13135 + }, + { + "epoch": 0.40259899472845406, + "grad_norm": 1.5987162893760773, + "learning_rate": 6.7803045358220575e-06, + "loss": 0.6676, + "step": 13136 + }, + { + "epoch": 0.40262964325119527, + "grad_norm": 0.8560043220228325, + "learning_rate": 6.7798407365199624e-06, + "loss": 0.4459, + "step": 13137 + }, + { + "epoch": 0.4026602917739365, + "grad_norm": 1.7950719842206233, + "learning_rate": 6.7793769196806414e-06, + "loss": 0.6415, + "step": 13138 + }, + { + "epoch": 0.4026909402966777, + "grad_norm": 0.7849427939497611, + "learning_rate": 6.778913085308663e-06, + "loss": 0.4565, + "step": 13139 + }, + { + "epoch": 0.4027215888194189, + "grad_norm": 1.6296121327080832, + "learning_rate": 6.7784492334086e-06, + "loss": 0.5987, + "step": 13140 + }, + { + "epoch": 0.4027522373421601, + "grad_norm": 1.815884723375786, + "learning_rate": 6.77798536398502e-06, + "loss": 0.7236, + "step": 13141 + }, + { + "epoch": 0.4027828858649013, + "grad_norm": 0.8080296428760723, + "learning_rate": 6.777521477042497e-06, + "loss": 0.4738, + "step": 13142 + }, + { + "epoch": 0.4028135343876425, + "grad_norm": 1.5696520957239188, + "learning_rate": 6.777057572585599e-06, + "loss": 0.5579, + "step": 13143 + }, + { + "epoch": 0.4028441829103837, + "grad_norm": 1.749586819900728, + "learning_rate": 6.776593650618899e-06, + "loss": 0.755, + "step": 13144 + }, + { + "epoch": 0.4028748314331249, + "grad_norm": 1.5401494674590945, + "learning_rate": 6.776129711146966e-06, + "loss": 0.6903, + "step": 13145 + }, + { + "epoch": 0.4029054799558661, + "grad_norm": 1.8861435042468153, + "learning_rate": 6.775665754174374e-06, + "loss": 0.7099, + "step": 13146 + }, + { + "epoch": 0.4029361284786073, + "grad_norm": 1.8078953529255786, + "learning_rate": 6.775201779705692e-06, + "loss": 0.7252, + "step": 13147 + }, + { + "epoch": 0.40296677700134853, + "grad_norm": 1.79411448428115, + "learning_rate": 6.774737787745492e-06, + "loss": 0.6372, + "step": 13148 + }, + { + "epoch": 0.40299742552408974, + "grad_norm": 1.6989162063043743, + "learning_rate": 6.774273778298347e-06, + "loss": 0.7538, + "step": 13149 + }, + { + "epoch": 0.40302807404683094, + "grad_norm": 1.699374739139116, + "learning_rate": 6.773809751368831e-06, + "loss": 0.6303, + "step": 13150 + }, + { + "epoch": 0.40305872256957215, + "grad_norm": 0.8459699339891429, + "learning_rate": 6.773345706961509e-06, + "loss": 0.4462, + "step": 13151 + }, + { + "epoch": 0.40308937109231335, + "grad_norm": 1.5647768197370078, + "learning_rate": 6.772881645080962e-06, + "loss": 0.6008, + "step": 13152 + }, + { + "epoch": 0.40312001961505456, + "grad_norm": 1.6400122376607578, + "learning_rate": 6.772417565731756e-06, + "loss": 0.7365, + "step": 13153 + }, + { + "epoch": 0.40315066813779576, + "grad_norm": 1.58254744019157, + "learning_rate": 6.771953468918467e-06, + "loss": 0.586, + "step": 13154 + }, + { + "epoch": 0.40318131666053697, + "grad_norm": 1.6146232069751103, + "learning_rate": 6.771489354645668e-06, + "loss": 0.6608, + "step": 13155 + }, + { + "epoch": 0.4032119651832782, + "grad_norm": 1.4259223224525759, + "learning_rate": 6.771025222917931e-06, + "loss": 0.6365, + "step": 13156 + }, + { + "epoch": 0.4032426137060194, + "grad_norm": 1.613833307833611, + "learning_rate": 6.77056107373983e-06, + "loss": 0.6949, + "step": 13157 + }, + { + "epoch": 0.4032732622287606, + "grad_norm": 1.798732933766527, + "learning_rate": 6.770096907115935e-06, + "loss": 0.685, + "step": 13158 + }, + { + "epoch": 0.4033039107515018, + "grad_norm": 1.8086699520824714, + "learning_rate": 6.769632723050824e-06, + "loss": 0.6857, + "step": 13159 + }, + { + "epoch": 0.403334559274243, + "grad_norm": 1.8700447689767576, + "learning_rate": 6.769168521549069e-06, + "loss": 0.6899, + "step": 13160 + }, + { + "epoch": 0.4033652077969842, + "grad_norm": 1.678404272810012, + "learning_rate": 6.768704302615245e-06, + "loss": 0.6502, + "step": 13161 + }, + { + "epoch": 0.4033958563197254, + "grad_norm": 1.7549577277321664, + "learning_rate": 6.768240066253923e-06, + "loss": 0.6623, + "step": 13162 + }, + { + "epoch": 0.4034265048424666, + "grad_norm": 2.562266333354294, + "learning_rate": 6.767775812469679e-06, + "loss": 0.8049, + "step": 13163 + }, + { + "epoch": 0.4034571533652078, + "grad_norm": 1.6793618494775373, + "learning_rate": 6.767311541267089e-06, + "loss": 0.6975, + "step": 13164 + }, + { + "epoch": 0.403487801887949, + "grad_norm": 1.691178994326731, + "learning_rate": 6.766847252650726e-06, + "loss": 0.7346, + "step": 13165 + }, + { + "epoch": 0.4035184504106902, + "grad_norm": 0.7964964884897435, + "learning_rate": 6.766382946625164e-06, + "loss": 0.4487, + "step": 13166 + }, + { + "epoch": 0.4035490989334314, + "grad_norm": 1.7257499499499565, + "learning_rate": 6.76591862319498e-06, + "loss": 0.6475, + "step": 13167 + }, + { + "epoch": 0.4035797474561726, + "grad_norm": 0.7976638258888686, + "learning_rate": 6.7654542823647475e-06, + "loss": 0.4866, + "step": 13168 + }, + { + "epoch": 0.4036103959789138, + "grad_norm": 1.6137274371943824, + "learning_rate": 6.764989924139043e-06, + "loss": 0.5307, + "step": 13169 + }, + { + "epoch": 0.403641044501655, + "grad_norm": 1.6070678736961206, + "learning_rate": 6.764525548522441e-06, + "loss": 0.653, + "step": 13170 + }, + { + "epoch": 0.4036716930243962, + "grad_norm": 1.714281447573636, + "learning_rate": 6.764061155519515e-06, + "loss": 0.6073, + "step": 13171 + }, + { + "epoch": 0.4037023415471374, + "grad_norm": 1.8238785441235241, + "learning_rate": 6.763596745134845e-06, + "loss": 0.6712, + "step": 13172 + }, + { + "epoch": 0.4037329900698786, + "grad_norm": 1.781152143535151, + "learning_rate": 6.763132317373004e-06, + "loss": 0.6888, + "step": 13173 + }, + { + "epoch": 0.4037636385926198, + "grad_norm": 1.6446167397508558, + "learning_rate": 6.762667872238572e-06, + "loss": 0.5877, + "step": 13174 + }, + { + "epoch": 0.40379428711536103, + "grad_norm": 1.835773836509297, + "learning_rate": 6.762203409736119e-06, + "loss": 0.705, + "step": 13175 + }, + { + "epoch": 0.40382493563810223, + "grad_norm": 1.6349288265352901, + "learning_rate": 6.761738929870227e-06, + "loss": 0.5993, + "step": 13176 + }, + { + "epoch": 0.40385558416084344, + "grad_norm": 1.7650793542408012, + "learning_rate": 6.761274432645471e-06, + "loss": 0.6581, + "step": 13177 + }, + { + "epoch": 0.40388623268358465, + "grad_norm": 0.8651740882776284, + "learning_rate": 6.7608099180664255e-06, + "loss": 0.4644, + "step": 13178 + }, + { + "epoch": 0.40391688120632585, + "grad_norm": 1.875813814045223, + "learning_rate": 6.76034538613767e-06, + "loss": 0.6612, + "step": 13179 + }, + { + "epoch": 0.40394752972906706, + "grad_norm": 1.741546335721029, + "learning_rate": 6.759880836863781e-06, + "loss": 0.6972, + "step": 13180 + }, + { + "epoch": 0.40397817825180826, + "grad_norm": 1.8095974986419052, + "learning_rate": 6.759416270249337e-06, + "loss": 0.7, + "step": 13181 + }, + { + "epoch": 0.40400882677454947, + "grad_norm": 1.6264604561359646, + "learning_rate": 6.758951686298913e-06, + "loss": 0.6519, + "step": 13182 + }, + { + "epoch": 0.4040394752972907, + "grad_norm": 1.689305225460867, + "learning_rate": 6.758487085017088e-06, + "loss": 0.6279, + "step": 13183 + }, + { + "epoch": 0.4040701238200319, + "grad_norm": 1.76748003915936, + "learning_rate": 6.7580224664084405e-06, + "loss": 0.6663, + "step": 13184 + }, + { + "epoch": 0.4041007723427731, + "grad_norm": 1.6885582776202277, + "learning_rate": 6.757557830477548e-06, + "loss": 0.671, + "step": 13185 + }, + { + "epoch": 0.4041314208655143, + "grad_norm": 1.7762058028965713, + "learning_rate": 6.757093177228987e-06, + "loss": 0.711, + "step": 13186 + }, + { + "epoch": 0.4041620693882555, + "grad_norm": 1.896831888335628, + "learning_rate": 6.756628506667339e-06, + "loss": 0.7011, + "step": 13187 + }, + { + "epoch": 0.4041927179109967, + "grad_norm": 1.7738965638981554, + "learning_rate": 6.7561638187971804e-06, + "loss": 0.7024, + "step": 13188 + }, + { + "epoch": 0.4042233664337379, + "grad_norm": 1.6424380991651415, + "learning_rate": 6.755699113623091e-06, + "loss": 0.6301, + "step": 13189 + }, + { + "epoch": 0.4042540149564791, + "grad_norm": 1.771734625431502, + "learning_rate": 6.755234391149646e-06, + "loss": 0.6966, + "step": 13190 + }, + { + "epoch": 0.4042846634792203, + "grad_norm": 1.6937053844934418, + "learning_rate": 6.754769651381431e-06, + "loss": 0.6297, + "step": 13191 + }, + { + "epoch": 0.4043153120019615, + "grad_norm": 1.8480189774034506, + "learning_rate": 6.75430489432302e-06, + "loss": 0.6717, + "step": 13192 + }, + { + "epoch": 0.40434596052470273, + "grad_norm": 1.69339640517167, + "learning_rate": 6.753840119978995e-06, + "loss": 0.7144, + "step": 13193 + }, + { + "epoch": 0.40437660904744394, + "grad_norm": 1.673490915801232, + "learning_rate": 6.753375328353933e-06, + "loss": 0.622, + "step": 13194 + }, + { + "epoch": 0.40440725757018514, + "grad_norm": 2.9489384982161377, + "learning_rate": 6.752910519452417e-06, + "loss": 0.7755, + "step": 13195 + }, + { + "epoch": 0.40443790609292635, + "grad_norm": 1.7096252381833774, + "learning_rate": 6.752445693279024e-06, + "loss": 0.7465, + "step": 13196 + }, + { + "epoch": 0.4044685546156675, + "grad_norm": 1.7306979985950695, + "learning_rate": 6.751980849838336e-06, + "loss": 0.6955, + "step": 13197 + }, + { + "epoch": 0.4044992031384087, + "grad_norm": 1.6295103219005374, + "learning_rate": 6.7515159891349314e-06, + "loss": 0.6099, + "step": 13198 + }, + { + "epoch": 0.4045298516611499, + "grad_norm": 1.6475262035792766, + "learning_rate": 6.751051111173391e-06, + "loss": 0.6558, + "step": 13199 + }, + { + "epoch": 0.4045605001838911, + "grad_norm": 1.8604257555642811, + "learning_rate": 6.750586215958299e-06, + "loss": 0.6342, + "step": 13200 + }, + { + "epoch": 0.4045911487066323, + "grad_norm": 1.7803700258392503, + "learning_rate": 6.75012130349423e-06, + "loss": 0.6429, + "step": 13201 + }, + { + "epoch": 0.4046217972293735, + "grad_norm": 1.6624581099503128, + "learning_rate": 6.749656373785769e-06, + "loss": 0.727, + "step": 13202 + }, + { + "epoch": 0.40465244575211473, + "grad_norm": 1.74700233123295, + "learning_rate": 6.749191426837496e-06, + "loss": 0.5991, + "step": 13203 + }, + { + "epoch": 0.40468309427485594, + "grad_norm": 1.5371437827786494, + "learning_rate": 6.748726462653994e-06, + "loss": 0.5929, + "step": 13204 + }, + { + "epoch": 0.40471374279759714, + "grad_norm": 1.7080720253278485, + "learning_rate": 6.7482614812398405e-06, + "loss": 0.6575, + "step": 13205 + }, + { + "epoch": 0.40474439132033835, + "grad_norm": 1.6946267386091358, + "learning_rate": 6.747796482599621e-06, + "loss": 0.5942, + "step": 13206 + }, + { + "epoch": 0.40477503984307955, + "grad_norm": 1.6633224402478164, + "learning_rate": 6.747331466737914e-06, + "loss": 0.6238, + "step": 13207 + }, + { + "epoch": 0.40480568836582076, + "grad_norm": 1.9141086501776934, + "learning_rate": 6.7468664336593044e-06, + "loss": 0.5826, + "step": 13208 + }, + { + "epoch": 0.40483633688856197, + "grad_norm": 1.5322386515913176, + "learning_rate": 6.746401383368372e-06, + "loss": 0.6598, + "step": 13209 + }, + { + "epoch": 0.40486698541130317, + "grad_norm": 1.8555807665405595, + "learning_rate": 6.7459363158697e-06, + "loss": 0.6768, + "step": 13210 + }, + { + "epoch": 0.4048976339340444, + "grad_norm": 1.774881247581876, + "learning_rate": 6.745471231167871e-06, + "loss": 0.6185, + "step": 13211 + }, + { + "epoch": 0.4049282824567856, + "grad_norm": 1.736072411390974, + "learning_rate": 6.745006129267467e-06, + "loss": 0.6988, + "step": 13212 + }, + { + "epoch": 0.4049589309795268, + "grad_norm": 0.830401998513927, + "learning_rate": 6.7445410101730716e-06, + "loss": 0.4602, + "step": 13213 + }, + { + "epoch": 0.404989579502268, + "grad_norm": 1.7776702801393338, + "learning_rate": 6.744075873889266e-06, + "loss": 0.7298, + "step": 13214 + }, + { + "epoch": 0.4050202280250092, + "grad_norm": 1.8470321807894774, + "learning_rate": 6.743610720420637e-06, + "loss": 0.7337, + "step": 13215 + }, + { + "epoch": 0.4050508765477504, + "grad_norm": 0.8237139623372283, + "learning_rate": 6.743145549771764e-06, + "loss": 0.4525, + "step": 13216 + }, + { + "epoch": 0.4050815250704916, + "grad_norm": 1.716135344914798, + "learning_rate": 6.742680361947231e-06, + "loss": 0.5915, + "step": 13217 + }, + { + "epoch": 0.4051121735932328, + "grad_norm": 1.5927954734056855, + "learning_rate": 6.742215156951624e-06, + "loss": 0.5721, + "step": 13218 + }, + { + "epoch": 0.405142822115974, + "grad_norm": 1.636897527119504, + "learning_rate": 6.741749934789526e-06, + "loss": 0.6862, + "step": 13219 + }, + { + "epoch": 0.4051734706387152, + "grad_norm": 1.8074627555401357, + "learning_rate": 6.741284695465518e-06, + "loss": 0.6819, + "step": 13220 + }, + { + "epoch": 0.40520411916145643, + "grad_norm": 1.5494396101077286, + "learning_rate": 6.740819438984187e-06, + "loss": 0.6368, + "step": 13221 + }, + { + "epoch": 0.40523476768419764, + "grad_norm": 1.5472731766736207, + "learning_rate": 6.740354165350117e-06, + "loss": 0.696, + "step": 13222 + }, + { + "epoch": 0.40526541620693884, + "grad_norm": 1.8802356318542894, + "learning_rate": 6.739888874567893e-06, + "loss": 0.6271, + "step": 13223 + }, + { + "epoch": 0.40529606472968005, + "grad_norm": 1.7150214307183482, + "learning_rate": 6.739423566642098e-06, + "loss": 0.6865, + "step": 13224 + }, + { + "epoch": 0.40532671325242126, + "grad_norm": 1.6289742546283776, + "learning_rate": 6.738958241577317e-06, + "loss": 0.6508, + "step": 13225 + }, + { + "epoch": 0.40535736177516246, + "grad_norm": 1.7653505874219306, + "learning_rate": 6.738492899378136e-06, + "loss": 0.739, + "step": 13226 + }, + { + "epoch": 0.40538801029790367, + "grad_norm": 1.7177106831214344, + "learning_rate": 6.73802754004914e-06, + "loss": 0.6256, + "step": 13227 + }, + { + "epoch": 0.4054186588206448, + "grad_norm": 1.6971362424666112, + "learning_rate": 6.737562163594914e-06, + "loss": 0.6256, + "step": 13228 + }, + { + "epoch": 0.405449307343386, + "grad_norm": 1.6571681374943599, + "learning_rate": 6.737096770020042e-06, + "loss": 0.5756, + "step": 13229 + }, + { + "epoch": 0.40547995586612723, + "grad_norm": 2.038373669216938, + "learning_rate": 6.736631359329112e-06, + "loss": 0.656, + "step": 13230 + }, + { + "epoch": 0.40551060438886843, + "grad_norm": 1.7044837670079231, + "learning_rate": 6.736165931526711e-06, + "loss": 0.7307, + "step": 13231 + }, + { + "epoch": 0.40554125291160964, + "grad_norm": 0.994354832580736, + "learning_rate": 6.73570048661742e-06, + "loss": 0.456, + "step": 13232 + }, + { + "epoch": 0.40557190143435085, + "grad_norm": 0.9427874947383883, + "learning_rate": 6.735235024605829e-06, + "loss": 0.4503, + "step": 13233 + }, + { + "epoch": 0.40560254995709205, + "grad_norm": 1.6326378956048944, + "learning_rate": 6.734769545496523e-06, + "loss": 0.6331, + "step": 13234 + }, + { + "epoch": 0.40563319847983326, + "grad_norm": 1.8453447531332592, + "learning_rate": 6.734304049294089e-06, + "loss": 0.6986, + "step": 13235 + }, + { + "epoch": 0.40566384700257446, + "grad_norm": 1.6597864081505733, + "learning_rate": 6.7338385360031135e-06, + "loss": 0.6491, + "step": 13236 + }, + { + "epoch": 0.40569449552531567, + "grad_norm": 1.551932637850828, + "learning_rate": 6.7333730056281825e-06, + "loss": 0.5729, + "step": 13237 + }, + { + "epoch": 0.4057251440480569, + "grad_norm": 1.5436735959277748, + "learning_rate": 6.732907458173885e-06, + "loss": 0.5961, + "step": 13238 + }, + { + "epoch": 0.4057557925707981, + "grad_norm": 1.8305925911435372, + "learning_rate": 6.732441893644807e-06, + "loss": 0.6785, + "step": 13239 + }, + { + "epoch": 0.4057864410935393, + "grad_norm": 1.88844857485183, + "learning_rate": 6.731976312045534e-06, + "loss": 0.7326, + "step": 13240 + }, + { + "epoch": 0.4058170896162805, + "grad_norm": 1.853968074218349, + "learning_rate": 6.731510713380657e-06, + "loss": 0.6694, + "step": 13241 + }, + { + "epoch": 0.4058477381390217, + "grad_norm": 1.7415013553351997, + "learning_rate": 6.7310450976547616e-06, + "loss": 0.7114, + "step": 13242 + }, + { + "epoch": 0.4058783866617629, + "grad_norm": 1.9991512378098288, + "learning_rate": 6.730579464872435e-06, + "loss": 0.6876, + "step": 13243 + }, + { + "epoch": 0.4059090351845041, + "grad_norm": 1.6975590111502834, + "learning_rate": 6.730113815038266e-06, + "loss": 0.6899, + "step": 13244 + }, + { + "epoch": 0.4059396837072453, + "grad_norm": 1.3492046558045119, + "learning_rate": 6.729648148156844e-06, + "loss": 0.534, + "step": 13245 + }, + { + "epoch": 0.4059703322299865, + "grad_norm": 1.5097359607744514, + "learning_rate": 6.729182464232758e-06, + "loss": 0.5781, + "step": 13246 + }, + { + "epoch": 0.4060009807527277, + "grad_norm": 1.3553420887476095, + "learning_rate": 6.728716763270592e-06, + "loss": 0.4732, + "step": 13247 + }, + { + "epoch": 0.40603162927546893, + "grad_norm": 1.8152170366583527, + "learning_rate": 6.728251045274937e-06, + "loss": 0.6905, + "step": 13248 + }, + { + "epoch": 0.40606227779821014, + "grad_norm": 1.0103834238422302, + "learning_rate": 6.727785310250384e-06, + "loss": 0.4488, + "step": 13249 + }, + { + "epoch": 0.40609292632095134, + "grad_norm": 1.5608268889916268, + "learning_rate": 6.72731955820152e-06, + "loss": 0.6514, + "step": 13250 + }, + { + "epoch": 0.40612357484369255, + "grad_norm": 1.6795972782383173, + "learning_rate": 6.726853789132933e-06, + "loss": 0.6513, + "step": 13251 + }, + { + "epoch": 0.40615422336643375, + "grad_norm": 1.7275362279879782, + "learning_rate": 6.7263880030492155e-06, + "loss": 0.6885, + "step": 13252 + }, + { + "epoch": 0.40618487188917496, + "grad_norm": 1.665918261825264, + "learning_rate": 6.725922199954955e-06, + "loss": 0.5594, + "step": 13253 + }, + { + "epoch": 0.40621552041191616, + "grad_norm": 1.9120571405685935, + "learning_rate": 6.725456379854742e-06, + "loss": 0.7326, + "step": 13254 + }, + { + "epoch": 0.40624616893465737, + "grad_norm": 1.7632051144024008, + "learning_rate": 6.724990542753164e-06, + "loss": 0.6606, + "step": 13255 + }, + { + "epoch": 0.4062768174573986, + "grad_norm": 1.725045431740236, + "learning_rate": 6.724524688654814e-06, + "loss": 0.6387, + "step": 13256 + }, + { + "epoch": 0.4063074659801398, + "grad_norm": 1.4129013698146562, + "learning_rate": 6.72405881756428e-06, + "loss": 0.4719, + "step": 13257 + }, + { + "epoch": 0.406338114502881, + "grad_norm": 1.2184057838643823, + "learning_rate": 6.723592929486156e-06, + "loss": 0.463, + "step": 13258 + }, + { + "epoch": 0.4063687630256222, + "grad_norm": 1.7792447988990798, + "learning_rate": 6.7231270244250266e-06, + "loss": 0.6691, + "step": 13259 + }, + { + "epoch": 0.40639941154836334, + "grad_norm": 1.6574595011336155, + "learning_rate": 6.722661102385488e-06, + "loss": 0.7098, + "step": 13260 + }, + { + "epoch": 0.40643006007110455, + "grad_norm": 0.7785400297523631, + "learning_rate": 6.722195163372128e-06, + "loss": 0.4517, + "step": 13261 + }, + { + "epoch": 0.40646070859384575, + "grad_norm": 1.503989456924655, + "learning_rate": 6.721729207389538e-06, + "loss": 0.6441, + "step": 13262 + }, + { + "epoch": 0.40649135711658696, + "grad_norm": 1.730674627979536, + "learning_rate": 6.72126323444231e-06, + "loss": 0.6062, + "step": 13263 + }, + { + "epoch": 0.40652200563932817, + "grad_norm": 1.8076549010874021, + "learning_rate": 6.720797244535036e-06, + "loss": 0.627, + "step": 13264 + }, + { + "epoch": 0.40655265416206937, + "grad_norm": 1.5826335144465857, + "learning_rate": 6.720331237672305e-06, + "loss": 0.6578, + "step": 13265 + }, + { + "epoch": 0.4065833026848106, + "grad_norm": 1.7971890615222825, + "learning_rate": 6.71986521385871e-06, + "loss": 0.7266, + "step": 13266 + }, + { + "epoch": 0.4066139512075518, + "grad_norm": 2.7683575574969095, + "learning_rate": 6.7193991730988435e-06, + "loss": 0.7347, + "step": 13267 + }, + { + "epoch": 0.406644599730293, + "grad_norm": 1.923298189849223, + "learning_rate": 6.718933115397296e-06, + "loss": 0.6968, + "step": 13268 + }, + { + "epoch": 0.4066752482530342, + "grad_norm": 1.6544954307951867, + "learning_rate": 6.718467040758663e-06, + "loss": 0.6111, + "step": 13269 + }, + { + "epoch": 0.4067058967757754, + "grad_norm": 1.6869069538489052, + "learning_rate": 6.718000949187533e-06, + "loss": 0.6744, + "step": 13270 + }, + { + "epoch": 0.4067365452985166, + "grad_norm": 1.6823221251174425, + "learning_rate": 6.7175348406884995e-06, + "loss": 0.7072, + "step": 13271 + }, + { + "epoch": 0.4067671938212578, + "grad_norm": 1.8011176177314978, + "learning_rate": 6.717068715266157e-06, + "loss": 0.6736, + "step": 13272 + }, + { + "epoch": 0.406797842343999, + "grad_norm": 1.6517294576428507, + "learning_rate": 6.716602572925099e-06, + "loss": 0.635, + "step": 13273 + }, + { + "epoch": 0.4068284908667402, + "grad_norm": 1.5683946182990909, + "learning_rate": 6.716136413669912e-06, + "loss": 0.7026, + "step": 13274 + }, + { + "epoch": 0.40685913938948143, + "grad_norm": 1.6665598637559491, + "learning_rate": 6.715670237505198e-06, + "loss": 0.6461, + "step": 13275 + }, + { + "epoch": 0.40688978791222263, + "grad_norm": 1.582627102556214, + "learning_rate": 6.715204044435543e-06, + "loss": 0.6724, + "step": 13276 + }, + { + "epoch": 0.40692043643496384, + "grad_norm": 1.8179601738261164, + "learning_rate": 6.7147378344655455e-06, + "loss": 0.6879, + "step": 13277 + }, + { + "epoch": 0.40695108495770504, + "grad_norm": 1.781269672237062, + "learning_rate": 6.714271607599797e-06, + "loss": 0.7231, + "step": 13278 + }, + { + "epoch": 0.40698173348044625, + "grad_norm": 1.760709623789008, + "learning_rate": 6.713805363842893e-06, + "loss": 0.64, + "step": 13279 + }, + { + "epoch": 0.40701238200318746, + "grad_norm": 1.8732699407350653, + "learning_rate": 6.7133391031994236e-06, + "loss": 0.7258, + "step": 13280 + }, + { + "epoch": 0.40704303052592866, + "grad_norm": 1.8451849137302134, + "learning_rate": 6.712872825673987e-06, + "loss": 0.7222, + "step": 13281 + }, + { + "epoch": 0.40707367904866987, + "grad_norm": 1.55621959146622, + "learning_rate": 6.712406531271176e-06, + "loss": 0.5977, + "step": 13282 + }, + { + "epoch": 0.4071043275714111, + "grad_norm": 1.7484437945758584, + "learning_rate": 6.711940219995585e-06, + "loss": 0.6692, + "step": 13283 + }, + { + "epoch": 0.4071349760941523, + "grad_norm": 1.4921399405065698, + "learning_rate": 6.711473891851812e-06, + "loss": 0.4624, + "step": 13284 + }, + { + "epoch": 0.4071656246168935, + "grad_norm": 1.7792139198610648, + "learning_rate": 6.711007546844444e-06, + "loss": 0.6894, + "step": 13285 + }, + { + "epoch": 0.4071962731396347, + "grad_norm": 1.4689017664701007, + "learning_rate": 6.710541184978084e-06, + "loss": 0.5964, + "step": 13286 + }, + { + "epoch": 0.4072269216623759, + "grad_norm": 1.9057443931632077, + "learning_rate": 6.7100748062573225e-06, + "loss": 0.6113, + "step": 13287 + }, + { + "epoch": 0.4072575701851171, + "grad_norm": 2.075697691948813, + "learning_rate": 6.709608410686759e-06, + "loss": 0.7004, + "step": 13288 + }, + { + "epoch": 0.4072882187078583, + "grad_norm": 1.6155958023286499, + "learning_rate": 6.7091419982709836e-06, + "loss": 0.5908, + "step": 13289 + }, + { + "epoch": 0.4073188672305995, + "grad_norm": 1.8015312826913084, + "learning_rate": 6.7086755690145965e-06, + "loss": 0.5726, + "step": 13290 + }, + { + "epoch": 0.40734951575334066, + "grad_norm": 1.4944458253149349, + "learning_rate": 6.7082091229221904e-06, + "loss": 0.6149, + "step": 13291 + }, + { + "epoch": 0.40738016427608187, + "grad_norm": 1.7695670064103626, + "learning_rate": 6.707742659998364e-06, + "loss": 0.7767, + "step": 13292 + }, + { + "epoch": 0.4074108127988231, + "grad_norm": 1.7510662665325325, + "learning_rate": 6.707276180247712e-06, + "loss": 0.6399, + "step": 13293 + }, + { + "epoch": 0.4074414613215643, + "grad_norm": 1.6029192856903507, + "learning_rate": 6.706809683674829e-06, + "loss": 0.6909, + "step": 13294 + }, + { + "epoch": 0.4074721098443055, + "grad_norm": 0.9373124174270224, + "learning_rate": 6.706343170284315e-06, + "loss": 0.449, + "step": 13295 + }, + { + "epoch": 0.4075027583670467, + "grad_norm": 1.628812811804074, + "learning_rate": 6.705876640080766e-06, + "loss": 0.7653, + "step": 13296 + }, + { + "epoch": 0.4075334068897879, + "grad_norm": 1.4715687196462586, + "learning_rate": 6.7054100930687785e-06, + "loss": 0.6396, + "step": 13297 + }, + { + "epoch": 0.4075640554125291, + "grad_norm": 1.9167074603516827, + "learning_rate": 6.704943529252947e-06, + "loss": 0.6834, + "step": 13298 + }, + { + "epoch": 0.4075947039352703, + "grad_norm": 1.730822283211162, + "learning_rate": 6.7044769486378715e-06, + "loss": 0.7104, + "step": 13299 + }, + { + "epoch": 0.4076253524580115, + "grad_norm": 1.64433090491783, + "learning_rate": 6.704010351228149e-06, + "loss": 0.671, + "step": 13300 + }, + { + "epoch": 0.4076560009807527, + "grad_norm": 1.60309506977877, + "learning_rate": 6.703543737028375e-06, + "loss": 0.7081, + "step": 13301 + }, + { + "epoch": 0.4076866495034939, + "grad_norm": 0.822272349268506, + "learning_rate": 6.7030771060431495e-06, + "loss": 0.4489, + "step": 13302 + }, + { + "epoch": 0.40771729802623513, + "grad_norm": 2.19526180366706, + "learning_rate": 6.70261045827707e-06, + "loss": 0.6416, + "step": 13303 + }, + { + "epoch": 0.40774794654897634, + "grad_norm": 2.013769511179118, + "learning_rate": 6.702143793734735e-06, + "loss": 0.7032, + "step": 13304 + }, + { + "epoch": 0.40777859507171754, + "grad_norm": 1.5756063003135987, + "learning_rate": 6.7016771124207404e-06, + "loss": 0.6859, + "step": 13305 + }, + { + "epoch": 0.40780924359445875, + "grad_norm": 1.7443663582032305, + "learning_rate": 6.701210414339685e-06, + "loss": 0.5983, + "step": 13306 + }, + { + "epoch": 0.40783989211719995, + "grad_norm": 1.7673265695410711, + "learning_rate": 6.7007436994961685e-06, + "loss": 0.5807, + "step": 13307 + }, + { + "epoch": 0.40787054063994116, + "grad_norm": 1.6976696379286855, + "learning_rate": 6.7002769678947895e-06, + "loss": 0.667, + "step": 13308 + }, + { + "epoch": 0.40790118916268236, + "grad_norm": 1.650074543781794, + "learning_rate": 6.699810219540146e-06, + "loss": 0.5925, + "step": 13309 + }, + { + "epoch": 0.40793183768542357, + "grad_norm": 2.0100216354819977, + "learning_rate": 6.699343454436839e-06, + "loss": 0.6394, + "step": 13310 + }, + { + "epoch": 0.4079624862081648, + "grad_norm": 1.5934403512736859, + "learning_rate": 6.698876672589465e-06, + "loss": 0.6566, + "step": 13311 + }, + { + "epoch": 0.407993134730906, + "grad_norm": 1.6710360906043842, + "learning_rate": 6.698409874002626e-06, + "loss": 0.5786, + "step": 13312 + }, + { + "epoch": 0.4080237832536472, + "grad_norm": 1.5752933026190201, + "learning_rate": 6.697943058680918e-06, + "loss": 0.7309, + "step": 13313 + }, + { + "epoch": 0.4080544317763884, + "grad_norm": 1.6947110100635148, + "learning_rate": 6.697476226628943e-06, + "loss": 0.6471, + "step": 13314 + }, + { + "epoch": 0.4080850802991296, + "grad_norm": 1.9619992308922232, + "learning_rate": 6.697009377851301e-06, + "loss": 0.6981, + "step": 13315 + }, + { + "epoch": 0.4081157288218708, + "grad_norm": 2.027486457169507, + "learning_rate": 6.696542512352592e-06, + "loss": 0.6672, + "step": 13316 + }, + { + "epoch": 0.408146377344612, + "grad_norm": 1.667689243042061, + "learning_rate": 6.696075630137413e-06, + "loss": 0.6841, + "step": 13317 + }, + { + "epoch": 0.4081770258673532, + "grad_norm": 1.8061329473791545, + "learning_rate": 6.6956087312103694e-06, + "loss": 0.7027, + "step": 13318 + }, + { + "epoch": 0.4082076743900944, + "grad_norm": 0.8309860065904496, + "learning_rate": 6.695141815576058e-06, + "loss": 0.4262, + "step": 13319 + }, + { + "epoch": 0.4082383229128356, + "grad_norm": 1.758966873646382, + "learning_rate": 6.694674883239081e-06, + "loss": 0.6096, + "step": 13320 + }, + { + "epoch": 0.40826897143557683, + "grad_norm": 1.7456764970475454, + "learning_rate": 6.694207934204038e-06, + "loss": 0.726, + "step": 13321 + }, + { + "epoch": 0.408299619958318, + "grad_norm": 1.696284603126578, + "learning_rate": 6.693740968475531e-06, + "loss": 0.6243, + "step": 13322 + }, + { + "epoch": 0.4083302684810592, + "grad_norm": 0.7894710217422082, + "learning_rate": 6.693273986058162e-06, + "loss": 0.4786, + "step": 13323 + }, + { + "epoch": 0.4083609170038004, + "grad_norm": 1.5249485076939058, + "learning_rate": 6.69280698695653e-06, + "loss": 0.6593, + "step": 13324 + }, + { + "epoch": 0.4083915655265416, + "grad_norm": 1.7115849451697, + "learning_rate": 6.692339971175239e-06, + "loss": 0.6241, + "step": 13325 + }, + { + "epoch": 0.4084222140492828, + "grad_norm": 1.8275553522382562, + "learning_rate": 6.691872938718887e-06, + "loss": 0.7329, + "step": 13326 + }, + { + "epoch": 0.408452862572024, + "grad_norm": 1.6098835834812313, + "learning_rate": 6.691405889592081e-06, + "loss": 0.6101, + "step": 13327 + }, + { + "epoch": 0.4084835110947652, + "grad_norm": 1.6870754113297426, + "learning_rate": 6.6909388237994175e-06, + "loss": 0.5609, + "step": 13328 + }, + { + "epoch": 0.4085141596175064, + "grad_norm": 0.8773814461176866, + "learning_rate": 6.690471741345503e-06, + "loss": 0.4792, + "step": 13329 + }, + { + "epoch": 0.40854480814024763, + "grad_norm": 1.7926737240834723, + "learning_rate": 6.690004642234935e-06, + "loss": 0.6992, + "step": 13330 + }, + { + "epoch": 0.40857545666298883, + "grad_norm": 1.9241451556945757, + "learning_rate": 6.6895375264723225e-06, + "loss": 0.684, + "step": 13331 + }, + { + "epoch": 0.40860610518573004, + "grad_norm": 1.9788875036854545, + "learning_rate": 6.689070394062261e-06, + "loss": 0.7076, + "step": 13332 + }, + { + "epoch": 0.40863675370847125, + "grad_norm": 1.4539385125596525, + "learning_rate": 6.688603245009359e-06, + "loss": 0.7004, + "step": 13333 + }, + { + "epoch": 0.40866740223121245, + "grad_norm": 1.8103859607084056, + "learning_rate": 6.6881360793182155e-06, + "loss": 0.6653, + "step": 13334 + }, + { + "epoch": 0.40869805075395366, + "grad_norm": 1.4053329640786594, + "learning_rate": 6.687668896993438e-06, + "loss": 0.6347, + "step": 13335 + }, + { + "epoch": 0.40872869927669486, + "grad_norm": 2.044777296588277, + "learning_rate": 6.687201698039625e-06, + "loss": 0.6857, + "step": 13336 + }, + { + "epoch": 0.40875934779943607, + "grad_norm": 1.844516788771478, + "learning_rate": 6.686734482461381e-06, + "loss": 0.7119, + "step": 13337 + }, + { + "epoch": 0.4087899963221773, + "grad_norm": 1.7986880235499356, + "learning_rate": 6.686267250263314e-06, + "loss": 0.6662, + "step": 13338 + }, + { + "epoch": 0.4088206448449185, + "grad_norm": 1.700335602588464, + "learning_rate": 6.685800001450023e-06, + "loss": 0.7794, + "step": 13339 + }, + { + "epoch": 0.4088512933676597, + "grad_norm": 1.6921481338250997, + "learning_rate": 6.685332736026111e-06, + "loss": 0.6491, + "step": 13340 + }, + { + "epoch": 0.4088819418904009, + "grad_norm": 1.8138188487089064, + "learning_rate": 6.684865453996185e-06, + "loss": 0.663, + "step": 13341 + }, + { + "epoch": 0.4089125904131421, + "grad_norm": 1.7996049186511618, + "learning_rate": 6.684398155364852e-06, + "loss": 0.7315, + "step": 13342 + }, + { + "epoch": 0.4089432389358833, + "grad_norm": 1.8357805139592713, + "learning_rate": 6.68393084013671e-06, + "loss": 0.7606, + "step": 13343 + }, + { + "epoch": 0.4089738874586245, + "grad_norm": 1.5631638210427206, + "learning_rate": 6.683463508316367e-06, + "loss": 0.6207, + "step": 13344 + }, + { + "epoch": 0.4090045359813657, + "grad_norm": 0.8426826871305929, + "learning_rate": 6.682996159908426e-06, + "loss": 0.4641, + "step": 13345 + }, + { + "epoch": 0.4090351845041069, + "grad_norm": 1.825513115712902, + "learning_rate": 6.682528794917495e-06, + "loss": 0.7071, + "step": 13346 + }, + { + "epoch": 0.4090658330268481, + "grad_norm": 2.1095096900571964, + "learning_rate": 6.682061413348178e-06, + "loss": 0.6539, + "step": 13347 + }, + { + "epoch": 0.40909648154958933, + "grad_norm": 0.7708103597735084, + "learning_rate": 6.681594015205078e-06, + "loss": 0.4628, + "step": 13348 + }, + { + "epoch": 0.40912713007233054, + "grad_norm": 0.7901504484862296, + "learning_rate": 6.681126600492802e-06, + "loss": 0.4547, + "step": 13349 + }, + { + "epoch": 0.40915777859507174, + "grad_norm": 1.4758345428020958, + "learning_rate": 6.680659169215956e-06, + "loss": 0.7003, + "step": 13350 + }, + { + "epoch": 0.40918842711781295, + "grad_norm": 1.9879805127751315, + "learning_rate": 6.6801917213791454e-06, + "loss": 0.6561, + "step": 13351 + }, + { + "epoch": 0.40921907564055415, + "grad_norm": 1.6665615959865119, + "learning_rate": 6.679724256986974e-06, + "loss": 0.6594, + "step": 13352 + }, + { + "epoch": 0.4092497241632953, + "grad_norm": 1.8923301695901982, + "learning_rate": 6.679256776044052e-06, + "loss": 0.7121, + "step": 13353 + }, + { + "epoch": 0.4092803726860365, + "grad_norm": 1.7067941403338576, + "learning_rate": 6.6787892785549825e-06, + "loss": 0.6733, + "step": 13354 + }, + { + "epoch": 0.4093110212087777, + "grad_norm": 2.131595563217012, + "learning_rate": 6.678321764524373e-06, + "loss": 0.6586, + "step": 13355 + }, + { + "epoch": 0.4093416697315189, + "grad_norm": 1.8246155434391846, + "learning_rate": 6.67785423395683e-06, + "loss": 0.6914, + "step": 13356 + }, + { + "epoch": 0.4093723182542601, + "grad_norm": 1.594058384499629, + "learning_rate": 6.677386686856959e-06, + "loss": 0.6993, + "step": 13357 + }, + { + "epoch": 0.40940296677700133, + "grad_norm": 1.5376525813637631, + "learning_rate": 6.6769191232293685e-06, + "loss": 0.7034, + "step": 13358 + }, + { + "epoch": 0.40943361529974254, + "grad_norm": 0.9298853102962887, + "learning_rate": 6.676451543078664e-06, + "loss": 0.4572, + "step": 13359 + }, + { + "epoch": 0.40946426382248374, + "grad_norm": 1.7491305338673624, + "learning_rate": 6.675983946409454e-06, + "loss": 0.583, + "step": 13360 + }, + { + "epoch": 0.40949491234522495, + "grad_norm": 0.8379613989389715, + "learning_rate": 6.675516333226346e-06, + "loss": 0.4831, + "step": 13361 + }, + { + "epoch": 0.40952556086796615, + "grad_norm": 1.9031471239169735, + "learning_rate": 6.6750487035339465e-06, + "loss": 0.6883, + "step": 13362 + }, + { + "epoch": 0.40955620939070736, + "grad_norm": 1.708728903314733, + "learning_rate": 6.674581057336862e-06, + "loss": 0.6154, + "step": 13363 + }, + { + "epoch": 0.40958685791344857, + "grad_norm": 1.5998969787089865, + "learning_rate": 6.674113394639704e-06, + "loss": 0.6259, + "step": 13364 + }, + { + "epoch": 0.40961750643618977, + "grad_norm": 1.738458817392415, + "learning_rate": 6.673645715447078e-06, + "loss": 0.6943, + "step": 13365 + }, + { + "epoch": 0.409648154958931, + "grad_norm": 1.915288820947147, + "learning_rate": 6.673178019763592e-06, + "loss": 0.641, + "step": 13366 + }, + { + "epoch": 0.4096788034816722, + "grad_norm": 1.6505411949678883, + "learning_rate": 6.672710307593855e-06, + "loss": 0.7164, + "step": 13367 + }, + { + "epoch": 0.4097094520044134, + "grad_norm": 0.8795412875206908, + "learning_rate": 6.672242578942475e-06, + "loss": 0.4467, + "step": 13368 + }, + { + "epoch": 0.4097401005271546, + "grad_norm": 1.841885795589861, + "learning_rate": 6.671774833814062e-06, + "loss": 0.7056, + "step": 13369 + }, + { + "epoch": 0.4097707490498958, + "grad_norm": 1.6027012128685254, + "learning_rate": 6.671307072213223e-06, + "loss": 0.6842, + "step": 13370 + }, + { + "epoch": 0.409801397572637, + "grad_norm": 1.8704900340210207, + "learning_rate": 6.6708392941445675e-06, + "loss": 0.7387, + "step": 13371 + }, + { + "epoch": 0.4098320460953782, + "grad_norm": 1.7418008745233973, + "learning_rate": 6.670371499612705e-06, + "loss": 0.6737, + "step": 13372 + }, + { + "epoch": 0.4098626946181194, + "grad_norm": 1.9201834628347867, + "learning_rate": 6.669903688622246e-06, + "loss": 0.6535, + "step": 13373 + }, + { + "epoch": 0.4098933431408606, + "grad_norm": 1.6831328946391373, + "learning_rate": 6.669435861177798e-06, + "loss": 0.6995, + "step": 13374 + }, + { + "epoch": 0.4099239916636018, + "grad_norm": 1.489958867198239, + "learning_rate": 6.668968017283971e-06, + "loss": 0.5454, + "step": 13375 + }, + { + "epoch": 0.40995464018634303, + "grad_norm": 1.733012567799383, + "learning_rate": 6.668500156945376e-06, + "loss": 0.7053, + "step": 13376 + }, + { + "epoch": 0.40998528870908424, + "grad_norm": 1.789281730227265, + "learning_rate": 6.668032280166621e-06, + "loss": 0.6241, + "step": 13377 + }, + { + "epoch": 0.41001593723182544, + "grad_norm": 1.5552170931351954, + "learning_rate": 6.667564386952316e-06, + "loss": 0.679, + "step": 13378 + }, + { + "epoch": 0.41004658575456665, + "grad_norm": 1.714826477830339, + "learning_rate": 6.667096477307075e-06, + "loss": 0.7085, + "step": 13379 + }, + { + "epoch": 0.41007723427730786, + "grad_norm": 1.7040453731667404, + "learning_rate": 6.666628551235504e-06, + "loss": 0.6907, + "step": 13380 + }, + { + "epoch": 0.41010788280004906, + "grad_norm": 1.7658136226645111, + "learning_rate": 6.666160608742217e-06, + "loss": 0.6945, + "step": 13381 + }, + { + "epoch": 0.41013853132279027, + "grad_norm": 1.4854580107637396, + "learning_rate": 6.665692649831822e-06, + "loss": 0.6331, + "step": 13382 + }, + { + "epoch": 0.4101691798455315, + "grad_norm": 1.657977878443558, + "learning_rate": 6.665224674508932e-06, + "loss": 0.6206, + "step": 13383 + }, + { + "epoch": 0.4101998283682726, + "grad_norm": 1.599719639492126, + "learning_rate": 6.664756682778156e-06, + "loss": 0.6479, + "step": 13384 + }, + { + "epoch": 0.41023047689101383, + "grad_norm": 1.6298775133147383, + "learning_rate": 6.6642886746441085e-06, + "loss": 0.6337, + "step": 13385 + }, + { + "epoch": 0.41026112541375503, + "grad_norm": 1.9781994330703512, + "learning_rate": 6.6638206501113965e-06, + "loss": 0.6171, + "step": 13386 + }, + { + "epoch": 0.41029177393649624, + "grad_norm": 1.7859517777530607, + "learning_rate": 6.663352609184635e-06, + "loss": 0.6829, + "step": 13387 + }, + { + "epoch": 0.41032242245923745, + "grad_norm": 1.8527014832491386, + "learning_rate": 6.662884551868436e-06, + "loss": 0.6803, + "step": 13388 + }, + { + "epoch": 0.41035307098197865, + "grad_norm": 2.0314568813716045, + "learning_rate": 6.662416478167407e-06, + "loss": 0.7022, + "step": 13389 + }, + { + "epoch": 0.41038371950471986, + "grad_norm": 1.7043638086409314, + "learning_rate": 6.661948388086166e-06, + "loss": 0.6414, + "step": 13390 + }, + { + "epoch": 0.41041436802746106, + "grad_norm": 1.718984090807318, + "learning_rate": 6.66148028162932e-06, + "loss": 0.6324, + "step": 13391 + }, + { + "epoch": 0.41044501655020227, + "grad_norm": 0.8681878542549326, + "learning_rate": 6.661012158801487e-06, + "loss": 0.4684, + "step": 13392 + }, + { + "epoch": 0.4104756650729435, + "grad_norm": 1.653361113215478, + "learning_rate": 6.660544019607272e-06, + "loss": 0.6276, + "step": 13393 + }, + { + "epoch": 0.4105063135956847, + "grad_norm": 1.6881539403033918, + "learning_rate": 6.660075864051294e-06, + "loss": 0.6286, + "step": 13394 + }, + { + "epoch": 0.4105369621184259, + "grad_norm": 1.6448492549552072, + "learning_rate": 6.659607692138164e-06, + "loss": 0.5802, + "step": 13395 + }, + { + "epoch": 0.4105676106411671, + "grad_norm": 1.7652925344171948, + "learning_rate": 6.659139503872496e-06, + "loss": 0.6821, + "step": 13396 + }, + { + "epoch": 0.4105982591639083, + "grad_norm": 1.5147178267051413, + "learning_rate": 6.658671299258899e-06, + "loss": 0.6768, + "step": 13397 + }, + { + "epoch": 0.4106289076866495, + "grad_norm": 1.5688645706745499, + "learning_rate": 6.658203078301991e-06, + "loss": 0.7001, + "step": 13398 + }, + { + "epoch": 0.4106595562093907, + "grad_norm": 0.764777907610378, + "learning_rate": 6.657734841006383e-06, + "loss": 0.439, + "step": 13399 + }, + { + "epoch": 0.4106902047321319, + "grad_norm": 1.5511906795973083, + "learning_rate": 6.6572665873766914e-06, + "loss": 0.6406, + "step": 13400 + }, + { + "epoch": 0.4107208532548731, + "grad_norm": 1.5398366456832941, + "learning_rate": 6.6567983174175255e-06, + "loss": 0.6286, + "step": 13401 + }, + { + "epoch": 0.4107515017776143, + "grad_norm": 1.7089652081101607, + "learning_rate": 6.656330031133503e-06, + "loss": 0.627, + "step": 13402 + }, + { + "epoch": 0.41078215030035553, + "grad_norm": 1.784539682147725, + "learning_rate": 6.655861728529237e-06, + "loss": 0.6995, + "step": 13403 + }, + { + "epoch": 0.41081279882309674, + "grad_norm": 1.7289231876608366, + "learning_rate": 6.655393409609342e-06, + "loss": 0.7222, + "step": 13404 + }, + { + "epoch": 0.41084344734583794, + "grad_norm": 1.6232780439303525, + "learning_rate": 6.654925074378432e-06, + "loss": 0.6864, + "step": 13405 + }, + { + "epoch": 0.41087409586857915, + "grad_norm": 1.5693924336843283, + "learning_rate": 6.6544567228411206e-06, + "loss": 0.7091, + "step": 13406 + }, + { + "epoch": 0.41090474439132035, + "grad_norm": 1.6671995660333454, + "learning_rate": 6.653988355002026e-06, + "loss": 0.6505, + "step": 13407 + }, + { + "epoch": 0.41093539291406156, + "grad_norm": 1.5202360594930329, + "learning_rate": 6.653519970865759e-06, + "loss": 0.6054, + "step": 13408 + }, + { + "epoch": 0.41096604143680276, + "grad_norm": 1.7852482296500332, + "learning_rate": 6.653051570436938e-06, + "loss": 0.7702, + "step": 13409 + }, + { + "epoch": 0.41099668995954397, + "grad_norm": 1.84822013457629, + "learning_rate": 6.652583153720176e-06, + "loss": 0.6262, + "step": 13410 + }, + { + "epoch": 0.4110273384822852, + "grad_norm": 1.6915441145649046, + "learning_rate": 6.65211472072009e-06, + "loss": 0.7262, + "step": 13411 + }, + { + "epoch": 0.4110579870050264, + "grad_norm": 1.5962942253030836, + "learning_rate": 6.651646271441295e-06, + "loss": 0.645, + "step": 13412 + }, + { + "epoch": 0.4110886355277676, + "grad_norm": 1.4744439325023908, + "learning_rate": 6.651177805888407e-06, + "loss": 0.5856, + "step": 13413 + }, + { + "epoch": 0.4111192840505088, + "grad_norm": 1.6529308949690342, + "learning_rate": 6.650709324066041e-06, + "loss": 0.6057, + "step": 13414 + }, + { + "epoch": 0.41114993257324994, + "grad_norm": 0.7733073576484201, + "learning_rate": 6.650240825978813e-06, + "loss": 0.4477, + "step": 13415 + }, + { + "epoch": 0.41118058109599115, + "grad_norm": 1.8379210818248846, + "learning_rate": 6.6497723116313405e-06, + "loss": 0.7026, + "step": 13416 + }, + { + "epoch": 0.41121122961873235, + "grad_norm": 1.839627802713918, + "learning_rate": 6.649303781028239e-06, + "loss": 0.6628, + "step": 13417 + }, + { + "epoch": 0.41124187814147356, + "grad_norm": 1.8431127330273562, + "learning_rate": 6.648835234174126e-06, + "loss": 0.6542, + "step": 13418 + }, + { + "epoch": 0.41127252666421477, + "grad_norm": 1.6768358092960065, + "learning_rate": 6.648366671073617e-06, + "loss": 0.732, + "step": 13419 + }, + { + "epoch": 0.41130317518695597, + "grad_norm": 1.7791006675613301, + "learning_rate": 6.647898091731331e-06, + "loss": 0.689, + "step": 13420 + }, + { + "epoch": 0.4113338237096972, + "grad_norm": 1.6876656918951178, + "learning_rate": 6.64742949615188e-06, + "loss": 0.6863, + "step": 13421 + }, + { + "epoch": 0.4113644722324384, + "grad_norm": 1.592646473779818, + "learning_rate": 6.646960884339888e-06, + "loss": 0.6919, + "step": 13422 + }, + { + "epoch": 0.4113951207551796, + "grad_norm": 1.705453035959536, + "learning_rate": 6.646492256299968e-06, + "loss": 0.6615, + "step": 13423 + }, + { + "epoch": 0.4114257692779208, + "grad_norm": 1.8662159469630692, + "learning_rate": 6.6460236120367384e-06, + "loss": 0.7121, + "step": 13424 + }, + { + "epoch": 0.411456417800662, + "grad_norm": 1.8368841307268378, + "learning_rate": 6.645554951554817e-06, + "loss": 0.7355, + "step": 13425 + }, + { + "epoch": 0.4114870663234032, + "grad_norm": 1.8610252062032533, + "learning_rate": 6.645086274858822e-06, + "loss": 0.7862, + "step": 13426 + }, + { + "epoch": 0.4115177148461444, + "grad_norm": 1.6615310070307492, + "learning_rate": 6.644617581953371e-06, + "loss": 0.6653, + "step": 13427 + }, + { + "epoch": 0.4115483633688856, + "grad_norm": 1.801961255499414, + "learning_rate": 6.644148872843081e-06, + "loss": 0.6635, + "step": 13428 + }, + { + "epoch": 0.4115790118916268, + "grad_norm": 2.2224977315655843, + "learning_rate": 6.643680147532572e-06, + "loss": 0.693, + "step": 13429 + }, + { + "epoch": 0.41160966041436803, + "grad_norm": 1.5449684057139765, + "learning_rate": 6.643211406026463e-06, + "loss": 0.6007, + "step": 13430 + }, + { + "epoch": 0.41164030893710923, + "grad_norm": 1.4826382454785498, + "learning_rate": 6.642742648329371e-06, + "loss": 0.6254, + "step": 13431 + }, + { + "epoch": 0.41167095745985044, + "grad_norm": 1.6404738180490772, + "learning_rate": 6.642273874445914e-06, + "loss": 0.6807, + "step": 13432 + }, + { + "epoch": 0.41170160598259165, + "grad_norm": 1.6597369153960488, + "learning_rate": 6.641805084380715e-06, + "loss": 0.7131, + "step": 13433 + }, + { + "epoch": 0.41173225450533285, + "grad_norm": 1.703342262993007, + "learning_rate": 6.641336278138387e-06, + "loss": 0.6349, + "step": 13434 + }, + { + "epoch": 0.41176290302807406, + "grad_norm": 1.7781121237555708, + "learning_rate": 6.640867455723556e-06, + "loss": 0.6112, + "step": 13435 + }, + { + "epoch": 0.41179355155081526, + "grad_norm": 1.6878424044083278, + "learning_rate": 6.6403986171408365e-06, + "loss": 0.6923, + "step": 13436 + }, + { + "epoch": 0.41182420007355647, + "grad_norm": 1.8369451185832126, + "learning_rate": 6.63992976239485e-06, + "loss": 0.6068, + "step": 13437 + }, + { + "epoch": 0.4118548485962977, + "grad_norm": 1.8841371979975174, + "learning_rate": 6.639460891490217e-06, + "loss": 0.6418, + "step": 13438 + }, + { + "epoch": 0.4118854971190389, + "grad_norm": 1.7669740298452128, + "learning_rate": 6.6389920044315545e-06, + "loss": 0.6418, + "step": 13439 + }, + { + "epoch": 0.4119161456417801, + "grad_norm": 1.7148366638603725, + "learning_rate": 6.638523101223485e-06, + "loss": 0.6507, + "step": 13440 + }, + { + "epoch": 0.4119467941645213, + "grad_norm": 1.8193823327726903, + "learning_rate": 6.638054181870629e-06, + "loss": 0.7127, + "step": 13441 + }, + { + "epoch": 0.4119774426872625, + "grad_norm": 1.5821542741847365, + "learning_rate": 6.637585246377605e-06, + "loss": 0.7603, + "step": 13442 + }, + { + "epoch": 0.4120080912100037, + "grad_norm": 1.7970170972930137, + "learning_rate": 6.637116294749035e-06, + "loss": 0.7232, + "step": 13443 + }, + { + "epoch": 0.4120387397327449, + "grad_norm": 1.7354312994475913, + "learning_rate": 6.6366473269895395e-06, + "loss": 0.6135, + "step": 13444 + }, + { + "epoch": 0.4120693882554861, + "grad_norm": 1.6128213512395186, + "learning_rate": 6.636178343103739e-06, + "loss": 0.5915, + "step": 13445 + }, + { + "epoch": 0.41210003677822726, + "grad_norm": 1.7089757630217361, + "learning_rate": 6.635709343096255e-06, + "loss": 0.6967, + "step": 13446 + }, + { + "epoch": 0.41213068530096847, + "grad_norm": 1.6945386729843248, + "learning_rate": 6.635240326971707e-06, + "loss": 0.6971, + "step": 13447 + }, + { + "epoch": 0.4121613338237097, + "grad_norm": 0.8337705138506429, + "learning_rate": 6.634771294734719e-06, + "loss": 0.4604, + "step": 13448 + }, + { + "epoch": 0.4121919823464509, + "grad_norm": 1.5577772312532054, + "learning_rate": 6.63430224638991e-06, + "loss": 0.5998, + "step": 13449 + }, + { + "epoch": 0.4122226308691921, + "grad_norm": 1.7857769367564842, + "learning_rate": 6.633833181941905e-06, + "loss": 0.6813, + "step": 13450 + }, + { + "epoch": 0.4122532793919333, + "grad_norm": 0.774583443546709, + "learning_rate": 6.633364101395321e-06, + "loss": 0.4348, + "step": 13451 + }, + { + "epoch": 0.4122839279146745, + "grad_norm": 1.7925069063513002, + "learning_rate": 6.632895004754785e-06, + "loss": 0.7085, + "step": 13452 + }, + { + "epoch": 0.4123145764374157, + "grad_norm": 1.6245337621040188, + "learning_rate": 6.632425892024914e-06, + "loss": 0.6472, + "step": 13453 + }, + { + "epoch": 0.4123452249601569, + "grad_norm": 0.7910828948049808, + "learning_rate": 6.631956763210335e-06, + "loss": 0.4557, + "step": 13454 + }, + { + "epoch": 0.4123758734828981, + "grad_norm": 1.71660078692795, + "learning_rate": 6.6314876183156686e-06, + "loss": 0.627, + "step": 13455 + }, + { + "epoch": 0.4124065220056393, + "grad_norm": 1.6549634517505543, + "learning_rate": 6.631018457345536e-06, + "loss": 0.6612, + "step": 13456 + }, + { + "epoch": 0.4124371705283805, + "grad_norm": 1.4768105887353855, + "learning_rate": 6.630549280304561e-06, + "loss": 0.6245, + "step": 13457 + }, + { + "epoch": 0.41246781905112173, + "grad_norm": 1.5751822183204283, + "learning_rate": 6.630080087197368e-06, + "loss": 0.5961, + "step": 13458 + }, + { + "epoch": 0.41249846757386294, + "grad_norm": 0.7954091218694648, + "learning_rate": 6.629610878028579e-06, + "loss": 0.4623, + "step": 13459 + }, + { + "epoch": 0.41252911609660414, + "grad_norm": 0.7766364557113604, + "learning_rate": 6.629141652802815e-06, + "loss": 0.4447, + "step": 13460 + }, + { + "epoch": 0.41255976461934535, + "grad_norm": 1.559088062947135, + "learning_rate": 6.628672411524704e-06, + "loss": 0.5983, + "step": 13461 + }, + { + "epoch": 0.41259041314208655, + "grad_norm": 2.0445792548388373, + "learning_rate": 6.628203154198865e-06, + "loss": 0.6862, + "step": 13462 + }, + { + "epoch": 0.41262106166482776, + "grad_norm": 1.630933593739183, + "learning_rate": 6.627733880829926e-06, + "loss": 0.6155, + "step": 13463 + }, + { + "epoch": 0.41265171018756897, + "grad_norm": 1.569345863774532, + "learning_rate": 6.627264591422507e-06, + "loss": 0.6549, + "step": 13464 + }, + { + "epoch": 0.41268235871031017, + "grad_norm": 1.8651453985485977, + "learning_rate": 6.626795285981235e-06, + "loss": 0.7299, + "step": 13465 + }, + { + "epoch": 0.4127130072330514, + "grad_norm": 1.6877306447952236, + "learning_rate": 6.6263259645107305e-06, + "loss": 0.6626, + "step": 13466 + }, + { + "epoch": 0.4127436557557926, + "grad_norm": 1.6448593642046956, + "learning_rate": 6.625856627015621e-06, + "loss": 0.6663, + "step": 13467 + }, + { + "epoch": 0.4127743042785338, + "grad_norm": 0.8510746707767077, + "learning_rate": 6.6253872735005296e-06, + "loss": 0.4658, + "step": 13468 + }, + { + "epoch": 0.412804952801275, + "grad_norm": 1.5522479622691867, + "learning_rate": 6.624917903970084e-06, + "loss": 0.6423, + "step": 13469 + }, + { + "epoch": 0.4128356013240162, + "grad_norm": 1.7094016820336086, + "learning_rate": 6.624448518428905e-06, + "loss": 0.6905, + "step": 13470 + }, + { + "epoch": 0.4128662498467574, + "grad_norm": 1.7623498553549544, + "learning_rate": 6.6239791168816195e-06, + "loss": 0.619, + "step": 13471 + }, + { + "epoch": 0.4128968983694986, + "grad_norm": 1.5538230541916616, + "learning_rate": 6.623509699332851e-06, + "loss": 0.7077, + "step": 13472 + }, + { + "epoch": 0.4129275468922398, + "grad_norm": 0.773285247400521, + "learning_rate": 6.623040265787227e-06, + "loss": 0.4474, + "step": 13473 + }, + { + "epoch": 0.412958195414981, + "grad_norm": 1.841030645871962, + "learning_rate": 6.6225708162493715e-06, + "loss": 0.7236, + "step": 13474 + }, + { + "epoch": 0.4129888439377222, + "grad_norm": 1.6209255180852737, + "learning_rate": 6.6221013507239105e-06, + "loss": 0.6429, + "step": 13475 + }, + { + "epoch": 0.41301949246046343, + "grad_norm": 1.861693684304544, + "learning_rate": 6.621631869215471e-06, + "loss": 0.6404, + "step": 13476 + }, + { + "epoch": 0.4130501409832046, + "grad_norm": 1.6604488872223315, + "learning_rate": 6.621162371728678e-06, + "loss": 0.6902, + "step": 13477 + }, + { + "epoch": 0.4130807895059458, + "grad_norm": 1.7868505488454258, + "learning_rate": 6.620692858268156e-06, + "loss": 0.5576, + "step": 13478 + }, + { + "epoch": 0.413111438028687, + "grad_norm": 1.5487806551127319, + "learning_rate": 6.6202233288385335e-06, + "loss": 0.5979, + "step": 13479 + }, + { + "epoch": 0.4131420865514282, + "grad_norm": 1.8475741791190508, + "learning_rate": 6.619753783444435e-06, + "loss": 0.6617, + "step": 13480 + }, + { + "epoch": 0.4131727350741694, + "grad_norm": 1.7874717267985858, + "learning_rate": 6.6192842220904886e-06, + "loss": 0.6409, + "step": 13481 + }, + { + "epoch": 0.4132033835969106, + "grad_norm": 1.7773126720399095, + "learning_rate": 6.61881464478132e-06, + "loss": 0.6851, + "step": 13482 + }, + { + "epoch": 0.4132340321196518, + "grad_norm": 1.581219271122007, + "learning_rate": 6.618345051521558e-06, + "loss": 0.5948, + "step": 13483 + }, + { + "epoch": 0.413264680642393, + "grad_norm": 1.8456727809364177, + "learning_rate": 6.617875442315827e-06, + "loss": 0.6877, + "step": 13484 + }, + { + "epoch": 0.41329532916513423, + "grad_norm": 1.792082646661338, + "learning_rate": 6.617405817168755e-06, + "loss": 0.6545, + "step": 13485 + }, + { + "epoch": 0.41332597768787543, + "grad_norm": 1.6891813135045897, + "learning_rate": 6.616936176084969e-06, + "loss": 0.7073, + "step": 13486 + }, + { + "epoch": 0.41335662621061664, + "grad_norm": 1.560636466406356, + "learning_rate": 6.616466519069099e-06, + "loss": 0.5357, + "step": 13487 + }, + { + "epoch": 0.41338727473335785, + "grad_norm": 1.6705833010110929, + "learning_rate": 6.61599684612577e-06, + "loss": 0.657, + "step": 13488 + }, + { + "epoch": 0.41341792325609905, + "grad_norm": 1.5517034826066176, + "learning_rate": 6.615527157259611e-06, + "loss": 0.7284, + "step": 13489 + }, + { + "epoch": 0.41344857177884026, + "grad_norm": 1.7467992804137902, + "learning_rate": 6.615057452475249e-06, + "loss": 0.68, + "step": 13490 + }, + { + "epoch": 0.41347922030158146, + "grad_norm": 1.8940050585109918, + "learning_rate": 6.6145877317773135e-06, + "loss": 0.7171, + "step": 13491 + }, + { + "epoch": 0.41350986882432267, + "grad_norm": 0.8741730330933971, + "learning_rate": 6.614117995170431e-06, + "loss": 0.4499, + "step": 13492 + }, + { + "epoch": 0.4135405173470639, + "grad_norm": 1.6489740806853317, + "learning_rate": 6.613648242659232e-06, + "loss": 0.702, + "step": 13493 + }, + { + "epoch": 0.4135711658698051, + "grad_norm": 1.87982346065486, + "learning_rate": 6.613178474248342e-06, + "loss": 0.7156, + "step": 13494 + }, + { + "epoch": 0.4136018143925463, + "grad_norm": 1.646477469671486, + "learning_rate": 6.6127086899423935e-06, + "loss": 0.6336, + "step": 13495 + }, + { + "epoch": 0.4136324629152875, + "grad_norm": 1.8348464631294286, + "learning_rate": 6.612238889746013e-06, + "loss": 0.6915, + "step": 13496 + }, + { + "epoch": 0.4136631114380287, + "grad_norm": 1.6246946091904215, + "learning_rate": 6.611769073663831e-06, + "loss": 0.6101, + "step": 13497 + }, + { + "epoch": 0.4136937599607699, + "grad_norm": 1.7850099465848948, + "learning_rate": 6.611299241700474e-06, + "loss": 0.7362, + "step": 13498 + }, + { + "epoch": 0.4137244084835111, + "grad_norm": 1.517641211883032, + "learning_rate": 6.610829393860575e-06, + "loss": 0.5717, + "step": 13499 + }, + { + "epoch": 0.4137550570062523, + "grad_norm": 1.7424245952062294, + "learning_rate": 6.6103595301487625e-06, + "loss": 0.6603, + "step": 13500 + }, + { + "epoch": 0.4137857055289935, + "grad_norm": 1.7787380094127248, + "learning_rate": 6.609889650569663e-06, + "loss": 0.6745, + "step": 13501 + }, + { + "epoch": 0.4138163540517347, + "grad_norm": 1.6973766123148308, + "learning_rate": 6.609419755127911e-06, + "loss": 0.6213, + "step": 13502 + }, + { + "epoch": 0.41384700257447593, + "grad_norm": 1.7166788425185044, + "learning_rate": 6.608949843828132e-06, + "loss": 0.7457, + "step": 13503 + }, + { + "epoch": 0.41387765109721714, + "grad_norm": 1.6225547058498049, + "learning_rate": 6.6084799166749615e-06, + "loss": 0.6497, + "step": 13504 + }, + { + "epoch": 0.41390829961995834, + "grad_norm": 1.6840163928732261, + "learning_rate": 6.608009973673025e-06, + "loss": 0.643, + "step": 13505 + }, + { + "epoch": 0.41393894814269955, + "grad_norm": 2.7596636420811884, + "learning_rate": 6.607540014826956e-06, + "loss": 0.6445, + "step": 13506 + }, + { + "epoch": 0.41396959666544075, + "grad_norm": 0.7974257891480825, + "learning_rate": 6.607070040141382e-06, + "loss": 0.4556, + "step": 13507 + }, + { + "epoch": 0.4140002451881819, + "grad_norm": 1.5165930239976992, + "learning_rate": 6.606600049620938e-06, + "loss": 0.5951, + "step": 13508 + }, + { + "epoch": 0.4140308937109231, + "grad_norm": 1.8750971183603837, + "learning_rate": 6.606130043270251e-06, + "loss": 0.7833, + "step": 13509 + }, + { + "epoch": 0.4140615422336643, + "grad_norm": 1.7658998602919407, + "learning_rate": 6.6056600210939544e-06, + "loss": 0.7556, + "step": 13510 + }, + { + "epoch": 0.4140921907564055, + "grad_norm": 1.5603580915455646, + "learning_rate": 6.605189983096678e-06, + "loss": 0.7308, + "step": 13511 + }, + { + "epoch": 0.4141228392791467, + "grad_norm": 1.558783233307359, + "learning_rate": 6.604719929283056e-06, + "loss": 0.6531, + "step": 13512 + }, + { + "epoch": 0.41415348780188793, + "grad_norm": 1.6920411439637308, + "learning_rate": 6.604249859657717e-06, + "loss": 0.6275, + "step": 13513 + }, + { + "epoch": 0.41418413632462914, + "grad_norm": 1.419713733932443, + "learning_rate": 6.603779774225292e-06, + "loss": 0.6418, + "step": 13514 + }, + { + "epoch": 0.41421478484737034, + "grad_norm": 1.8225025949412603, + "learning_rate": 6.6033096729904164e-06, + "loss": 0.6599, + "step": 13515 + }, + { + "epoch": 0.41424543337011155, + "grad_norm": 1.6663813625024726, + "learning_rate": 6.60283955595772e-06, + "loss": 0.7854, + "step": 13516 + }, + { + "epoch": 0.41427608189285275, + "grad_norm": 1.68382422738499, + "learning_rate": 6.602369423131836e-06, + "loss": 0.6817, + "step": 13517 + }, + { + "epoch": 0.41430673041559396, + "grad_norm": 1.6024140740694626, + "learning_rate": 6.601899274517394e-06, + "loss": 0.6221, + "step": 13518 + }, + { + "epoch": 0.41433737893833517, + "grad_norm": 1.6147118037243724, + "learning_rate": 6.601429110119031e-06, + "loss": 0.6679, + "step": 13519 + }, + { + "epoch": 0.41436802746107637, + "grad_norm": 1.8573685070435908, + "learning_rate": 6.600958929941376e-06, + "loss": 0.6287, + "step": 13520 + }, + { + "epoch": 0.4143986759838176, + "grad_norm": 1.666854301180219, + "learning_rate": 6.600488733989064e-06, + "loss": 0.6294, + "step": 13521 + }, + { + "epoch": 0.4144293245065588, + "grad_norm": 1.4592106348334044, + "learning_rate": 6.600018522266724e-06, + "loss": 0.6264, + "step": 13522 + }, + { + "epoch": 0.4144599730293, + "grad_norm": 1.936888355854581, + "learning_rate": 6.599548294778996e-06, + "loss": 0.6087, + "step": 13523 + }, + { + "epoch": 0.4144906215520412, + "grad_norm": 1.8925551084741774, + "learning_rate": 6.599078051530506e-06, + "loss": 0.6899, + "step": 13524 + }, + { + "epoch": 0.4145212700747824, + "grad_norm": 1.807592872893147, + "learning_rate": 6.598607792525893e-06, + "loss": 0.6446, + "step": 13525 + }, + { + "epoch": 0.4145519185975236, + "grad_norm": 0.8592100873664613, + "learning_rate": 6.598137517769787e-06, + "loss": 0.4453, + "step": 13526 + }, + { + "epoch": 0.4145825671202648, + "grad_norm": 0.8037077256755613, + "learning_rate": 6.597667227266825e-06, + "loss": 0.4548, + "step": 13527 + }, + { + "epoch": 0.414613215643006, + "grad_norm": 1.5991086227505038, + "learning_rate": 6.597196921021638e-06, + "loss": 0.6917, + "step": 13528 + }, + { + "epoch": 0.4146438641657472, + "grad_norm": 1.5730660712633, + "learning_rate": 6.5967265990388605e-06, + "loss": 0.7219, + "step": 13529 + }, + { + "epoch": 0.41467451268848843, + "grad_norm": 1.6660601738814607, + "learning_rate": 6.596256261323128e-06, + "loss": 0.6334, + "step": 13530 + }, + { + "epoch": 0.41470516121122963, + "grad_norm": 1.5965776805010743, + "learning_rate": 6.595785907879074e-06, + "loss": 0.648, + "step": 13531 + }, + { + "epoch": 0.41473580973397084, + "grad_norm": 1.779700619513507, + "learning_rate": 6.595315538711334e-06, + "loss": 0.786, + "step": 13532 + }, + { + "epoch": 0.41476645825671205, + "grad_norm": 0.7723426320848962, + "learning_rate": 6.5948451538245406e-06, + "loss": 0.4589, + "step": 13533 + }, + { + "epoch": 0.41479710677945325, + "grad_norm": 1.6008946227175689, + "learning_rate": 6.5943747532233305e-06, + "loss": 0.5903, + "step": 13534 + }, + { + "epoch": 0.41482775530219446, + "grad_norm": 1.707328598516572, + "learning_rate": 6.593904336912338e-06, + "loss": 0.6495, + "step": 13535 + }, + { + "epoch": 0.41485840382493566, + "grad_norm": 0.81679037295798, + "learning_rate": 6.5934339048961986e-06, + "loss": 0.463, + "step": 13536 + }, + { + "epoch": 0.41488905234767687, + "grad_norm": 1.6495046252652925, + "learning_rate": 6.592963457179546e-06, + "loss": 0.6463, + "step": 13537 + }, + { + "epoch": 0.4149197008704181, + "grad_norm": 1.7456616164430647, + "learning_rate": 6.592492993767017e-06, + "loss": 0.5728, + "step": 13538 + }, + { + "epoch": 0.4149503493931592, + "grad_norm": 1.7318416066291569, + "learning_rate": 6.592022514663248e-06, + "loss": 0.666, + "step": 13539 + }, + { + "epoch": 0.41498099791590043, + "grad_norm": 0.8764550345718448, + "learning_rate": 6.591552019872872e-06, + "loss": 0.4783, + "step": 13540 + }, + { + "epoch": 0.41501164643864163, + "grad_norm": 1.516136619442212, + "learning_rate": 6.591081509400529e-06, + "loss": 0.6847, + "step": 13541 + }, + { + "epoch": 0.41504229496138284, + "grad_norm": 1.7255661287716424, + "learning_rate": 6.590610983250853e-06, + "loss": 0.7134, + "step": 13542 + }, + { + "epoch": 0.41507294348412405, + "grad_norm": 1.6809494029678764, + "learning_rate": 6.590140441428479e-06, + "loss": 0.6271, + "step": 13543 + }, + { + "epoch": 0.41510359200686525, + "grad_norm": 1.670112775605455, + "learning_rate": 6.589669883938043e-06, + "loss": 0.6832, + "step": 13544 + }, + { + "epoch": 0.41513424052960646, + "grad_norm": 1.750636059246388, + "learning_rate": 6.5891993107841846e-06, + "loss": 0.6593, + "step": 13545 + }, + { + "epoch": 0.41516488905234766, + "grad_norm": 1.5983821483308849, + "learning_rate": 6.588728721971538e-06, + "loss": 0.6703, + "step": 13546 + }, + { + "epoch": 0.41519553757508887, + "grad_norm": 1.6961066700639011, + "learning_rate": 6.588258117504742e-06, + "loss": 0.6527, + "step": 13547 + }, + { + "epoch": 0.4152261860978301, + "grad_norm": 1.7173159940637668, + "learning_rate": 6.587787497388431e-06, + "loss": 0.6742, + "step": 13548 + }, + { + "epoch": 0.4152568346205713, + "grad_norm": 1.6168289385676735, + "learning_rate": 6.5873168616272445e-06, + "loss": 0.6215, + "step": 13549 + }, + { + "epoch": 0.4152874831433125, + "grad_norm": 1.8367100750497194, + "learning_rate": 6.586846210225819e-06, + "loss": 0.7522, + "step": 13550 + }, + { + "epoch": 0.4153181316660537, + "grad_norm": 1.710675672177606, + "learning_rate": 6.586375543188791e-06, + "loss": 0.5972, + "step": 13551 + }, + { + "epoch": 0.4153487801887949, + "grad_norm": 1.6433697135972805, + "learning_rate": 6.585904860520798e-06, + "loss": 0.6774, + "step": 13552 + }, + { + "epoch": 0.4153794287115361, + "grad_norm": 0.8518244170254038, + "learning_rate": 6.58543416222648e-06, + "loss": 0.4583, + "step": 13553 + }, + { + "epoch": 0.4154100772342773, + "grad_norm": 1.6297694370331446, + "learning_rate": 6.584963448310474e-06, + "loss": 0.6143, + "step": 13554 + }, + { + "epoch": 0.4154407257570185, + "grad_norm": 1.8212814020465928, + "learning_rate": 6.5844927187774164e-06, + "loss": 0.6802, + "step": 13555 + }, + { + "epoch": 0.4154713742797597, + "grad_norm": 0.8058396337613833, + "learning_rate": 6.5840219736319475e-06, + "loss": 0.4616, + "step": 13556 + }, + { + "epoch": 0.4155020228025009, + "grad_norm": 1.830311937581392, + "learning_rate": 6.583551212878704e-06, + "loss": 0.7078, + "step": 13557 + }, + { + "epoch": 0.41553267132524213, + "grad_norm": 1.6540535660879114, + "learning_rate": 6.5830804365223266e-06, + "loss": 0.5681, + "step": 13558 + }, + { + "epoch": 0.41556331984798334, + "grad_norm": 1.9069540655529205, + "learning_rate": 6.58260964456745e-06, + "loss": 0.7228, + "step": 13559 + }, + { + "epoch": 0.41559396837072454, + "grad_norm": 1.7978151489817726, + "learning_rate": 6.582138837018719e-06, + "loss": 0.6936, + "step": 13560 + }, + { + "epoch": 0.41562461689346575, + "grad_norm": 1.7826578499552475, + "learning_rate": 6.581668013880767e-06, + "loss": 0.6768, + "step": 13561 + }, + { + "epoch": 0.41565526541620695, + "grad_norm": 1.5970513994318856, + "learning_rate": 6.581197175158236e-06, + "loss": 0.6813, + "step": 13562 + }, + { + "epoch": 0.41568591393894816, + "grad_norm": 1.805756643108612, + "learning_rate": 6.580726320855765e-06, + "loss": 0.5886, + "step": 13563 + }, + { + "epoch": 0.41571656246168937, + "grad_norm": 0.8201627902798323, + "learning_rate": 6.580255450977992e-06, + "loss": 0.4502, + "step": 13564 + }, + { + "epoch": 0.41574721098443057, + "grad_norm": 1.6883173588372369, + "learning_rate": 6.579784565529558e-06, + "loss": 0.7652, + "step": 13565 + }, + { + "epoch": 0.4157778595071718, + "grad_norm": 1.62466895619112, + "learning_rate": 6.579313664515103e-06, + "loss": 0.5982, + "step": 13566 + }, + { + "epoch": 0.415808508029913, + "grad_norm": 1.90507479694742, + "learning_rate": 6.578842747939267e-06, + "loss": 0.6626, + "step": 13567 + }, + { + "epoch": 0.4158391565526542, + "grad_norm": 1.4796062473434173, + "learning_rate": 6.578371815806689e-06, + "loss": 0.6296, + "step": 13568 + }, + { + "epoch": 0.4158698050753954, + "grad_norm": 1.736880328396366, + "learning_rate": 6.5779008681220095e-06, + "loss": 0.7509, + "step": 13569 + }, + { + "epoch": 0.41590045359813654, + "grad_norm": 0.7723623578484957, + "learning_rate": 6.577429904889868e-06, + "loss": 0.4426, + "step": 13570 + }, + { + "epoch": 0.41593110212087775, + "grad_norm": 0.8259547493489307, + "learning_rate": 6.576958926114907e-06, + "loss": 0.4674, + "step": 13571 + }, + { + "epoch": 0.41596175064361895, + "grad_norm": 0.802127259561182, + "learning_rate": 6.576487931801766e-06, + "loss": 0.4505, + "step": 13572 + }, + { + "epoch": 0.41599239916636016, + "grad_norm": 1.8907230266158161, + "learning_rate": 6.576016921955087e-06, + "loss": 0.6396, + "step": 13573 + }, + { + "epoch": 0.41602304768910137, + "grad_norm": 1.5426586613443003, + "learning_rate": 6.575545896579509e-06, + "loss": 0.6211, + "step": 13574 + }, + { + "epoch": 0.41605369621184257, + "grad_norm": 1.690064934689959, + "learning_rate": 6.575074855679675e-06, + "loss": 0.5416, + "step": 13575 + }, + { + "epoch": 0.4160843447345838, + "grad_norm": 1.6201078510240954, + "learning_rate": 6.574603799260224e-06, + "loss": 0.6335, + "step": 13576 + }, + { + "epoch": 0.416114993257325, + "grad_norm": 1.6620201886661583, + "learning_rate": 6.574132727325801e-06, + "loss": 0.6866, + "step": 13577 + }, + { + "epoch": 0.4161456417800662, + "grad_norm": 1.693784766763086, + "learning_rate": 6.5736616398810436e-06, + "loss": 0.738, + "step": 13578 + }, + { + "epoch": 0.4161762903028074, + "grad_norm": 1.8580756655801687, + "learning_rate": 6.573190536930596e-06, + "loss": 0.6558, + "step": 13579 + }, + { + "epoch": 0.4162069388255486, + "grad_norm": 1.8721618410436112, + "learning_rate": 6.5727194184790985e-06, + "loss": 0.6637, + "step": 13580 + }, + { + "epoch": 0.4162375873482898, + "grad_norm": 1.6845044035231376, + "learning_rate": 6.572248284531196e-06, + "loss": 0.6886, + "step": 13581 + }, + { + "epoch": 0.416268235871031, + "grad_norm": 1.7656841031214245, + "learning_rate": 6.571777135091528e-06, + "loss": 0.6215, + "step": 13582 + }, + { + "epoch": 0.4162988843937722, + "grad_norm": 1.7013525604002617, + "learning_rate": 6.571305970164737e-06, + "loss": 0.5755, + "step": 13583 + }, + { + "epoch": 0.4163295329165134, + "grad_norm": 1.5649430469331027, + "learning_rate": 6.570834789755468e-06, + "loss": 0.5976, + "step": 13584 + }, + { + "epoch": 0.41636018143925463, + "grad_norm": 1.512215126725242, + "learning_rate": 6.570363593868361e-06, + "loss": 0.6535, + "step": 13585 + }, + { + "epoch": 0.41639082996199583, + "grad_norm": 1.7197849274118053, + "learning_rate": 6.569892382508061e-06, + "loss": 0.7353, + "step": 13586 + }, + { + "epoch": 0.41642147848473704, + "grad_norm": 3.0860196971286364, + "learning_rate": 6.569421155679207e-06, + "loss": 0.7077, + "step": 13587 + }, + { + "epoch": 0.41645212700747825, + "grad_norm": 1.5242592143571956, + "learning_rate": 6.568949913386446e-06, + "loss": 0.568, + "step": 13588 + }, + { + "epoch": 0.41648277553021945, + "grad_norm": 1.591274703480377, + "learning_rate": 6.56847865563442e-06, + "loss": 0.6242, + "step": 13589 + }, + { + "epoch": 0.41651342405296066, + "grad_norm": 1.704702684783561, + "learning_rate": 6.568007382427773e-06, + "loss": 0.715, + "step": 13590 + }, + { + "epoch": 0.41654407257570186, + "grad_norm": 1.945386863824227, + "learning_rate": 6.567536093771147e-06, + "loss": 0.5985, + "step": 13591 + }, + { + "epoch": 0.41657472109844307, + "grad_norm": 1.5415557567289846, + "learning_rate": 6.5670647896691885e-06, + "loss": 0.6508, + "step": 13592 + }, + { + "epoch": 0.4166053696211843, + "grad_norm": 1.6845619132337908, + "learning_rate": 6.5665934701265384e-06, + "loss": 0.6799, + "step": 13593 + }, + { + "epoch": 0.4166360181439255, + "grad_norm": 1.8166012168153645, + "learning_rate": 6.566122135147843e-06, + "loss": 0.7268, + "step": 13594 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 1.8746151598722605, + "learning_rate": 6.565650784737745e-06, + "loss": 0.5876, + "step": 13595 + }, + { + "epoch": 0.4166973151894079, + "grad_norm": 1.8365557223164415, + "learning_rate": 6.565179418900889e-06, + "loss": 0.7211, + "step": 13596 + }, + { + "epoch": 0.4167279637121491, + "grad_norm": 1.6400464009334481, + "learning_rate": 6.56470803764192e-06, + "loss": 0.6221, + "step": 13597 + }, + { + "epoch": 0.4167586122348903, + "grad_norm": 1.5174858970262908, + "learning_rate": 6.5642366409654826e-06, + "loss": 0.6104, + "step": 13598 + }, + { + "epoch": 0.4167892607576315, + "grad_norm": 1.8777552448388555, + "learning_rate": 6.56376522887622e-06, + "loss": 0.6673, + "step": 13599 + }, + { + "epoch": 0.4168199092803727, + "grad_norm": 1.8398113301915626, + "learning_rate": 6.563293801378781e-06, + "loss": 0.7284, + "step": 13600 + }, + { + "epoch": 0.41685055780311386, + "grad_norm": 1.0650257600683168, + "learning_rate": 6.562822358477806e-06, + "loss": 0.4451, + "step": 13601 + }, + { + "epoch": 0.41688120632585507, + "grad_norm": 1.6087709704814512, + "learning_rate": 6.562350900177943e-06, + "loss": 0.62, + "step": 13602 + }, + { + "epoch": 0.4169118548485963, + "grad_norm": 2.6515654058621183, + "learning_rate": 6.5618794264838374e-06, + "loss": 0.6035, + "step": 13603 + }, + { + "epoch": 0.4169425033713375, + "grad_norm": 2.103327810103587, + "learning_rate": 6.561407937400132e-06, + "loss": 0.7481, + "step": 13604 + }, + { + "epoch": 0.4169731518940787, + "grad_norm": 1.5852297768547892, + "learning_rate": 6.560936432931477e-06, + "loss": 0.6013, + "step": 13605 + }, + { + "epoch": 0.4170038004168199, + "grad_norm": 1.5288738600421836, + "learning_rate": 6.560464913082515e-06, + "loss": 0.5976, + "step": 13606 + }, + { + "epoch": 0.4170344489395611, + "grad_norm": 1.606343457591157, + "learning_rate": 6.559993377857894e-06, + "loss": 0.7469, + "step": 13607 + }, + { + "epoch": 0.4170650974623023, + "grad_norm": 1.9403320332003535, + "learning_rate": 6.5595218272622585e-06, + "loss": 0.7281, + "step": 13608 + }, + { + "epoch": 0.4170957459850435, + "grad_norm": 1.6981469251735943, + "learning_rate": 6.559050261300255e-06, + "loss": 0.7237, + "step": 13609 + }, + { + "epoch": 0.4171263945077847, + "grad_norm": 0.8449345161057402, + "learning_rate": 6.55857867997653e-06, + "loss": 0.4456, + "step": 13610 + }, + { + "epoch": 0.4171570430305259, + "grad_norm": 1.8615362099984636, + "learning_rate": 6.558107083295731e-06, + "loss": 0.7442, + "step": 13611 + }, + { + "epoch": 0.4171876915532671, + "grad_norm": 1.6401517326155486, + "learning_rate": 6.557635471262506e-06, + "loss": 0.6981, + "step": 13612 + }, + { + "epoch": 0.41721834007600833, + "grad_norm": 1.682153314463918, + "learning_rate": 6.557163843881498e-06, + "loss": 0.6896, + "step": 13613 + }, + { + "epoch": 0.41724898859874954, + "grad_norm": 1.7286854533199656, + "learning_rate": 6.556692201157356e-06, + "loss": 0.636, + "step": 13614 + }, + { + "epoch": 0.41727963712149074, + "grad_norm": 0.7594793521772811, + "learning_rate": 6.556220543094728e-06, + "loss": 0.4543, + "step": 13615 + }, + { + "epoch": 0.41731028564423195, + "grad_norm": 1.6760330384335231, + "learning_rate": 6.555748869698262e-06, + "loss": 0.6058, + "step": 13616 + }, + { + "epoch": 0.41734093416697315, + "grad_norm": 1.5189261730297803, + "learning_rate": 6.5552771809726034e-06, + "loss": 0.6772, + "step": 13617 + }, + { + "epoch": 0.41737158268971436, + "grad_norm": 1.702234571099347, + "learning_rate": 6.554805476922401e-06, + "loss": 0.6591, + "step": 13618 + }, + { + "epoch": 0.41740223121245557, + "grad_norm": 1.6412868482228316, + "learning_rate": 6.554333757552302e-06, + "loss": 0.7227, + "step": 13619 + }, + { + "epoch": 0.41743287973519677, + "grad_norm": 1.5351558813865374, + "learning_rate": 6.553862022866956e-06, + "loss": 0.5892, + "step": 13620 + }, + { + "epoch": 0.417463528257938, + "grad_norm": 1.8009826211120183, + "learning_rate": 6.5533902728710075e-06, + "loss": 0.6077, + "step": 13621 + }, + { + "epoch": 0.4174941767806792, + "grad_norm": 1.771691862777727, + "learning_rate": 6.5529185075691095e-06, + "loss": 0.5739, + "step": 13622 + }, + { + "epoch": 0.4175248253034204, + "grad_norm": 1.9800089319236316, + "learning_rate": 6.552446726965907e-06, + "loss": 0.7074, + "step": 13623 + }, + { + "epoch": 0.4175554738261616, + "grad_norm": 1.7089925437370226, + "learning_rate": 6.55197493106605e-06, + "loss": 0.5893, + "step": 13624 + }, + { + "epoch": 0.4175861223489028, + "grad_norm": 1.8637043514714697, + "learning_rate": 6.551503119874186e-06, + "loss": 0.7428, + "step": 13625 + }, + { + "epoch": 0.417616770871644, + "grad_norm": 1.7107547925007458, + "learning_rate": 6.551031293394965e-06, + "loss": 0.6694, + "step": 13626 + }, + { + "epoch": 0.4176474193943852, + "grad_norm": 1.682607290210034, + "learning_rate": 6.5505594516330385e-06, + "loss": 0.7732, + "step": 13627 + }, + { + "epoch": 0.4176780679171264, + "grad_norm": 1.7623947443873624, + "learning_rate": 6.55008759459305e-06, + "loss": 0.742, + "step": 13628 + }, + { + "epoch": 0.4177087164398676, + "grad_norm": 1.7168515761564596, + "learning_rate": 6.549615722279652e-06, + "loss": 0.6865, + "step": 13629 + }, + { + "epoch": 0.41773936496260883, + "grad_norm": 2.100675683440678, + "learning_rate": 6.5491438346974945e-06, + "loss": 0.7041, + "step": 13630 + }, + { + "epoch": 0.41777001348535003, + "grad_norm": 0.8730678386309223, + "learning_rate": 6.548671931851227e-06, + "loss": 0.4401, + "step": 13631 + }, + { + "epoch": 0.4178006620080912, + "grad_norm": 1.699350533050347, + "learning_rate": 6.5482000137454985e-06, + "loss": 0.6293, + "step": 13632 + }, + { + "epoch": 0.4178313105308324, + "grad_norm": 1.9647743019170234, + "learning_rate": 6.547728080384959e-06, + "loss": 0.6439, + "step": 13633 + }, + { + "epoch": 0.4178619590535736, + "grad_norm": 1.700963527084569, + "learning_rate": 6.547256131774258e-06, + "loss": 0.6832, + "step": 13634 + }, + { + "epoch": 0.4178926075763148, + "grad_norm": 1.7493871623302546, + "learning_rate": 6.546784167918047e-06, + "loss": 0.6228, + "step": 13635 + }, + { + "epoch": 0.417923256099056, + "grad_norm": 1.8013765461131812, + "learning_rate": 6.546312188820976e-06, + "loss": 0.6869, + "step": 13636 + }, + { + "epoch": 0.4179539046217972, + "grad_norm": 1.6191244346177358, + "learning_rate": 6.545840194487694e-06, + "loss": 0.6562, + "step": 13637 + }, + { + "epoch": 0.4179845531445384, + "grad_norm": 1.8390070711434472, + "learning_rate": 6.545368184922855e-06, + "loss": 0.7129, + "step": 13638 + }, + { + "epoch": 0.4180152016672796, + "grad_norm": 0.7884827175883327, + "learning_rate": 6.5448961601311066e-06, + "loss": 0.4468, + "step": 13639 + }, + { + "epoch": 0.41804585019002083, + "grad_norm": 0.8027628084513113, + "learning_rate": 6.544424120117103e-06, + "loss": 0.4432, + "step": 13640 + }, + { + "epoch": 0.41807649871276203, + "grad_norm": 2.009588112771523, + "learning_rate": 6.54395206488549e-06, + "loss": 0.6929, + "step": 13641 + }, + { + "epoch": 0.41810714723550324, + "grad_norm": 1.756943960101164, + "learning_rate": 6.543479994440926e-06, + "loss": 0.7305, + "step": 13642 + }, + { + "epoch": 0.41813779575824445, + "grad_norm": 1.7751270176918537, + "learning_rate": 6.543007908788057e-06, + "loss": 0.7147, + "step": 13643 + }, + { + "epoch": 0.41816844428098565, + "grad_norm": 0.7973242097790706, + "learning_rate": 6.542535807931536e-06, + "loss": 0.4461, + "step": 13644 + }, + { + "epoch": 0.41819909280372686, + "grad_norm": 0.781664721790983, + "learning_rate": 6.542063691876015e-06, + "loss": 0.4661, + "step": 13645 + }, + { + "epoch": 0.41822974132646806, + "grad_norm": 1.6823089822810477, + "learning_rate": 6.541591560626147e-06, + "loss": 0.5982, + "step": 13646 + }, + { + "epoch": 0.41826038984920927, + "grad_norm": 1.6089791213995837, + "learning_rate": 6.5411194141865804e-06, + "loss": 0.7425, + "step": 13647 + }, + { + "epoch": 0.4182910383719505, + "grad_norm": 1.7329020979999707, + "learning_rate": 6.540647252561972e-06, + "loss": 0.6369, + "step": 13648 + }, + { + "epoch": 0.4183216868946917, + "grad_norm": 1.765282083746735, + "learning_rate": 6.540175075756971e-06, + "loss": 0.6875, + "step": 13649 + }, + { + "epoch": 0.4183523354174329, + "grad_norm": 1.577404133008093, + "learning_rate": 6.539702883776232e-06, + "loss": 0.6541, + "step": 13650 + }, + { + "epoch": 0.4183829839401741, + "grad_norm": 1.8055399376652472, + "learning_rate": 6.539230676624406e-06, + "loss": 0.6374, + "step": 13651 + }, + { + "epoch": 0.4184136324629153, + "grad_norm": 1.889777597994, + "learning_rate": 6.538758454306147e-06, + "loss": 0.6729, + "step": 13652 + }, + { + "epoch": 0.4184442809856565, + "grad_norm": 1.7248496188291038, + "learning_rate": 6.538286216826107e-06, + "loss": 0.6889, + "step": 13653 + }, + { + "epoch": 0.4184749295083977, + "grad_norm": 1.8991572937335006, + "learning_rate": 6.537813964188938e-06, + "loss": 0.7375, + "step": 13654 + }, + { + "epoch": 0.4185055780311389, + "grad_norm": 1.6659709204463995, + "learning_rate": 6.537341696399295e-06, + "loss": 0.5932, + "step": 13655 + }, + { + "epoch": 0.4185362265538801, + "grad_norm": 1.671219520131011, + "learning_rate": 6.536869413461832e-06, + "loss": 0.6682, + "step": 13656 + }, + { + "epoch": 0.4185668750766213, + "grad_norm": 1.8680434932385055, + "learning_rate": 6.5363971153812e-06, + "loss": 0.7107, + "step": 13657 + }, + { + "epoch": 0.41859752359936253, + "grad_norm": 1.7076291235647536, + "learning_rate": 6.5359248021620556e-06, + "loss": 0.6419, + "step": 13658 + }, + { + "epoch": 0.41862817212210374, + "grad_norm": 0.8370737348432876, + "learning_rate": 6.5354524738090505e-06, + "loss": 0.4432, + "step": 13659 + }, + { + "epoch": 0.41865882064484494, + "grad_norm": 1.949862953725367, + "learning_rate": 6.534980130326839e-06, + "loss": 0.8174, + "step": 13660 + }, + { + "epoch": 0.41868946916758615, + "grad_norm": 1.694694243785603, + "learning_rate": 6.534507771720076e-06, + "loss": 0.678, + "step": 13661 + }, + { + "epoch": 0.41872011769032735, + "grad_norm": 1.5949691420864651, + "learning_rate": 6.534035397993415e-06, + "loss": 0.6011, + "step": 13662 + }, + { + "epoch": 0.4187507662130685, + "grad_norm": 1.7242431043498223, + "learning_rate": 6.53356300915151e-06, + "loss": 0.7, + "step": 13663 + }, + { + "epoch": 0.4187814147358097, + "grad_norm": 1.5973153518032992, + "learning_rate": 6.533090605199017e-06, + "loss": 0.6699, + "step": 13664 + }, + { + "epoch": 0.4188120632585509, + "grad_norm": 1.724406996092783, + "learning_rate": 6.532618186140591e-06, + "loss": 0.6727, + "step": 13665 + }, + { + "epoch": 0.4188427117812921, + "grad_norm": 1.7548498595781084, + "learning_rate": 6.5321457519808855e-06, + "loss": 0.6237, + "step": 13666 + }, + { + "epoch": 0.4188733603040333, + "grad_norm": 1.7751102398178016, + "learning_rate": 6.531673302724555e-06, + "loss": 0.7576, + "step": 13667 + }, + { + "epoch": 0.41890400882677453, + "grad_norm": 1.644075823797452, + "learning_rate": 6.531200838376255e-06, + "loss": 0.642, + "step": 13668 + }, + { + "epoch": 0.41893465734951574, + "grad_norm": 1.7375822029523136, + "learning_rate": 6.5307283589406425e-06, + "loss": 0.684, + "step": 13669 + }, + { + "epoch": 0.41896530587225694, + "grad_norm": 1.7290116496490116, + "learning_rate": 6.530255864422372e-06, + "loss": 0.6521, + "step": 13670 + }, + { + "epoch": 0.41899595439499815, + "grad_norm": 1.8180493580963921, + "learning_rate": 6.529783354826098e-06, + "loss": 0.6642, + "step": 13671 + }, + { + "epoch": 0.41902660291773935, + "grad_norm": 1.5959528854994616, + "learning_rate": 6.529310830156479e-06, + "loss": 0.6268, + "step": 13672 + }, + { + "epoch": 0.41905725144048056, + "grad_norm": 1.8154730502347642, + "learning_rate": 6.5288382904181665e-06, + "loss": 0.634, + "step": 13673 + }, + { + "epoch": 0.41908789996322177, + "grad_norm": 0.815378469177394, + "learning_rate": 6.528365735615822e-06, + "loss": 0.4507, + "step": 13674 + }, + { + "epoch": 0.41911854848596297, + "grad_norm": 1.626527471180943, + "learning_rate": 6.527893165754097e-06, + "loss": 0.6875, + "step": 13675 + }, + { + "epoch": 0.4191491970087042, + "grad_norm": 1.6933653828830677, + "learning_rate": 6.5274205808376504e-06, + "loss": 0.7047, + "step": 13676 + }, + { + "epoch": 0.4191798455314454, + "grad_norm": 1.729967405425221, + "learning_rate": 6.526947980871137e-06, + "loss": 0.7055, + "step": 13677 + }, + { + "epoch": 0.4192104940541866, + "grad_norm": 1.7624537185958804, + "learning_rate": 6.526475365859215e-06, + "loss": 0.6851, + "step": 13678 + }, + { + "epoch": 0.4192411425769278, + "grad_norm": 0.7995116502164784, + "learning_rate": 6.526002735806541e-06, + "loss": 0.4746, + "step": 13679 + }, + { + "epoch": 0.419271791099669, + "grad_norm": 1.8523761677746495, + "learning_rate": 6.525530090717771e-06, + "loss": 0.6205, + "step": 13680 + }, + { + "epoch": 0.4193024396224102, + "grad_norm": 1.682777354541186, + "learning_rate": 6.5250574305975635e-06, + "loss": 0.7121, + "step": 13681 + }, + { + "epoch": 0.4193330881451514, + "grad_norm": 1.6773225123628168, + "learning_rate": 6.524584755450573e-06, + "loss": 0.5807, + "step": 13682 + }, + { + "epoch": 0.4193637366678926, + "grad_norm": 0.8075253567597176, + "learning_rate": 6.524112065281461e-06, + "loss": 0.4816, + "step": 13683 + }, + { + "epoch": 0.4193943851906338, + "grad_norm": 1.650733004873314, + "learning_rate": 6.523639360094882e-06, + "loss": 0.6607, + "step": 13684 + }, + { + "epoch": 0.41942503371337503, + "grad_norm": 1.7414973041312656, + "learning_rate": 6.523166639895496e-06, + "loss": 0.6093, + "step": 13685 + }, + { + "epoch": 0.41945568223611623, + "grad_norm": 1.730512544563599, + "learning_rate": 6.522693904687958e-06, + "loss": 0.7046, + "step": 13686 + }, + { + "epoch": 0.41948633075885744, + "grad_norm": 0.8183446163862447, + "learning_rate": 6.522221154476927e-06, + "loss": 0.4702, + "step": 13687 + }, + { + "epoch": 0.41951697928159865, + "grad_norm": 1.7007647178934482, + "learning_rate": 6.521748389267062e-06, + "loss": 0.6206, + "step": 13688 + }, + { + "epoch": 0.41954762780433985, + "grad_norm": 1.859065719131993, + "learning_rate": 6.521275609063021e-06, + "loss": 0.7212, + "step": 13689 + }, + { + "epoch": 0.41957827632708106, + "grad_norm": 1.8514649626248905, + "learning_rate": 6.520802813869463e-06, + "loss": 0.6382, + "step": 13690 + }, + { + "epoch": 0.41960892484982226, + "grad_norm": 1.8147294180937479, + "learning_rate": 6.520330003691045e-06, + "loss": 0.6631, + "step": 13691 + }, + { + "epoch": 0.41963957337256347, + "grad_norm": 1.6648723910451952, + "learning_rate": 6.519857178532424e-06, + "loss": 0.615, + "step": 13692 + }, + { + "epoch": 0.4196702218953047, + "grad_norm": 1.9300666676670737, + "learning_rate": 6.519384338398263e-06, + "loss": 0.6594, + "step": 13693 + }, + { + "epoch": 0.4197008704180458, + "grad_norm": 1.6001684915510024, + "learning_rate": 6.518911483293221e-06, + "loss": 0.6457, + "step": 13694 + }, + { + "epoch": 0.41973151894078703, + "grad_norm": 1.5972234210877863, + "learning_rate": 6.5184386132219535e-06, + "loss": 0.6295, + "step": 13695 + }, + { + "epoch": 0.41976216746352824, + "grad_norm": 2.072261865636767, + "learning_rate": 6.517965728189124e-06, + "loss": 0.7405, + "step": 13696 + }, + { + "epoch": 0.41979281598626944, + "grad_norm": 1.954836321630001, + "learning_rate": 6.517492828199388e-06, + "loss": 0.6319, + "step": 13697 + }, + { + "epoch": 0.41982346450901065, + "grad_norm": 1.768647121382391, + "learning_rate": 6.5170199132574075e-06, + "loss": 0.6843, + "step": 13698 + }, + { + "epoch": 0.41985411303175185, + "grad_norm": 1.5505738165892426, + "learning_rate": 6.516546983367841e-06, + "loss": 0.6535, + "step": 13699 + }, + { + "epoch": 0.41988476155449306, + "grad_norm": 1.7272241277673832, + "learning_rate": 6.516074038535351e-06, + "loss": 0.6893, + "step": 13700 + }, + { + "epoch": 0.41991541007723426, + "grad_norm": 1.5906099246345218, + "learning_rate": 6.515601078764593e-06, + "loss": 0.6014, + "step": 13701 + }, + { + "epoch": 0.41994605859997547, + "grad_norm": 1.6046915279803478, + "learning_rate": 6.5151281040602325e-06, + "loss": 0.6349, + "step": 13702 + }, + { + "epoch": 0.4199767071227167, + "grad_norm": 1.6144031050700636, + "learning_rate": 6.514655114426924e-06, + "loss": 0.6765, + "step": 13703 + }, + { + "epoch": 0.4200073556454579, + "grad_norm": 1.7486618646871965, + "learning_rate": 6.514182109869333e-06, + "loss": 0.6636, + "step": 13704 + }, + { + "epoch": 0.4200380041681991, + "grad_norm": 0.8883067489209445, + "learning_rate": 6.513709090392118e-06, + "loss": 0.4646, + "step": 13705 + }, + { + "epoch": 0.4200686526909403, + "grad_norm": 1.8193509053700192, + "learning_rate": 6.51323605599994e-06, + "loss": 0.6416, + "step": 13706 + }, + { + "epoch": 0.4200993012136815, + "grad_norm": 1.8462309788127174, + "learning_rate": 6.51276300669746e-06, + "loss": 0.6118, + "step": 13707 + }, + { + "epoch": 0.4201299497364227, + "grad_norm": 1.930822945958704, + "learning_rate": 6.512289942489339e-06, + "loss": 0.7498, + "step": 13708 + }, + { + "epoch": 0.4201605982591639, + "grad_norm": 1.6381600313611024, + "learning_rate": 6.511816863380239e-06, + "loss": 0.7134, + "step": 13709 + }, + { + "epoch": 0.4201912467819051, + "grad_norm": 1.7215669169756969, + "learning_rate": 6.511343769374819e-06, + "loss": 0.6517, + "step": 13710 + }, + { + "epoch": 0.4202218953046463, + "grad_norm": 1.6761933456043319, + "learning_rate": 6.510870660477744e-06, + "loss": 0.7458, + "step": 13711 + }, + { + "epoch": 0.4202525438273875, + "grad_norm": 1.6587240282633393, + "learning_rate": 6.510397536693673e-06, + "loss": 0.6489, + "step": 13712 + }, + { + "epoch": 0.42028319235012873, + "grad_norm": 0.8539151134066323, + "learning_rate": 6.5099243980272684e-06, + "loss": 0.4659, + "step": 13713 + }, + { + "epoch": 0.42031384087286994, + "grad_norm": 1.8985543891173617, + "learning_rate": 6.5094512444831915e-06, + "loss": 0.7167, + "step": 13714 + }, + { + "epoch": 0.42034448939561114, + "grad_norm": 1.7792998132711377, + "learning_rate": 6.508978076066107e-06, + "loss": 0.636, + "step": 13715 + }, + { + "epoch": 0.42037513791835235, + "grad_norm": 0.7956407788778016, + "learning_rate": 6.508504892780675e-06, + "loss": 0.4425, + "step": 13716 + }, + { + "epoch": 0.42040578644109355, + "grad_norm": 1.6700730039761762, + "learning_rate": 6.508031694631558e-06, + "loss": 0.6809, + "step": 13717 + }, + { + "epoch": 0.42043643496383476, + "grad_norm": 1.6327707884797, + "learning_rate": 6.507558481623419e-06, + "loss": 0.57, + "step": 13718 + }, + { + "epoch": 0.42046708348657597, + "grad_norm": 1.4968871308105307, + "learning_rate": 6.50708525376092e-06, + "loss": 0.5891, + "step": 13719 + }, + { + "epoch": 0.42049773200931717, + "grad_norm": 1.6062551770528473, + "learning_rate": 6.506612011048725e-06, + "loss": 0.7698, + "step": 13720 + }, + { + "epoch": 0.4205283805320584, + "grad_norm": 0.8066712016273876, + "learning_rate": 6.506138753491496e-06, + "loss": 0.4839, + "step": 13721 + }, + { + "epoch": 0.4205590290547996, + "grad_norm": 1.5962426853600553, + "learning_rate": 6.505665481093897e-06, + "loss": 0.6727, + "step": 13722 + }, + { + "epoch": 0.4205896775775408, + "grad_norm": 1.569045871321629, + "learning_rate": 6.50519219386059e-06, + "loss": 0.5644, + "step": 13723 + }, + { + "epoch": 0.420620326100282, + "grad_norm": 0.7972364278130297, + "learning_rate": 6.50471889179624e-06, + "loss": 0.4521, + "step": 13724 + }, + { + "epoch": 0.42065097462302314, + "grad_norm": 1.779613015580328, + "learning_rate": 6.5042455749055086e-06, + "loss": 0.7181, + "step": 13725 + }, + { + "epoch": 0.42068162314576435, + "grad_norm": 0.7869099512308763, + "learning_rate": 6.503772243193061e-06, + "loss": 0.4735, + "step": 13726 + }, + { + "epoch": 0.42071227166850556, + "grad_norm": 1.8261077953239135, + "learning_rate": 6.5032988966635625e-06, + "loss": 0.635, + "step": 13727 + }, + { + "epoch": 0.42074292019124676, + "grad_norm": 1.7515455839557352, + "learning_rate": 6.502825535321674e-06, + "loss": 0.6592, + "step": 13728 + }, + { + "epoch": 0.42077356871398797, + "grad_norm": 1.57455457438908, + "learning_rate": 6.502352159172061e-06, + "loss": 0.6531, + "step": 13729 + }, + { + "epoch": 0.42080421723672917, + "grad_norm": 1.7468324329577536, + "learning_rate": 6.501878768219387e-06, + "loss": 0.6486, + "step": 13730 + }, + { + "epoch": 0.4208348657594704, + "grad_norm": 1.7027666481147516, + "learning_rate": 6.501405362468319e-06, + "loss": 0.5709, + "step": 13731 + }, + { + "epoch": 0.4208655142822116, + "grad_norm": 1.7326811205925885, + "learning_rate": 6.500931941923519e-06, + "loss": 0.7278, + "step": 13732 + }, + { + "epoch": 0.4208961628049528, + "grad_norm": 0.8159009817720685, + "learning_rate": 6.500458506589652e-06, + "loss": 0.4365, + "step": 13733 + }, + { + "epoch": 0.420926811327694, + "grad_norm": 1.8161476146738817, + "learning_rate": 6.499985056471384e-06, + "loss": 0.7266, + "step": 13734 + }, + { + "epoch": 0.4209574598504352, + "grad_norm": 1.84545288518129, + "learning_rate": 6.49951159157338e-06, + "loss": 0.6648, + "step": 13735 + }, + { + "epoch": 0.4209881083731764, + "grad_norm": 1.863410006439299, + "learning_rate": 6.499038111900302e-06, + "loss": 0.6374, + "step": 13736 + }, + { + "epoch": 0.4210187568959176, + "grad_norm": 0.8676759512065111, + "learning_rate": 6.498564617456821e-06, + "loss": 0.4555, + "step": 13737 + }, + { + "epoch": 0.4210494054186588, + "grad_norm": 2.680275294347155, + "learning_rate": 6.498091108247597e-06, + "loss": 0.6913, + "step": 13738 + }, + { + "epoch": 0.4210800539414, + "grad_norm": 1.5107796544142738, + "learning_rate": 6.4976175842773005e-06, + "loss": 0.5836, + "step": 13739 + }, + { + "epoch": 0.42111070246414123, + "grad_norm": 1.7592719786392006, + "learning_rate": 6.497144045550593e-06, + "loss": 0.6289, + "step": 13740 + }, + { + "epoch": 0.42114135098688243, + "grad_norm": 1.73450358620179, + "learning_rate": 6.496670492072144e-06, + "loss": 0.6352, + "step": 13741 + }, + { + "epoch": 0.42117199950962364, + "grad_norm": 0.7945244870741766, + "learning_rate": 6.496196923846615e-06, + "loss": 0.4516, + "step": 13742 + }, + { + "epoch": 0.42120264803236485, + "grad_norm": 1.8230099978019254, + "learning_rate": 6.495723340878677e-06, + "loss": 0.6591, + "step": 13743 + }, + { + "epoch": 0.42123329655510605, + "grad_norm": 2.6921315724920083, + "learning_rate": 6.4952497431729936e-06, + "loss": 0.6477, + "step": 13744 + }, + { + "epoch": 0.42126394507784726, + "grad_norm": 1.8982784568695708, + "learning_rate": 6.4947761307342315e-06, + "loss": 0.7073, + "step": 13745 + }, + { + "epoch": 0.42129459360058846, + "grad_norm": 1.6873394474007959, + "learning_rate": 6.494302503567057e-06, + "loss": 0.6637, + "step": 13746 + }, + { + "epoch": 0.42132524212332967, + "grad_norm": 1.724624759833715, + "learning_rate": 6.493828861676139e-06, + "loss": 0.6982, + "step": 13747 + }, + { + "epoch": 0.4213558906460709, + "grad_norm": 1.5317618067147647, + "learning_rate": 6.493355205066143e-06, + "loss": 0.6763, + "step": 13748 + }, + { + "epoch": 0.4213865391688121, + "grad_norm": 1.4427482307119968, + "learning_rate": 6.492881533741735e-06, + "loss": 0.5851, + "step": 13749 + }, + { + "epoch": 0.4214171876915533, + "grad_norm": 1.6705095520551352, + "learning_rate": 6.492407847707584e-06, + "loss": 0.6502, + "step": 13750 + }, + { + "epoch": 0.4214478362142945, + "grad_norm": 1.6945094563539769, + "learning_rate": 6.491934146968357e-06, + "loss": 0.708, + "step": 13751 + }, + { + "epoch": 0.4214784847370357, + "grad_norm": 1.577688854098726, + "learning_rate": 6.491460431528721e-06, + "loss": 0.5587, + "step": 13752 + }, + { + "epoch": 0.4215091332597769, + "grad_norm": 1.5154223373129823, + "learning_rate": 6.490986701393343e-06, + "loss": 0.677, + "step": 13753 + }, + { + "epoch": 0.4215397817825181, + "grad_norm": 1.562715971949809, + "learning_rate": 6.490512956566894e-06, + "loss": 0.6138, + "step": 13754 + }, + { + "epoch": 0.4215704303052593, + "grad_norm": 1.7085850106169296, + "learning_rate": 6.490039197054037e-06, + "loss": 0.7039, + "step": 13755 + }, + { + "epoch": 0.42160107882800046, + "grad_norm": 1.5591487428840523, + "learning_rate": 6.489565422859443e-06, + "loss": 0.6142, + "step": 13756 + }, + { + "epoch": 0.42163172735074167, + "grad_norm": 1.4788049401461394, + "learning_rate": 6.489091633987778e-06, + "loss": 0.6931, + "step": 13757 + }, + { + "epoch": 0.4216623758734829, + "grad_norm": 1.9410133144039983, + "learning_rate": 6.488617830443715e-06, + "loss": 0.6336, + "step": 13758 + }, + { + "epoch": 0.4216930243962241, + "grad_norm": 1.6396733001053712, + "learning_rate": 6.488144012231918e-06, + "loss": 0.6195, + "step": 13759 + }, + { + "epoch": 0.4217236729189653, + "grad_norm": 0.8444429718603774, + "learning_rate": 6.487670179357058e-06, + "loss": 0.459, + "step": 13760 + }, + { + "epoch": 0.4217543214417065, + "grad_norm": 1.606085579901667, + "learning_rate": 6.487196331823803e-06, + "loss": 0.5113, + "step": 13761 + }, + { + "epoch": 0.4217849699644477, + "grad_norm": 1.6645498619065664, + "learning_rate": 6.486722469636822e-06, + "loss": 0.6703, + "step": 13762 + }, + { + "epoch": 0.4218156184871889, + "grad_norm": 0.8679162036989643, + "learning_rate": 6.486248592800785e-06, + "loss": 0.4409, + "step": 13763 + }, + { + "epoch": 0.4218462670099301, + "grad_norm": 1.6780953300042212, + "learning_rate": 6.485774701320358e-06, + "loss": 0.63, + "step": 13764 + }, + { + "epoch": 0.4218769155326713, + "grad_norm": 1.8558128803319367, + "learning_rate": 6.485300795200215e-06, + "loss": 0.6693, + "step": 13765 + }, + { + "epoch": 0.4219075640554125, + "grad_norm": 1.8066414586016406, + "learning_rate": 6.484826874445023e-06, + "loss": 0.6456, + "step": 13766 + }, + { + "epoch": 0.4219382125781537, + "grad_norm": 1.763549018474962, + "learning_rate": 6.4843529390594505e-06, + "loss": 0.7072, + "step": 13767 + }, + { + "epoch": 0.42196886110089493, + "grad_norm": 1.8424110051747418, + "learning_rate": 6.483878989048169e-06, + "loss": 0.7201, + "step": 13768 + }, + { + "epoch": 0.42199950962363614, + "grad_norm": 1.7848091258085914, + "learning_rate": 6.48340502441585e-06, + "loss": 0.6396, + "step": 13769 + }, + { + "epoch": 0.42203015814637734, + "grad_norm": 0.8140787409800967, + "learning_rate": 6.48293104516716e-06, + "loss": 0.4666, + "step": 13770 + }, + { + "epoch": 0.42206080666911855, + "grad_norm": 1.5621018836060463, + "learning_rate": 6.482457051306772e-06, + "loss": 0.6105, + "step": 13771 + }, + { + "epoch": 0.42209145519185975, + "grad_norm": 0.759279891262684, + "learning_rate": 6.481983042839354e-06, + "loss": 0.4498, + "step": 13772 + }, + { + "epoch": 0.42212210371460096, + "grad_norm": 1.844132672699512, + "learning_rate": 6.481509019769579e-06, + "loss": 0.6754, + "step": 13773 + }, + { + "epoch": 0.42215275223734217, + "grad_norm": 1.499447081427152, + "learning_rate": 6.481034982102116e-06, + "loss": 0.7072, + "step": 13774 + }, + { + "epoch": 0.42218340076008337, + "grad_norm": 1.648687871708743, + "learning_rate": 6.480560929841636e-06, + "loss": 0.6738, + "step": 13775 + }, + { + "epoch": 0.4222140492828246, + "grad_norm": 0.8022505411046005, + "learning_rate": 6.4800868629928116e-06, + "loss": 0.4549, + "step": 13776 + }, + { + "epoch": 0.4222446978055658, + "grad_norm": 1.7904847154507362, + "learning_rate": 6.479612781560312e-06, + "loss": 0.6624, + "step": 13777 + }, + { + "epoch": 0.422275346328307, + "grad_norm": 1.746181845832458, + "learning_rate": 6.4791386855488096e-06, + "loss": 0.6016, + "step": 13778 + }, + { + "epoch": 0.4223059948510482, + "grad_norm": 1.7036286422338074, + "learning_rate": 6.478664574962974e-06, + "loss": 0.629, + "step": 13779 + }, + { + "epoch": 0.4223366433737894, + "grad_norm": 1.898696051274358, + "learning_rate": 6.478190449807479e-06, + "loss": 0.6973, + "step": 13780 + }, + { + "epoch": 0.4223672918965306, + "grad_norm": 1.7614213011579867, + "learning_rate": 6.4777163100869944e-06, + "loss": 0.7108, + "step": 13781 + }, + { + "epoch": 0.4223979404192718, + "grad_norm": 1.858213493090659, + "learning_rate": 6.477242155806195e-06, + "loss": 0.7679, + "step": 13782 + }, + { + "epoch": 0.422428588942013, + "grad_norm": 1.8533059803659901, + "learning_rate": 6.476767986969748e-06, + "loss": 0.717, + "step": 13783 + }, + { + "epoch": 0.4224592374647542, + "grad_norm": 1.5936095230429674, + "learning_rate": 6.47629380358233e-06, + "loss": 0.7274, + "step": 13784 + }, + { + "epoch": 0.42248988598749543, + "grad_norm": 1.5573975499352277, + "learning_rate": 6.475819605648611e-06, + "loss": 0.5854, + "step": 13785 + }, + { + "epoch": 0.42252053451023663, + "grad_norm": 1.9824063245483317, + "learning_rate": 6.4753453931732634e-06, + "loss": 0.7641, + "step": 13786 + }, + { + "epoch": 0.4225511830329778, + "grad_norm": 1.7860209603204706, + "learning_rate": 6.474871166160959e-06, + "loss": 0.6209, + "step": 13787 + }, + { + "epoch": 0.422581831555719, + "grad_norm": 0.8257261390815677, + "learning_rate": 6.474396924616374e-06, + "loss": 0.4233, + "step": 13788 + }, + { + "epoch": 0.4226124800784602, + "grad_norm": 1.8889333953713583, + "learning_rate": 6.473922668544179e-06, + "loss": 0.6283, + "step": 13789 + }, + { + "epoch": 0.4226431286012014, + "grad_norm": 0.7964455660172272, + "learning_rate": 6.473448397949045e-06, + "loss": 0.4656, + "step": 13790 + }, + { + "epoch": 0.4226737771239426, + "grad_norm": 1.8661662940344774, + "learning_rate": 6.472974112835647e-06, + "loss": 0.7206, + "step": 13791 + }, + { + "epoch": 0.4227044256466838, + "grad_norm": 1.6927645763925698, + "learning_rate": 6.472499813208659e-06, + "loss": 0.7021, + "step": 13792 + }, + { + "epoch": 0.422735074169425, + "grad_norm": 1.7530494740380198, + "learning_rate": 6.472025499072754e-06, + "loss": 0.7486, + "step": 13793 + }, + { + "epoch": 0.4227657226921662, + "grad_norm": 2.0036235215082248, + "learning_rate": 6.471551170432604e-06, + "loss": 0.6789, + "step": 13794 + }, + { + "epoch": 0.42279637121490743, + "grad_norm": 0.818435689954247, + "learning_rate": 6.471076827292885e-06, + "loss": 0.4719, + "step": 13795 + }, + { + "epoch": 0.42282701973764864, + "grad_norm": 1.6149729836055788, + "learning_rate": 6.470602469658268e-06, + "loss": 0.5557, + "step": 13796 + }, + { + "epoch": 0.42285766826038984, + "grad_norm": 1.626895650038314, + "learning_rate": 6.4701280975334316e-06, + "loss": 0.64, + "step": 13797 + }, + { + "epoch": 0.42288831678313105, + "grad_norm": 1.5965903596093356, + "learning_rate": 6.469653710923044e-06, + "loss": 0.5795, + "step": 13798 + }, + { + "epoch": 0.42291896530587225, + "grad_norm": 1.797338834726794, + "learning_rate": 6.469179309831783e-06, + "loss": 0.6717, + "step": 13799 + }, + { + "epoch": 0.42294961382861346, + "grad_norm": 1.7130565726618538, + "learning_rate": 6.468704894264324e-06, + "loss": 0.6304, + "step": 13800 + }, + { + "epoch": 0.42298026235135466, + "grad_norm": 1.8388056670207138, + "learning_rate": 6.468230464225337e-06, + "loss": 0.6716, + "step": 13801 + }, + { + "epoch": 0.42301091087409587, + "grad_norm": 0.8043286793740912, + "learning_rate": 6.467756019719501e-06, + "loss": 0.4639, + "step": 13802 + }, + { + "epoch": 0.4230415593968371, + "grad_norm": 1.5782707124727253, + "learning_rate": 6.467281560751489e-06, + "loss": 0.6104, + "step": 13803 + }, + { + "epoch": 0.4230722079195783, + "grad_norm": 1.6509610986693661, + "learning_rate": 6.466807087325978e-06, + "loss": 0.7785, + "step": 13804 + }, + { + "epoch": 0.4231028564423195, + "grad_norm": 1.9283249889363603, + "learning_rate": 6.46633259944764e-06, + "loss": 0.6963, + "step": 13805 + }, + { + "epoch": 0.4231335049650607, + "grad_norm": 1.6557580732983053, + "learning_rate": 6.465858097121151e-06, + "loss": 0.7226, + "step": 13806 + }, + { + "epoch": 0.4231641534878019, + "grad_norm": 1.777047225072308, + "learning_rate": 6.4653835803511884e-06, + "loss": 0.6634, + "step": 13807 + }, + { + "epoch": 0.4231948020105431, + "grad_norm": 0.8084997359814785, + "learning_rate": 6.464909049142427e-06, + "loss": 0.4478, + "step": 13808 + }, + { + "epoch": 0.4232254505332843, + "grad_norm": 1.7576261938275006, + "learning_rate": 6.464434503499542e-06, + "loss": 0.719, + "step": 13809 + }, + { + "epoch": 0.4232560990560255, + "grad_norm": 1.6557638533865782, + "learning_rate": 6.463959943427207e-06, + "loss": 0.7216, + "step": 13810 + }, + { + "epoch": 0.4232867475787667, + "grad_norm": 0.8444375309515799, + "learning_rate": 6.463485368930102e-06, + "loss": 0.4602, + "step": 13811 + }, + { + "epoch": 0.4233173961015079, + "grad_norm": 3.320109583641664, + "learning_rate": 6.4630107800129015e-06, + "loss": 0.5779, + "step": 13812 + }, + { + "epoch": 0.42334804462424913, + "grad_norm": 1.5761159247508842, + "learning_rate": 6.46253617668028e-06, + "loss": 0.6386, + "step": 13813 + }, + { + "epoch": 0.42337869314699034, + "grad_norm": 1.767741860953523, + "learning_rate": 6.462061558936916e-06, + "loss": 0.6465, + "step": 13814 + }, + { + "epoch": 0.42340934166973154, + "grad_norm": 0.7726674822290897, + "learning_rate": 6.461586926787484e-06, + "loss": 0.4461, + "step": 13815 + }, + { + "epoch": 0.42343999019247275, + "grad_norm": 2.012855141225537, + "learning_rate": 6.461112280236663e-06, + "loss": 0.6524, + "step": 13816 + }, + { + "epoch": 0.42347063871521395, + "grad_norm": 0.8027823231939757, + "learning_rate": 6.460637619289129e-06, + "loss": 0.439, + "step": 13817 + }, + { + "epoch": 0.4235012872379551, + "grad_norm": 1.6120884882010833, + "learning_rate": 6.46016294394956e-06, + "loss": 0.5632, + "step": 13818 + }, + { + "epoch": 0.4235319357606963, + "grad_norm": 1.6039694855593454, + "learning_rate": 6.45968825422263e-06, + "loss": 0.6418, + "step": 13819 + }, + { + "epoch": 0.4235625842834375, + "grad_norm": 1.7963951428090488, + "learning_rate": 6.459213550113019e-06, + "loss": 0.6742, + "step": 13820 + }, + { + "epoch": 0.4235932328061787, + "grad_norm": 1.4973380524389863, + "learning_rate": 6.4587388316254055e-06, + "loss": 0.68, + "step": 13821 + }, + { + "epoch": 0.4236238813289199, + "grad_norm": 1.708987875767616, + "learning_rate": 6.458264098764462e-06, + "loss": 0.6495, + "step": 13822 + }, + { + "epoch": 0.42365452985166113, + "grad_norm": 1.751921501278778, + "learning_rate": 6.457789351534871e-06, + "loss": 0.6003, + "step": 13823 + }, + { + "epoch": 0.42368517837440234, + "grad_norm": 1.694155041407784, + "learning_rate": 6.457314589941308e-06, + "loss": 0.6959, + "step": 13824 + }, + { + "epoch": 0.42371582689714354, + "grad_norm": 1.4685077378537867, + "learning_rate": 6.456839813988451e-06, + "loss": 0.5282, + "step": 13825 + }, + { + "epoch": 0.42374647541988475, + "grad_norm": 1.7491453001435329, + "learning_rate": 6.4563650236809785e-06, + "loss": 0.564, + "step": 13826 + }, + { + "epoch": 0.42377712394262596, + "grad_norm": 1.654780388253273, + "learning_rate": 6.455890219023569e-06, + "loss": 0.5969, + "step": 13827 + }, + { + "epoch": 0.42380777246536716, + "grad_norm": 1.4746922687744404, + "learning_rate": 6.455415400020901e-06, + "loss": 0.5897, + "step": 13828 + }, + { + "epoch": 0.42383842098810837, + "grad_norm": 1.6476716641722535, + "learning_rate": 6.454940566677652e-06, + "loss": 0.6841, + "step": 13829 + }, + { + "epoch": 0.42386906951084957, + "grad_norm": 1.596587876429569, + "learning_rate": 6.454465718998503e-06, + "loss": 0.6431, + "step": 13830 + }, + { + "epoch": 0.4238997180335908, + "grad_norm": 1.6980327097277295, + "learning_rate": 6.45399085698813e-06, + "loss": 0.7051, + "step": 13831 + }, + { + "epoch": 0.423930366556332, + "grad_norm": 0.9635966626958912, + "learning_rate": 6.453515980651213e-06, + "loss": 0.459, + "step": 13832 + }, + { + "epoch": 0.4239610150790732, + "grad_norm": 1.6537523995241796, + "learning_rate": 6.453041089992431e-06, + "loss": 0.6533, + "step": 13833 + }, + { + "epoch": 0.4239916636018144, + "grad_norm": 1.586978616546987, + "learning_rate": 6.452566185016464e-06, + "loss": 0.6545, + "step": 13834 + }, + { + "epoch": 0.4240223121245556, + "grad_norm": 1.6877363904444926, + "learning_rate": 6.452091265727991e-06, + "loss": 0.6154, + "step": 13835 + }, + { + "epoch": 0.4240529606472968, + "grad_norm": 1.7313148851785998, + "learning_rate": 6.4516163321316905e-06, + "loss": 0.6202, + "step": 13836 + }, + { + "epoch": 0.424083609170038, + "grad_norm": 0.8439630081399626, + "learning_rate": 6.451141384232242e-06, + "loss": 0.4711, + "step": 13837 + }, + { + "epoch": 0.4241142576927792, + "grad_norm": 2.5749014331327262, + "learning_rate": 6.450666422034327e-06, + "loss": 0.5988, + "step": 13838 + }, + { + "epoch": 0.4241449062155204, + "grad_norm": 1.4144039333894962, + "learning_rate": 6.450191445542625e-06, + "loss": 0.5157, + "step": 13839 + }, + { + "epoch": 0.42417555473826163, + "grad_norm": 1.7191430451344853, + "learning_rate": 6.449716454761816e-06, + "loss": 0.6801, + "step": 13840 + }, + { + "epoch": 0.42420620326100283, + "grad_norm": 1.698060176319871, + "learning_rate": 6.449241449696579e-06, + "loss": 0.6897, + "step": 13841 + }, + { + "epoch": 0.42423685178374404, + "grad_norm": 1.6278657177389333, + "learning_rate": 6.448766430351595e-06, + "loss": 0.6408, + "step": 13842 + }, + { + "epoch": 0.42426750030648525, + "grad_norm": 0.8900557826603221, + "learning_rate": 6.448291396731545e-06, + "loss": 0.4576, + "step": 13843 + }, + { + "epoch": 0.42429814882922645, + "grad_norm": 1.7947505235001278, + "learning_rate": 6.4478163488411096e-06, + "loss": 0.6682, + "step": 13844 + }, + { + "epoch": 0.42432879735196766, + "grad_norm": 1.7072956257824787, + "learning_rate": 6.447341286684969e-06, + "loss": 0.5943, + "step": 13845 + }, + { + "epoch": 0.42435944587470886, + "grad_norm": 1.6516315381547042, + "learning_rate": 6.446866210267804e-06, + "loss": 0.6364, + "step": 13846 + }, + { + "epoch": 0.42439009439745007, + "grad_norm": 1.4255296348489894, + "learning_rate": 6.446391119594297e-06, + "loss": 0.5501, + "step": 13847 + }, + { + "epoch": 0.4244207429201913, + "grad_norm": 1.801579930172086, + "learning_rate": 6.445916014669127e-06, + "loss": 0.6888, + "step": 13848 + }, + { + "epoch": 0.4244513914429324, + "grad_norm": 1.649275746189209, + "learning_rate": 6.445440895496977e-06, + "loss": 0.5741, + "step": 13849 + }, + { + "epoch": 0.42448203996567363, + "grad_norm": 1.6231187420703626, + "learning_rate": 6.4449657620825275e-06, + "loss": 0.6758, + "step": 13850 + }, + { + "epoch": 0.42451268848841484, + "grad_norm": 1.6162916036489323, + "learning_rate": 6.444490614430463e-06, + "loss": 0.6091, + "step": 13851 + }, + { + "epoch": 0.42454333701115604, + "grad_norm": 1.6681743263292859, + "learning_rate": 6.44401545254546e-06, + "loss": 0.6528, + "step": 13852 + }, + { + "epoch": 0.42457398553389725, + "grad_norm": 1.7222502185587978, + "learning_rate": 6.4435402764322056e-06, + "loss": 0.6408, + "step": 13853 + }, + { + "epoch": 0.42460463405663845, + "grad_norm": 1.5731803407131455, + "learning_rate": 6.443065086095379e-06, + "loss": 0.6817, + "step": 13854 + }, + { + "epoch": 0.42463528257937966, + "grad_norm": 1.7279103697235143, + "learning_rate": 6.442589881539662e-06, + "loss": 0.8022, + "step": 13855 + }, + { + "epoch": 0.42466593110212086, + "grad_norm": 1.781455699836348, + "learning_rate": 6.4421146627697375e-06, + "loss": 0.6964, + "step": 13856 + }, + { + "epoch": 0.42469657962486207, + "grad_norm": 0.8317934595291959, + "learning_rate": 6.4416394297902894e-06, + "loss": 0.4549, + "step": 13857 + }, + { + "epoch": 0.4247272281476033, + "grad_norm": 1.7704372261887604, + "learning_rate": 6.441164182605999e-06, + "loss": 0.6543, + "step": 13858 + }, + { + "epoch": 0.4247578766703445, + "grad_norm": 1.7505864127246589, + "learning_rate": 6.440688921221547e-06, + "loss": 0.6001, + "step": 13859 + }, + { + "epoch": 0.4247885251930857, + "grad_norm": 1.6897181577409834, + "learning_rate": 6.440213645641621e-06, + "loss": 0.6752, + "step": 13860 + }, + { + "epoch": 0.4248191737158269, + "grad_norm": 1.4587347701080806, + "learning_rate": 6.4397383558709005e-06, + "loss": 0.6027, + "step": 13861 + }, + { + "epoch": 0.4248498222385681, + "grad_norm": 1.5932428161700138, + "learning_rate": 6.439263051914071e-06, + "loss": 0.6753, + "step": 13862 + }, + { + "epoch": 0.4248804707613093, + "grad_norm": 0.8095450016435826, + "learning_rate": 6.438787733775812e-06, + "loss": 0.4481, + "step": 13863 + }, + { + "epoch": 0.4249111192840505, + "grad_norm": 1.6250912076854063, + "learning_rate": 6.438312401460812e-06, + "loss": 0.5714, + "step": 13864 + }, + { + "epoch": 0.4249417678067917, + "grad_norm": 1.9378566630793999, + "learning_rate": 6.437837054973748e-06, + "loss": 0.6309, + "step": 13865 + }, + { + "epoch": 0.4249724163295329, + "grad_norm": 1.6256426893725129, + "learning_rate": 6.437361694319312e-06, + "loss": 0.5925, + "step": 13866 + }, + { + "epoch": 0.4250030648522741, + "grad_norm": 0.7552544069099429, + "learning_rate": 6.436886319502181e-06, + "loss": 0.4407, + "step": 13867 + }, + { + "epoch": 0.42503371337501533, + "grad_norm": 1.9319084298733493, + "learning_rate": 6.436410930527042e-06, + "loss": 0.7955, + "step": 13868 + }, + { + "epoch": 0.42506436189775654, + "grad_norm": 1.7717268652717408, + "learning_rate": 6.435935527398578e-06, + "loss": 0.6726, + "step": 13869 + }, + { + "epoch": 0.42509501042049774, + "grad_norm": 1.6433765559483853, + "learning_rate": 6.435460110121474e-06, + "loss": 0.7622, + "step": 13870 + }, + { + "epoch": 0.42512565894323895, + "grad_norm": 1.675889095382075, + "learning_rate": 6.434984678700416e-06, + "loss": 0.6794, + "step": 13871 + }, + { + "epoch": 0.42515630746598015, + "grad_norm": 0.8562266113328677, + "learning_rate": 6.434509233140084e-06, + "loss": 0.4557, + "step": 13872 + }, + { + "epoch": 0.42518695598872136, + "grad_norm": 1.7955264169702654, + "learning_rate": 6.434033773445168e-06, + "loss": 0.6206, + "step": 13873 + }, + { + "epoch": 0.42521760451146257, + "grad_norm": 1.7684822696697466, + "learning_rate": 6.4335582996203484e-06, + "loss": 0.7241, + "step": 13874 + }, + { + "epoch": 0.42524825303420377, + "grad_norm": 1.7464361459686968, + "learning_rate": 6.433082811670314e-06, + "loss": 0.7348, + "step": 13875 + }, + { + "epoch": 0.425278901556945, + "grad_norm": 1.7544841755490626, + "learning_rate": 6.432607309599745e-06, + "loss": 0.6765, + "step": 13876 + }, + { + "epoch": 0.4253095500796862, + "grad_norm": 1.617292012242652, + "learning_rate": 6.432131793413333e-06, + "loss": 0.6431, + "step": 13877 + }, + { + "epoch": 0.4253401986024274, + "grad_norm": 1.5675000739336602, + "learning_rate": 6.431656263115757e-06, + "loss": 0.6194, + "step": 13878 + }, + { + "epoch": 0.4253708471251686, + "grad_norm": 1.764200972927779, + "learning_rate": 6.4311807187117085e-06, + "loss": 0.6533, + "step": 13879 + }, + { + "epoch": 0.42540149564790974, + "grad_norm": 1.6780455914664194, + "learning_rate": 6.430705160205868e-06, + "loss": 0.7074, + "step": 13880 + }, + { + "epoch": 0.42543214417065095, + "grad_norm": 1.7110029657978494, + "learning_rate": 6.4302295876029245e-06, + "loss": 0.6286, + "step": 13881 + }, + { + "epoch": 0.42546279269339216, + "grad_norm": 0.7805337670795213, + "learning_rate": 6.4297540009075634e-06, + "loss": 0.4521, + "step": 13882 + }, + { + "epoch": 0.42549344121613336, + "grad_norm": 1.7624048856072876, + "learning_rate": 6.429278400124469e-06, + "loss": 0.6453, + "step": 13883 + }, + { + "epoch": 0.42552408973887457, + "grad_norm": 1.648936528583531, + "learning_rate": 6.428802785258329e-06, + "loss": 0.6649, + "step": 13884 + }, + { + "epoch": 0.4255547382616158, + "grad_norm": 1.4840888251790512, + "learning_rate": 6.4283271563138305e-06, + "loss": 0.6736, + "step": 13885 + }, + { + "epoch": 0.425585386784357, + "grad_norm": 1.6520301039190217, + "learning_rate": 6.427851513295659e-06, + "loss": 0.6884, + "step": 13886 + }, + { + "epoch": 0.4256160353070982, + "grad_norm": 1.604610396581829, + "learning_rate": 6.4273758562085e-06, + "loss": 0.6963, + "step": 13887 + }, + { + "epoch": 0.4256466838298394, + "grad_norm": 0.7914516495419193, + "learning_rate": 6.426900185057042e-06, + "loss": 0.4554, + "step": 13888 + }, + { + "epoch": 0.4256773323525806, + "grad_norm": 1.77447733527944, + "learning_rate": 6.4264244998459725e-06, + "loss": 0.6444, + "step": 13889 + }, + { + "epoch": 0.4257079808753218, + "grad_norm": 0.7576635212295693, + "learning_rate": 6.425948800579977e-06, + "loss": 0.4405, + "step": 13890 + }, + { + "epoch": 0.425738629398063, + "grad_norm": 1.766028270967824, + "learning_rate": 6.4254730872637415e-06, + "loss": 0.7019, + "step": 13891 + }, + { + "epoch": 0.4257692779208042, + "grad_norm": 1.7591312909747268, + "learning_rate": 6.424997359901957e-06, + "loss": 0.5819, + "step": 13892 + }, + { + "epoch": 0.4257999264435454, + "grad_norm": 1.5833452850239191, + "learning_rate": 6.4245216184993085e-06, + "loss": 0.6318, + "step": 13893 + }, + { + "epoch": 0.4258305749662866, + "grad_norm": 0.8382021501464183, + "learning_rate": 6.424045863060484e-06, + "loss": 0.4744, + "step": 13894 + }, + { + "epoch": 0.42586122348902783, + "grad_norm": 1.8209636556995128, + "learning_rate": 6.42357009359017e-06, + "loss": 0.6824, + "step": 13895 + }, + { + "epoch": 0.42589187201176903, + "grad_norm": 1.8763895161135593, + "learning_rate": 6.423094310093056e-06, + "loss": 0.5974, + "step": 13896 + }, + { + "epoch": 0.42592252053451024, + "grad_norm": 1.8556903830052367, + "learning_rate": 6.4226185125738305e-06, + "loss": 0.7105, + "step": 13897 + }, + { + "epoch": 0.42595316905725145, + "grad_norm": 1.4998231587239863, + "learning_rate": 6.422142701037179e-06, + "loss": 0.65, + "step": 13898 + }, + { + "epoch": 0.42598381757999265, + "grad_norm": 1.4456093864624955, + "learning_rate": 6.4216668754877945e-06, + "loss": 0.5642, + "step": 13899 + }, + { + "epoch": 0.42601446610273386, + "grad_norm": 1.9426222521407357, + "learning_rate": 6.42119103593036e-06, + "loss": 0.7057, + "step": 13900 + }, + { + "epoch": 0.42604511462547506, + "grad_norm": 0.7420995945098979, + "learning_rate": 6.420715182369569e-06, + "loss": 0.4361, + "step": 13901 + }, + { + "epoch": 0.42607576314821627, + "grad_norm": 1.6616693887011385, + "learning_rate": 6.420239314810106e-06, + "loss": 0.7363, + "step": 13902 + }, + { + "epoch": 0.4261064116709575, + "grad_norm": 1.650328293674452, + "learning_rate": 6.419763433256663e-06, + "loss": 0.6172, + "step": 13903 + }, + { + "epoch": 0.4261370601936987, + "grad_norm": 1.4474367060951934, + "learning_rate": 6.4192875377139265e-06, + "loss": 0.5637, + "step": 13904 + }, + { + "epoch": 0.4261677087164399, + "grad_norm": 1.5048547615288745, + "learning_rate": 6.4188116281865875e-06, + "loss": 0.598, + "step": 13905 + }, + { + "epoch": 0.4261983572391811, + "grad_norm": 1.5139008004138184, + "learning_rate": 6.418335704679332e-06, + "loss": 0.6443, + "step": 13906 + }, + { + "epoch": 0.4262290057619223, + "grad_norm": 1.8036814652585844, + "learning_rate": 6.417859767196855e-06, + "loss": 0.6938, + "step": 13907 + }, + { + "epoch": 0.4262596542846635, + "grad_norm": 0.8085983765037149, + "learning_rate": 6.4173838157438415e-06, + "loss": 0.4479, + "step": 13908 + }, + { + "epoch": 0.4262903028074047, + "grad_norm": 1.7377556228940856, + "learning_rate": 6.4169078503249835e-06, + "loss": 0.5491, + "step": 13909 + }, + { + "epoch": 0.4263209513301459, + "grad_norm": 1.666256813599271, + "learning_rate": 6.416431870944969e-06, + "loss": 0.6092, + "step": 13910 + }, + { + "epoch": 0.42635159985288706, + "grad_norm": 1.7834201029267087, + "learning_rate": 6.41595587760849e-06, + "loss": 0.7058, + "step": 13911 + }, + { + "epoch": 0.42638224837562827, + "grad_norm": 0.8092144126082, + "learning_rate": 6.415479870320233e-06, + "loss": 0.4607, + "step": 13912 + }, + { + "epoch": 0.4264128968983695, + "grad_norm": 1.901026857646267, + "learning_rate": 6.415003849084893e-06, + "loss": 0.6683, + "step": 13913 + }, + { + "epoch": 0.4264435454211107, + "grad_norm": 1.7142584858221634, + "learning_rate": 6.414527813907158e-06, + "loss": 0.5606, + "step": 13914 + }, + { + "epoch": 0.4264741939438519, + "grad_norm": 1.8092829806631665, + "learning_rate": 6.414051764791717e-06, + "loss": 0.6963, + "step": 13915 + }, + { + "epoch": 0.4265048424665931, + "grad_norm": 1.6278850364470194, + "learning_rate": 6.413575701743264e-06, + "loss": 0.7041, + "step": 13916 + }, + { + "epoch": 0.4265354909893343, + "grad_norm": 1.7250470021271644, + "learning_rate": 6.413099624766487e-06, + "loss": 0.6795, + "step": 13917 + }, + { + "epoch": 0.4265661395120755, + "grad_norm": 1.6409726902046071, + "learning_rate": 6.4126235338660784e-06, + "loss": 0.6534, + "step": 13918 + }, + { + "epoch": 0.4265967880348167, + "grad_norm": 1.7590593127783725, + "learning_rate": 6.4121474290467266e-06, + "loss": 0.6638, + "step": 13919 + }, + { + "epoch": 0.4266274365575579, + "grad_norm": 1.7876778030299936, + "learning_rate": 6.411671310313128e-06, + "loss": 0.6986, + "step": 13920 + }, + { + "epoch": 0.4266580850802991, + "grad_norm": 1.631389965204475, + "learning_rate": 6.411195177669968e-06, + "loss": 0.6043, + "step": 13921 + }, + { + "epoch": 0.4266887336030403, + "grad_norm": 1.5335578904648672, + "learning_rate": 6.410719031121943e-06, + "loss": 0.5806, + "step": 13922 + }, + { + "epoch": 0.42671938212578153, + "grad_norm": 1.711073649636695, + "learning_rate": 6.410242870673739e-06, + "loss": 0.5977, + "step": 13923 + }, + { + "epoch": 0.42675003064852274, + "grad_norm": 1.8656930634084263, + "learning_rate": 6.409766696330055e-06, + "loss": 0.7096, + "step": 13924 + }, + { + "epoch": 0.42678067917126394, + "grad_norm": 1.7870955118509966, + "learning_rate": 6.409290508095578e-06, + "loss": 0.6155, + "step": 13925 + }, + { + "epoch": 0.42681132769400515, + "grad_norm": 1.4507391664115892, + "learning_rate": 6.408814305974999e-06, + "loss": 0.6044, + "step": 13926 + }, + { + "epoch": 0.42684197621674635, + "grad_norm": 1.6270608016179762, + "learning_rate": 6.408338089973015e-06, + "loss": 0.6209, + "step": 13927 + }, + { + "epoch": 0.42687262473948756, + "grad_norm": 2.034605773700067, + "learning_rate": 6.407861860094314e-06, + "loss": 0.6257, + "step": 13928 + }, + { + "epoch": 0.42690327326222877, + "grad_norm": 1.7096451210200876, + "learning_rate": 6.407385616343591e-06, + "loss": 0.6574, + "step": 13929 + }, + { + "epoch": 0.42693392178496997, + "grad_norm": 1.9615690467196516, + "learning_rate": 6.406909358725536e-06, + "loss": 0.6114, + "step": 13930 + }, + { + "epoch": 0.4269645703077112, + "grad_norm": 1.738189556013694, + "learning_rate": 6.4064330872448455e-06, + "loss": 0.6865, + "step": 13931 + }, + { + "epoch": 0.4269952188304524, + "grad_norm": 1.560698260564891, + "learning_rate": 6.405956801906207e-06, + "loss": 0.7087, + "step": 13932 + }, + { + "epoch": 0.4270258673531936, + "grad_norm": 1.6605741634727507, + "learning_rate": 6.405480502714319e-06, + "loss": 0.6678, + "step": 13933 + }, + { + "epoch": 0.4270565158759348, + "grad_norm": 1.4948298554846815, + "learning_rate": 6.405004189673869e-06, + "loss": 0.6298, + "step": 13934 + }, + { + "epoch": 0.427087164398676, + "grad_norm": 1.7372923399293754, + "learning_rate": 6.404527862789556e-06, + "loss": 0.6874, + "step": 13935 + }, + { + "epoch": 0.4271178129214172, + "grad_norm": 1.7351382240216393, + "learning_rate": 6.404051522066068e-06, + "loss": 0.64, + "step": 13936 + }, + { + "epoch": 0.4271484614441584, + "grad_norm": 1.8001419946101904, + "learning_rate": 6.403575167508104e-06, + "loss": 0.6175, + "step": 13937 + }, + { + "epoch": 0.4271791099668996, + "grad_norm": 1.7402404212457212, + "learning_rate": 6.403098799120352e-06, + "loss": 0.7149, + "step": 13938 + }, + { + "epoch": 0.4272097584896408, + "grad_norm": 2.009710918781045, + "learning_rate": 6.402622416907511e-06, + "loss": 0.6634, + "step": 13939 + }, + { + "epoch": 0.42724040701238203, + "grad_norm": 0.8767818620683908, + "learning_rate": 6.4021460208742716e-06, + "loss": 0.4486, + "step": 13940 + }, + { + "epoch": 0.42727105553512323, + "grad_norm": 1.9045753686075273, + "learning_rate": 6.401669611025327e-06, + "loss": 0.6157, + "step": 13941 + }, + { + "epoch": 0.4273017040578644, + "grad_norm": 2.100717322833554, + "learning_rate": 6.401193187365375e-06, + "loss": 0.7104, + "step": 13942 + }, + { + "epoch": 0.4273323525806056, + "grad_norm": 1.8692713227729392, + "learning_rate": 6.400716749899108e-06, + "loss": 0.8061, + "step": 13943 + }, + { + "epoch": 0.4273630011033468, + "grad_norm": 1.8903397170426157, + "learning_rate": 6.4002402986312195e-06, + "loss": 0.6508, + "step": 13944 + }, + { + "epoch": 0.427393649626088, + "grad_norm": 2.279636605395078, + "learning_rate": 6.3997638335664055e-06, + "loss": 0.8133, + "step": 13945 + }, + { + "epoch": 0.4274242981488292, + "grad_norm": 1.829747238455254, + "learning_rate": 6.39928735470936e-06, + "loss": 0.6676, + "step": 13946 + }, + { + "epoch": 0.4274549466715704, + "grad_norm": 1.7835307957624282, + "learning_rate": 6.39881086206478e-06, + "loss": 0.6319, + "step": 13947 + }, + { + "epoch": 0.4274855951943116, + "grad_norm": 1.8395356030816508, + "learning_rate": 6.398334355637356e-06, + "loss": 0.5886, + "step": 13948 + }, + { + "epoch": 0.4275162437170528, + "grad_norm": 1.6742522684641699, + "learning_rate": 6.397857835431787e-06, + "loss": 0.6283, + "step": 13949 + }, + { + "epoch": 0.42754689223979403, + "grad_norm": 1.7123935569325055, + "learning_rate": 6.397381301452768e-06, + "loss": 0.6246, + "step": 13950 + }, + { + "epoch": 0.42757754076253524, + "grad_norm": 1.8005617789282502, + "learning_rate": 6.396904753704993e-06, + "loss": 0.7251, + "step": 13951 + }, + { + "epoch": 0.42760818928527644, + "grad_norm": 1.726059279349296, + "learning_rate": 6.396428192193156e-06, + "loss": 0.7069, + "step": 13952 + }, + { + "epoch": 0.42763883780801765, + "grad_norm": 1.744272400811341, + "learning_rate": 6.395951616921957e-06, + "loss": 0.6352, + "step": 13953 + }, + { + "epoch": 0.42766948633075885, + "grad_norm": 1.6073332236700584, + "learning_rate": 6.395475027896089e-06, + "loss": 0.6676, + "step": 13954 + }, + { + "epoch": 0.42770013485350006, + "grad_norm": 1.719909087509381, + "learning_rate": 6.394998425120249e-06, + "loss": 0.6933, + "step": 13955 + }, + { + "epoch": 0.42773078337624126, + "grad_norm": 1.9803631411191411, + "learning_rate": 6.394521808599131e-06, + "loss": 0.626, + "step": 13956 + }, + { + "epoch": 0.42776143189898247, + "grad_norm": 1.821698958040076, + "learning_rate": 6.394045178337434e-06, + "loss": 0.7246, + "step": 13957 + }, + { + "epoch": 0.4277920804217237, + "grad_norm": 1.8426690553557148, + "learning_rate": 6.393568534339854e-06, + "loss": 0.6822, + "step": 13958 + }, + { + "epoch": 0.4278227289444649, + "grad_norm": 1.568289400636583, + "learning_rate": 6.393091876611086e-06, + "loss": 0.5831, + "step": 13959 + }, + { + "epoch": 0.4278533774672061, + "grad_norm": 0.9014304407324474, + "learning_rate": 6.392615205155826e-06, + "loss": 0.4629, + "step": 13960 + }, + { + "epoch": 0.4278840259899473, + "grad_norm": 1.693226371742006, + "learning_rate": 6.3921385199787735e-06, + "loss": 0.656, + "step": 13961 + }, + { + "epoch": 0.4279146745126885, + "grad_norm": 1.687882757739218, + "learning_rate": 6.391661821084624e-06, + "loss": 0.602, + "step": 13962 + }, + { + "epoch": 0.4279453230354297, + "grad_norm": 1.8710296569401392, + "learning_rate": 6.391185108478074e-06, + "loss": 0.6483, + "step": 13963 + }, + { + "epoch": 0.4279759715581709, + "grad_norm": 1.5750255525004888, + "learning_rate": 6.39070838216382e-06, + "loss": 0.6165, + "step": 13964 + }, + { + "epoch": 0.4280066200809121, + "grad_norm": 1.8419405183309578, + "learning_rate": 6.3902316421465626e-06, + "loss": 0.6732, + "step": 13965 + }, + { + "epoch": 0.4280372686036533, + "grad_norm": 1.7070893166455428, + "learning_rate": 6.389754888430996e-06, + "loss": 0.6356, + "step": 13966 + }, + { + "epoch": 0.4280679171263945, + "grad_norm": 1.7383313199771626, + "learning_rate": 6.389278121021818e-06, + "loss": 0.6095, + "step": 13967 + }, + { + "epoch": 0.42809856564913573, + "grad_norm": 1.736644649235304, + "learning_rate": 6.388801339923729e-06, + "loss": 0.6519, + "step": 13968 + }, + { + "epoch": 0.42812921417187694, + "grad_norm": 1.5702588268365163, + "learning_rate": 6.388324545141423e-06, + "loss": 0.7169, + "step": 13969 + }, + { + "epoch": 0.42815986269461814, + "grad_norm": 1.8874750712466901, + "learning_rate": 6.387847736679603e-06, + "loss": 0.7227, + "step": 13970 + }, + { + "epoch": 0.42819051121735935, + "grad_norm": 1.6791276645505842, + "learning_rate": 6.387370914542962e-06, + "loss": 0.7401, + "step": 13971 + }, + { + "epoch": 0.42822115974010055, + "grad_norm": 1.6544722723043794, + "learning_rate": 6.386894078736201e-06, + "loss": 0.6951, + "step": 13972 + }, + { + "epoch": 0.4282518082628417, + "grad_norm": 1.8154043107316442, + "learning_rate": 6.386417229264017e-06, + "loss": 0.7395, + "step": 13973 + }, + { + "epoch": 0.4282824567855829, + "grad_norm": 0.8111263860088643, + "learning_rate": 6.385940366131112e-06, + "loss": 0.4322, + "step": 13974 + }, + { + "epoch": 0.4283131053083241, + "grad_norm": 2.0935528371134993, + "learning_rate": 6.385463489342179e-06, + "loss": 0.6937, + "step": 13975 + }, + { + "epoch": 0.4283437538310653, + "grad_norm": 1.7537477632812628, + "learning_rate": 6.384986598901921e-06, + "loss": 0.7511, + "step": 13976 + }, + { + "epoch": 0.4283744023538065, + "grad_norm": 1.8228029655634486, + "learning_rate": 6.384509694815036e-06, + "loss": 0.7335, + "step": 13977 + }, + { + "epoch": 0.42840505087654773, + "grad_norm": 1.6829376322726917, + "learning_rate": 6.384032777086222e-06, + "loss": 0.5733, + "step": 13978 + }, + { + "epoch": 0.42843569939928894, + "grad_norm": 2.0200778069040024, + "learning_rate": 6.38355584572018e-06, + "loss": 0.7062, + "step": 13979 + }, + { + "epoch": 0.42846634792203014, + "grad_norm": 1.6488267289751009, + "learning_rate": 6.383078900721607e-06, + "loss": 0.6838, + "step": 13980 + }, + { + "epoch": 0.42849699644477135, + "grad_norm": 1.7928969130267691, + "learning_rate": 6.382601942095203e-06, + "loss": 0.6811, + "step": 13981 + }, + { + "epoch": 0.42852764496751256, + "grad_norm": 1.6989097528775992, + "learning_rate": 6.38212496984567e-06, + "loss": 0.632, + "step": 13982 + }, + { + "epoch": 0.42855829349025376, + "grad_norm": 0.8664678241039241, + "learning_rate": 6.381647983977706e-06, + "loss": 0.4706, + "step": 13983 + }, + { + "epoch": 0.42858894201299497, + "grad_norm": 1.659202846562776, + "learning_rate": 6.381170984496009e-06, + "loss": 0.6193, + "step": 13984 + }, + { + "epoch": 0.4286195905357362, + "grad_norm": 1.4574898379703114, + "learning_rate": 6.380693971405284e-06, + "loss": 0.5909, + "step": 13985 + }, + { + "epoch": 0.4286502390584774, + "grad_norm": 1.7459004704755123, + "learning_rate": 6.380216944710224e-06, + "loss": 0.7142, + "step": 13986 + }, + { + "epoch": 0.4286808875812186, + "grad_norm": 1.7260848799388397, + "learning_rate": 6.379739904415537e-06, + "loss": 0.6849, + "step": 13987 + }, + { + "epoch": 0.4287115361039598, + "grad_norm": 0.7844543918251954, + "learning_rate": 6.379262850525918e-06, + "loss": 0.4526, + "step": 13988 + }, + { + "epoch": 0.428742184626701, + "grad_norm": 1.6783032628545567, + "learning_rate": 6.3787857830460706e-06, + "loss": 0.6034, + "step": 13989 + }, + { + "epoch": 0.4287728331494422, + "grad_norm": 1.5244960024810237, + "learning_rate": 6.378308701980692e-06, + "loss": 0.663, + "step": 13990 + }, + { + "epoch": 0.4288034816721834, + "grad_norm": 0.7802329602961116, + "learning_rate": 6.377831607334487e-06, + "loss": 0.4461, + "step": 13991 + }, + { + "epoch": 0.4288341301949246, + "grad_norm": 1.8231231180576022, + "learning_rate": 6.377354499112153e-06, + "loss": 0.6423, + "step": 13992 + }, + { + "epoch": 0.4288647787176658, + "grad_norm": 1.7469806098967777, + "learning_rate": 6.376877377318393e-06, + "loss": 0.7092, + "step": 13993 + }, + { + "epoch": 0.428895427240407, + "grad_norm": 1.7437708367627556, + "learning_rate": 6.3764002419579095e-06, + "loss": 0.7157, + "step": 13994 + }, + { + "epoch": 0.42892607576314823, + "grad_norm": 1.7462875268676352, + "learning_rate": 6.3759230930354e-06, + "loss": 0.6048, + "step": 13995 + }, + { + "epoch": 0.42895672428588943, + "grad_norm": 1.5617239166414163, + "learning_rate": 6.375445930555569e-06, + "loss": 0.613, + "step": 13996 + }, + { + "epoch": 0.42898737280863064, + "grad_norm": 1.6178844622766455, + "learning_rate": 6.374968754523119e-06, + "loss": 0.6289, + "step": 13997 + }, + { + "epoch": 0.42901802133137185, + "grad_norm": 1.785821074761163, + "learning_rate": 6.3744915649427485e-06, + "loss": 0.674, + "step": 13998 + }, + { + "epoch": 0.42904866985411305, + "grad_norm": 1.6653949611147252, + "learning_rate": 6.374014361819161e-06, + "loss": 0.6599, + "step": 13999 + }, + { + "epoch": 0.42907931837685426, + "grad_norm": 1.5917302645695057, + "learning_rate": 6.373537145157058e-06, + "loss": 0.7449, + "step": 14000 + }, + { + "epoch": 0.42910996689959546, + "grad_norm": 1.5490823062801045, + "learning_rate": 6.373059914961144e-06, + "loss": 0.6326, + "step": 14001 + }, + { + "epoch": 0.42914061542233667, + "grad_norm": 1.644590912254796, + "learning_rate": 6.372582671236118e-06, + "loss": 0.7541, + "step": 14002 + }, + { + "epoch": 0.4291712639450779, + "grad_norm": 0.8802614969444899, + "learning_rate": 6.372105413986684e-06, + "loss": 0.4721, + "step": 14003 + }, + { + "epoch": 0.429201912467819, + "grad_norm": 1.760962094866583, + "learning_rate": 6.371628143217543e-06, + "loss": 0.7588, + "step": 14004 + }, + { + "epoch": 0.42923256099056023, + "grad_norm": 1.5627549194051416, + "learning_rate": 6.3711508589334e-06, + "loss": 0.6594, + "step": 14005 + }, + { + "epoch": 0.42926320951330144, + "grad_norm": 1.7422768813348712, + "learning_rate": 6.370673561138958e-06, + "loss": 0.7044, + "step": 14006 + }, + { + "epoch": 0.42929385803604264, + "grad_norm": 1.6833854913932884, + "learning_rate": 6.3701962498389165e-06, + "loss": 0.6735, + "step": 14007 + }, + { + "epoch": 0.42932450655878385, + "grad_norm": 1.8101625916054669, + "learning_rate": 6.369718925037982e-06, + "loss": 0.7448, + "step": 14008 + }, + { + "epoch": 0.42935515508152505, + "grad_norm": 1.7892658739298333, + "learning_rate": 6.369241586740856e-06, + "loss": 0.6675, + "step": 14009 + }, + { + "epoch": 0.42938580360426626, + "grad_norm": 1.9405915005583663, + "learning_rate": 6.3687642349522425e-06, + "loss": 0.6438, + "step": 14010 + }, + { + "epoch": 0.42941645212700746, + "grad_norm": 1.5030657987146225, + "learning_rate": 6.368286869676846e-06, + "loss": 0.655, + "step": 14011 + }, + { + "epoch": 0.42944710064974867, + "grad_norm": 1.8646528447718296, + "learning_rate": 6.367809490919368e-06, + "loss": 0.6487, + "step": 14012 + }, + { + "epoch": 0.4294777491724899, + "grad_norm": 1.6253989103548652, + "learning_rate": 6.367332098684512e-06, + "loss": 0.619, + "step": 14013 + }, + { + "epoch": 0.4295083976952311, + "grad_norm": 1.8593795302935292, + "learning_rate": 6.366854692976983e-06, + "loss": 0.7467, + "step": 14014 + }, + { + "epoch": 0.4295390462179723, + "grad_norm": 1.6221993767886302, + "learning_rate": 6.366377273801486e-06, + "loss": 0.6912, + "step": 14015 + }, + { + "epoch": 0.4295696947407135, + "grad_norm": 1.8617533008275282, + "learning_rate": 6.365899841162725e-06, + "loss": 0.71, + "step": 14016 + }, + { + "epoch": 0.4296003432634547, + "grad_norm": 1.616324563778454, + "learning_rate": 6.365422395065403e-06, + "loss": 0.6111, + "step": 14017 + }, + { + "epoch": 0.4296309917861959, + "grad_norm": 1.8657046696118083, + "learning_rate": 6.3649449355142226e-06, + "loss": 0.6325, + "step": 14018 + }, + { + "epoch": 0.4296616403089371, + "grad_norm": 1.680948935094431, + "learning_rate": 6.364467462513892e-06, + "loss": 0.585, + "step": 14019 + }, + { + "epoch": 0.4296922888316783, + "grad_norm": 1.6242465012784257, + "learning_rate": 6.363989976069115e-06, + "loss": 0.5911, + "step": 14020 + }, + { + "epoch": 0.4297229373544195, + "grad_norm": 1.7805714257329626, + "learning_rate": 6.363512476184595e-06, + "loss": 0.5875, + "step": 14021 + }, + { + "epoch": 0.4297535858771607, + "grad_norm": 1.6363141668216925, + "learning_rate": 6.363034962865038e-06, + "loss": 0.6274, + "step": 14022 + }, + { + "epoch": 0.42978423439990193, + "grad_norm": 1.6677335054726705, + "learning_rate": 6.362557436115149e-06, + "loss": 0.6461, + "step": 14023 + }, + { + "epoch": 0.42981488292264314, + "grad_norm": 1.7881200764712348, + "learning_rate": 6.362079895939632e-06, + "loss": 0.6658, + "step": 14024 + }, + { + "epoch": 0.42984553144538434, + "grad_norm": 1.8443098301009795, + "learning_rate": 6.361602342343194e-06, + "loss": 0.8095, + "step": 14025 + }, + { + "epoch": 0.42987617996812555, + "grad_norm": 2.1822838541750706, + "learning_rate": 6.361124775330539e-06, + "loss": 0.6662, + "step": 14026 + }, + { + "epoch": 0.42990682849086675, + "grad_norm": 1.6109726187068938, + "learning_rate": 6.360647194906373e-06, + "loss": 0.6934, + "step": 14027 + }, + { + "epoch": 0.42993747701360796, + "grad_norm": 0.8227439817493735, + "learning_rate": 6.360169601075404e-06, + "loss": 0.4527, + "step": 14028 + }, + { + "epoch": 0.42996812553634917, + "grad_norm": 1.8179347831738295, + "learning_rate": 6.359691993842335e-06, + "loss": 0.66, + "step": 14029 + }, + { + "epoch": 0.42999877405909037, + "grad_norm": 1.6995529722898604, + "learning_rate": 6.359214373211873e-06, + "loss": 0.6593, + "step": 14030 + }, + { + "epoch": 0.4300294225818316, + "grad_norm": 1.4181689438732623, + "learning_rate": 6.358736739188724e-06, + "loss": 0.5588, + "step": 14031 + }, + { + "epoch": 0.4300600711045728, + "grad_norm": 1.6092947581813628, + "learning_rate": 6.3582590917775946e-06, + "loss": 0.6775, + "step": 14032 + }, + { + "epoch": 0.430090719627314, + "grad_norm": 1.8019041334043417, + "learning_rate": 6.357781430983189e-06, + "loss": 0.6127, + "step": 14033 + }, + { + "epoch": 0.4301213681500552, + "grad_norm": 1.6581361810489303, + "learning_rate": 6.357303756810218e-06, + "loss": 0.7053, + "step": 14034 + }, + { + "epoch": 0.43015201667279634, + "grad_norm": 1.4378880525172122, + "learning_rate": 6.356826069263384e-06, + "loss": 0.611, + "step": 14035 + }, + { + "epoch": 0.43018266519553755, + "grad_norm": 1.7568476011880407, + "learning_rate": 6.356348368347396e-06, + "loss": 0.6783, + "step": 14036 + }, + { + "epoch": 0.43021331371827876, + "grad_norm": 1.764353827811076, + "learning_rate": 6.355870654066961e-06, + "loss": 0.6435, + "step": 14037 + }, + { + "epoch": 0.43024396224101996, + "grad_norm": 1.692612849794232, + "learning_rate": 6.3553929264267845e-06, + "loss": 0.6633, + "step": 14038 + }, + { + "epoch": 0.43027461076376117, + "grad_norm": 1.6805183579092802, + "learning_rate": 6.354915185431576e-06, + "loss": 0.6641, + "step": 14039 + }, + { + "epoch": 0.4303052592865024, + "grad_norm": 1.686095598141503, + "learning_rate": 6.354437431086041e-06, + "loss": 0.6986, + "step": 14040 + }, + { + "epoch": 0.4303359078092436, + "grad_norm": 1.7252655665123247, + "learning_rate": 6.353959663394887e-06, + "loss": 0.654, + "step": 14041 + }, + { + "epoch": 0.4303665563319848, + "grad_norm": 1.821982733110838, + "learning_rate": 6.353481882362822e-06, + "loss": 0.6159, + "step": 14042 + }, + { + "epoch": 0.430397204854726, + "grad_norm": 0.7841632137065402, + "learning_rate": 6.3530040879945565e-06, + "loss": 0.453, + "step": 14043 + }, + { + "epoch": 0.4304278533774672, + "grad_norm": 1.648695716255823, + "learning_rate": 6.352526280294791e-06, + "loss": 0.6311, + "step": 14044 + }, + { + "epoch": 0.4304585019002084, + "grad_norm": 1.8430745706765277, + "learning_rate": 6.352048459268241e-06, + "loss": 0.6288, + "step": 14045 + }, + { + "epoch": 0.4304891504229496, + "grad_norm": 2.046041976407356, + "learning_rate": 6.35157062491961e-06, + "loss": 0.709, + "step": 14046 + }, + { + "epoch": 0.4305197989456908, + "grad_norm": 1.6310629259134242, + "learning_rate": 6.351092777253609e-06, + "loss": 0.5205, + "step": 14047 + }, + { + "epoch": 0.430550447468432, + "grad_norm": 1.7495392285569493, + "learning_rate": 6.350614916274945e-06, + "loss": 0.633, + "step": 14048 + }, + { + "epoch": 0.4305810959911732, + "grad_norm": 1.8644533331599524, + "learning_rate": 6.350137041988327e-06, + "loss": 0.6807, + "step": 14049 + }, + { + "epoch": 0.43061174451391443, + "grad_norm": 1.6269114364781452, + "learning_rate": 6.349659154398462e-06, + "loss": 0.7111, + "step": 14050 + }, + { + "epoch": 0.43064239303665564, + "grad_norm": 1.710865851793101, + "learning_rate": 6.34918125351006e-06, + "loss": 0.6944, + "step": 14051 + }, + { + "epoch": 0.43067304155939684, + "grad_norm": 1.6003218114654345, + "learning_rate": 6.348703339327832e-06, + "loss": 0.627, + "step": 14052 + }, + { + "epoch": 0.43070369008213805, + "grad_norm": 1.7349087824530698, + "learning_rate": 6.348225411856482e-06, + "loss": 0.6912, + "step": 14053 + }, + { + "epoch": 0.43073433860487925, + "grad_norm": 1.7194668604773393, + "learning_rate": 6.347747471100725e-06, + "loss": 0.7021, + "step": 14054 + }, + { + "epoch": 0.43076498712762046, + "grad_norm": 1.6559085007380003, + "learning_rate": 6.347269517065265e-06, + "loss": 0.6957, + "step": 14055 + }, + { + "epoch": 0.43079563565036166, + "grad_norm": 1.728448094471294, + "learning_rate": 6.346791549754816e-06, + "loss": 0.6683, + "step": 14056 + }, + { + "epoch": 0.43082628417310287, + "grad_norm": 1.7343322828197847, + "learning_rate": 6.346313569174083e-06, + "loss": 0.6507, + "step": 14057 + }, + { + "epoch": 0.4308569326958441, + "grad_norm": 0.8020248176799155, + "learning_rate": 6.34583557532778e-06, + "loss": 0.456, + "step": 14058 + }, + { + "epoch": 0.4308875812185853, + "grad_norm": 0.7568978651149393, + "learning_rate": 6.345357568220613e-06, + "loss": 0.4283, + "step": 14059 + }, + { + "epoch": 0.4309182297413265, + "grad_norm": 1.7187674716056385, + "learning_rate": 6.344879547857294e-06, + "loss": 0.6294, + "step": 14060 + }, + { + "epoch": 0.4309488782640677, + "grad_norm": 1.6792556646207804, + "learning_rate": 6.3444015142425335e-06, + "loss": 0.5584, + "step": 14061 + }, + { + "epoch": 0.4309795267868089, + "grad_norm": 1.8733490647178863, + "learning_rate": 6.34392346738104e-06, + "loss": 0.6748, + "step": 14062 + }, + { + "epoch": 0.4310101753095501, + "grad_norm": 1.6729415265318701, + "learning_rate": 6.3434454072775255e-06, + "loss": 0.6591, + "step": 14063 + }, + { + "epoch": 0.4310408238322913, + "grad_norm": 1.8523996858984895, + "learning_rate": 6.342967333936698e-06, + "loss": 0.6586, + "step": 14064 + }, + { + "epoch": 0.4310714723550325, + "grad_norm": 0.857121657321418, + "learning_rate": 6.342489247363272e-06, + "loss": 0.46, + "step": 14065 + }, + { + "epoch": 0.43110212087777366, + "grad_norm": 0.8521970682955889, + "learning_rate": 6.342011147561955e-06, + "loss": 0.4447, + "step": 14066 + }, + { + "epoch": 0.43113276940051487, + "grad_norm": 1.5931576827321614, + "learning_rate": 6.341533034537459e-06, + "loss": 0.6368, + "step": 14067 + }, + { + "epoch": 0.4311634179232561, + "grad_norm": 1.6462055891153669, + "learning_rate": 6.3410549082944935e-06, + "loss": 0.7269, + "step": 14068 + }, + { + "epoch": 0.4311940664459973, + "grad_norm": 1.6616233998154264, + "learning_rate": 6.340576768837772e-06, + "loss": 0.6746, + "step": 14069 + }, + { + "epoch": 0.4312247149687385, + "grad_norm": 0.786599288327515, + "learning_rate": 6.340098616172006e-06, + "loss": 0.439, + "step": 14070 + }, + { + "epoch": 0.4312553634914797, + "grad_norm": 1.6722130518611105, + "learning_rate": 6.339620450301903e-06, + "loss": 0.71, + "step": 14071 + }, + { + "epoch": 0.4312860120142209, + "grad_norm": 1.893864089026168, + "learning_rate": 6.339142271232177e-06, + "loss": 0.6264, + "step": 14072 + }, + { + "epoch": 0.4313166605369621, + "grad_norm": 1.7267887942086315, + "learning_rate": 6.3386640789675415e-06, + "loss": 0.6678, + "step": 14073 + }, + { + "epoch": 0.4313473090597033, + "grad_norm": 1.6664004127636556, + "learning_rate": 6.338185873512705e-06, + "loss": 0.6678, + "step": 14074 + }, + { + "epoch": 0.4313779575824445, + "grad_norm": 1.5826910729644668, + "learning_rate": 6.337707654872382e-06, + "loss": 0.5839, + "step": 14075 + }, + { + "epoch": 0.4314086061051857, + "grad_norm": 1.6071197913806345, + "learning_rate": 6.337229423051281e-06, + "loss": 0.6005, + "step": 14076 + }, + { + "epoch": 0.4314392546279269, + "grad_norm": 2.0197352936483854, + "learning_rate": 6.336751178054118e-06, + "loss": 0.7181, + "step": 14077 + }, + { + "epoch": 0.43146990315066813, + "grad_norm": 1.7273769258575362, + "learning_rate": 6.336272919885603e-06, + "loss": 0.6405, + "step": 14078 + }, + { + "epoch": 0.43150055167340934, + "grad_norm": 0.8162962804663377, + "learning_rate": 6.335794648550448e-06, + "loss": 0.4282, + "step": 14079 + }, + { + "epoch": 0.43153120019615054, + "grad_norm": 1.5495985879847436, + "learning_rate": 6.335316364053369e-06, + "loss": 0.504, + "step": 14080 + }, + { + "epoch": 0.43156184871889175, + "grad_norm": 1.8364018501378718, + "learning_rate": 6.334838066399074e-06, + "loss": 0.7098, + "step": 14081 + }, + { + "epoch": 0.43159249724163296, + "grad_norm": 0.7692132353267312, + "learning_rate": 6.33435975559228e-06, + "loss": 0.4251, + "step": 14082 + }, + { + "epoch": 0.43162314576437416, + "grad_norm": 1.8217487652699353, + "learning_rate": 6.333881431637696e-06, + "loss": 0.6568, + "step": 14083 + }, + { + "epoch": 0.43165379428711537, + "grad_norm": 3.0530699115849758, + "learning_rate": 6.333403094540038e-06, + "loss": 0.6936, + "step": 14084 + }, + { + "epoch": 0.43168444280985657, + "grad_norm": 1.7198824561531119, + "learning_rate": 6.332924744304019e-06, + "loss": 0.6937, + "step": 14085 + }, + { + "epoch": 0.4317150913325978, + "grad_norm": 1.7466301838601195, + "learning_rate": 6.332446380934349e-06, + "loss": 0.7371, + "step": 14086 + }, + { + "epoch": 0.431745739855339, + "grad_norm": 0.7662766893174582, + "learning_rate": 6.331968004435746e-06, + "loss": 0.4549, + "step": 14087 + }, + { + "epoch": 0.4317763883780802, + "grad_norm": 1.533782009058267, + "learning_rate": 6.3314896148129205e-06, + "loss": 0.609, + "step": 14088 + }, + { + "epoch": 0.4318070369008214, + "grad_norm": 1.7925551194486187, + "learning_rate": 6.331011212070588e-06, + "loss": 0.6819, + "step": 14089 + }, + { + "epoch": 0.4318376854235626, + "grad_norm": 1.5155593237634095, + "learning_rate": 6.33053279621346e-06, + "loss": 0.6203, + "step": 14090 + }, + { + "epoch": 0.4318683339463038, + "grad_norm": 1.8751355518443538, + "learning_rate": 6.3300543672462536e-06, + "loss": 0.6166, + "step": 14091 + }, + { + "epoch": 0.431898982469045, + "grad_norm": 1.410875089642617, + "learning_rate": 6.329575925173679e-06, + "loss": 0.4223, + "step": 14092 + }, + { + "epoch": 0.4319296309917862, + "grad_norm": 1.6997204894418063, + "learning_rate": 6.329097470000456e-06, + "loss": 0.6148, + "step": 14093 + }, + { + "epoch": 0.4319602795145274, + "grad_norm": 1.8025379366353294, + "learning_rate": 6.328619001731292e-06, + "loss": 0.6568, + "step": 14094 + }, + { + "epoch": 0.43199092803726863, + "grad_norm": 1.853933351860769, + "learning_rate": 6.3281405203709065e-06, + "loss": 0.6586, + "step": 14095 + }, + { + "epoch": 0.43202157656000983, + "grad_norm": 1.8798971831496911, + "learning_rate": 6.327662025924013e-06, + "loss": 0.6648, + "step": 14096 + }, + { + "epoch": 0.432052225082751, + "grad_norm": 1.8003588336358418, + "learning_rate": 6.327183518395327e-06, + "loss": 0.6425, + "step": 14097 + }, + { + "epoch": 0.4320828736054922, + "grad_norm": 1.6597445255642542, + "learning_rate": 6.32670499778956e-06, + "loss": 0.5613, + "step": 14098 + }, + { + "epoch": 0.4321135221282334, + "grad_norm": 1.7840418825946698, + "learning_rate": 6.3262264641114305e-06, + "loss": 0.6242, + "step": 14099 + }, + { + "epoch": 0.4321441706509746, + "grad_norm": 1.7843029905844014, + "learning_rate": 6.325747917365651e-06, + "loss": 0.6537, + "step": 14100 + }, + { + "epoch": 0.4321748191737158, + "grad_norm": 1.6777452146624627, + "learning_rate": 6.32526935755694e-06, + "loss": 0.6934, + "step": 14101 + }, + { + "epoch": 0.432205467696457, + "grad_norm": 0.8755210336775381, + "learning_rate": 6.3247907846900096e-06, + "loss": 0.4608, + "step": 14102 + }, + { + "epoch": 0.4322361162191982, + "grad_norm": 1.9882197605130907, + "learning_rate": 6.324312198769576e-06, + "loss": 0.6376, + "step": 14103 + }, + { + "epoch": 0.4322667647419394, + "grad_norm": 0.8038337726756977, + "learning_rate": 6.323833599800356e-06, + "loss": 0.4554, + "step": 14104 + }, + { + "epoch": 0.43229741326468063, + "grad_norm": 1.4159061416664522, + "learning_rate": 6.323354987787066e-06, + "loss": 0.5525, + "step": 14105 + }, + { + "epoch": 0.43232806178742184, + "grad_norm": 1.6252380840213045, + "learning_rate": 6.32287636273442e-06, + "loss": 0.6139, + "step": 14106 + }, + { + "epoch": 0.43235871031016304, + "grad_norm": 1.6973731784585417, + "learning_rate": 6.322397724647134e-06, + "loss": 0.6664, + "step": 14107 + }, + { + "epoch": 0.43238935883290425, + "grad_norm": 1.642281942215868, + "learning_rate": 6.3219190735299254e-06, + "loss": 0.6505, + "step": 14108 + }, + { + "epoch": 0.43242000735564545, + "grad_norm": 0.8408773835464276, + "learning_rate": 6.3214404093875105e-06, + "loss": 0.4429, + "step": 14109 + }, + { + "epoch": 0.43245065587838666, + "grad_norm": 1.718131596332515, + "learning_rate": 6.320961732224605e-06, + "loss": 0.647, + "step": 14110 + }, + { + "epoch": 0.43248130440112786, + "grad_norm": 1.81917531429082, + "learning_rate": 6.320483042045924e-06, + "loss": 0.599, + "step": 14111 + }, + { + "epoch": 0.43251195292386907, + "grad_norm": 1.754954440081384, + "learning_rate": 6.320004338856189e-06, + "loss": 0.6707, + "step": 14112 + }, + { + "epoch": 0.4325426014466103, + "grad_norm": 2.053309156500082, + "learning_rate": 6.319525622660111e-06, + "loss": 0.7862, + "step": 14113 + }, + { + "epoch": 0.4325732499693515, + "grad_norm": 1.9585758995119773, + "learning_rate": 6.31904689346241e-06, + "loss": 0.6834, + "step": 14114 + }, + { + "epoch": 0.4326038984920927, + "grad_norm": 1.4105939792392468, + "learning_rate": 6.318568151267801e-06, + "loss": 0.6162, + "step": 14115 + }, + { + "epoch": 0.4326345470148339, + "grad_norm": 1.6300226040678134, + "learning_rate": 6.318089396081004e-06, + "loss": 0.6661, + "step": 14116 + }, + { + "epoch": 0.4326651955375751, + "grad_norm": 1.6700833295596045, + "learning_rate": 6.317610627906736e-06, + "loss": 0.6395, + "step": 14117 + }, + { + "epoch": 0.4326958440603163, + "grad_norm": 1.5655850907389972, + "learning_rate": 6.317131846749711e-06, + "loss": 0.5975, + "step": 14118 + }, + { + "epoch": 0.4327264925830575, + "grad_norm": 1.780892794279201, + "learning_rate": 6.316653052614651e-06, + "loss": 0.6272, + "step": 14119 + }, + { + "epoch": 0.4327571411057987, + "grad_norm": 1.7298193831474582, + "learning_rate": 6.316174245506271e-06, + "loss": 0.6929, + "step": 14120 + }, + { + "epoch": 0.4327877896285399, + "grad_norm": 1.9107759878934023, + "learning_rate": 6.315695425429289e-06, + "loss": 0.5277, + "step": 14121 + }, + { + "epoch": 0.4328184381512811, + "grad_norm": 1.7133660501824162, + "learning_rate": 6.315216592388423e-06, + "loss": 0.6042, + "step": 14122 + }, + { + "epoch": 0.43284908667402233, + "grad_norm": 1.5398388395088238, + "learning_rate": 6.314737746388393e-06, + "loss": 0.6384, + "step": 14123 + }, + { + "epoch": 0.43287973519676354, + "grad_norm": 0.8188099571207864, + "learning_rate": 6.314258887433915e-06, + "loss": 0.4566, + "step": 14124 + }, + { + "epoch": 0.43291038371950474, + "grad_norm": 0.7759885418476685, + "learning_rate": 6.313780015529707e-06, + "loss": 0.4367, + "step": 14125 + }, + { + "epoch": 0.43294103224224595, + "grad_norm": 1.885899646881898, + "learning_rate": 6.313301130680488e-06, + "loss": 0.6608, + "step": 14126 + }, + { + "epoch": 0.43297168076498715, + "grad_norm": 1.7027669520940678, + "learning_rate": 6.312822232890978e-06, + "loss": 0.6262, + "step": 14127 + }, + { + "epoch": 0.4330023292877283, + "grad_norm": 1.6796275948935846, + "learning_rate": 6.312343322165895e-06, + "loss": 0.7063, + "step": 14128 + }, + { + "epoch": 0.4330329778104695, + "grad_norm": 1.7780688177786776, + "learning_rate": 6.311864398509957e-06, + "loss": 0.6975, + "step": 14129 + }, + { + "epoch": 0.4330636263332107, + "grad_norm": 1.6623043462509168, + "learning_rate": 6.311385461927882e-06, + "loss": 0.6756, + "step": 14130 + }, + { + "epoch": 0.4330942748559519, + "grad_norm": 1.6240936643305717, + "learning_rate": 6.310906512424393e-06, + "loss": 0.6319, + "step": 14131 + }, + { + "epoch": 0.4331249233786931, + "grad_norm": 1.769316282490766, + "learning_rate": 6.3104275500042055e-06, + "loss": 0.677, + "step": 14132 + }, + { + "epoch": 0.43315557190143433, + "grad_norm": 1.8462292809173815, + "learning_rate": 6.30994857467204e-06, + "loss": 0.6752, + "step": 14133 + }, + { + "epoch": 0.43318622042417554, + "grad_norm": 1.6450068586060034, + "learning_rate": 6.309469586432616e-06, + "loss": 0.6949, + "step": 14134 + }, + { + "epoch": 0.43321686894691674, + "grad_norm": 2.0217477567493405, + "learning_rate": 6.308990585290653e-06, + "loss": 0.5958, + "step": 14135 + }, + { + "epoch": 0.43324751746965795, + "grad_norm": 1.7254179436408827, + "learning_rate": 6.308511571250871e-06, + "loss": 0.68, + "step": 14136 + }, + { + "epoch": 0.43327816599239916, + "grad_norm": 1.6537546481465832, + "learning_rate": 6.3080325443179905e-06, + "loss": 0.6677, + "step": 14137 + }, + { + "epoch": 0.43330881451514036, + "grad_norm": 1.6444216963780223, + "learning_rate": 6.307553504496729e-06, + "loss": 0.6864, + "step": 14138 + }, + { + "epoch": 0.43333946303788157, + "grad_norm": 1.0471265728156636, + "learning_rate": 6.3070744517918105e-06, + "loss": 0.4306, + "step": 14139 + }, + { + "epoch": 0.4333701115606228, + "grad_norm": 3.6585461561472448, + "learning_rate": 6.306595386207952e-06, + "loss": 0.6449, + "step": 14140 + }, + { + "epoch": 0.433400760083364, + "grad_norm": 0.850995389727527, + "learning_rate": 6.306116307749874e-06, + "loss": 0.4344, + "step": 14141 + }, + { + "epoch": 0.4334314086061052, + "grad_norm": 1.7650028469728936, + "learning_rate": 6.305637216422298e-06, + "loss": 0.7142, + "step": 14142 + }, + { + "epoch": 0.4334620571288464, + "grad_norm": 1.9246712894134546, + "learning_rate": 6.305158112229946e-06, + "loss": 0.663, + "step": 14143 + }, + { + "epoch": 0.4334927056515876, + "grad_norm": 1.6280418185066647, + "learning_rate": 6.304678995177535e-06, + "loss": 0.7019, + "step": 14144 + }, + { + "epoch": 0.4335233541743288, + "grad_norm": 1.6589974959373857, + "learning_rate": 6.304199865269789e-06, + "loss": 0.7258, + "step": 14145 + }, + { + "epoch": 0.43355400269707, + "grad_norm": 1.6329841533683698, + "learning_rate": 6.303720722511428e-06, + "loss": 0.6478, + "step": 14146 + }, + { + "epoch": 0.4335846512198112, + "grad_norm": 1.8602551773427831, + "learning_rate": 6.303241566907173e-06, + "loss": 0.6134, + "step": 14147 + }, + { + "epoch": 0.4336152997425524, + "grad_norm": 1.8558641851844047, + "learning_rate": 6.302762398461746e-06, + "loss": 0.7173, + "step": 14148 + }, + { + "epoch": 0.4336459482652936, + "grad_norm": 1.5069491564132427, + "learning_rate": 6.302283217179868e-06, + "loss": 0.5949, + "step": 14149 + }, + { + "epoch": 0.43367659678803483, + "grad_norm": 1.0795822029770086, + "learning_rate": 6.301804023066258e-06, + "loss": 0.4594, + "step": 14150 + }, + { + "epoch": 0.43370724531077604, + "grad_norm": 1.62584716372244, + "learning_rate": 6.3013248161256425e-06, + "loss": 0.6198, + "step": 14151 + }, + { + "epoch": 0.43373789383351724, + "grad_norm": 1.6736154592255235, + "learning_rate": 6.300845596362739e-06, + "loss": 0.6705, + "step": 14152 + }, + { + "epoch": 0.43376854235625845, + "grad_norm": 1.8277364648436467, + "learning_rate": 6.300366363782272e-06, + "loss": 0.7483, + "step": 14153 + }, + { + "epoch": 0.43379919087899965, + "grad_norm": 1.539433826472309, + "learning_rate": 6.299887118388962e-06, + "loss": 0.6161, + "step": 14154 + }, + { + "epoch": 0.43382983940174086, + "grad_norm": 1.6755787421445583, + "learning_rate": 6.2994078601875334e-06, + "loss": 0.6639, + "step": 14155 + }, + { + "epoch": 0.43386048792448206, + "grad_norm": 1.591109695326633, + "learning_rate": 6.298928589182704e-06, + "loss": 0.6795, + "step": 14156 + }, + { + "epoch": 0.43389113644722327, + "grad_norm": 1.7594944998931885, + "learning_rate": 6.2984493053792e-06, + "loss": 0.588, + "step": 14157 + }, + { + "epoch": 0.4339217849699645, + "grad_norm": 0.7451339248578059, + "learning_rate": 6.297970008781742e-06, + "loss": 0.4302, + "step": 14158 + }, + { + "epoch": 0.4339524334927056, + "grad_norm": 1.6947131034473997, + "learning_rate": 6.297490699395055e-06, + "loss": 0.688, + "step": 14159 + }, + { + "epoch": 0.43398308201544683, + "grad_norm": 1.746781308933776, + "learning_rate": 6.297011377223859e-06, + "loss": 0.7082, + "step": 14160 + }, + { + "epoch": 0.43401373053818804, + "grad_norm": 1.9610454422442918, + "learning_rate": 6.296532042272878e-06, + "loss": 0.6137, + "step": 14161 + }, + { + "epoch": 0.43404437906092924, + "grad_norm": 0.7990196708553629, + "learning_rate": 6.296052694546837e-06, + "loss": 0.4506, + "step": 14162 + }, + { + "epoch": 0.43407502758367045, + "grad_norm": 1.7544954625164155, + "learning_rate": 6.295573334050455e-06, + "loss": 0.6522, + "step": 14163 + }, + { + "epoch": 0.43410567610641165, + "grad_norm": 1.708188575499701, + "learning_rate": 6.2950939607884574e-06, + "loss": 0.7455, + "step": 14164 + }, + { + "epoch": 0.43413632462915286, + "grad_norm": 0.7979117376486754, + "learning_rate": 6.294614574765567e-06, + "loss": 0.457, + "step": 14165 + }, + { + "epoch": 0.43416697315189406, + "grad_norm": 1.7807945658745985, + "learning_rate": 6.294135175986511e-06, + "loss": 0.6999, + "step": 14166 + }, + { + "epoch": 0.43419762167463527, + "grad_norm": 1.8165248520501718, + "learning_rate": 6.293655764456008e-06, + "loss": 0.6713, + "step": 14167 + }, + { + "epoch": 0.4342282701973765, + "grad_norm": 1.545139198781758, + "learning_rate": 6.2931763401787835e-06, + "loss": 0.6429, + "step": 14168 + }, + { + "epoch": 0.4342589187201177, + "grad_norm": 1.6055262801880612, + "learning_rate": 6.292696903159562e-06, + "loss": 0.6379, + "step": 14169 + }, + { + "epoch": 0.4342895672428589, + "grad_norm": 1.970896175535036, + "learning_rate": 6.292217453403068e-06, + "loss": 0.6741, + "step": 14170 + }, + { + "epoch": 0.4343202157656001, + "grad_norm": 1.6978367909214858, + "learning_rate": 6.291737990914024e-06, + "loss": 0.618, + "step": 14171 + }, + { + "epoch": 0.4343508642883413, + "grad_norm": 1.7476415157436764, + "learning_rate": 6.291258515697155e-06, + "loss": 0.6074, + "step": 14172 + }, + { + "epoch": 0.4343815128110825, + "grad_norm": 1.7137470523742369, + "learning_rate": 6.290779027757186e-06, + "loss": 0.7298, + "step": 14173 + }, + { + "epoch": 0.4344121613338237, + "grad_norm": 1.886100293445284, + "learning_rate": 6.29029952709884e-06, + "loss": 0.6893, + "step": 14174 + }, + { + "epoch": 0.4344428098565649, + "grad_norm": 1.6653361169055592, + "learning_rate": 6.289820013726844e-06, + "loss": 0.6468, + "step": 14175 + }, + { + "epoch": 0.4344734583793061, + "grad_norm": 1.6881034810397428, + "learning_rate": 6.2893404876459195e-06, + "loss": 0.5904, + "step": 14176 + }, + { + "epoch": 0.4345041069020473, + "grad_norm": 1.8330623799580312, + "learning_rate": 6.288860948860794e-06, + "loss": 0.5854, + "step": 14177 + }, + { + "epoch": 0.43453475542478853, + "grad_norm": 0.814425397510059, + "learning_rate": 6.288381397376193e-06, + "loss": 0.4362, + "step": 14178 + }, + { + "epoch": 0.43456540394752974, + "grad_norm": 1.4288016636710095, + "learning_rate": 6.28790183319684e-06, + "loss": 0.5739, + "step": 14179 + }, + { + "epoch": 0.43459605247027094, + "grad_norm": 1.6029945227732791, + "learning_rate": 6.28742225632746e-06, + "loss": 0.607, + "step": 14180 + }, + { + "epoch": 0.43462670099301215, + "grad_norm": 1.6905667686144996, + "learning_rate": 6.28694266677278e-06, + "loss": 0.7723, + "step": 14181 + }, + { + "epoch": 0.43465734951575336, + "grad_norm": 1.6838694336722089, + "learning_rate": 6.286463064537524e-06, + "loss": 0.6959, + "step": 14182 + }, + { + "epoch": 0.43468799803849456, + "grad_norm": 1.679424817449818, + "learning_rate": 6.285983449626418e-06, + "loss": 0.6375, + "step": 14183 + }, + { + "epoch": 0.43471864656123577, + "grad_norm": 1.6138708598995377, + "learning_rate": 6.285503822044188e-06, + "loss": 0.6943, + "step": 14184 + }, + { + "epoch": 0.43474929508397697, + "grad_norm": 1.6863467365873304, + "learning_rate": 6.285024181795561e-06, + "loss": 0.626, + "step": 14185 + }, + { + "epoch": 0.4347799436067182, + "grad_norm": 1.8992929584259053, + "learning_rate": 6.2845445288852615e-06, + "loss": 0.7007, + "step": 14186 + }, + { + "epoch": 0.4348105921294594, + "grad_norm": 1.8572958112985347, + "learning_rate": 6.284064863318016e-06, + "loss": 0.6751, + "step": 14187 + }, + { + "epoch": 0.4348412406522006, + "grad_norm": 1.6214897417927272, + "learning_rate": 6.283585185098551e-06, + "loss": 0.6624, + "step": 14188 + }, + { + "epoch": 0.4348718891749418, + "grad_norm": 1.7776837669255765, + "learning_rate": 6.283105494231591e-06, + "loss": 0.7708, + "step": 14189 + }, + { + "epoch": 0.43490253769768294, + "grad_norm": 1.5778544066144595, + "learning_rate": 6.282625790721867e-06, + "loss": 0.6865, + "step": 14190 + }, + { + "epoch": 0.43493318622042415, + "grad_norm": 1.580621620786881, + "learning_rate": 6.2821460745741e-06, + "loss": 0.6306, + "step": 14191 + }, + { + "epoch": 0.43496383474316536, + "grad_norm": 1.7211368701938201, + "learning_rate": 6.2816663457930225e-06, + "loss": 0.5802, + "step": 14192 + }, + { + "epoch": 0.43499448326590656, + "grad_norm": 1.7421245408753294, + "learning_rate": 6.281186604383358e-06, + "loss": 0.6039, + "step": 14193 + }, + { + "epoch": 0.43502513178864777, + "grad_norm": 1.5450317270428144, + "learning_rate": 6.280706850349834e-06, + "loss": 0.5892, + "step": 14194 + }, + { + "epoch": 0.435055780311389, + "grad_norm": 1.707586477616342, + "learning_rate": 6.2802270836971756e-06, + "loss": 0.5949, + "step": 14195 + }, + { + "epoch": 0.4350864288341302, + "grad_norm": 1.7492175221774429, + "learning_rate": 6.279747304430115e-06, + "loss": 0.6664, + "step": 14196 + }, + { + "epoch": 0.4351170773568714, + "grad_norm": 0.8171116925954721, + "learning_rate": 6.279267512553375e-06, + "loss": 0.4617, + "step": 14197 + }, + { + "epoch": 0.4351477258796126, + "grad_norm": 1.4718102491963299, + "learning_rate": 6.278787708071687e-06, + "loss": 0.6567, + "step": 14198 + }, + { + "epoch": 0.4351783744023538, + "grad_norm": 0.7972227136189469, + "learning_rate": 6.278307890989773e-06, + "loss": 0.4431, + "step": 14199 + }, + { + "epoch": 0.435209022925095, + "grad_norm": 1.5442165225766031, + "learning_rate": 6.277828061312367e-06, + "loss": 0.6548, + "step": 14200 + }, + { + "epoch": 0.4352396714478362, + "grad_norm": 1.749535181382336, + "learning_rate": 6.277348219044194e-06, + "loss": 0.6985, + "step": 14201 + }, + { + "epoch": 0.4352703199705774, + "grad_norm": 1.6828011170782158, + "learning_rate": 6.276868364189981e-06, + "loss": 0.6535, + "step": 14202 + }, + { + "epoch": 0.4353009684933186, + "grad_norm": 1.496143025398213, + "learning_rate": 6.276388496754458e-06, + "loss": 0.6212, + "step": 14203 + }, + { + "epoch": 0.4353316170160598, + "grad_norm": 1.6689080382276889, + "learning_rate": 6.275908616742351e-06, + "loss": 0.7231, + "step": 14204 + }, + { + "epoch": 0.43536226553880103, + "grad_norm": 1.608620864747018, + "learning_rate": 6.275428724158393e-06, + "loss": 0.5778, + "step": 14205 + }, + { + "epoch": 0.43539291406154224, + "grad_norm": 1.6629570434478735, + "learning_rate": 6.274948819007307e-06, + "loss": 0.6371, + "step": 14206 + }, + { + "epoch": 0.43542356258428344, + "grad_norm": 1.8164861457200954, + "learning_rate": 6.274468901293825e-06, + "loss": 0.7198, + "step": 14207 + }, + { + "epoch": 0.43545421110702465, + "grad_norm": 0.8352268203947424, + "learning_rate": 6.2739889710226745e-06, + "loss": 0.4747, + "step": 14208 + }, + { + "epoch": 0.43548485962976585, + "grad_norm": 2.0174118259483635, + "learning_rate": 6.2735090281985855e-06, + "loss": 0.7103, + "step": 14209 + }, + { + "epoch": 0.43551550815250706, + "grad_norm": 1.5360072145479453, + "learning_rate": 6.273029072826285e-06, + "loss": 0.6848, + "step": 14210 + }, + { + "epoch": 0.43554615667524826, + "grad_norm": 1.5675395900670777, + "learning_rate": 6.272549104910504e-06, + "loss": 0.5956, + "step": 14211 + }, + { + "epoch": 0.43557680519798947, + "grad_norm": 0.7725829823251497, + "learning_rate": 6.272069124455973e-06, + "loss": 0.4659, + "step": 14212 + }, + { + "epoch": 0.4356074537207307, + "grad_norm": 1.4875658909839395, + "learning_rate": 6.271589131467416e-06, + "loss": 0.5802, + "step": 14213 + }, + { + "epoch": 0.4356381022434719, + "grad_norm": 1.716649991924082, + "learning_rate": 6.271109125949568e-06, + "loss": 0.6844, + "step": 14214 + }, + { + "epoch": 0.4356687507662131, + "grad_norm": 1.594385317952467, + "learning_rate": 6.270629107907155e-06, + "loss": 0.5996, + "step": 14215 + }, + { + "epoch": 0.4356993992889543, + "grad_norm": 0.7937585569500697, + "learning_rate": 6.2701490773449105e-06, + "loss": 0.4414, + "step": 14216 + }, + { + "epoch": 0.4357300478116955, + "grad_norm": 1.9285156137873125, + "learning_rate": 6.26966903426756e-06, + "loss": 0.7041, + "step": 14217 + }, + { + "epoch": 0.4357606963344367, + "grad_norm": 1.6376343165815985, + "learning_rate": 6.269188978679837e-06, + "loss": 0.6859, + "step": 14218 + }, + { + "epoch": 0.4357913448571779, + "grad_norm": 1.762760907681, + "learning_rate": 6.26870891058647e-06, + "loss": 0.6134, + "step": 14219 + }, + { + "epoch": 0.4358219933799191, + "grad_norm": 1.522391486237992, + "learning_rate": 6.26822882999219e-06, + "loss": 0.5167, + "step": 14220 + }, + { + "epoch": 0.43585264190266026, + "grad_norm": 1.7556048203712324, + "learning_rate": 6.267748736901726e-06, + "loss": 0.6425, + "step": 14221 + }, + { + "epoch": 0.43588329042540147, + "grad_norm": 1.8822428792306383, + "learning_rate": 6.2672686313198095e-06, + "loss": 0.7477, + "step": 14222 + }, + { + "epoch": 0.4359139389481427, + "grad_norm": 0.7509895093601561, + "learning_rate": 6.26678851325117e-06, + "loss": 0.4469, + "step": 14223 + }, + { + "epoch": 0.4359445874708839, + "grad_norm": 1.7085518695927555, + "learning_rate": 6.266308382700541e-06, + "loss": 0.674, + "step": 14224 + }, + { + "epoch": 0.4359752359936251, + "grad_norm": 0.7879114698036147, + "learning_rate": 6.26582823967265e-06, + "loss": 0.4466, + "step": 14225 + }, + { + "epoch": 0.4360058845163663, + "grad_norm": 1.8929471177242565, + "learning_rate": 6.26534808417223e-06, + "loss": 0.6891, + "step": 14226 + }, + { + "epoch": 0.4360365330391075, + "grad_norm": 1.5089166793230142, + "learning_rate": 6.264867916204011e-06, + "loss": 0.5713, + "step": 14227 + }, + { + "epoch": 0.4360671815618487, + "grad_norm": 1.7768470815664617, + "learning_rate": 6.264387735772727e-06, + "loss": 0.6087, + "step": 14228 + }, + { + "epoch": 0.4360978300845899, + "grad_norm": 1.547864269159272, + "learning_rate": 6.2639075428831054e-06, + "loss": 0.6209, + "step": 14229 + }, + { + "epoch": 0.4361284786073311, + "grad_norm": 1.8621579028284396, + "learning_rate": 6.263427337539878e-06, + "loss": 0.6961, + "step": 14230 + }, + { + "epoch": 0.4361591271300723, + "grad_norm": 1.678773195751565, + "learning_rate": 6.2629471197477795e-06, + "loss": 0.6184, + "step": 14231 + }, + { + "epoch": 0.4361897756528135, + "grad_norm": 1.7814466725881084, + "learning_rate": 6.26246688951154e-06, + "loss": 0.6366, + "step": 14232 + }, + { + "epoch": 0.43622042417555473, + "grad_norm": 1.965367633381306, + "learning_rate": 6.261986646835892e-06, + "loss": 0.6099, + "step": 14233 + }, + { + "epoch": 0.43625107269829594, + "grad_norm": 1.7467000766125653, + "learning_rate": 6.261506391725565e-06, + "loss": 0.6837, + "step": 14234 + }, + { + "epoch": 0.43628172122103714, + "grad_norm": 1.5074415590176364, + "learning_rate": 6.2610261241852946e-06, + "loss": 0.6231, + "step": 14235 + }, + { + "epoch": 0.43631236974377835, + "grad_norm": 1.6186677381510821, + "learning_rate": 6.26054584421981e-06, + "loss": 0.606, + "step": 14236 + }, + { + "epoch": 0.43634301826651956, + "grad_norm": 1.8260107926486209, + "learning_rate": 6.260065551833845e-06, + "loss": 0.6332, + "step": 14237 + }, + { + "epoch": 0.43637366678926076, + "grad_norm": 1.6632847730403506, + "learning_rate": 6.259585247032129e-06, + "loss": 0.6005, + "step": 14238 + }, + { + "epoch": 0.43640431531200197, + "grad_norm": 1.6142332323574249, + "learning_rate": 6.2591049298194005e-06, + "loss": 0.6868, + "step": 14239 + }, + { + "epoch": 0.4364349638347432, + "grad_norm": 0.8522176653184373, + "learning_rate": 6.258624600200389e-06, + "loss": 0.4591, + "step": 14240 + }, + { + "epoch": 0.4364656123574844, + "grad_norm": 1.6430755451119397, + "learning_rate": 6.258144258179826e-06, + "loss": 0.5963, + "step": 14241 + }, + { + "epoch": 0.4364962608802256, + "grad_norm": 1.6783336792688204, + "learning_rate": 6.257663903762445e-06, + "loss": 0.7093, + "step": 14242 + }, + { + "epoch": 0.4365269094029668, + "grad_norm": 1.9145709556320858, + "learning_rate": 6.257183536952982e-06, + "loss": 0.6997, + "step": 14243 + }, + { + "epoch": 0.436557557925708, + "grad_norm": 1.6646194058715857, + "learning_rate": 6.2567031577561676e-06, + "loss": 0.6271, + "step": 14244 + }, + { + "epoch": 0.4365882064484492, + "grad_norm": 1.6144349068731763, + "learning_rate": 6.2562227661767336e-06, + "loss": 0.6044, + "step": 14245 + }, + { + "epoch": 0.4366188549711904, + "grad_norm": 1.643269842641451, + "learning_rate": 6.2557423622194165e-06, + "loss": 0.6994, + "step": 14246 + }, + { + "epoch": 0.4366495034939316, + "grad_norm": 1.6298209444397642, + "learning_rate": 6.255261945888949e-06, + "loss": 0.6584, + "step": 14247 + }, + { + "epoch": 0.4366801520166728, + "grad_norm": 1.5634124402947467, + "learning_rate": 6.254781517190064e-06, + "loss": 0.6019, + "step": 14248 + }, + { + "epoch": 0.436710800539414, + "grad_norm": 1.6409712068244955, + "learning_rate": 6.254301076127495e-06, + "loss": 0.5954, + "step": 14249 + }, + { + "epoch": 0.43674144906215523, + "grad_norm": 1.3452263070932458, + "learning_rate": 6.253820622705977e-06, + "loss": 0.6478, + "step": 14250 + }, + { + "epoch": 0.43677209758489643, + "grad_norm": 1.489764785270914, + "learning_rate": 6.253340156930243e-06, + "loss": 0.6022, + "step": 14251 + }, + { + "epoch": 0.4368027461076376, + "grad_norm": 1.5734400449106225, + "learning_rate": 6.25285967880503e-06, + "loss": 0.717, + "step": 14252 + }, + { + "epoch": 0.4368333946303788, + "grad_norm": 1.5731626863094395, + "learning_rate": 6.252379188335067e-06, + "loss": 0.6517, + "step": 14253 + }, + { + "epoch": 0.43686404315312, + "grad_norm": 1.7209142754978632, + "learning_rate": 6.251898685525093e-06, + "loss": 0.6841, + "step": 14254 + }, + { + "epoch": 0.4368946916758612, + "grad_norm": 1.6097707484393127, + "learning_rate": 6.251418170379841e-06, + "loss": 0.639, + "step": 14255 + }, + { + "epoch": 0.4369253401986024, + "grad_norm": 1.6812914742719869, + "learning_rate": 6.250937642904045e-06, + "loss": 0.7002, + "step": 14256 + }, + { + "epoch": 0.4369559887213436, + "grad_norm": 1.5587731138149832, + "learning_rate": 6.250457103102441e-06, + "loss": 0.5869, + "step": 14257 + }, + { + "epoch": 0.4369866372440848, + "grad_norm": 0.8580315008818065, + "learning_rate": 6.2499765509797615e-06, + "loss": 0.4548, + "step": 14258 + }, + { + "epoch": 0.437017285766826, + "grad_norm": 1.9116688581275407, + "learning_rate": 6.249495986540746e-06, + "loss": 0.6465, + "step": 14259 + }, + { + "epoch": 0.43704793428956723, + "grad_norm": 1.747946109852959, + "learning_rate": 6.249015409790126e-06, + "loss": 0.6642, + "step": 14260 + }, + { + "epoch": 0.43707858281230844, + "grad_norm": 0.7919084979500906, + "learning_rate": 6.248534820732637e-06, + "loss": 0.4533, + "step": 14261 + }, + { + "epoch": 0.43710923133504964, + "grad_norm": 1.4141307111378751, + "learning_rate": 6.248054219373014e-06, + "loss": 0.5801, + "step": 14262 + }, + { + "epoch": 0.43713987985779085, + "grad_norm": 1.695401930124417, + "learning_rate": 6.247573605715996e-06, + "loss": 0.6106, + "step": 14263 + }, + { + "epoch": 0.43717052838053205, + "grad_norm": 1.7164658157517108, + "learning_rate": 6.247092979766314e-06, + "loss": 0.7068, + "step": 14264 + }, + { + "epoch": 0.43720117690327326, + "grad_norm": 1.8032900036700608, + "learning_rate": 6.246612341528706e-06, + "loss": 0.5986, + "step": 14265 + }, + { + "epoch": 0.43723182542601446, + "grad_norm": 1.8366895575816498, + "learning_rate": 6.246131691007908e-06, + "loss": 0.6553, + "step": 14266 + }, + { + "epoch": 0.43726247394875567, + "grad_norm": 1.7746473164952212, + "learning_rate": 6.2456510282086556e-06, + "loss": 0.6549, + "step": 14267 + }, + { + "epoch": 0.4372931224714969, + "grad_norm": 1.7309072607818974, + "learning_rate": 6.245170353135686e-06, + "loss": 0.6485, + "step": 14268 + }, + { + "epoch": 0.4373237709942381, + "grad_norm": 1.502111319303909, + "learning_rate": 6.244689665793733e-06, + "loss": 0.5755, + "step": 14269 + }, + { + "epoch": 0.4373544195169793, + "grad_norm": 1.779233001657589, + "learning_rate": 6.244208966187534e-06, + "loss": 0.7179, + "step": 14270 + }, + { + "epoch": 0.4373850680397205, + "grad_norm": 1.5127904668905758, + "learning_rate": 6.243728254321826e-06, + "loss": 0.6478, + "step": 14271 + }, + { + "epoch": 0.4374157165624617, + "grad_norm": 1.9485860009920843, + "learning_rate": 6.243247530201345e-06, + "loss": 0.6625, + "step": 14272 + }, + { + "epoch": 0.4374463650852029, + "grad_norm": 1.6627755331437644, + "learning_rate": 6.242766793830828e-06, + "loss": 0.5866, + "step": 14273 + }, + { + "epoch": 0.4374770136079441, + "grad_norm": 1.6481127065498722, + "learning_rate": 6.242286045215014e-06, + "loss": 0.7472, + "step": 14274 + }, + { + "epoch": 0.4375076621306853, + "grad_norm": 1.8721238443827837, + "learning_rate": 6.241805284358635e-06, + "loss": 0.7381, + "step": 14275 + }, + { + "epoch": 0.4375383106534265, + "grad_norm": 1.752159250963597, + "learning_rate": 6.241324511266432e-06, + "loss": 0.7225, + "step": 14276 + }, + { + "epoch": 0.4375689591761677, + "grad_norm": 1.6502833937926062, + "learning_rate": 6.2408437259431396e-06, + "loss": 0.6294, + "step": 14277 + }, + { + "epoch": 0.43759960769890893, + "grad_norm": 1.422305211182262, + "learning_rate": 6.2403629283935e-06, + "loss": 0.6259, + "step": 14278 + }, + { + "epoch": 0.43763025622165014, + "grad_norm": 1.7619056856914475, + "learning_rate": 6.239882118622244e-06, + "loss": 0.5625, + "step": 14279 + }, + { + "epoch": 0.43766090474439134, + "grad_norm": 5.157418350911879, + "learning_rate": 6.239401296634113e-06, + "loss": 0.766, + "step": 14280 + }, + { + "epoch": 0.43769155326713255, + "grad_norm": 1.7933366197800424, + "learning_rate": 6.238920462433843e-06, + "loss": 0.6948, + "step": 14281 + }, + { + "epoch": 0.43772220178987375, + "grad_norm": 1.707775487880428, + "learning_rate": 6.238439616026174e-06, + "loss": 0.6331, + "step": 14282 + }, + { + "epoch": 0.4377528503126149, + "grad_norm": 1.5863555546498789, + "learning_rate": 6.237958757415843e-06, + "loss": 0.566, + "step": 14283 + }, + { + "epoch": 0.4377834988353561, + "grad_norm": 10.254805357216933, + "learning_rate": 6.237477886607586e-06, + "loss": 0.6359, + "step": 14284 + }, + { + "epoch": 0.4378141473580973, + "grad_norm": 1.8507844924884633, + "learning_rate": 6.2369970036061435e-06, + "loss": 0.6556, + "step": 14285 + }, + { + "epoch": 0.4378447958808385, + "grad_norm": 1.9529548322068477, + "learning_rate": 6.236516108416254e-06, + "loss": 0.7149, + "step": 14286 + }, + { + "epoch": 0.4378754444035797, + "grad_norm": 1.7795167632883526, + "learning_rate": 6.236035201042654e-06, + "loss": 0.6887, + "step": 14287 + }, + { + "epoch": 0.43790609292632093, + "grad_norm": 3.3501849072252883, + "learning_rate": 6.235554281490082e-06, + "loss": 0.6715, + "step": 14288 + }, + { + "epoch": 0.43793674144906214, + "grad_norm": 2.0966227607446637, + "learning_rate": 6.23507334976328e-06, + "loss": 0.7091, + "step": 14289 + }, + { + "epoch": 0.43796738997180334, + "grad_norm": 1.5738882578783, + "learning_rate": 6.234592405866981e-06, + "loss": 0.584, + "step": 14290 + }, + { + "epoch": 0.43799803849454455, + "grad_norm": 1.801203593208695, + "learning_rate": 6.2341114498059295e-06, + "loss": 0.6706, + "step": 14291 + }, + { + "epoch": 0.43802868701728576, + "grad_norm": 0.8391671435734299, + "learning_rate": 6.233630481584862e-06, + "loss": 0.4555, + "step": 14292 + }, + { + "epoch": 0.43805933554002696, + "grad_norm": 1.8426214749194907, + "learning_rate": 6.233149501208518e-06, + "loss": 0.5833, + "step": 14293 + }, + { + "epoch": 0.43808998406276817, + "grad_norm": 1.8738673022205978, + "learning_rate": 6.2326685086816355e-06, + "loss": 0.7071, + "step": 14294 + }, + { + "epoch": 0.4381206325855094, + "grad_norm": 1.7485665048285737, + "learning_rate": 6.2321875040089555e-06, + "loss": 0.5889, + "step": 14295 + }, + { + "epoch": 0.4381512811082506, + "grad_norm": 1.8998359576190846, + "learning_rate": 6.231706487195215e-06, + "loss": 0.6396, + "step": 14296 + }, + { + "epoch": 0.4381819296309918, + "grad_norm": 1.7085631588064107, + "learning_rate": 6.231225458245157e-06, + "loss": 0.768, + "step": 14297 + }, + { + "epoch": 0.438212578153733, + "grad_norm": 1.632584261932853, + "learning_rate": 6.230744417163519e-06, + "loss": 0.5737, + "step": 14298 + }, + { + "epoch": 0.4382432266764742, + "grad_norm": 1.7488032557545712, + "learning_rate": 6.23026336395504e-06, + "loss": 0.7255, + "step": 14299 + }, + { + "epoch": 0.4382738751992154, + "grad_norm": 1.7749563279532583, + "learning_rate": 6.229782298624464e-06, + "loss": 0.6257, + "step": 14300 + }, + { + "epoch": 0.4383045237219566, + "grad_norm": 1.85980059491364, + "learning_rate": 6.229301221176527e-06, + "loss": 0.6783, + "step": 14301 + }, + { + "epoch": 0.4383351722446978, + "grad_norm": 0.7941886693483069, + "learning_rate": 6.2288201316159715e-06, + "loss": 0.4336, + "step": 14302 + }, + { + "epoch": 0.438365820767439, + "grad_norm": 0.8031242423113579, + "learning_rate": 6.228339029947534e-06, + "loss": 0.4312, + "step": 14303 + }, + { + "epoch": 0.4383964692901802, + "grad_norm": 0.7563316091265978, + "learning_rate": 6.227857916175961e-06, + "loss": 0.4441, + "step": 14304 + }, + { + "epoch": 0.43842711781292143, + "grad_norm": 0.7838348123837521, + "learning_rate": 6.227376790305989e-06, + "loss": 0.4411, + "step": 14305 + }, + { + "epoch": 0.43845776633566264, + "grad_norm": 1.9041920324032262, + "learning_rate": 6.226895652342359e-06, + "loss": 0.7291, + "step": 14306 + }, + { + "epoch": 0.43848841485840384, + "grad_norm": 1.8709811190551713, + "learning_rate": 6.226414502289811e-06, + "loss": 0.7196, + "step": 14307 + }, + { + "epoch": 0.43851906338114505, + "grad_norm": 1.9802248572476502, + "learning_rate": 6.2259333401530896e-06, + "loss": 0.6611, + "step": 14308 + }, + { + "epoch": 0.43854971190388625, + "grad_norm": 0.8120395735808645, + "learning_rate": 6.225452165936932e-06, + "loss": 0.4592, + "step": 14309 + }, + { + "epoch": 0.43858036042662746, + "grad_norm": 1.9000976926905233, + "learning_rate": 6.22497097964608e-06, + "loss": 0.5739, + "step": 14310 + }, + { + "epoch": 0.43861100894936866, + "grad_norm": 1.8718513649755149, + "learning_rate": 6.224489781285277e-06, + "loss": 0.6115, + "step": 14311 + }, + { + "epoch": 0.43864165747210987, + "grad_norm": 1.8094062687493704, + "learning_rate": 6.224008570859262e-06, + "loss": 0.6858, + "step": 14312 + }, + { + "epoch": 0.4386723059948511, + "grad_norm": 1.7066747763591321, + "learning_rate": 6.223527348372778e-06, + "loss": 0.6542, + "step": 14313 + }, + { + "epoch": 0.4387029545175922, + "grad_norm": 1.578508259524756, + "learning_rate": 6.223046113830564e-06, + "loss": 0.6091, + "step": 14314 + }, + { + "epoch": 0.43873360304033343, + "grad_norm": 1.8493748930181906, + "learning_rate": 6.222564867237366e-06, + "loss": 0.727, + "step": 14315 + }, + { + "epoch": 0.43876425156307464, + "grad_norm": 1.76227107552195, + "learning_rate": 6.222083608597923e-06, + "loss": 0.6939, + "step": 14316 + }, + { + "epoch": 0.43879490008581584, + "grad_norm": 1.6719073152458688, + "learning_rate": 6.221602337916978e-06, + "loss": 0.6362, + "step": 14317 + }, + { + "epoch": 0.43882554860855705, + "grad_norm": 1.7729863080458732, + "learning_rate": 6.221121055199271e-06, + "loss": 0.577, + "step": 14318 + }, + { + "epoch": 0.43885619713129825, + "grad_norm": 1.7279538188647354, + "learning_rate": 6.220639760449547e-06, + "loss": 0.7205, + "step": 14319 + }, + { + "epoch": 0.43888684565403946, + "grad_norm": 2.0571863043893934, + "learning_rate": 6.220158453672547e-06, + "loss": 0.7342, + "step": 14320 + }, + { + "epoch": 0.43891749417678066, + "grad_norm": 1.6896822999929257, + "learning_rate": 6.219677134873013e-06, + "loss": 0.6549, + "step": 14321 + }, + { + "epoch": 0.43894814269952187, + "grad_norm": 1.8996339563613363, + "learning_rate": 6.219195804055689e-06, + "loss": 0.7083, + "step": 14322 + }, + { + "epoch": 0.4389787912222631, + "grad_norm": 1.6794567188340794, + "learning_rate": 6.218714461225316e-06, + "loss": 0.6937, + "step": 14323 + }, + { + "epoch": 0.4390094397450043, + "grad_norm": 2.037017641837596, + "learning_rate": 6.218233106386639e-06, + "loss": 0.6733, + "step": 14324 + }, + { + "epoch": 0.4390400882677455, + "grad_norm": 1.6635955043366093, + "learning_rate": 6.217751739544396e-06, + "loss": 0.6709, + "step": 14325 + }, + { + "epoch": 0.4390707367904867, + "grad_norm": 1.5174990619413693, + "learning_rate": 6.217270360703337e-06, + "loss": 0.6023, + "step": 14326 + }, + { + "epoch": 0.4391013853132279, + "grad_norm": 1.6576136782455697, + "learning_rate": 6.216788969868199e-06, + "loss": 0.6725, + "step": 14327 + }, + { + "epoch": 0.4391320338359691, + "grad_norm": 1.8464171079650904, + "learning_rate": 6.2163075670437324e-06, + "loss": 0.7138, + "step": 14328 + }, + { + "epoch": 0.4391626823587103, + "grad_norm": 0.9788623909585832, + "learning_rate": 6.215826152234672e-06, + "loss": 0.4715, + "step": 14329 + }, + { + "epoch": 0.4391933308814515, + "grad_norm": 1.5091850746233204, + "learning_rate": 6.215344725445766e-06, + "loss": 0.6498, + "step": 14330 + }, + { + "epoch": 0.4392239794041927, + "grad_norm": 1.9131058007079376, + "learning_rate": 6.214863286681759e-06, + "loss": 0.7049, + "step": 14331 + }, + { + "epoch": 0.4392546279269339, + "grad_norm": 1.5217536587929597, + "learning_rate": 6.214381835947393e-06, + "loss": 0.6071, + "step": 14332 + }, + { + "epoch": 0.43928527644967513, + "grad_norm": 1.8613092530853301, + "learning_rate": 6.213900373247411e-06, + "loss": 0.799, + "step": 14333 + }, + { + "epoch": 0.43931592497241634, + "grad_norm": 1.8690762236507923, + "learning_rate": 6.213418898586559e-06, + "loss": 0.5722, + "step": 14334 + }, + { + "epoch": 0.43934657349515754, + "grad_norm": 1.7796740816315328, + "learning_rate": 6.212937411969579e-06, + "loss": 0.604, + "step": 14335 + }, + { + "epoch": 0.43937722201789875, + "grad_norm": 1.6434023325508489, + "learning_rate": 6.2124559134012165e-06, + "loss": 0.6385, + "step": 14336 + }, + { + "epoch": 0.43940787054063996, + "grad_norm": 0.8644054774992141, + "learning_rate": 6.211974402886218e-06, + "loss": 0.4682, + "step": 14337 + }, + { + "epoch": 0.43943851906338116, + "grad_norm": 1.6931171520513892, + "learning_rate": 6.211492880429323e-06, + "loss": 0.682, + "step": 14338 + }, + { + "epoch": 0.43946916758612237, + "grad_norm": 2.070300227821399, + "learning_rate": 6.211011346035279e-06, + "loss": 0.7186, + "step": 14339 + }, + { + "epoch": 0.4394998161088636, + "grad_norm": 1.595698372500152, + "learning_rate": 6.210529799708831e-06, + "loss": 0.6963, + "step": 14340 + }, + { + "epoch": 0.4395304646316048, + "grad_norm": 1.7804171255812107, + "learning_rate": 6.210048241454723e-06, + "loss": 0.6416, + "step": 14341 + }, + { + "epoch": 0.439561113154346, + "grad_norm": 0.7957053947685288, + "learning_rate": 6.2095666712776995e-06, + "loss": 0.4529, + "step": 14342 + }, + { + "epoch": 0.4395917616770872, + "grad_norm": 1.6013100572487022, + "learning_rate": 6.209085089182507e-06, + "loss": 0.5893, + "step": 14343 + }, + { + "epoch": 0.4396224101998284, + "grad_norm": 0.7972979519260305, + "learning_rate": 6.20860349517389e-06, + "loss": 0.4397, + "step": 14344 + }, + { + "epoch": 0.43965305872256955, + "grad_norm": 1.631586369975757, + "learning_rate": 6.208121889256592e-06, + "loss": 0.7657, + "step": 14345 + }, + { + "epoch": 0.43968370724531075, + "grad_norm": 1.6019150104438704, + "learning_rate": 6.20764027143536e-06, + "loss": 0.5878, + "step": 14346 + }, + { + "epoch": 0.43971435576805196, + "grad_norm": 1.6863470047821443, + "learning_rate": 6.207158641714942e-06, + "loss": 0.6113, + "step": 14347 + }, + { + "epoch": 0.43974500429079316, + "grad_norm": 1.4329686883877424, + "learning_rate": 6.2066770001000784e-06, + "loss": 0.5922, + "step": 14348 + }, + { + "epoch": 0.43977565281353437, + "grad_norm": 1.901893977344471, + "learning_rate": 6.206195346595518e-06, + "loss": 0.7023, + "step": 14349 + }, + { + "epoch": 0.4398063013362756, + "grad_norm": 1.5893503040494772, + "learning_rate": 6.2057136812060074e-06, + "loss": 0.696, + "step": 14350 + }, + { + "epoch": 0.4398369498590168, + "grad_norm": 1.5895544853172807, + "learning_rate": 6.20523200393629e-06, + "loss": 0.5638, + "step": 14351 + }, + { + "epoch": 0.439867598381758, + "grad_norm": 1.6884622606737953, + "learning_rate": 6.204750314791115e-06, + "loss": 0.6341, + "step": 14352 + }, + { + "epoch": 0.4398982469044992, + "grad_norm": 1.7878097729079117, + "learning_rate": 6.204268613775225e-06, + "loss": 0.7067, + "step": 14353 + }, + { + "epoch": 0.4399288954272404, + "grad_norm": 1.8340444000902925, + "learning_rate": 6.203786900893369e-06, + "loss": 0.6292, + "step": 14354 + }, + { + "epoch": 0.4399595439499816, + "grad_norm": 1.6337752733960744, + "learning_rate": 6.203305176150293e-06, + "loss": 0.6321, + "step": 14355 + }, + { + "epoch": 0.4399901924727228, + "grad_norm": 1.638928486406952, + "learning_rate": 6.2028234395507435e-06, + "loss": 0.6468, + "step": 14356 + }, + { + "epoch": 0.440020840995464, + "grad_norm": 1.7586173889970822, + "learning_rate": 6.202341691099465e-06, + "loss": 0.6515, + "step": 14357 + }, + { + "epoch": 0.4400514895182052, + "grad_norm": 1.7107992894807025, + "learning_rate": 6.2018599308012085e-06, + "loss": 0.6448, + "step": 14358 + }, + { + "epoch": 0.4400821380409464, + "grad_norm": 1.5779684739976052, + "learning_rate": 6.201378158660718e-06, + "loss": 0.6226, + "step": 14359 + }, + { + "epoch": 0.44011278656368763, + "grad_norm": 1.765053371236269, + "learning_rate": 6.200896374682741e-06, + "loss": 0.6259, + "step": 14360 + }, + { + "epoch": 0.44014343508642884, + "grad_norm": 0.9735216925840341, + "learning_rate": 6.200414578872024e-06, + "loss": 0.4559, + "step": 14361 + }, + { + "epoch": 0.44017408360917004, + "grad_norm": 1.705172686772485, + "learning_rate": 6.199932771233315e-06, + "loss": 0.593, + "step": 14362 + }, + { + "epoch": 0.44020473213191125, + "grad_norm": 1.7225599093935675, + "learning_rate": 6.199450951771363e-06, + "loss": 0.6797, + "step": 14363 + }, + { + "epoch": 0.44023538065465245, + "grad_norm": 1.6924358804227349, + "learning_rate": 6.198969120490913e-06, + "loss": 0.6413, + "step": 14364 + }, + { + "epoch": 0.44026602917739366, + "grad_norm": 1.8737691361983984, + "learning_rate": 6.198487277396712e-06, + "loss": 0.7215, + "step": 14365 + }, + { + "epoch": 0.44029667770013486, + "grad_norm": 1.683060917437606, + "learning_rate": 6.198005422493511e-06, + "loss": 0.5913, + "step": 14366 + }, + { + "epoch": 0.44032732622287607, + "grad_norm": 1.7039984187148867, + "learning_rate": 6.1975235557860554e-06, + "loss": 0.6793, + "step": 14367 + }, + { + "epoch": 0.4403579747456173, + "grad_norm": 1.6377757794398515, + "learning_rate": 6.197041677279094e-06, + "loss": 0.6659, + "step": 14368 + }, + { + "epoch": 0.4403886232683585, + "grad_norm": 1.635596764390654, + "learning_rate": 6.196559786977374e-06, + "loss": 0.6052, + "step": 14369 + }, + { + "epoch": 0.4404192717910997, + "grad_norm": 1.585217321617142, + "learning_rate": 6.196077884885646e-06, + "loss": 0.6111, + "step": 14370 + }, + { + "epoch": 0.4404499203138409, + "grad_norm": 1.665005340246267, + "learning_rate": 6.195595971008655e-06, + "loss": 0.6736, + "step": 14371 + }, + { + "epoch": 0.4404805688365821, + "grad_norm": 0.7807177550556498, + "learning_rate": 6.195114045351151e-06, + "loss": 0.4112, + "step": 14372 + }, + { + "epoch": 0.4405112173593233, + "grad_norm": 1.641458575350662, + "learning_rate": 6.194632107917884e-06, + "loss": 0.6319, + "step": 14373 + }, + { + "epoch": 0.4405418658820645, + "grad_norm": 1.6267140916148926, + "learning_rate": 6.1941501587136e-06, + "loss": 0.5552, + "step": 14374 + }, + { + "epoch": 0.4405725144048057, + "grad_norm": 1.751949945821059, + "learning_rate": 6.193668197743051e-06, + "loss": 0.623, + "step": 14375 + }, + { + "epoch": 0.44060316292754687, + "grad_norm": 1.6715049094834846, + "learning_rate": 6.19318622501098e-06, + "loss": 0.8081, + "step": 14376 + }, + { + "epoch": 0.44063381145028807, + "grad_norm": 1.853907390005223, + "learning_rate": 6.192704240522142e-06, + "loss": 0.6838, + "step": 14377 + }, + { + "epoch": 0.4406644599730293, + "grad_norm": 0.8378523956851804, + "learning_rate": 6.192222244281284e-06, + "loss": 0.4401, + "step": 14378 + }, + { + "epoch": 0.4406951084957705, + "grad_norm": 1.7051399761999313, + "learning_rate": 6.191740236293154e-06, + "loss": 0.7406, + "step": 14379 + }, + { + "epoch": 0.4407257570185117, + "grad_norm": 1.530135910868418, + "learning_rate": 6.191258216562503e-06, + "loss": 0.6352, + "step": 14380 + }, + { + "epoch": 0.4407564055412529, + "grad_norm": 1.6212247475527441, + "learning_rate": 6.19077618509408e-06, + "loss": 0.6629, + "step": 14381 + }, + { + "epoch": 0.4407870540639941, + "grad_norm": 1.7936365994795624, + "learning_rate": 6.190294141892637e-06, + "loss": 0.5052, + "step": 14382 + }, + { + "epoch": 0.4408177025867353, + "grad_norm": 1.8336912726015577, + "learning_rate": 6.1898120869629185e-06, + "loss": 0.6186, + "step": 14383 + }, + { + "epoch": 0.4408483511094765, + "grad_norm": 1.6792517775250835, + "learning_rate": 6.189330020309678e-06, + "loss": 0.7461, + "step": 14384 + }, + { + "epoch": 0.4408789996322177, + "grad_norm": 0.8130237212288073, + "learning_rate": 6.188847941937664e-06, + "loss": 0.4394, + "step": 14385 + }, + { + "epoch": 0.4409096481549589, + "grad_norm": 1.6265401821319638, + "learning_rate": 6.188365851851629e-06, + "loss": 0.6341, + "step": 14386 + }, + { + "epoch": 0.4409402966777001, + "grad_norm": 1.7031483995347005, + "learning_rate": 6.187883750056319e-06, + "loss": 0.698, + "step": 14387 + }, + { + "epoch": 0.44097094520044133, + "grad_norm": 1.673919792674677, + "learning_rate": 6.187401636556487e-06, + "loss": 0.5579, + "step": 14388 + }, + { + "epoch": 0.44100159372318254, + "grad_norm": 1.550803412437091, + "learning_rate": 6.186919511356882e-06, + "loss": 0.6546, + "step": 14389 + }, + { + "epoch": 0.44103224224592374, + "grad_norm": 2.0211905869140505, + "learning_rate": 6.186437374462257e-06, + "loss": 0.7933, + "step": 14390 + }, + { + "epoch": 0.44106289076866495, + "grad_norm": 1.8193410726516182, + "learning_rate": 6.18595522587736e-06, + "loss": 0.6408, + "step": 14391 + }, + { + "epoch": 0.44109353929140616, + "grad_norm": 1.6956352529852357, + "learning_rate": 6.185473065606944e-06, + "loss": 0.6416, + "step": 14392 + }, + { + "epoch": 0.44112418781414736, + "grad_norm": 1.7386546625192816, + "learning_rate": 6.184990893655758e-06, + "loss": 0.6213, + "step": 14393 + }, + { + "epoch": 0.44115483633688857, + "grad_norm": 1.7753369681311113, + "learning_rate": 6.184508710028552e-06, + "loss": 0.7292, + "step": 14394 + }, + { + "epoch": 0.4411854848596298, + "grad_norm": 1.6494618763123587, + "learning_rate": 6.18402651473008e-06, + "loss": 0.6667, + "step": 14395 + }, + { + "epoch": 0.441216133382371, + "grad_norm": 1.7527642406227493, + "learning_rate": 6.18354430776509e-06, + "loss": 0.7467, + "step": 14396 + }, + { + "epoch": 0.4412467819051122, + "grad_norm": 1.6514348743225629, + "learning_rate": 6.1830620891383384e-06, + "loss": 0.6576, + "step": 14397 + }, + { + "epoch": 0.4412774304278534, + "grad_norm": 1.8129917910714461, + "learning_rate": 6.182579858854572e-06, + "loss": 0.7401, + "step": 14398 + }, + { + "epoch": 0.4413080789505946, + "grad_norm": 1.664985939000339, + "learning_rate": 6.182097616918543e-06, + "loss": 0.5376, + "step": 14399 + }, + { + "epoch": 0.4413387274733358, + "grad_norm": 1.8213812827691813, + "learning_rate": 6.1816153633350026e-06, + "loss": 0.6748, + "step": 14400 + }, + { + "epoch": 0.441369375996077, + "grad_norm": 1.8275001326612619, + "learning_rate": 6.181133098108707e-06, + "loss": 0.5673, + "step": 14401 + }, + { + "epoch": 0.4414000245188182, + "grad_norm": 1.5654683020501061, + "learning_rate": 6.180650821244403e-06, + "loss": 0.675, + "step": 14402 + }, + { + "epoch": 0.4414306730415594, + "grad_norm": 1.5841557991777604, + "learning_rate": 6.1801685327468445e-06, + "loss": 0.6515, + "step": 14403 + }, + { + "epoch": 0.4414613215643006, + "grad_norm": 1.8216715515834805, + "learning_rate": 6.1796862326207815e-06, + "loss": 0.7205, + "step": 14404 + }, + { + "epoch": 0.44149197008704183, + "grad_norm": 1.7763283302859076, + "learning_rate": 6.179203920870971e-06, + "loss": 0.6942, + "step": 14405 + }, + { + "epoch": 0.44152261860978304, + "grad_norm": 1.5029544040114287, + "learning_rate": 6.178721597502162e-06, + "loss": 0.6249, + "step": 14406 + }, + { + "epoch": 0.4415532671325242, + "grad_norm": 1.6438947555195065, + "learning_rate": 6.178239262519106e-06, + "loss": 0.7614, + "step": 14407 + }, + { + "epoch": 0.4415839156552654, + "grad_norm": 1.7714531932412854, + "learning_rate": 6.177756915926558e-06, + "loss": 0.6858, + "step": 14408 + }, + { + "epoch": 0.4416145641780066, + "grad_norm": 1.786364521994295, + "learning_rate": 6.17727455772927e-06, + "loss": 0.6529, + "step": 14409 + }, + { + "epoch": 0.4416452127007478, + "grad_norm": 1.618567064562419, + "learning_rate": 6.176792187931995e-06, + "loss": 0.574, + "step": 14410 + }, + { + "epoch": 0.441675861223489, + "grad_norm": 0.8344561262447483, + "learning_rate": 6.1763098065394844e-06, + "loss": 0.4746, + "step": 14411 + }, + { + "epoch": 0.4417065097462302, + "grad_norm": 1.6791741917546747, + "learning_rate": 6.1758274135564935e-06, + "loss": 0.6729, + "step": 14412 + }, + { + "epoch": 0.4417371582689714, + "grad_norm": 0.8220037453912635, + "learning_rate": 6.175345008987773e-06, + "loss": 0.4597, + "step": 14413 + }, + { + "epoch": 0.4417678067917126, + "grad_norm": 1.6666835448806085, + "learning_rate": 6.174862592838079e-06, + "loss": 0.6778, + "step": 14414 + }, + { + "epoch": 0.44179845531445383, + "grad_norm": 1.8307707233126833, + "learning_rate": 6.174380165112161e-06, + "loss": 0.6246, + "step": 14415 + }, + { + "epoch": 0.44182910383719504, + "grad_norm": 1.8245541156409733, + "learning_rate": 6.173897725814777e-06, + "loss": 0.6967, + "step": 14416 + }, + { + "epoch": 0.44185975235993624, + "grad_norm": 1.7314271139419826, + "learning_rate": 6.173415274950677e-06, + "loss": 0.6772, + "step": 14417 + }, + { + "epoch": 0.44189040088267745, + "grad_norm": 1.7654598741177971, + "learning_rate": 6.1729328125246165e-06, + "loss": 0.7023, + "step": 14418 + }, + { + "epoch": 0.44192104940541865, + "grad_norm": 1.6714146843148139, + "learning_rate": 6.172450338541348e-06, + "loss": 0.7228, + "step": 14419 + }, + { + "epoch": 0.44195169792815986, + "grad_norm": 0.8371486364792434, + "learning_rate": 6.171967853005628e-06, + "loss": 0.4509, + "step": 14420 + }, + { + "epoch": 0.44198234645090106, + "grad_norm": 1.7730105571032584, + "learning_rate": 6.171485355922208e-06, + "loss": 0.5715, + "step": 14421 + }, + { + "epoch": 0.44201299497364227, + "grad_norm": 0.8066771974044377, + "learning_rate": 6.171002847295843e-06, + "loss": 0.4576, + "step": 14422 + }, + { + "epoch": 0.4420436434963835, + "grad_norm": 2.0915087368308862, + "learning_rate": 6.170520327131288e-06, + "loss": 0.7547, + "step": 14423 + }, + { + "epoch": 0.4420742920191247, + "grad_norm": 1.8131936857296505, + "learning_rate": 6.170037795433296e-06, + "loss": 0.6195, + "step": 14424 + }, + { + "epoch": 0.4421049405418659, + "grad_norm": 1.7723709318006795, + "learning_rate": 6.169555252206623e-06, + "loss": 0.5981, + "step": 14425 + }, + { + "epoch": 0.4421355890646071, + "grad_norm": 1.625820192236429, + "learning_rate": 6.169072697456021e-06, + "loss": 0.6828, + "step": 14426 + }, + { + "epoch": 0.4421662375873483, + "grad_norm": 1.6674659343946792, + "learning_rate": 6.168590131186247e-06, + "loss": 0.5942, + "step": 14427 + }, + { + "epoch": 0.4421968861100895, + "grad_norm": 1.8373618236512987, + "learning_rate": 6.168107553402057e-06, + "loss": 0.7868, + "step": 14428 + }, + { + "epoch": 0.4422275346328307, + "grad_norm": 0.8191847701279278, + "learning_rate": 6.167624964108205e-06, + "loss": 0.4521, + "step": 14429 + }, + { + "epoch": 0.4422581831555719, + "grad_norm": 1.7039349231252863, + "learning_rate": 6.1671423633094426e-06, + "loss": 0.5816, + "step": 14430 + }, + { + "epoch": 0.4422888316783131, + "grad_norm": 1.4814081171459867, + "learning_rate": 6.1666597510105294e-06, + "loss": 0.5927, + "step": 14431 + }, + { + "epoch": 0.4423194802010543, + "grad_norm": 1.661414640927167, + "learning_rate": 6.16617712721622e-06, + "loss": 0.6638, + "step": 14432 + }, + { + "epoch": 0.44235012872379553, + "grad_norm": 1.7490810351025778, + "learning_rate": 6.1656944919312675e-06, + "loss": 0.5647, + "step": 14433 + }, + { + "epoch": 0.44238077724653674, + "grad_norm": 0.8162794073223502, + "learning_rate": 6.165211845160429e-06, + "loss": 0.4378, + "step": 14434 + }, + { + "epoch": 0.44241142576927794, + "grad_norm": 1.6831955990395455, + "learning_rate": 6.164729186908462e-06, + "loss": 0.7191, + "step": 14435 + }, + { + "epoch": 0.44244207429201915, + "grad_norm": 1.586320170349236, + "learning_rate": 6.164246517180119e-06, + "loss": 0.638, + "step": 14436 + }, + { + "epoch": 0.44247272281476036, + "grad_norm": 0.837216141920032, + "learning_rate": 6.163763835980156e-06, + "loss": 0.4626, + "step": 14437 + }, + { + "epoch": 0.4425033713375015, + "grad_norm": 1.6288061220014323, + "learning_rate": 6.1632811433133325e-06, + "loss": 0.709, + "step": 14438 + }, + { + "epoch": 0.4425340198602427, + "grad_norm": 1.6546236075387053, + "learning_rate": 6.1627984391843995e-06, + "loss": 0.6965, + "step": 14439 + }, + { + "epoch": 0.4425646683829839, + "grad_norm": 1.7082770954398758, + "learning_rate": 6.1623157235981194e-06, + "loss": 0.6581, + "step": 14440 + }, + { + "epoch": 0.4425953169057251, + "grad_norm": 1.8495713307353159, + "learning_rate": 6.1618329965592415e-06, + "loss": 0.6609, + "step": 14441 + }, + { + "epoch": 0.44262596542846633, + "grad_norm": 0.8400557766966368, + "learning_rate": 6.161350258072528e-06, + "loss": 0.4468, + "step": 14442 + }, + { + "epoch": 0.44265661395120753, + "grad_norm": 1.6013003528760272, + "learning_rate": 6.160867508142733e-06, + "loss": 0.6478, + "step": 14443 + }, + { + "epoch": 0.44268726247394874, + "grad_norm": 1.5708580823485374, + "learning_rate": 6.160384746774614e-06, + "loss": 0.6639, + "step": 14444 + }, + { + "epoch": 0.44271791099668995, + "grad_norm": 1.7788465886419487, + "learning_rate": 6.159901973972926e-06, + "loss": 0.6544, + "step": 14445 + }, + { + "epoch": 0.44274855951943115, + "grad_norm": 0.8100091902388091, + "learning_rate": 6.159419189742427e-06, + "loss": 0.4385, + "step": 14446 + }, + { + "epoch": 0.44277920804217236, + "grad_norm": 1.6275899998441778, + "learning_rate": 6.1589363940878755e-06, + "loss": 0.6662, + "step": 14447 + }, + { + "epoch": 0.44280985656491356, + "grad_norm": 1.6712678000513308, + "learning_rate": 6.158453587014025e-06, + "loss": 0.6584, + "step": 14448 + }, + { + "epoch": 0.44284050508765477, + "grad_norm": 1.9011852213401312, + "learning_rate": 6.1579707685256365e-06, + "loss": 0.7601, + "step": 14449 + }, + { + "epoch": 0.442871153610396, + "grad_norm": 1.9450132609271755, + "learning_rate": 6.157487938627464e-06, + "loss": 0.7055, + "step": 14450 + }, + { + "epoch": 0.4429018021331372, + "grad_norm": 1.602465556447034, + "learning_rate": 6.15700509732427e-06, + "loss": 0.6177, + "step": 14451 + }, + { + "epoch": 0.4429324506558784, + "grad_norm": 1.6949985761665576, + "learning_rate": 6.156522244620806e-06, + "loss": 0.732, + "step": 14452 + }, + { + "epoch": 0.4429630991786196, + "grad_norm": 1.5330195800570523, + "learning_rate": 6.156039380521833e-06, + "loss": 0.6931, + "step": 14453 + }, + { + "epoch": 0.4429937477013608, + "grad_norm": 1.8228327898293073, + "learning_rate": 6.1555565050321085e-06, + "loss": 0.6526, + "step": 14454 + }, + { + "epoch": 0.443024396224102, + "grad_norm": 2.0119406637039248, + "learning_rate": 6.155073618156391e-06, + "loss": 0.6718, + "step": 14455 + }, + { + "epoch": 0.4430550447468432, + "grad_norm": 0.8914259616233939, + "learning_rate": 6.154590719899436e-06, + "loss": 0.4717, + "step": 14456 + }, + { + "epoch": 0.4430856932695844, + "grad_norm": 0.8372843169295241, + "learning_rate": 6.154107810266004e-06, + "loss": 0.4592, + "step": 14457 + }, + { + "epoch": 0.4431163417923256, + "grad_norm": 1.8193364842962017, + "learning_rate": 6.153624889260852e-06, + "loss": 0.6186, + "step": 14458 + }, + { + "epoch": 0.4431469903150668, + "grad_norm": 1.551244016301106, + "learning_rate": 6.15314195688874e-06, + "loss": 0.7114, + "step": 14459 + }, + { + "epoch": 0.44317763883780803, + "grad_norm": 1.6002804855333976, + "learning_rate": 6.152659013154424e-06, + "loss": 0.6881, + "step": 14460 + }, + { + "epoch": 0.44320828736054924, + "grad_norm": 1.5378112328025677, + "learning_rate": 6.152176058062665e-06, + "loss": 0.6619, + "step": 14461 + }, + { + "epoch": 0.44323893588329044, + "grad_norm": 1.3810215231751488, + "learning_rate": 6.151693091618218e-06, + "loss": 0.5071, + "step": 14462 + }, + { + "epoch": 0.44326958440603165, + "grad_norm": 1.7052989451912792, + "learning_rate": 6.151210113825846e-06, + "loss": 0.5637, + "step": 14463 + }, + { + "epoch": 0.44330023292877285, + "grad_norm": 1.6180689591243376, + "learning_rate": 6.150727124690306e-06, + "loss": 0.5564, + "step": 14464 + }, + { + "epoch": 0.44333088145151406, + "grad_norm": 1.6966386543266878, + "learning_rate": 6.150244124216358e-06, + "loss": 0.5522, + "step": 14465 + }, + { + "epoch": 0.44336152997425526, + "grad_norm": 0.9106512042062089, + "learning_rate": 6.14976111240876e-06, + "loss": 0.4408, + "step": 14466 + }, + { + "epoch": 0.44339217849699647, + "grad_norm": 1.6983873042082422, + "learning_rate": 6.149278089272271e-06, + "loss": 0.709, + "step": 14467 + }, + { + "epoch": 0.4434228270197377, + "grad_norm": 1.865926844600645, + "learning_rate": 6.148795054811652e-06, + "loss": 0.6581, + "step": 14468 + }, + { + "epoch": 0.4434534755424788, + "grad_norm": 1.8334768867937286, + "learning_rate": 6.1483120090316595e-06, + "loss": 0.7323, + "step": 14469 + }, + { + "epoch": 0.44348412406522003, + "grad_norm": 0.8773904867944416, + "learning_rate": 6.147828951937057e-06, + "loss": 0.4633, + "step": 14470 + }, + { + "epoch": 0.44351477258796124, + "grad_norm": 1.767837801258666, + "learning_rate": 6.147345883532601e-06, + "loss": 0.7788, + "step": 14471 + }, + { + "epoch": 0.44354542111070244, + "grad_norm": 1.8749946950070941, + "learning_rate": 6.146862803823053e-06, + "loss": 0.6745, + "step": 14472 + }, + { + "epoch": 0.44357606963344365, + "grad_norm": 1.4986422719468242, + "learning_rate": 6.1463797128131705e-06, + "loss": 0.5436, + "step": 14473 + }, + { + "epoch": 0.44360671815618485, + "grad_norm": 1.758842386342752, + "learning_rate": 6.1458966105077176e-06, + "loss": 0.693, + "step": 14474 + }, + { + "epoch": 0.44363736667892606, + "grad_norm": 1.5836170076169223, + "learning_rate": 6.145413496911452e-06, + "loss": 0.5809, + "step": 14475 + }, + { + "epoch": 0.44366801520166727, + "grad_norm": 1.6214169452211835, + "learning_rate": 6.144930372029133e-06, + "loss": 0.5872, + "step": 14476 + }, + { + "epoch": 0.44369866372440847, + "grad_norm": 1.8269304280660594, + "learning_rate": 6.144447235865522e-06, + "loss": 0.7197, + "step": 14477 + }, + { + "epoch": 0.4437293122471497, + "grad_norm": 1.7033051514087758, + "learning_rate": 6.143964088425382e-06, + "loss": 0.6126, + "step": 14478 + }, + { + "epoch": 0.4437599607698909, + "grad_norm": 1.6481455729727612, + "learning_rate": 6.143480929713469e-06, + "loss": 0.6307, + "step": 14479 + }, + { + "epoch": 0.4437906092926321, + "grad_norm": 1.0114816258972665, + "learning_rate": 6.142997759734546e-06, + "loss": 0.4322, + "step": 14480 + }, + { + "epoch": 0.4438212578153733, + "grad_norm": 2.1741093306934687, + "learning_rate": 6.142514578493374e-06, + "loss": 0.7516, + "step": 14481 + }, + { + "epoch": 0.4438519063381145, + "grad_norm": 1.6915845826770877, + "learning_rate": 6.142031385994714e-06, + "loss": 0.5599, + "step": 14482 + }, + { + "epoch": 0.4438825548608557, + "grad_norm": 1.6415299508527565, + "learning_rate": 6.141548182243326e-06, + "loss": 0.5749, + "step": 14483 + }, + { + "epoch": 0.4439132033835969, + "grad_norm": 1.579984615117357, + "learning_rate": 6.141064967243972e-06, + "loss": 0.6396, + "step": 14484 + }, + { + "epoch": 0.4439438519063381, + "grad_norm": 1.4802198657084469, + "learning_rate": 6.140581741001413e-06, + "loss": 0.6016, + "step": 14485 + }, + { + "epoch": 0.4439745004290793, + "grad_norm": 1.5247199125250759, + "learning_rate": 6.1400985035204095e-06, + "loss": 0.686, + "step": 14486 + }, + { + "epoch": 0.4440051489518205, + "grad_norm": 1.578713635298507, + "learning_rate": 6.139615254805724e-06, + "loss": 0.6447, + "step": 14487 + }, + { + "epoch": 0.44403579747456173, + "grad_norm": 1.9929578070319562, + "learning_rate": 6.139131994862118e-06, + "loss": 0.6792, + "step": 14488 + }, + { + "epoch": 0.44406644599730294, + "grad_norm": 1.783123309255822, + "learning_rate": 6.1386487236943525e-06, + "loss": 0.7229, + "step": 14489 + }, + { + "epoch": 0.44409709452004414, + "grad_norm": 1.5798366123411214, + "learning_rate": 6.138165441307191e-06, + "loss": 0.6398, + "step": 14490 + }, + { + "epoch": 0.44412774304278535, + "grad_norm": 1.5103004289830373, + "learning_rate": 6.137682147705392e-06, + "loss": 0.5907, + "step": 14491 + }, + { + "epoch": 0.44415839156552656, + "grad_norm": 0.8536468819815408, + "learning_rate": 6.1371988428937215e-06, + "loss": 0.4461, + "step": 14492 + }, + { + "epoch": 0.44418904008826776, + "grad_norm": 1.6783754048604813, + "learning_rate": 6.136715526876938e-06, + "loss": 0.6451, + "step": 14493 + }, + { + "epoch": 0.44421968861100897, + "grad_norm": 1.5637264319210464, + "learning_rate": 6.136232199659809e-06, + "loss": 0.6116, + "step": 14494 + }, + { + "epoch": 0.4442503371337502, + "grad_norm": 1.9753921239182035, + "learning_rate": 6.13574886124709e-06, + "loss": 0.665, + "step": 14495 + }, + { + "epoch": 0.4442809856564914, + "grad_norm": 1.6643801598358694, + "learning_rate": 6.13526551164355e-06, + "loss": 0.6322, + "step": 14496 + }, + { + "epoch": 0.4443116341792326, + "grad_norm": 1.7933840993945742, + "learning_rate": 6.134782150853946e-06, + "loss": 0.6893, + "step": 14497 + }, + { + "epoch": 0.4443422827019738, + "grad_norm": 1.6889174509891092, + "learning_rate": 6.134298778883046e-06, + "loss": 0.6595, + "step": 14498 + }, + { + "epoch": 0.444372931224715, + "grad_norm": 1.6436970352261686, + "learning_rate": 6.133815395735606e-06, + "loss": 0.6991, + "step": 14499 + }, + { + "epoch": 0.44440357974745615, + "grad_norm": 1.9428041301982824, + "learning_rate": 6.133332001416394e-06, + "loss": 0.6889, + "step": 14500 + }, + { + "epoch": 0.44443422827019735, + "grad_norm": 0.7852803401500797, + "learning_rate": 6.1328485959301745e-06, + "loss": 0.4377, + "step": 14501 + }, + { + "epoch": 0.44446487679293856, + "grad_norm": 1.685285973836446, + "learning_rate": 6.1323651792817045e-06, + "loss": 0.6204, + "step": 14502 + }, + { + "epoch": 0.44449552531567976, + "grad_norm": 1.8199075721887763, + "learning_rate": 6.131881751475752e-06, + "loss": 0.6618, + "step": 14503 + }, + { + "epoch": 0.44452617383842097, + "grad_norm": 1.6459928295464403, + "learning_rate": 6.131398312517078e-06, + "loss": 0.7, + "step": 14504 + }, + { + "epoch": 0.4445568223611622, + "grad_norm": 1.6255876360880788, + "learning_rate": 6.13091486241045e-06, + "loss": 0.6804, + "step": 14505 + }, + { + "epoch": 0.4445874708839034, + "grad_norm": 1.6267375329074256, + "learning_rate": 6.130431401160626e-06, + "loss": 0.609, + "step": 14506 + }, + { + "epoch": 0.4446181194066446, + "grad_norm": 1.8836175111619418, + "learning_rate": 6.129947928772373e-06, + "loss": 0.6921, + "step": 14507 + }, + { + "epoch": 0.4446487679293858, + "grad_norm": 1.6374384399146065, + "learning_rate": 6.129464445250452e-06, + "loss": 0.5371, + "step": 14508 + }, + { + "epoch": 0.444679416452127, + "grad_norm": 1.5510448079042292, + "learning_rate": 6.128980950599632e-06, + "loss": 0.5687, + "step": 14509 + }, + { + "epoch": 0.4447100649748682, + "grad_norm": 1.7277281254206422, + "learning_rate": 6.128497444824672e-06, + "loss": 0.6149, + "step": 14510 + }, + { + "epoch": 0.4447407134976094, + "grad_norm": 1.6477206817815384, + "learning_rate": 6.1280139279303385e-06, + "loss": 0.707, + "step": 14511 + }, + { + "epoch": 0.4447713620203506, + "grad_norm": 1.6907784450533445, + "learning_rate": 6.127530399921393e-06, + "loss": 0.6545, + "step": 14512 + }, + { + "epoch": 0.4448020105430918, + "grad_norm": 1.9368842310936847, + "learning_rate": 6.127046860802605e-06, + "loss": 0.6896, + "step": 14513 + }, + { + "epoch": 0.444832659065833, + "grad_norm": 1.9322110928436425, + "learning_rate": 6.1265633105787344e-06, + "loss": 0.6953, + "step": 14514 + }, + { + "epoch": 0.44486330758857423, + "grad_norm": 1.754203137583388, + "learning_rate": 6.1260797492545484e-06, + "loss": 0.6411, + "step": 14515 + }, + { + "epoch": 0.44489395611131544, + "grad_norm": 1.8143371268494244, + "learning_rate": 6.125596176834809e-06, + "loss": 0.7056, + "step": 14516 + }, + { + "epoch": 0.44492460463405664, + "grad_norm": 1.6158639596708662, + "learning_rate": 6.125112593324283e-06, + "loss": 0.5841, + "step": 14517 + }, + { + "epoch": 0.44495525315679785, + "grad_norm": 1.5473158365210022, + "learning_rate": 6.124628998727735e-06, + "loss": 0.5903, + "step": 14518 + }, + { + "epoch": 0.44498590167953905, + "grad_norm": 1.7038418437429286, + "learning_rate": 6.124145393049929e-06, + "loss": 0.6651, + "step": 14519 + }, + { + "epoch": 0.44501655020228026, + "grad_norm": 1.6384724128917896, + "learning_rate": 6.123661776295632e-06, + "loss": 0.6428, + "step": 14520 + }, + { + "epoch": 0.44504719872502146, + "grad_norm": 1.7584893229715264, + "learning_rate": 6.123178148469609e-06, + "loss": 0.6846, + "step": 14521 + }, + { + "epoch": 0.44507784724776267, + "grad_norm": 1.5456033791752957, + "learning_rate": 6.122694509576622e-06, + "loss": 0.6157, + "step": 14522 + }, + { + "epoch": 0.4451084957705039, + "grad_norm": 1.6577584662675748, + "learning_rate": 6.122210859621439e-06, + "loss": 0.6224, + "step": 14523 + }, + { + "epoch": 0.4451391442932451, + "grad_norm": 1.6350809407236515, + "learning_rate": 6.121727198608827e-06, + "loss": 0.6764, + "step": 14524 + }, + { + "epoch": 0.4451697928159863, + "grad_norm": 1.5817687298345566, + "learning_rate": 6.1212435265435475e-06, + "loss": 0.5663, + "step": 14525 + }, + { + "epoch": 0.4452004413387275, + "grad_norm": 1.7202973149078298, + "learning_rate": 6.120759843430371e-06, + "loss": 0.7061, + "step": 14526 + }, + { + "epoch": 0.4452310898614687, + "grad_norm": 1.61422103929771, + "learning_rate": 6.1202761492740595e-06, + "loss": 0.6648, + "step": 14527 + }, + { + "epoch": 0.4452617383842099, + "grad_norm": 1.536103324886504, + "learning_rate": 6.119792444079381e-06, + "loss": 0.5687, + "step": 14528 + }, + { + "epoch": 0.4452923869069511, + "grad_norm": 0.825277099685421, + "learning_rate": 6.119308727851101e-06, + "loss": 0.4386, + "step": 14529 + }, + { + "epoch": 0.4453230354296923, + "grad_norm": 1.8331640019379838, + "learning_rate": 6.118825000593984e-06, + "loss": 0.6788, + "step": 14530 + }, + { + "epoch": 0.44535368395243347, + "grad_norm": 1.7658464759399914, + "learning_rate": 6.1183412623128e-06, + "loss": 0.6161, + "step": 14531 + }, + { + "epoch": 0.44538433247517467, + "grad_norm": 1.5048363032261083, + "learning_rate": 6.117857513012314e-06, + "loss": 0.6293, + "step": 14532 + }, + { + "epoch": 0.4454149809979159, + "grad_norm": 1.777842475221699, + "learning_rate": 6.117373752697291e-06, + "loss": 0.7572, + "step": 14533 + }, + { + "epoch": 0.4454456295206571, + "grad_norm": 1.5272819688310575, + "learning_rate": 6.116889981372498e-06, + "loss": 0.731, + "step": 14534 + }, + { + "epoch": 0.4454762780433983, + "grad_norm": 1.6447034512783953, + "learning_rate": 6.116406199042703e-06, + "loss": 0.6716, + "step": 14535 + }, + { + "epoch": 0.4455069265661395, + "grad_norm": 1.6234098573271887, + "learning_rate": 6.115922405712672e-06, + "loss": 0.6367, + "step": 14536 + }, + { + "epoch": 0.4455375750888807, + "grad_norm": 0.7832752694739943, + "learning_rate": 6.115438601387172e-06, + "loss": 0.4598, + "step": 14537 + }, + { + "epoch": 0.4455682236116219, + "grad_norm": 1.8814520088719617, + "learning_rate": 6.114954786070969e-06, + "loss": 0.6479, + "step": 14538 + }, + { + "epoch": 0.4455988721343631, + "grad_norm": 1.623040926464995, + "learning_rate": 6.114470959768832e-06, + "loss": 0.6244, + "step": 14539 + }, + { + "epoch": 0.4456295206571043, + "grad_norm": 1.6861529984863628, + "learning_rate": 6.1139871224855285e-06, + "loss": 0.6298, + "step": 14540 + }, + { + "epoch": 0.4456601691798455, + "grad_norm": 0.7377790459008731, + "learning_rate": 6.113503274225824e-06, + "loss": 0.4449, + "step": 14541 + }, + { + "epoch": 0.44569081770258673, + "grad_norm": 1.9361896655362496, + "learning_rate": 6.113019414994485e-06, + "loss": 0.7007, + "step": 14542 + }, + { + "epoch": 0.44572146622532793, + "grad_norm": 1.6196164914187714, + "learning_rate": 6.112535544796284e-06, + "loss": 0.668, + "step": 14543 + }, + { + "epoch": 0.44575211474806914, + "grad_norm": 0.7307850900663118, + "learning_rate": 6.112051663635985e-06, + "loss": 0.4413, + "step": 14544 + }, + { + "epoch": 0.44578276327081034, + "grad_norm": 1.8114742964813024, + "learning_rate": 6.111567771518354e-06, + "loss": 0.7309, + "step": 14545 + }, + { + "epoch": 0.44581341179355155, + "grad_norm": 1.868219858097067, + "learning_rate": 6.1110838684481645e-06, + "loss": 0.7115, + "step": 14546 + }, + { + "epoch": 0.44584406031629276, + "grad_norm": 1.787897572851519, + "learning_rate": 6.11059995443018e-06, + "loss": 0.6983, + "step": 14547 + }, + { + "epoch": 0.44587470883903396, + "grad_norm": 1.7090091397411762, + "learning_rate": 6.11011602946917e-06, + "loss": 0.6901, + "step": 14548 + }, + { + "epoch": 0.44590535736177517, + "grad_norm": 1.5889004157725888, + "learning_rate": 6.109632093569902e-06, + "loss": 0.6314, + "step": 14549 + }, + { + "epoch": 0.4459360058845164, + "grad_norm": 1.649155792986373, + "learning_rate": 6.109148146737146e-06, + "loss": 0.7138, + "step": 14550 + }, + { + "epoch": 0.4459666544072576, + "grad_norm": 1.7046097720268467, + "learning_rate": 6.108664188975669e-06, + "loss": 0.6972, + "step": 14551 + }, + { + "epoch": 0.4459973029299988, + "grad_norm": 1.7444938988573957, + "learning_rate": 6.108180220290241e-06, + "loss": 0.652, + "step": 14552 + }, + { + "epoch": 0.44602795145274, + "grad_norm": 1.765926629152845, + "learning_rate": 6.107696240685627e-06, + "loss": 0.6473, + "step": 14553 + }, + { + "epoch": 0.4460585999754812, + "grad_norm": 1.5999620463745796, + "learning_rate": 6.107212250166602e-06, + "loss": 0.5188, + "step": 14554 + }, + { + "epoch": 0.4460892484982224, + "grad_norm": 1.5158846733005384, + "learning_rate": 6.1067282487379295e-06, + "loss": 0.6949, + "step": 14555 + }, + { + "epoch": 0.4461198970209636, + "grad_norm": 1.8531212902802063, + "learning_rate": 6.10624423640438e-06, + "loss": 0.7519, + "step": 14556 + }, + { + "epoch": 0.4461505455437048, + "grad_norm": 1.8382950638866422, + "learning_rate": 6.105760213170725e-06, + "loss": 0.766, + "step": 14557 + }, + { + "epoch": 0.446181194066446, + "grad_norm": 1.69866215197135, + "learning_rate": 6.1052761790417315e-06, + "loss": 0.6226, + "step": 14558 + }, + { + "epoch": 0.4462118425891872, + "grad_norm": 1.7056327292447901, + "learning_rate": 6.104792134022169e-06, + "loss": 0.7491, + "step": 14559 + }, + { + "epoch": 0.44624249111192843, + "grad_norm": 1.7513218936426114, + "learning_rate": 6.104308078116804e-06, + "loss": 0.6159, + "step": 14560 + }, + { + "epoch": 0.44627313963466964, + "grad_norm": 1.7376260371728052, + "learning_rate": 6.103824011330411e-06, + "loss": 0.6718, + "step": 14561 + }, + { + "epoch": 0.4463037881574108, + "grad_norm": 0.8082021131733731, + "learning_rate": 6.103339933667757e-06, + "loss": 0.4003, + "step": 14562 + }, + { + "epoch": 0.446334436680152, + "grad_norm": 1.8083599106031565, + "learning_rate": 6.102855845133615e-06, + "loss": 0.7785, + "step": 14563 + }, + { + "epoch": 0.4463650852028932, + "grad_norm": 0.7680532112789716, + "learning_rate": 6.102371745732749e-06, + "loss": 0.433, + "step": 14564 + }, + { + "epoch": 0.4463957337256344, + "grad_norm": 1.7278421791257528, + "learning_rate": 6.101887635469933e-06, + "loss": 0.6036, + "step": 14565 + }, + { + "epoch": 0.4464263822483756, + "grad_norm": 1.8250373726284463, + "learning_rate": 6.101403514349936e-06, + "loss": 0.6449, + "step": 14566 + }, + { + "epoch": 0.4464570307711168, + "grad_norm": 2.1326344392730374, + "learning_rate": 6.100919382377531e-06, + "loss": 0.7373, + "step": 14567 + }, + { + "epoch": 0.446487679293858, + "grad_norm": 1.7264645235073546, + "learning_rate": 6.100435239557482e-06, + "loss": 0.7106, + "step": 14568 + }, + { + "epoch": 0.4465183278165992, + "grad_norm": 1.6661521759500235, + "learning_rate": 6.0999510858945646e-06, + "loss": 0.7216, + "step": 14569 + }, + { + "epoch": 0.44654897633934043, + "grad_norm": 0.897045663980705, + "learning_rate": 6.099466921393546e-06, + "loss": 0.4676, + "step": 14570 + }, + { + "epoch": 0.44657962486208164, + "grad_norm": 2.0393475531570298, + "learning_rate": 6.098982746059201e-06, + "loss": 0.7693, + "step": 14571 + }, + { + "epoch": 0.44661027338482284, + "grad_norm": 1.5278554373792057, + "learning_rate": 6.098498559896298e-06, + "loss": 0.6028, + "step": 14572 + }, + { + "epoch": 0.44664092190756405, + "grad_norm": 1.6644099280036042, + "learning_rate": 6.098014362909606e-06, + "loss": 0.681, + "step": 14573 + }, + { + "epoch": 0.44667157043030525, + "grad_norm": 0.7683639499341881, + "learning_rate": 6.097530155103899e-06, + "loss": 0.4442, + "step": 14574 + }, + { + "epoch": 0.44670221895304646, + "grad_norm": 1.5440862254146739, + "learning_rate": 6.097045936483944e-06, + "loss": 0.6631, + "step": 14575 + }, + { + "epoch": 0.44673286747578766, + "grad_norm": 1.725728675314859, + "learning_rate": 6.096561707054517e-06, + "loss": 0.6969, + "step": 14576 + }, + { + "epoch": 0.44676351599852887, + "grad_norm": 1.8740295494496642, + "learning_rate": 6.096077466820386e-06, + "loss": 0.7355, + "step": 14577 + }, + { + "epoch": 0.4467941645212701, + "grad_norm": 1.6267110363072599, + "learning_rate": 6.095593215786324e-06, + "loss": 0.6026, + "step": 14578 + }, + { + "epoch": 0.4468248130440113, + "grad_norm": 1.5336131207274861, + "learning_rate": 6.095108953957101e-06, + "loss": 0.6104, + "step": 14579 + }, + { + "epoch": 0.4468554615667525, + "grad_norm": 1.79486058814839, + "learning_rate": 6.09462468133749e-06, + "loss": 0.5926, + "step": 14580 + }, + { + "epoch": 0.4468861100894937, + "grad_norm": 1.6372090275544326, + "learning_rate": 6.09414039793226e-06, + "loss": 0.5727, + "step": 14581 + }, + { + "epoch": 0.4469167586122349, + "grad_norm": 1.7824421870036586, + "learning_rate": 6.093656103746187e-06, + "loss": 0.7262, + "step": 14582 + }, + { + "epoch": 0.4469474071349761, + "grad_norm": 1.6237840417493912, + "learning_rate": 6.09317179878404e-06, + "loss": 0.6388, + "step": 14583 + }, + { + "epoch": 0.4469780556577173, + "grad_norm": 1.6493489824672984, + "learning_rate": 6.092687483050592e-06, + "loss": 0.6095, + "step": 14584 + }, + { + "epoch": 0.4470087041804585, + "grad_norm": 1.675862803792404, + "learning_rate": 6.092203156550614e-06, + "loss": 0.7128, + "step": 14585 + }, + { + "epoch": 0.4470393527031997, + "grad_norm": 1.8447791252825219, + "learning_rate": 6.091718819288879e-06, + "loss": 0.6613, + "step": 14586 + }, + { + "epoch": 0.4470700012259409, + "grad_norm": 1.766061755990687, + "learning_rate": 6.091234471270159e-06, + "loss": 0.6265, + "step": 14587 + }, + { + "epoch": 0.44710064974868213, + "grad_norm": 1.758180913445557, + "learning_rate": 6.090750112499226e-06, + "loss": 0.6312, + "step": 14588 + }, + { + "epoch": 0.44713129827142334, + "grad_norm": 1.5899166663829438, + "learning_rate": 6.0902657429808535e-06, + "loss": 0.5927, + "step": 14589 + }, + { + "epoch": 0.44716194679416454, + "grad_norm": 1.7763286931456763, + "learning_rate": 6.089781362719813e-06, + "loss": 0.5657, + "step": 14590 + }, + { + "epoch": 0.44719259531690575, + "grad_norm": 1.7420090482144754, + "learning_rate": 6.08929697172088e-06, + "loss": 0.6068, + "step": 14591 + }, + { + "epoch": 0.44722324383964696, + "grad_norm": 0.9444958749607975, + "learning_rate": 6.088812569988822e-06, + "loss": 0.4585, + "step": 14592 + }, + { + "epoch": 0.4472538923623881, + "grad_norm": 1.5951359182745695, + "learning_rate": 6.088328157528418e-06, + "loss": 0.6362, + "step": 14593 + }, + { + "epoch": 0.4472845408851293, + "grad_norm": 1.6971720231312923, + "learning_rate": 6.0878437343444375e-06, + "loss": 0.6834, + "step": 14594 + }, + { + "epoch": 0.4473151894078705, + "grad_norm": 1.6417924652985338, + "learning_rate": 6.087359300441655e-06, + "loss": 0.629, + "step": 14595 + }, + { + "epoch": 0.4473458379306117, + "grad_norm": 0.7555645342997194, + "learning_rate": 6.086874855824842e-06, + "loss": 0.4554, + "step": 14596 + }, + { + "epoch": 0.44737648645335293, + "grad_norm": 1.6711142169356408, + "learning_rate": 6.086390400498773e-06, + "loss": 0.6875, + "step": 14597 + }, + { + "epoch": 0.44740713497609413, + "grad_norm": 1.6096823174463668, + "learning_rate": 6.085905934468221e-06, + "loss": 0.5623, + "step": 14598 + }, + { + "epoch": 0.44743778349883534, + "grad_norm": 1.69012005699684, + "learning_rate": 6.085421457737961e-06, + "loss": 0.6815, + "step": 14599 + }, + { + "epoch": 0.44746843202157655, + "grad_norm": 1.7907507989386748, + "learning_rate": 6.084936970312764e-06, + "loss": 0.722, + "step": 14600 + }, + { + "epoch": 0.44749908054431775, + "grad_norm": 1.5910210930827888, + "learning_rate": 6.084452472197408e-06, + "loss": 0.679, + "step": 14601 + }, + { + "epoch": 0.44752972906705896, + "grad_norm": 2.7744405266765826, + "learning_rate": 6.0839679633966635e-06, + "loss": 0.7575, + "step": 14602 + }, + { + "epoch": 0.44756037758980016, + "grad_norm": 0.8774236400811362, + "learning_rate": 6.0834834439153034e-06, + "loss": 0.4496, + "step": 14603 + }, + { + "epoch": 0.44759102611254137, + "grad_norm": 1.813755486065661, + "learning_rate": 6.082998913758106e-06, + "loss": 0.5903, + "step": 14604 + }, + { + "epoch": 0.4476216746352826, + "grad_norm": 0.8140177909368866, + "learning_rate": 6.082514372929843e-06, + "loss": 0.4675, + "step": 14605 + }, + { + "epoch": 0.4476523231580238, + "grad_norm": 1.866548495080769, + "learning_rate": 6.082029821435288e-06, + "loss": 0.7377, + "step": 14606 + }, + { + "epoch": 0.447682971680765, + "grad_norm": 1.5089490061205846, + "learning_rate": 6.081545259279216e-06, + "loss": 0.6424, + "step": 14607 + }, + { + "epoch": 0.4477136202035062, + "grad_norm": 1.7293792734309363, + "learning_rate": 6.081060686466403e-06, + "loss": 0.5943, + "step": 14608 + }, + { + "epoch": 0.4477442687262474, + "grad_norm": 1.661368754977721, + "learning_rate": 6.080576103001622e-06, + "loss": 0.7155, + "step": 14609 + }, + { + "epoch": 0.4477749172489886, + "grad_norm": 1.7434242827144155, + "learning_rate": 6.080091508889649e-06, + "loss": 0.6868, + "step": 14610 + }, + { + "epoch": 0.4478055657717298, + "grad_norm": 1.579530751142045, + "learning_rate": 6.079606904135256e-06, + "loss": 0.5589, + "step": 14611 + }, + { + "epoch": 0.447836214294471, + "grad_norm": 1.866848929871751, + "learning_rate": 6.079122288743221e-06, + "loss": 0.7838, + "step": 14612 + }, + { + "epoch": 0.4478668628172122, + "grad_norm": 1.7885676142701097, + "learning_rate": 6.078637662718319e-06, + "loss": 0.6741, + "step": 14613 + }, + { + "epoch": 0.4478975113399534, + "grad_norm": 0.8530868595340987, + "learning_rate": 6.078153026065321e-06, + "loss": 0.4454, + "step": 14614 + }, + { + "epoch": 0.44792815986269463, + "grad_norm": 1.6789371993689401, + "learning_rate": 6.0776683787890075e-06, + "loss": 0.5737, + "step": 14615 + }, + { + "epoch": 0.44795880838543584, + "grad_norm": 2.049871079412684, + "learning_rate": 6.077183720894152e-06, + "loss": 0.6043, + "step": 14616 + }, + { + "epoch": 0.44798945690817704, + "grad_norm": 1.8115470014826294, + "learning_rate": 6.076699052385531e-06, + "loss": 0.706, + "step": 14617 + }, + { + "epoch": 0.44802010543091825, + "grad_norm": 1.887534075313164, + "learning_rate": 6.0762143732679156e-06, + "loss": 0.6466, + "step": 14618 + }, + { + "epoch": 0.44805075395365945, + "grad_norm": 1.8622616503055953, + "learning_rate": 6.075729683546087e-06, + "loss": 0.567, + "step": 14619 + }, + { + "epoch": 0.44808140247640066, + "grad_norm": 1.9081001083086384, + "learning_rate": 6.075244983224816e-06, + "loss": 0.6547, + "step": 14620 + }, + { + "epoch": 0.44811205099914186, + "grad_norm": 0.7694232555344831, + "learning_rate": 6.074760272308885e-06, + "loss": 0.4686, + "step": 14621 + }, + { + "epoch": 0.44814269952188307, + "grad_norm": 1.7248122383763458, + "learning_rate": 6.074275550803063e-06, + "loss": 0.7272, + "step": 14622 + }, + { + "epoch": 0.4481733480446243, + "grad_norm": 1.7209723335504699, + "learning_rate": 6.073790818712131e-06, + "loss": 0.6807, + "step": 14623 + }, + { + "epoch": 0.4482039965673654, + "grad_norm": 1.8675830367757047, + "learning_rate": 6.073306076040861e-06, + "loss": 0.7147, + "step": 14624 + }, + { + "epoch": 0.44823464509010663, + "grad_norm": 1.7264914735557297, + "learning_rate": 6.072821322794034e-06, + "loss": 0.6228, + "step": 14625 + }, + { + "epoch": 0.44826529361284784, + "grad_norm": 1.6288760449433297, + "learning_rate": 6.0723365589764224e-06, + "loss": 0.5969, + "step": 14626 + }, + { + "epoch": 0.44829594213558904, + "grad_norm": 0.7742467753094343, + "learning_rate": 6.0718517845928035e-06, + "loss": 0.4311, + "step": 14627 + }, + { + "epoch": 0.44832659065833025, + "grad_norm": 1.6671511209386505, + "learning_rate": 6.0713669996479584e-06, + "loss": 0.6793, + "step": 14628 + }, + { + "epoch": 0.44835723918107145, + "grad_norm": 1.7608120642442393, + "learning_rate": 6.070882204146656e-06, + "loss": 0.6957, + "step": 14629 + }, + { + "epoch": 0.44838788770381266, + "grad_norm": 1.613483614902363, + "learning_rate": 6.070397398093681e-06, + "loss": 0.6159, + "step": 14630 + }, + { + "epoch": 0.44841853622655387, + "grad_norm": 1.699265235821568, + "learning_rate": 6.069912581493803e-06, + "loss": 0.5888, + "step": 14631 + }, + { + "epoch": 0.44844918474929507, + "grad_norm": 1.6570481984524748, + "learning_rate": 6.069427754351805e-06, + "loss": 0.5741, + "step": 14632 + }, + { + "epoch": 0.4484798332720363, + "grad_norm": 2.2691527065094763, + "learning_rate": 6.068942916672461e-06, + "loss": 0.7901, + "step": 14633 + }, + { + "epoch": 0.4485104817947775, + "grad_norm": 1.586785832973947, + "learning_rate": 6.068458068460549e-06, + "loss": 0.6381, + "step": 14634 + }, + { + "epoch": 0.4485411303175187, + "grad_norm": 1.7243459554815563, + "learning_rate": 6.067973209720845e-06, + "loss": 0.6317, + "step": 14635 + }, + { + "epoch": 0.4485717788402599, + "grad_norm": 1.6149818989637785, + "learning_rate": 6.067488340458131e-06, + "loss": 0.6599, + "step": 14636 + }, + { + "epoch": 0.4486024273630011, + "grad_norm": 1.7748733403984414, + "learning_rate": 6.067003460677177e-06, + "loss": 0.6976, + "step": 14637 + }, + { + "epoch": 0.4486330758857423, + "grad_norm": 1.8388413719426524, + "learning_rate": 6.066518570382768e-06, + "loss": 0.7104, + "step": 14638 + }, + { + "epoch": 0.4486637244084835, + "grad_norm": 1.0439999946534766, + "learning_rate": 6.066033669579677e-06, + "loss": 0.4521, + "step": 14639 + }, + { + "epoch": 0.4486943729312247, + "grad_norm": 1.6077557574132924, + "learning_rate": 6.065548758272684e-06, + "loss": 0.5775, + "step": 14640 + }, + { + "epoch": 0.4487250214539659, + "grad_norm": 1.5102489957502891, + "learning_rate": 6.065063836466567e-06, + "loss": 0.6239, + "step": 14641 + }, + { + "epoch": 0.44875566997670713, + "grad_norm": 1.9324785294096432, + "learning_rate": 6.064578904166103e-06, + "loss": 0.6389, + "step": 14642 + }, + { + "epoch": 0.44878631849944833, + "grad_norm": 1.9529682792129135, + "learning_rate": 6.0640939613760705e-06, + "loss": 0.6653, + "step": 14643 + }, + { + "epoch": 0.44881696702218954, + "grad_norm": 1.6366331504072131, + "learning_rate": 6.063609008101249e-06, + "loss": 0.663, + "step": 14644 + }, + { + "epoch": 0.44884761554493074, + "grad_norm": 1.5859924091000448, + "learning_rate": 6.063124044346415e-06, + "loss": 0.7047, + "step": 14645 + }, + { + "epoch": 0.44887826406767195, + "grad_norm": 1.6369174823167028, + "learning_rate": 6.0626390701163474e-06, + "loss": 0.5828, + "step": 14646 + }, + { + "epoch": 0.44890891259041316, + "grad_norm": 1.4865099329673332, + "learning_rate": 6.062154085415826e-06, + "loss": 0.6222, + "step": 14647 + }, + { + "epoch": 0.44893956111315436, + "grad_norm": 0.906301662575853, + "learning_rate": 6.061669090249628e-06, + "loss": 0.45, + "step": 14648 + }, + { + "epoch": 0.44897020963589557, + "grad_norm": 1.89234318227374, + "learning_rate": 6.061184084622534e-06, + "loss": 0.644, + "step": 14649 + }, + { + "epoch": 0.4490008581586368, + "grad_norm": 1.7083727428922468, + "learning_rate": 6.060699068539319e-06, + "loss": 0.626, + "step": 14650 + }, + { + "epoch": 0.449031506681378, + "grad_norm": 1.86245603231633, + "learning_rate": 6.060214042004767e-06, + "loss": 0.6174, + "step": 14651 + }, + { + "epoch": 0.4490621552041192, + "grad_norm": 1.7151796589825647, + "learning_rate": 6.059729005023655e-06, + "loss": 0.6064, + "step": 14652 + }, + { + "epoch": 0.4490928037268604, + "grad_norm": 1.6581706366471476, + "learning_rate": 6.05924395760076e-06, + "loss": 0.5971, + "step": 14653 + }, + { + "epoch": 0.4491234522496016, + "grad_norm": 1.5772199238351656, + "learning_rate": 6.0587588997408646e-06, + "loss": 0.5819, + "step": 14654 + }, + { + "epoch": 0.44915410077234275, + "grad_norm": 1.739379962489197, + "learning_rate": 6.058273831448747e-06, + "loss": 0.6538, + "step": 14655 + }, + { + "epoch": 0.44918474929508395, + "grad_norm": 1.7324865944605563, + "learning_rate": 6.057788752729187e-06, + "loss": 0.6151, + "step": 14656 + }, + { + "epoch": 0.44921539781782516, + "grad_norm": 1.6677623142541917, + "learning_rate": 6.057303663586962e-06, + "loss": 0.5546, + "step": 14657 + }, + { + "epoch": 0.44924604634056636, + "grad_norm": 1.6398196473783015, + "learning_rate": 6.056818564026855e-06, + "loss": 0.7008, + "step": 14658 + }, + { + "epoch": 0.44927669486330757, + "grad_norm": 1.6673702294771493, + "learning_rate": 6.056333454053645e-06, + "loss": 0.6583, + "step": 14659 + }, + { + "epoch": 0.4493073433860488, + "grad_norm": 1.6663596386833706, + "learning_rate": 6.05584833367211e-06, + "loss": 0.6919, + "step": 14660 + }, + { + "epoch": 0.44933799190879, + "grad_norm": 1.7093554681110275, + "learning_rate": 6.0553632028870305e-06, + "loss": 0.703, + "step": 14661 + }, + { + "epoch": 0.4493686404315312, + "grad_norm": 1.7133905993440632, + "learning_rate": 6.0548780617031875e-06, + "loss": 0.5595, + "step": 14662 + }, + { + "epoch": 0.4493992889542724, + "grad_norm": 1.5281565281776397, + "learning_rate": 6.054392910125362e-06, + "loss": 0.6177, + "step": 14663 + }, + { + "epoch": 0.4494299374770136, + "grad_norm": 1.5658486698980405, + "learning_rate": 6.053907748158333e-06, + "loss": 0.5581, + "step": 14664 + }, + { + "epoch": 0.4494605859997548, + "grad_norm": 1.559445758756294, + "learning_rate": 6.053422575806881e-06, + "loss": 0.5876, + "step": 14665 + }, + { + "epoch": 0.449491234522496, + "grad_norm": 1.714850605546664, + "learning_rate": 6.052937393075787e-06, + "loss": 0.7253, + "step": 14666 + }, + { + "epoch": 0.4495218830452372, + "grad_norm": 1.9698140812197125, + "learning_rate": 6.0524521999698315e-06, + "loss": 0.6725, + "step": 14667 + }, + { + "epoch": 0.4495525315679784, + "grad_norm": 0.8602054405443833, + "learning_rate": 6.051966996493795e-06, + "loss": 0.448, + "step": 14668 + }, + { + "epoch": 0.4495831800907196, + "grad_norm": 0.8426548229308302, + "learning_rate": 6.05148178265246e-06, + "loss": 0.4704, + "step": 14669 + }, + { + "epoch": 0.44961382861346083, + "grad_norm": 1.7887515751137621, + "learning_rate": 6.0509965584506035e-06, + "loss": 0.6621, + "step": 14670 + }, + { + "epoch": 0.44964447713620204, + "grad_norm": 1.7495100727089896, + "learning_rate": 6.050511323893011e-06, + "loss": 0.6571, + "step": 14671 + }, + { + "epoch": 0.44967512565894324, + "grad_norm": 0.7573530652957384, + "learning_rate": 6.05002607898446e-06, + "loss": 0.4526, + "step": 14672 + }, + { + "epoch": 0.44970577418168445, + "grad_norm": 0.7739492693463661, + "learning_rate": 6.049540823729735e-06, + "loss": 0.4468, + "step": 14673 + }, + { + "epoch": 0.44973642270442565, + "grad_norm": 1.9852801488935354, + "learning_rate": 6.049055558133614e-06, + "loss": 0.6892, + "step": 14674 + }, + { + "epoch": 0.44976707122716686, + "grad_norm": 1.731193987277517, + "learning_rate": 6.048570282200883e-06, + "loss": 0.6517, + "step": 14675 + }, + { + "epoch": 0.44979771974990806, + "grad_norm": 1.854063034408709, + "learning_rate": 6.0480849959363175e-06, + "loss": 0.6546, + "step": 14676 + }, + { + "epoch": 0.44982836827264927, + "grad_norm": 0.8041446353391454, + "learning_rate": 6.047599699344704e-06, + "loss": 0.4518, + "step": 14677 + }, + { + "epoch": 0.4498590167953905, + "grad_norm": 1.6875483417507549, + "learning_rate": 6.047114392430823e-06, + "loss": 0.6851, + "step": 14678 + }, + { + "epoch": 0.4498896653181317, + "grad_norm": 1.7920727512951438, + "learning_rate": 6.046629075199456e-06, + "loss": 0.6929, + "step": 14679 + }, + { + "epoch": 0.4499203138408729, + "grad_norm": 1.750391919684459, + "learning_rate": 6.046143747655383e-06, + "loss": 0.6415, + "step": 14680 + }, + { + "epoch": 0.4499509623636141, + "grad_norm": 1.5155376092470239, + "learning_rate": 6.04565840980339e-06, + "loss": 0.6091, + "step": 14681 + }, + { + "epoch": 0.4499816108863553, + "grad_norm": 1.890179303781855, + "learning_rate": 6.045173061648256e-06, + "loss": 0.6107, + "step": 14682 + }, + { + "epoch": 0.4500122594090965, + "grad_norm": 1.720385467611057, + "learning_rate": 6.044687703194765e-06, + "loss": 0.5856, + "step": 14683 + }, + { + "epoch": 0.4500429079318377, + "grad_norm": 1.6625950347792429, + "learning_rate": 6.044202334447698e-06, + "loss": 0.6382, + "step": 14684 + }, + { + "epoch": 0.4500735564545789, + "grad_norm": 1.5433875086192217, + "learning_rate": 6.043716955411839e-06, + "loss": 0.7079, + "step": 14685 + }, + { + "epoch": 0.45010420497732007, + "grad_norm": 1.721998658450264, + "learning_rate": 6.04323156609197e-06, + "loss": 0.6023, + "step": 14686 + }, + { + "epoch": 0.45013485350006127, + "grad_norm": 1.5058011220371768, + "learning_rate": 6.042746166492873e-06, + "loss": 0.651, + "step": 14687 + }, + { + "epoch": 0.4501655020228025, + "grad_norm": 2.0814334350037034, + "learning_rate": 6.042260756619331e-06, + "loss": 0.6688, + "step": 14688 + }, + { + "epoch": 0.4501961505455437, + "grad_norm": 0.8166917527973008, + "learning_rate": 6.041775336476128e-06, + "loss": 0.4361, + "step": 14689 + }, + { + "epoch": 0.4502267990682849, + "grad_norm": 0.8272768700866728, + "learning_rate": 6.041289906068046e-06, + "loss": 0.452, + "step": 14690 + }, + { + "epoch": 0.4502574475910261, + "grad_norm": 1.7945774937981342, + "learning_rate": 6.040804465399867e-06, + "loss": 0.6716, + "step": 14691 + }, + { + "epoch": 0.4502880961137673, + "grad_norm": 1.6636502663406136, + "learning_rate": 6.040319014476376e-06, + "loss": 0.6874, + "step": 14692 + }, + { + "epoch": 0.4503187446365085, + "grad_norm": 2.045757181660934, + "learning_rate": 6.0398335533023546e-06, + "loss": 0.7353, + "step": 14693 + }, + { + "epoch": 0.4503493931592497, + "grad_norm": 1.6690498478565936, + "learning_rate": 6.039348081882589e-06, + "loss": 0.6537, + "step": 14694 + }, + { + "epoch": 0.4503800416819909, + "grad_norm": 1.704273561451966, + "learning_rate": 6.03886260022186e-06, + "loss": 0.6142, + "step": 14695 + }, + { + "epoch": 0.4504106902047321, + "grad_norm": 1.9024356771982176, + "learning_rate": 6.038377108324951e-06, + "loss": 0.6848, + "step": 14696 + }, + { + "epoch": 0.45044133872747333, + "grad_norm": 0.859205783801287, + "learning_rate": 6.037891606196648e-06, + "loss": 0.4476, + "step": 14697 + }, + { + "epoch": 0.45047198725021453, + "grad_norm": 2.0999532660719034, + "learning_rate": 6.037406093841732e-06, + "loss": 0.6293, + "step": 14698 + }, + { + "epoch": 0.45050263577295574, + "grad_norm": 1.4769939409417054, + "learning_rate": 6.03692057126499e-06, + "loss": 0.5556, + "step": 14699 + }, + { + "epoch": 0.45053328429569695, + "grad_norm": 0.7857516902891494, + "learning_rate": 6.036435038471203e-06, + "loss": 0.4398, + "step": 14700 + }, + { + "epoch": 0.45056393281843815, + "grad_norm": 1.7240693901388615, + "learning_rate": 6.035949495465157e-06, + "loss": 0.6749, + "step": 14701 + }, + { + "epoch": 0.45059458134117936, + "grad_norm": 1.6215744447512694, + "learning_rate": 6.035463942251636e-06, + "loss": 0.7034, + "step": 14702 + }, + { + "epoch": 0.45062522986392056, + "grad_norm": 1.9569835320753195, + "learning_rate": 6.0349783788354235e-06, + "loss": 0.6798, + "step": 14703 + }, + { + "epoch": 0.45065587838666177, + "grad_norm": 1.6264778113914868, + "learning_rate": 6.034492805221304e-06, + "loss": 0.6551, + "step": 14704 + }, + { + "epoch": 0.450686526909403, + "grad_norm": 1.4834322628454406, + "learning_rate": 6.034007221414064e-06, + "loss": 0.6161, + "step": 14705 + }, + { + "epoch": 0.4507171754321442, + "grad_norm": 1.6407158216795823, + "learning_rate": 6.033521627418483e-06, + "loss": 0.7721, + "step": 14706 + }, + { + "epoch": 0.4507478239548854, + "grad_norm": 1.6330676093954524, + "learning_rate": 6.033036023239352e-06, + "loss": 0.7276, + "step": 14707 + }, + { + "epoch": 0.4507784724776266, + "grad_norm": 1.8245118647537393, + "learning_rate": 6.032550408881449e-06, + "loss": 0.7294, + "step": 14708 + }, + { + "epoch": 0.4508091210003678, + "grad_norm": 1.6862214939877067, + "learning_rate": 6.032064784349566e-06, + "loss": 0.6669, + "step": 14709 + }, + { + "epoch": 0.450839769523109, + "grad_norm": 1.6189659671619758, + "learning_rate": 6.031579149648483e-06, + "loss": 0.6628, + "step": 14710 + }, + { + "epoch": 0.4508704180458502, + "grad_norm": 1.4774699745889681, + "learning_rate": 6.031093504782987e-06, + "loss": 0.7893, + "step": 14711 + }, + { + "epoch": 0.4509010665685914, + "grad_norm": 1.704052520066079, + "learning_rate": 6.0306078497578636e-06, + "loss": 0.6567, + "step": 14712 + }, + { + "epoch": 0.4509317150913326, + "grad_norm": 0.878147091020303, + "learning_rate": 6.030122184577897e-06, + "loss": 0.4648, + "step": 14713 + }, + { + "epoch": 0.4509623636140738, + "grad_norm": 1.6974591325072597, + "learning_rate": 6.029636509247874e-06, + "loss": 0.6808, + "step": 14714 + }, + { + "epoch": 0.45099301213681503, + "grad_norm": 1.5729938831211674, + "learning_rate": 6.029150823772576e-06, + "loss": 0.7331, + "step": 14715 + }, + { + "epoch": 0.45102366065955624, + "grad_norm": 1.787636795200323, + "learning_rate": 6.028665128156794e-06, + "loss": 0.6659, + "step": 14716 + }, + { + "epoch": 0.4510543091822974, + "grad_norm": 1.6844003026991323, + "learning_rate": 6.0281794224053115e-06, + "loss": 0.5946, + "step": 14717 + }, + { + "epoch": 0.4510849577050386, + "grad_norm": 1.573106514147505, + "learning_rate": 6.027693706522914e-06, + "loss": 0.6685, + "step": 14718 + }, + { + "epoch": 0.4511156062277798, + "grad_norm": 1.7165724172224779, + "learning_rate": 6.0272079805143855e-06, + "loss": 0.6669, + "step": 14719 + }, + { + "epoch": 0.451146254750521, + "grad_norm": 1.899349645165797, + "learning_rate": 6.026722244384515e-06, + "loss": 0.7425, + "step": 14720 + }, + { + "epoch": 0.4511769032732622, + "grad_norm": 1.5736112608381267, + "learning_rate": 6.0262364981380884e-06, + "loss": 0.6688, + "step": 14721 + }, + { + "epoch": 0.4512075517960034, + "grad_norm": 1.7846076567842624, + "learning_rate": 6.02575074177989e-06, + "loss": 0.6561, + "step": 14722 + }, + { + "epoch": 0.4512382003187446, + "grad_norm": 1.5457005305477032, + "learning_rate": 6.025264975314708e-06, + "loss": 0.543, + "step": 14723 + }, + { + "epoch": 0.4512688488414858, + "grad_norm": 1.8172277322198316, + "learning_rate": 6.024779198747327e-06, + "loss": 0.6497, + "step": 14724 + }, + { + "epoch": 0.45129949736422703, + "grad_norm": 1.6922992097254501, + "learning_rate": 6.024293412082534e-06, + "loss": 0.6769, + "step": 14725 + }, + { + "epoch": 0.45133014588696824, + "grad_norm": 1.8579798920545423, + "learning_rate": 6.023807615325117e-06, + "loss": 0.5996, + "step": 14726 + }, + { + "epoch": 0.45136079440970944, + "grad_norm": 1.6307077380300825, + "learning_rate": 6.023321808479862e-06, + "loss": 0.6263, + "step": 14727 + }, + { + "epoch": 0.45139144293245065, + "grad_norm": 1.9578867799358077, + "learning_rate": 6.022835991551555e-06, + "loss": 0.6299, + "step": 14728 + }, + { + "epoch": 0.45142209145519185, + "grad_norm": 0.8523557243756739, + "learning_rate": 6.022350164544982e-06, + "loss": 0.4254, + "step": 14729 + }, + { + "epoch": 0.45145273997793306, + "grad_norm": 1.672102728960697, + "learning_rate": 6.021864327464933e-06, + "loss": 0.7503, + "step": 14730 + }, + { + "epoch": 0.45148338850067427, + "grad_norm": 2.015733861444247, + "learning_rate": 6.021378480316193e-06, + "loss": 0.7268, + "step": 14731 + }, + { + "epoch": 0.45151403702341547, + "grad_norm": 1.700460510448195, + "learning_rate": 6.020892623103548e-06, + "loss": 0.654, + "step": 14732 + }, + { + "epoch": 0.4515446855461567, + "grad_norm": 1.7738192452002095, + "learning_rate": 6.020406755831788e-06, + "loss": 0.6917, + "step": 14733 + }, + { + "epoch": 0.4515753340688979, + "grad_norm": 1.5983629225296867, + "learning_rate": 6.0199208785056985e-06, + "loss": 0.6119, + "step": 14734 + }, + { + "epoch": 0.4516059825916391, + "grad_norm": 1.8117091120699955, + "learning_rate": 6.019434991130069e-06, + "loss": 0.6324, + "step": 14735 + }, + { + "epoch": 0.4516366311143803, + "grad_norm": 1.8076741829924112, + "learning_rate": 6.018949093709684e-06, + "loss": 0.6874, + "step": 14736 + }, + { + "epoch": 0.4516672796371215, + "grad_norm": 1.574837109151993, + "learning_rate": 6.018463186249333e-06, + "loss": 0.5856, + "step": 14737 + }, + { + "epoch": 0.4516979281598627, + "grad_norm": 1.6702382095649126, + "learning_rate": 6.017977268753805e-06, + "loss": 0.7226, + "step": 14738 + }, + { + "epoch": 0.4517285766826039, + "grad_norm": 1.7223155485573454, + "learning_rate": 6.017491341227884e-06, + "loss": 0.5785, + "step": 14739 + }, + { + "epoch": 0.4517592252053451, + "grad_norm": 1.633533783220313, + "learning_rate": 6.017005403676365e-06, + "loss": 0.6422, + "step": 14740 + }, + { + "epoch": 0.4517898737280863, + "grad_norm": 1.8081107992729557, + "learning_rate": 6.016519456104028e-06, + "loss": 0.6716, + "step": 14741 + }, + { + "epoch": 0.4518205222508275, + "grad_norm": 1.6519854484306462, + "learning_rate": 6.016033498515665e-06, + "loss": 0.6359, + "step": 14742 + }, + { + "epoch": 0.45185117077356873, + "grad_norm": 1.786285796668657, + "learning_rate": 6.015547530916064e-06, + "loss": 0.5893, + "step": 14743 + }, + { + "epoch": 0.45188181929630994, + "grad_norm": 1.6815226281838245, + "learning_rate": 6.015061553310016e-06, + "loss": 0.6504, + "step": 14744 + }, + { + "epoch": 0.45191246781905114, + "grad_norm": 1.943132966646875, + "learning_rate": 6.014575565702303e-06, + "loss": 0.6853, + "step": 14745 + }, + { + "epoch": 0.45194311634179235, + "grad_norm": 0.8565181159856131, + "learning_rate": 6.01408956809772e-06, + "loss": 0.459, + "step": 14746 + }, + { + "epoch": 0.45197376486453356, + "grad_norm": 1.495397996820354, + "learning_rate": 6.01360356050105e-06, + "loss": 0.6968, + "step": 14747 + }, + { + "epoch": 0.4520044133872747, + "grad_norm": 1.7153484254544373, + "learning_rate": 6.013117542917087e-06, + "loss": 0.6946, + "step": 14748 + }, + { + "epoch": 0.4520350619100159, + "grad_norm": 1.5837672664634321, + "learning_rate": 6.012631515350619e-06, + "loss": 0.5605, + "step": 14749 + }, + { + "epoch": 0.4520657104327571, + "grad_norm": 1.6131697691500413, + "learning_rate": 6.012145477806431e-06, + "loss": 0.6058, + "step": 14750 + }, + { + "epoch": 0.4520963589554983, + "grad_norm": 1.8851598319348881, + "learning_rate": 6.011659430289316e-06, + "loss": 0.7085, + "step": 14751 + }, + { + "epoch": 0.45212700747823953, + "grad_norm": 0.8171723975694849, + "learning_rate": 6.011173372804061e-06, + "loss": 0.4685, + "step": 14752 + }, + { + "epoch": 0.45215765600098073, + "grad_norm": 1.796727615773298, + "learning_rate": 6.010687305355457e-06, + "loss": 0.6827, + "step": 14753 + }, + { + "epoch": 0.45218830452372194, + "grad_norm": 1.6884028507588666, + "learning_rate": 6.0102012279482915e-06, + "loss": 0.6371, + "step": 14754 + }, + { + "epoch": 0.45221895304646315, + "grad_norm": 1.8703985325265786, + "learning_rate": 6.009715140587357e-06, + "loss": 0.72, + "step": 14755 + }, + { + "epoch": 0.45224960156920435, + "grad_norm": 1.6967248826567578, + "learning_rate": 6.0092290432774384e-06, + "loss": 0.6695, + "step": 14756 + }, + { + "epoch": 0.45228025009194556, + "grad_norm": 1.709932725948683, + "learning_rate": 6.008742936023328e-06, + "loss": 0.6426, + "step": 14757 + }, + { + "epoch": 0.45231089861468676, + "grad_norm": 1.7172393565737571, + "learning_rate": 6.008256818829815e-06, + "loss": 0.6425, + "step": 14758 + }, + { + "epoch": 0.45234154713742797, + "grad_norm": 1.8287188599584852, + "learning_rate": 6.007770691701692e-06, + "loss": 0.7332, + "step": 14759 + }, + { + "epoch": 0.4523721956601692, + "grad_norm": 0.7923121894554046, + "learning_rate": 6.007284554643744e-06, + "loss": 0.4327, + "step": 14760 + }, + { + "epoch": 0.4524028441829104, + "grad_norm": 1.6990412462278306, + "learning_rate": 6.006798407660764e-06, + "loss": 0.6541, + "step": 14761 + }, + { + "epoch": 0.4524334927056516, + "grad_norm": 2.2113082868840257, + "learning_rate": 6.006312250757542e-06, + "loss": 0.6978, + "step": 14762 + }, + { + "epoch": 0.4524641412283928, + "grad_norm": 1.594558232219871, + "learning_rate": 6.005826083938868e-06, + "loss": 0.69, + "step": 14763 + }, + { + "epoch": 0.452494789751134, + "grad_norm": 1.6050925896941703, + "learning_rate": 6.005339907209533e-06, + "loss": 0.618, + "step": 14764 + }, + { + "epoch": 0.4525254382738752, + "grad_norm": 1.8005540649416427, + "learning_rate": 6.004853720574325e-06, + "loss": 0.7259, + "step": 14765 + }, + { + "epoch": 0.4525560867966164, + "grad_norm": 1.6972897319843214, + "learning_rate": 6.0043675240380385e-06, + "loss": 0.6309, + "step": 14766 + }, + { + "epoch": 0.4525867353193576, + "grad_norm": 1.4815370358761464, + "learning_rate": 6.00388131760546e-06, + "loss": 0.5745, + "step": 14767 + }, + { + "epoch": 0.4526173838420988, + "grad_norm": 2.031608368936869, + "learning_rate": 6.0033951012813825e-06, + "loss": 0.6745, + "step": 14768 + }, + { + "epoch": 0.45264803236484, + "grad_norm": 1.6580519140763146, + "learning_rate": 6.002908875070597e-06, + "loss": 0.6083, + "step": 14769 + }, + { + "epoch": 0.45267868088758123, + "grad_norm": 1.8619549304078344, + "learning_rate": 6.002422638977892e-06, + "loss": 0.6274, + "step": 14770 + }, + { + "epoch": 0.45270932941032244, + "grad_norm": 0.8261469759002075, + "learning_rate": 6.001936393008062e-06, + "loss": 0.4398, + "step": 14771 + }, + { + "epoch": 0.45273997793306364, + "grad_norm": 1.662093388580578, + "learning_rate": 6.001450137165896e-06, + "loss": 0.6686, + "step": 14772 + }, + { + "epoch": 0.45277062645580485, + "grad_norm": 1.7022628006734546, + "learning_rate": 6.0009638714561846e-06, + "loss": 0.6305, + "step": 14773 + }, + { + "epoch": 0.45280127497854605, + "grad_norm": 1.7007739953420091, + "learning_rate": 6.000477595883721e-06, + "loss": 0.6573, + "step": 14774 + }, + { + "epoch": 0.45283192350128726, + "grad_norm": 1.6867647826980343, + "learning_rate": 5.999991310453296e-06, + "loss": 0.675, + "step": 14775 + }, + { + "epoch": 0.45286257202402846, + "grad_norm": 1.6907938598701329, + "learning_rate": 5.999505015169701e-06, + "loss": 0.6325, + "step": 14776 + }, + { + "epoch": 0.45289322054676967, + "grad_norm": 1.6956743742842624, + "learning_rate": 5.999018710037725e-06, + "loss": 0.6617, + "step": 14777 + }, + { + "epoch": 0.4529238690695109, + "grad_norm": 1.7443492164003078, + "learning_rate": 5.998532395062165e-06, + "loss": 0.6423, + "step": 14778 + }, + { + "epoch": 0.452954517592252, + "grad_norm": 1.8652070510859209, + "learning_rate": 5.9980460702478084e-06, + "loss": 0.7353, + "step": 14779 + }, + { + "epoch": 0.45298516611499323, + "grad_norm": 1.8494749546597617, + "learning_rate": 5.997559735599448e-06, + "loss": 0.6814, + "step": 14780 + }, + { + "epoch": 0.45301581463773444, + "grad_norm": 1.6321898852736774, + "learning_rate": 5.997073391121876e-06, + "loss": 0.643, + "step": 14781 + }, + { + "epoch": 0.45304646316047564, + "grad_norm": 1.6723099755599216, + "learning_rate": 5.996587036819887e-06, + "loss": 0.6386, + "step": 14782 + }, + { + "epoch": 0.45307711168321685, + "grad_norm": 1.728814719131714, + "learning_rate": 5.996100672698269e-06, + "loss": 0.6904, + "step": 14783 + }, + { + "epoch": 0.45310776020595805, + "grad_norm": 1.8585757942857684, + "learning_rate": 5.995614298761816e-06, + "loss": 0.6884, + "step": 14784 + }, + { + "epoch": 0.45313840872869926, + "grad_norm": 1.536974510134653, + "learning_rate": 5.995127915015322e-06, + "loss": 0.6583, + "step": 14785 + }, + { + "epoch": 0.45316905725144047, + "grad_norm": 2.766802311543049, + "learning_rate": 5.994641521463578e-06, + "loss": 0.6653, + "step": 14786 + }, + { + "epoch": 0.45319970577418167, + "grad_norm": 1.5649428084548564, + "learning_rate": 5.994155118111376e-06, + "loss": 0.6421, + "step": 14787 + }, + { + "epoch": 0.4532303542969229, + "grad_norm": 1.6280009813079919, + "learning_rate": 5.9936687049635075e-06, + "loss": 0.7129, + "step": 14788 + }, + { + "epoch": 0.4532610028196641, + "grad_norm": 0.8504725995965158, + "learning_rate": 5.99318228202477e-06, + "loss": 0.4333, + "step": 14789 + }, + { + "epoch": 0.4532916513424053, + "grad_norm": 0.797346619242297, + "learning_rate": 5.992695849299952e-06, + "loss": 0.4419, + "step": 14790 + }, + { + "epoch": 0.4533222998651465, + "grad_norm": 2.0235851266573692, + "learning_rate": 5.992209406793847e-06, + "loss": 0.7196, + "step": 14791 + }, + { + "epoch": 0.4533529483878877, + "grad_norm": 1.839844992873075, + "learning_rate": 5.99172295451125e-06, + "loss": 0.6604, + "step": 14792 + }, + { + "epoch": 0.4533835969106289, + "grad_norm": 1.659288766819305, + "learning_rate": 5.991236492456952e-06, + "loss": 0.6146, + "step": 14793 + }, + { + "epoch": 0.4534142454333701, + "grad_norm": 1.5555327936247487, + "learning_rate": 5.99075002063575e-06, + "loss": 0.6644, + "step": 14794 + }, + { + "epoch": 0.4534448939561113, + "grad_norm": 0.8574271039270966, + "learning_rate": 5.990263539052431e-06, + "loss": 0.4491, + "step": 14795 + }, + { + "epoch": 0.4534755424788525, + "grad_norm": 1.8072374616079694, + "learning_rate": 5.989777047711793e-06, + "loss": 0.7137, + "step": 14796 + }, + { + "epoch": 0.45350619100159373, + "grad_norm": 1.7446669391355882, + "learning_rate": 5.9892905466186294e-06, + "loss": 0.5562, + "step": 14797 + }, + { + "epoch": 0.45353683952433493, + "grad_norm": 1.5146233684359491, + "learning_rate": 5.9888040357777334e-06, + "loss": 0.6752, + "step": 14798 + }, + { + "epoch": 0.45356748804707614, + "grad_norm": 1.854820165961491, + "learning_rate": 5.988317515193897e-06, + "loss": 0.6243, + "step": 14799 + }, + { + "epoch": 0.45359813656981735, + "grad_norm": 1.7733045631313311, + "learning_rate": 5.987830984871915e-06, + "loss": 0.6976, + "step": 14800 + }, + { + "epoch": 0.45362878509255855, + "grad_norm": 1.794232030025211, + "learning_rate": 5.987344444816582e-06, + "loss": 0.7241, + "step": 14801 + }, + { + "epoch": 0.45365943361529976, + "grad_norm": 1.8439706636815814, + "learning_rate": 5.9868578950326926e-06, + "loss": 0.7336, + "step": 14802 + }, + { + "epoch": 0.45369008213804096, + "grad_norm": 1.7406234848510713, + "learning_rate": 5.986371335525038e-06, + "loss": 0.6269, + "step": 14803 + }, + { + "epoch": 0.45372073066078217, + "grad_norm": 1.7180713698166947, + "learning_rate": 5.985884766298415e-06, + "loss": 0.6541, + "step": 14804 + }, + { + "epoch": 0.4537513791835234, + "grad_norm": 1.7967029749752679, + "learning_rate": 5.985398187357618e-06, + "loss": 0.6848, + "step": 14805 + }, + { + "epoch": 0.4537820277062646, + "grad_norm": 1.6160389131499828, + "learning_rate": 5.984911598707439e-06, + "loss": 0.5756, + "step": 14806 + }, + { + "epoch": 0.4538126762290058, + "grad_norm": 0.8731088564729937, + "learning_rate": 5.9844250003526764e-06, + "loss": 0.456, + "step": 14807 + }, + { + "epoch": 0.453843324751747, + "grad_norm": 0.8218302496420524, + "learning_rate": 5.98393839229812e-06, + "loss": 0.4596, + "step": 14808 + }, + { + "epoch": 0.4538739732744882, + "grad_norm": 1.6431183961142457, + "learning_rate": 5.983451774548568e-06, + "loss": 0.6424, + "step": 14809 + }, + { + "epoch": 0.45390462179722935, + "grad_norm": 1.807967399830412, + "learning_rate": 5.982965147108813e-06, + "loss": 0.5627, + "step": 14810 + }, + { + "epoch": 0.45393527031997055, + "grad_norm": 0.7575962372675313, + "learning_rate": 5.982478509983652e-06, + "loss": 0.4433, + "step": 14811 + }, + { + "epoch": 0.45396591884271176, + "grad_norm": 1.5415732275888374, + "learning_rate": 5.981991863177878e-06, + "loss": 0.5878, + "step": 14812 + }, + { + "epoch": 0.45399656736545296, + "grad_norm": 1.6486601471396252, + "learning_rate": 5.98150520669629e-06, + "loss": 0.6425, + "step": 14813 + }, + { + "epoch": 0.45402721588819417, + "grad_norm": 1.6176017098421032, + "learning_rate": 5.981018540543676e-06, + "loss": 0.6849, + "step": 14814 + }, + { + "epoch": 0.4540578644109354, + "grad_norm": 1.7937812893237537, + "learning_rate": 5.9805318647248376e-06, + "loss": 0.6374, + "step": 14815 + }, + { + "epoch": 0.4540885129336766, + "grad_norm": 1.9358105204095704, + "learning_rate": 5.9800451792445655e-06, + "loss": 0.6479, + "step": 14816 + }, + { + "epoch": 0.4541191614564178, + "grad_norm": 2.0718274103811, + "learning_rate": 5.9795584841076605e-06, + "loss": 0.607, + "step": 14817 + }, + { + "epoch": 0.454149809979159, + "grad_norm": 2.021017858314288, + "learning_rate": 5.979071779318913e-06, + "loss": 0.6968, + "step": 14818 + }, + { + "epoch": 0.4541804585019002, + "grad_norm": 1.6465841756795732, + "learning_rate": 5.9785850648831215e-06, + "loss": 0.6518, + "step": 14819 + }, + { + "epoch": 0.4542111070246414, + "grad_norm": 1.6584220999097967, + "learning_rate": 5.978098340805081e-06, + "loss": 0.6546, + "step": 14820 + }, + { + "epoch": 0.4542417555473826, + "grad_norm": 1.7938716581801664, + "learning_rate": 5.977611607089588e-06, + "loss": 0.6953, + "step": 14821 + }, + { + "epoch": 0.4542724040701238, + "grad_norm": 1.77336279860094, + "learning_rate": 5.977124863741437e-06, + "loss": 0.6989, + "step": 14822 + }, + { + "epoch": 0.454303052592865, + "grad_norm": 1.8217109067454231, + "learning_rate": 5.976638110765424e-06, + "loss": 0.7162, + "step": 14823 + }, + { + "epoch": 0.4543337011156062, + "grad_norm": 1.582733483600518, + "learning_rate": 5.976151348166347e-06, + "loss": 0.6177, + "step": 14824 + }, + { + "epoch": 0.45436434963834743, + "grad_norm": 1.7094112510572939, + "learning_rate": 5.975664575949001e-06, + "loss": 0.7055, + "step": 14825 + }, + { + "epoch": 0.45439499816108864, + "grad_norm": 1.647538927960111, + "learning_rate": 5.975177794118182e-06, + "loss": 0.6646, + "step": 14826 + }, + { + "epoch": 0.45442564668382984, + "grad_norm": 1.479633166062767, + "learning_rate": 5.974691002678685e-06, + "loss": 0.6203, + "step": 14827 + }, + { + "epoch": 0.45445629520657105, + "grad_norm": 1.8651157692836242, + "learning_rate": 5.974204201635311e-06, + "loss": 0.5797, + "step": 14828 + }, + { + "epoch": 0.45448694372931225, + "grad_norm": 1.662024179707531, + "learning_rate": 5.973717390992853e-06, + "loss": 0.7158, + "step": 14829 + }, + { + "epoch": 0.45451759225205346, + "grad_norm": 1.750547125452739, + "learning_rate": 5.973230570756108e-06, + "loss": 0.659, + "step": 14830 + }, + { + "epoch": 0.45454824077479467, + "grad_norm": 0.9515163820409305, + "learning_rate": 5.972743740929871e-06, + "loss": 0.4279, + "step": 14831 + }, + { + "epoch": 0.45457888929753587, + "grad_norm": 1.7544172157050681, + "learning_rate": 5.972256901518944e-06, + "loss": 0.6399, + "step": 14832 + }, + { + "epoch": 0.4546095378202771, + "grad_norm": 1.715341406060487, + "learning_rate": 5.9717700525281195e-06, + "loss": 0.7069, + "step": 14833 + }, + { + "epoch": 0.4546401863430183, + "grad_norm": 1.6311891143041672, + "learning_rate": 5.971283193962197e-06, + "loss": 0.5874, + "step": 14834 + }, + { + "epoch": 0.4546708348657595, + "grad_norm": 1.7681528327645275, + "learning_rate": 5.970796325825971e-06, + "loss": 0.7052, + "step": 14835 + }, + { + "epoch": 0.4547014833885007, + "grad_norm": 1.6530926128037908, + "learning_rate": 5.970309448124243e-06, + "loss": 0.618, + "step": 14836 + }, + { + "epoch": 0.4547321319112419, + "grad_norm": 1.636795605953908, + "learning_rate": 5.9698225608618066e-06, + "loss": 0.6176, + "step": 14837 + }, + { + "epoch": 0.4547627804339831, + "grad_norm": 1.769408673958416, + "learning_rate": 5.969335664043458e-06, + "loss": 0.6884, + "step": 14838 + }, + { + "epoch": 0.4547934289567243, + "grad_norm": 1.8222954057424277, + "learning_rate": 5.968848757674e-06, + "loss": 0.6841, + "step": 14839 + }, + { + "epoch": 0.4548240774794655, + "grad_norm": 1.5425391076502057, + "learning_rate": 5.968361841758228e-06, + "loss": 0.6066, + "step": 14840 + }, + { + "epoch": 0.45485472600220667, + "grad_norm": 1.8492432750746537, + "learning_rate": 5.967874916300937e-06, + "loss": 0.7712, + "step": 14841 + }, + { + "epoch": 0.45488537452494787, + "grad_norm": 1.6066391463145353, + "learning_rate": 5.967387981306927e-06, + "loss": 0.6688, + "step": 14842 + }, + { + "epoch": 0.4549160230476891, + "grad_norm": 0.8662757221669604, + "learning_rate": 5.966901036780997e-06, + "loss": 0.4645, + "step": 14843 + }, + { + "epoch": 0.4549466715704303, + "grad_norm": 1.8782184992688573, + "learning_rate": 5.966414082727943e-06, + "loss": 0.6966, + "step": 14844 + }, + { + "epoch": 0.4549773200931715, + "grad_norm": 0.8368241663435584, + "learning_rate": 5.965927119152561e-06, + "loss": 0.4405, + "step": 14845 + }, + { + "epoch": 0.4550079686159127, + "grad_norm": 1.8834768231323882, + "learning_rate": 5.965440146059656e-06, + "loss": 0.7306, + "step": 14846 + }, + { + "epoch": 0.4550386171386539, + "grad_norm": 1.6837856254636079, + "learning_rate": 5.96495316345402e-06, + "loss": 0.695, + "step": 14847 + }, + { + "epoch": 0.4550692656613951, + "grad_norm": 1.6970845283576743, + "learning_rate": 5.964466171340455e-06, + "loss": 0.6841, + "step": 14848 + }, + { + "epoch": 0.4550999141841363, + "grad_norm": 1.6382508574480568, + "learning_rate": 5.963979169723757e-06, + "loss": 0.5971, + "step": 14849 + }, + { + "epoch": 0.4551305627068775, + "grad_norm": 1.6688940185698702, + "learning_rate": 5.963492158608726e-06, + "loss": 0.6467, + "step": 14850 + }, + { + "epoch": 0.4551612112296187, + "grad_norm": 1.7774533766691154, + "learning_rate": 5.963005138000159e-06, + "loss": 0.6614, + "step": 14851 + }, + { + "epoch": 0.45519185975235993, + "grad_norm": 1.8122608603856227, + "learning_rate": 5.962518107902859e-06, + "loss": 0.6542, + "step": 14852 + }, + { + "epoch": 0.45522250827510113, + "grad_norm": 1.7221910258248483, + "learning_rate": 5.962031068321619e-06, + "loss": 0.8053, + "step": 14853 + }, + { + "epoch": 0.45525315679784234, + "grad_norm": 1.5768776491779186, + "learning_rate": 5.961544019261242e-06, + "loss": 0.5658, + "step": 14854 + }, + { + "epoch": 0.45528380532058355, + "grad_norm": 1.63878676737052, + "learning_rate": 5.961056960726525e-06, + "loss": 0.6501, + "step": 14855 + }, + { + "epoch": 0.45531445384332475, + "grad_norm": 1.734567392489296, + "learning_rate": 5.96056989272227e-06, + "loss": 0.6399, + "step": 14856 + }, + { + "epoch": 0.45534510236606596, + "grad_norm": 1.835662610169523, + "learning_rate": 5.96008281525327e-06, + "loss": 0.672, + "step": 14857 + }, + { + "epoch": 0.45537575088880716, + "grad_norm": 1.6851245392030512, + "learning_rate": 5.9595957283243326e-06, + "loss": 0.6831, + "step": 14858 + }, + { + "epoch": 0.45540639941154837, + "grad_norm": 1.578626304834, + "learning_rate": 5.959108631940251e-06, + "loss": 0.6539, + "step": 14859 + }, + { + "epoch": 0.4554370479342896, + "grad_norm": 1.6653673063413856, + "learning_rate": 5.958621526105825e-06, + "loss": 0.6691, + "step": 14860 + }, + { + "epoch": 0.4554676964570308, + "grad_norm": 0.8435489776262055, + "learning_rate": 5.958134410825859e-06, + "loss": 0.4575, + "step": 14861 + }, + { + "epoch": 0.455498344979772, + "grad_norm": 2.2404209641923507, + "learning_rate": 5.9576472861051474e-06, + "loss": 0.6654, + "step": 14862 + }, + { + "epoch": 0.4555289935025132, + "grad_norm": 1.5446530460626566, + "learning_rate": 5.957160151948493e-06, + "loss": 0.6318, + "step": 14863 + }, + { + "epoch": 0.4555596420252544, + "grad_norm": 1.8637330071273936, + "learning_rate": 5.956673008360695e-06, + "loss": 0.715, + "step": 14864 + }, + { + "epoch": 0.4555902905479956, + "grad_norm": 1.61632469240402, + "learning_rate": 5.956185855346552e-06, + "loss": 0.6124, + "step": 14865 + }, + { + "epoch": 0.4556209390707368, + "grad_norm": 1.5683360589715822, + "learning_rate": 5.955698692910865e-06, + "loss": 0.6017, + "step": 14866 + }, + { + "epoch": 0.455651587593478, + "grad_norm": 1.8496266174998368, + "learning_rate": 5.955211521058437e-06, + "loss": 0.6663, + "step": 14867 + }, + { + "epoch": 0.4556822361162192, + "grad_norm": 1.4628314901828325, + "learning_rate": 5.954724339794062e-06, + "loss": 0.5059, + "step": 14868 + }, + { + "epoch": 0.4557128846389604, + "grad_norm": 0.8031159970705307, + "learning_rate": 5.954237149122546e-06, + "loss": 0.4397, + "step": 14869 + }, + { + "epoch": 0.45574353316170163, + "grad_norm": 1.7275315694993607, + "learning_rate": 5.953749949048686e-06, + "loss": 0.6747, + "step": 14870 + }, + { + "epoch": 0.45577418168444284, + "grad_norm": 1.7062352498505495, + "learning_rate": 5.953262739577283e-06, + "loss": 0.6459, + "step": 14871 + }, + { + "epoch": 0.455804830207184, + "grad_norm": 0.7479281688937528, + "learning_rate": 5.952775520713141e-06, + "loss": 0.4048, + "step": 14872 + }, + { + "epoch": 0.4558354787299252, + "grad_norm": 1.927026474348482, + "learning_rate": 5.952288292461057e-06, + "loss": 0.8031, + "step": 14873 + }, + { + "epoch": 0.4558661272526664, + "grad_norm": 1.6712058172600555, + "learning_rate": 5.951801054825831e-06, + "loss": 0.5807, + "step": 14874 + }, + { + "epoch": 0.4558967757754076, + "grad_norm": 1.9402432008940635, + "learning_rate": 5.951313807812268e-06, + "loss": 0.6899, + "step": 14875 + }, + { + "epoch": 0.4559274242981488, + "grad_norm": 1.8379524694699172, + "learning_rate": 5.950826551425165e-06, + "loss": 0.6753, + "step": 14876 + }, + { + "epoch": 0.45595807282089, + "grad_norm": 1.5683890744465918, + "learning_rate": 5.950339285669324e-06, + "loss": 0.6863, + "step": 14877 + }, + { + "epoch": 0.4559887213436312, + "grad_norm": 1.7384115341386714, + "learning_rate": 5.94985201054955e-06, + "loss": 0.7405, + "step": 14878 + }, + { + "epoch": 0.4560193698663724, + "grad_norm": 1.9271041619716174, + "learning_rate": 5.949364726070639e-06, + "loss": 0.6469, + "step": 14879 + }, + { + "epoch": 0.45605001838911363, + "grad_norm": 1.6789075666001374, + "learning_rate": 5.948877432237396e-06, + "loss": 0.7024, + "step": 14880 + }, + { + "epoch": 0.45608066691185484, + "grad_norm": 1.8253112483414469, + "learning_rate": 5.948390129054617e-06, + "loss": 0.7246, + "step": 14881 + }, + { + "epoch": 0.45611131543459604, + "grad_norm": 1.5419325971278315, + "learning_rate": 5.947902816527112e-06, + "loss": 0.6004, + "step": 14882 + }, + { + "epoch": 0.45614196395733725, + "grad_norm": 1.539966519950394, + "learning_rate": 5.947415494659675e-06, + "loss": 0.6236, + "step": 14883 + }, + { + "epoch": 0.45617261248007845, + "grad_norm": 1.7386431257953732, + "learning_rate": 5.946928163457113e-06, + "loss": 0.596, + "step": 14884 + }, + { + "epoch": 0.45620326100281966, + "grad_norm": 1.6839153341054662, + "learning_rate": 5.9464408229242235e-06, + "loss": 0.6753, + "step": 14885 + }, + { + "epoch": 0.45623390952556087, + "grad_norm": 1.7098304141394685, + "learning_rate": 5.945953473065811e-06, + "loss": 0.6872, + "step": 14886 + }, + { + "epoch": 0.45626455804830207, + "grad_norm": 1.8983580846491952, + "learning_rate": 5.945466113886678e-06, + "loss": 0.6525, + "step": 14887 + }, + { + "epoch": 0.4562952065710433, + "grad_norm": 1.5049834347932898, + "learning_rate": 5.944978745391623e-06, + "loss": 0.6383, + "step": 14888 + }, + { + "epoch": 0.4563258550937845, + "grad_norm": 1.6872106629692, + "learning_rate": 5.944491367585453e-06, + "loss": 0.6454, + "step": 14889 + }, + { + "epoch": 0.4563565036165257, + "grad_norm": 1.7865053357744332, + "learning_rate": 5.944003980472968e-06, + "loss": 0.6465, + "step": 14890 + }, + { + "epoch": 0.4563871521392669, + "grad_norm": 2.34854509131332, + "learning_rate": 5.9435165840589695e-06, + "loss": 0.7182, + "step": 14891 + }, + { + "epoch": 0.4564178006620081, + "grad_norm": 1.5701635146729542, + "learning_rate": 5.9430291783482596e-06, + "loss": 0.5657, + "step": 14892 + }, + { + "epoch": 0.4564484491847493, + "grad_norm": 0.8579373492365686, + "learning_rate": 5.942541763345643e-06, + "loss": 0.4572, + "step": 14893 + }, + { + "epoch": 0.4564790977074905, + "grad_norm": 1.570736642253433, + "learning_rate": 5.942054339055922e-06, + "loss": 0.6112, + "step": 14894 + }, + { + "epoch": 0.4565097462302317, + "grad_norm": 1.6337160537794848, + "learning_rate": 5.941566905483898e-06, + "loss": 0.592, + "step": 14895 + }, + { + "epoch": 0.4565403947529729, + "grad_norm": 1.7877783112400074, + "learning_rate": 5.941079462634373e-06, + "loss": 0.7399, + "step": 14896 + }, + { + "epoch": 0.45657104327571413, + "grad_norm": 0.7973738017530309, + "learning_rate": 5.940592010512152e-06, + "loss": 0.4453, + "step": 14897 + }, + { + "epoch": 0.45660169179845533, + "grad_norm": 2.001572490002811, + "learning_rate": 5.940104549122039e-06, + "loss": 0.6066, + "step": 14898 + }, + { + "epoch": 0.45663234032119654, + "grad_norm": 1.5459687443798527, + "learning_rate": 5.939617078468834e-06, + "loss": 0.5836, + "step": 14899 + }, + { + "epoch": 0.45666298884393774, + "grad_norm": 1.538561017094995, + "learning_rate": 5.9391295985573405e-06, + "loss": 0.6651, + "step": 14900 + }, + { + "epoch": 0.45669363736667895, + "grad_norm": 1.5155912438849846, + "learning_rate": 5.938642109392364e-06, + "loss": 0.606, + "step": 14901 + }, + { + "epoch": 0.45672428588942016, + "grad_norm": 1.6234513945646798, + "learning_rate": 5.9381546109787055e-06, + "loss": 0.6938, + "step": 14902 + }, + { + "epoch": 0.4567549344121613, + "grad_norm": 1.6357631394904937, + "learning_rate": 5.937667103321171e-06, + "loss": 0.6909, + "step": 14903 + }, + { + "epoch": 0.4567855829349025, + "grad_norm": 1.8257882933744625, + "learning_rate": 5.937179586424562e-06, + "loss": 0.6412, + "step": 14904 + }, + { + "epoch": 0.4568162314576437, + "grad_norm": 1.8185167893405978, + "learning_rate": 5.936692060293681e-06, + "loss": 0.5487, + "step": 14905 + }, + { + "epoch": 0.4568468799803849, + "grad_norm": 1.7191326682377057, + "learning_rate": 5.936204524933338e-06, + "loss": 0.725, + "step": 14906 + }, + { + "epoch": 0.45687752850312613, + "grad_norm": 1.5866025284254988, + "learning_rate": 5.935716980348329e-06, + "loss": 0.5822, + "step": 14907 + }, + { + "epoch": 0.45690817702586733, + "grad_norm": 1.5980269536654603, + "learning_rate": 5.935229426543461e-06, + "loss": 0.6132, + "step": 14908 + }, + { + "epoch": 0.45693882554860854, + "grad_norm": 1.6309821481377889, + "learning_rate": 5.93474186352354e-06, + "loss": 0.6631, + "step": 14909 + }, + { + "epoch": 0.45696947407134975, + "grad_norm": 1.7936572145847973, + "learning_rate": 5.934254291293367e-06, + "loss": 0.6293, + "step": 14910 + }, + { + "epoch": 0.45700012259409095, + "grad_norm": 1.9616684969531928, + "learning_rate": 5.933766709857746e-06, + "loss": 0.8124, + "step": 14911 + }, + { + "epoch": 0.45703077111683216, + "grad_norm": 1.7847535900698859, + "learning_rate": 5.933279119221485e-06, + "loss": 0.6603, + "step": 14912 + }, + { + "epoch": 0.45706141963957336, + "grad_norm": 0.831325719668465, + "learning_rate": 5.932791519389386e-06, + "loss": 0.4294, + "step": 14913 + }, + { + "epoch": 0.45709206816231457, + "grad_norm": 1.7092460033491583, + "learning_rate": 5.932303910366252e-06, + "loss": 0.7765, + "step": 14914 + }, + { + "epoch": 0.4571227166850558, + "grad_norm": 1.6997821396588735, + "learning_rate": 5.93181629215689e-06, + "loss": 0.6653, + "step": 14915 + }, + { + "epoch": 0.457153365207797, + "grad_norm": 1.5771602228664319, + "learning_rate": 5.931328664766102e-06, + "loss": 0.6059, + "step": 14916 + }, + { + "epoch": 0.4571840137305382, + "grad_norm": 1.5463720650718353, + "learning_rate": 5.930841028198698e-06, + "loss": 0.5929, + "step": 14917 + }, + { + "epoch": 0.4572146622532794, + "grad_norm": 1.57350744848473, + "learning_rate": 5.930353382459476e-06, + "loss": 0.597, + "step": 14918 + }, + { + "epoch": 0.4572453107760206, + "grad_norm": 0.8079747689517466, + "learning_rate": 5.929865727553246e-06, + "loss": 0.4657, + "step": 14919 + }, + { + "epoch": 0.4572759592987618, + "grad_norm": 1.8770756350485591, + "learning_rate": 5.9293780634848096e-06, + "loss": 0.6808, + "step": 14920 + }, + { + "epoch": 0.457306607821503, + "grad_norm": 1.7492649026824463, + "learning_rate": 5.928890390258975e-06, + "loss": 0.6314, + "step": 14921 + }, + { + "epoch": 0.4573372563442442, + "grad_norm": 1.5569075942243344, + "learning_rate": 5.928402707880544e-06, + "loss": 0.6098, + "step": 14922 + }, + { + "epoch": 0.4573679048669854, + "grad_norm": 1.633836328419628, + "learning_rate": 5.927915016354324e-06, + "loss": 0.6629, + "step": 14923 + }, + { + "epoch": 0.4573985533897266, + "grad_norm": 0.7734593538577855, + "learning_rate": 5.92742731568512e-06, + "loss": 0.4253, + "step": 14924 + }, + { + "epoch": 0.45742920191246783, + "grad_norm": 0.7849528747607031, + "learning_rate": 5.926939605877738e-06, + "loss": 0.4495, + "step": 14925 + }, + { + "epoch": 0.45745985043520904, + "grad_norm": 1.4264694528988537, + "learning_rate": 5.926451886936983e-06, + "loss": 0.6688, + "step": 14926 + }, + { + "epoch": 0.45749049895795024, + "grad_norm": 1.5385567048534445, + "learning_rate": 5.925964158867659e-06, + "loss": 0.5748, + "step": 14927 + }, + { + "epoch": 0.45752114748069145, + "grad_norm": 1.8229958830492927, + "learning_rate": 5.925476421674574e-06, + "loss": 0.7063, + "step": 14928 + }, + { + "epoch": 0.45755179600343265, + "grad_norm": 1.5782002879696764, + "learning_rate": 5.924988675362534e-06, + "loss": 0.6792, + "step": 14929 + }, + { + "epoch": 0.45758244452617386, + "grad_norm": 1.7700538608468785, + "learning_rate": 5.9245009199363435e-06, + "loss": 0.7008, + "step": 14930 + }, + { + "epoch": 0.45761309304891506, + "grad_norm": 0.7914512172547106, + "learning_rate": 5.9240131554008074e-06, + "loss": 0.4249, + "step": 14931 + }, + { + "epoch": 0.45764374157165627, + "grad_norm": 1.8783163909991976, + "learning_rate": 5.923525381760735e-06, + "loss": 0.6842, + "step": 14932 + }, + { + "epoch": 0.4576743900943975, + "grad_norm": 2.0540292232414448, + "learning_rate": 5.92303759902093e-06, + "loss": 0.7098, + "step": 14933 + }, + { + "epoch": 0.4577050386171386, + "grad_norm": 1.600910530057047, + "learning_rate": 5.9225498071861994e-06, + "loss": 0.6023, + "step": 14934 + }, + { + "epoch": 0.45773568713987983, + "grad_norm": 0.8338434210647303, + "learning_rate": 5.922062006261349e-06, + "loss": 0.446, + "step": 14935 + }, + { + "epoch": 0.45776633566262104, + "grad_norm": 1.611549133840614, + "learning_rate": 5.921574196251188e-06, + "loss": 0.6269, + "step": 14936 + }, + { + "epoch": 0.45779698418536224, + "grad_norm": 1.8758669929707992, + "learning_rate": 5.921086377160519e-06, + "loss": 0.6606, + "step": 14937 + }, + { + "epoch": 0.45782763270810345, + "grad_norm": 1.5556701440449554, + "learning_rate": 5.92059854899415e-06, + "loss": 0.5963, + "step": 14938 + }, + { + "epoch": 0.45785828123084465, + "grad_norm": 1.8505412036705273, + "learning_rate": 5.9201107117568865e-06, + "loss": 0.6778, + "step": 14939 + }, + { + "epoch": 0.45788892975358586, + "grad_norm": 1.8529730908548887, + "learning_rate": 5.919622865453539e-06, + "loss": 0.6361, + "step": 14940 + }, + { + "epoch": 0.45791957827632707, + "grad_norm": 1.6478588950496773, + "learning_rate": 5.9191350100889126e-06, + "loss": 0.5188, + "step": 14941 + }, + { + "epoch": 0.45795022679906827, + "grad_norm": 1.8250586315768718, + "learning_rate": 5.918647145667811e-06, + "loss": 0.6378, + "step": 14942 + }, + { + "epoch": 0.4579808753218095, + "grad_norm": 1.886145885951831, + "learning_rate": 5.918159272195046e-06, + "loss": 0.7087, + "step": 14943 + }, + { + "epoch": 0.4580115238445507, + "grad_norm": 1.6605972301666414, + "learning_rate": 5.917671389675424e-06, + "loss": 0.5957, + "step": 14944 + }, + { + "epoch": 0.4580421723672919, + "grad_norm": 1.6429517652225463, + "learning_rate": 5.917183498113749e-06, + "loss": 0.5791, + "step": 14945 + }, + { + "epoch": 0.4580728208900331, + "grad_norm": 1.7850065949727876, + "learning_rate": 5.91669559751483e-06, + "loss": 0.688, + "step": 14946 + }, + { + "epoch": 0.4581034694127743, + "grad_norm": 1.8875915364843059, + "learning_rate": 5.916207687883476e-06, + "loss": 0.6396, + "step": 14947 + }, + { + "epoch": 0.4581341179355155, + "grad_norm": 1.8577991523934996, + "learning_rate": 5.9157197692244925e-06, + "loss": 0.695, + "step": 14948 + }, + { + "epoch": 0.4581647664582567, + "grad_norm": 1.667248689008499, + "learning_rate": 5.915231841542689e-06, + "loss": 0.6424, + "step": 14949 + }, + { + "epoch": 0.4581954149809979, + "grad_norm": 1.8595911448850422, + "learning_rate": 5.91474390484287e-06, + "loss": 0.6045, + "step": 14950 + }, + { + "epoch": 0.4582260635037391, + "grad_norm": 0.8129100246691185, + "learning_rate": 5.914255959129846e-06, + "loss": 0.4313, + "step": 14951 + }, + { + "epoch": 0.45825671202648033, + "grad_norm": 1.8494223589101468, + "learning_rate": 5.913768004408425e-06, + "loss": 0.6925, + "step": 14952 + }, + { + "epoch": 0.45828736054922153, + "grad_norm": 1.8706485863961744, + "learning_rate": 5.913280040683414e-06, + "loss": 0.6438, + "step": 14953 + }, + { + "epoch": 0.45831800907196274, + "grad_norm": 1.870832844259727, + "learning_rate": 5.912792067959619e-06, + "loss": 0.6823, + "step": 14954 + }, + { + "epoch": 0.45834865759470395, + "grad_norm": 1.5913784727667035, + "learning_rate": 5.912304086241853e-06, + "loss": 0.6495, + "step": 14955 + }, + { + "epoch": 0.45837930611744515, + "grad_norm": 1.680167489138406, + "learning_rate": 5.91181609553492e-06, + "loss": 0.6454, + "step": 14956 + }, + { + "epoch": 0.45840995464018636, + "grad_norm": 1.7226038786855888, + "learning_rate": 5.911328095843629e-06, + "loss": 0.6746, + "step": 14957 + }, + { + "epoch": 0.45844060316292756, + "grad_norm": 1.5419094770794013, + "learning_rate": 5.910840087172791e-06, + "loss": 0.6877, + "step": 14958 + }, + { + "epoch": 0.45847125168566877, + "grad_norm": 1.8164238962766286, + "learning_rate": 5.910352069527211e-06, + "loss": 0.7897, + "step": 14959 + }, + { + "epoch": 0.45850190020841, + "grad_norm": 0.821843405118851, + "learning_rate": 5.909864042911702e-06, + "loss": 0.4549, + "step": 14960 + }, + { + "epoch": 0.4585325487311512, + "grad_norm": 0.8363681445780149, + "learning_rate": 5.909376007331066e-06, + "loss": 0.4408, + "step": 14961 + }, + { + "epoch": 0.4585631972538924, + "grad_norm": 1.8504930787256832, + "learning_rate": 5.908887962790117e-06, + "loss": 0.6413, + "step": 14962 + }, + { + "epoch": 0.4585938457766336, + "grad_norm": 1.8027709082351155, + "learning_rate": 5.9083999092936625e-06, + "loss": 0.7491, + "step": 14963 + }, + { + "epoch": 0.4586244942993748, + "grad_norm": 1.9957729531344846, + "learning_rate": 5.907911846846511e-06, + "loss": 0.5988, + "step": 14964 + }, + { + "epoch": 0.45865514282211595, + "grad_norm": 1.918042384345431, + "learning_rate": 5.907423775453472e-06, + "loss": 0.7509, + "step": 14965 + }, + { + "epoch": 0.45868579134485715, + "grad_norm": 1.896183986997033, + "learning_rate": 5.906935695119354e-06, + "loss": 0.6914, + "step": 14966 + }, + { + "epoch": 0.45871643986759836, + "grad_norm": 0.8026333327111969, + "learning_rate": 5.906447605848967e-06, + "loss": 0.4426, + "step": 14967 + }, + { + "epoch": 0.45874708839033956, + "grad_norm": 1.7125262892346136, + "learning_rate": 5.90595950764712e-06, + "loss": 0.6596, + "step": 14968 + }, + { + "epoch": 0.45877773691308077, + "grad_norm": 1.8621618598783611, + "learning_rate": 5.905471400518622e-06, + "loss": 0.7953, + "step": 14969 + }, + { + "epoch": 0.458808385435822, + "grad_norm": 1.7419207776509569, + "learning_rate": 5.904983284468282e-06, + "loss": 0.6448, + "step": 14970 + }, + { + "epoch": 0.4588390339585632, + "grad_norm": 0.7871833943875163, + "learning_rate": 5.9044951595009114e-06, + "loss": 0.4432, + "step": 14971 + }, + { + "epoch": 0.4588696824813044, + "grad_norm": 1.767195287130095, + "learning_rate": 5.9040070256213166e-06, + "loss": 0.6919, + "step": 14972 + }, + { + "epoch": 0.4589003310040456, + "grad_norm": 1.8061387883388065, + "learning_rate": 5.903518882834311e-06, + "loss": 0.6463, + "step": 14973 + }, + { + "epoch": 0.4589309795267868, + "grad_norm": 1.7306467245452823, + "learning_rate": 5.903030731144701e-06, + "loss": 0.6408, + "step": 14974 + }, + { + "epoch": 0.458961628049528, + "grad_norm": 1.6968654399003467, + "learning_rate": 5.902542570557302e-06, + "loss": 0.6775, + "step": 14975 + }, + { + "epoch": 0.4589922765722692, + "grad_norm": 1.6021679495429066, + "learning_rate": 5.9020544010769155e-06, + "loss": 0.6428, + "step": 14976 + }, + { + "epoch": 0.4590229250950104, + "grad_norm": 1.5752807775451672, + "learning_rate": 5.901566222708359e-06, + "loss": 0.721, + "step": 14977 + }, + { + "epoch": 0.4590535736177516, + "grad_norm": 1.9826251031459647, + "learning_rate": 5.901078035456438e-06, + "loss": 0.7592, + "step": 14978 + }, + { + "epoch": 0.4590842221404928, + "grad_norm": 1.7712300838772206, + "learning_rate": 5.900589839325966e-06, + "loss": 0.6671, + "step": 14979 + }, + { + "epoch": 0.45911487066323403, + "grad_norm": 1.3587412782603467, + "learning_rate": 5.900101634321751e-06, + "loss": 0.6102, + "step": 14980 + }, + { + "epoch": 0.45914551918597524, + "grad_norm": 1.8382045608620274, + "learning_rate": 5.899613420448606e-06, + "loss": 0.6555, + "step": 14981 + }, + { + "epoch": 0.45917616770871644, + "grad_norm": 1.9779949474548026, + "learning_rate": 5.899125197711337e-06, + "loss": 0.6465, + "step": 14982 + }, + { + "epoch": 0.45920681623145765, + "grad_norm": 1.7345304633819467, + "learning_rate": 5.898636966114759e-06, + "loss": 0.6199, + "step": 14983 + }, + { + "epoch": 0.45923746475419885, + "grad_norm": 1.8485049614436093, + "learning_rate": 5.898148725663682e-06, + "loss": 0.6502, + "step": 14984 + }, + { + "epoch": 0.45926811327694006, + "grad_norm": 1.66322999575696, + "learning_rate": 5.8976604763629135e-06, + "loss": 0.5901, + "step": 14985 + }, + { + "epoch": 0.45929876179968127, + "grad_norm": 1.882047587612129, + "learning_rate": 5.897172218217269e-06, + "loss": 0.6883, + "step": 14986 + }, + { + "epoch": 0.45932941032242247, + "grad_norm": 0.8505277928268201, + "learning_rate": 5.896683951231554e-06, + "loss": 0.4499, + "step": 14987 + }, + { + "epoch": 0.4593600588451637, + "grad_norm": 1.918683545891323, + "learning_rate": 5.896195675410586e-06, + "loss": 0.6518, + "step": 14988 + }, + { + "epoch": 0.4593907073679049, + "grad_norm": 1.516836689977487, + "learning_rate": 5.89570739075917e-06, + "loss": 0.5881, + "step": 14989 + }, + { + "epoch": 0.4594213558906461, + "grad_norm": 1.801337344439613, + "learning_rate": 5.895219097282123e-06, + "loss": 0.6196, + "step": 14990 + }, + { + "epoch": 0.4594520044133873, + "grad_norm": 2.0213131572182865, + "learning_rate": 5.89473079498425e-06, + "loss": 0.735, + "step": 14991 + }, + { + "epoch": 0.4594826529361285, + "grad_norm": 1.6978443977062783, + "learning_rate": 5.894242483870367e-06, + "loss": 0.6682, + "step": 14992 + }, + { + "epoch": 0.4595133014588697, + "grad_norm": 1.9254368233515444, + "learning_rate": 5.893754163945283e-06, + "loss": 0.7397, + "step": 14993 + }, + { + "epoch": 0.4595439499816109, + "grad_norm": 1.7893900924023807, + "learning_rate": 5.893265835213813e-06, + "loss": 0.6678, + "step": 14994 + }, + { + "epoch": 0.4595745985043521, + "grad_norm": 1.5986176203552995, + "learning_rate": 5.8927774976807635e-06, + "loss": 0.6352, + "step": 14995 + }, + { + "epoch": 0.45960524702709327, + "grad_norm": 1.8605902744729612, + "learning_rate": 5.892289151350951e-06, + "loss": 0.6623, + "step": 14996 + }, + { + "epoch": 0.4596358955498345, + "grad_norm": 1.7104563895473668, + "learning_rate": 5.891800796229183e-06, + "loss": 0.6132, + "step": 14997 + }, + { + "epoch": 0.4596665440725757, + "grad_norm": 1.9207786056498493, + "learning_rate": 5.891312432320275e-06, + "loss": 0.7021, + "step": 14998 + }, + { + "epoch": 0.4596971925953169, + "grad_norm": 1.7311544780327217, + "learning_rate": 5.890824059629038e-06, + "loss": 0.6278, + "step": 14999 + }, + { + "epoch": 0.4597278411180581, + "grad_norm": 1.7221170173400033, + "learning_rate": 5.890335678160282e-06, + "loss": 0.6454, + "step": 15000 + }, + { + "epoch": 0.4597584896407993, + "grad_norm": 1.8229187189239893, + "learning_rate": 5.8898472879188216e-06, + "loss": 0.586, + "step": 15001 + }, + { + "epoch": 0.4597891381635405, + "grad_norm": 0.7961952117232435, + "learning_rate": 5.8893588889094684e-06, + "loss": 0.4343, + "step": 15002 + }, + { + "epoch": 0.4598197866862817, + "grad_norm": 1.4402605022804147, + "learning_rate": 5.888870481137036e-06, + "loss": 0.5894, + "step": 15003 + }, + { + "epoch": 0.4598504352090229, + "grad_norm": 1.579172951654577, + "learning_rate": 5.888382064606333e-06, + "loss": 0.638, + "step": 15004 + }, + { + "epoch": 0.4598810837317641, + "grad_norm": 1.631950243934095, + "learning_rate": 5.887893639322174e-06, + "loss": 0.6404, + "step": 15005 + }, + { + "epoch": 0.4599117322545053, + "grad_norm": 1.577199003166251, + "learning_rate": 5.8874052052893734e-06, + "loss": 0.6355, + "step": 15006 + }, + { + "epoch": 0.45994238077724653, + "grad_norm": 1.6623316153972274, + "learning_rate": 5.886916762512742e-06, + "loss": 0.6292, + "step": 15007 + }, + { + "epoch": 0.45997302929998773, + "grad_norm": 1.8235700523234868, + "learning_rate": 5.886428310997092e-06, + "loss": 0.6379, + "step": 15008 + }, + { + "epoch": 0.46000367782272894, + "grad_norm": 1.9793022998539904, + "learning_rate": 5.885939850747237e-06, + "loss": 0.6824, + "step": 15009 + }, + { + "epoch": 0.46003432634547015, + "grad_norm": 1.9170162751498796, + "learning_rate": 5.885451381767991e-06, + "loss": 0.7329, + "step": 15010 + }, + { + "epoch": 0.46006497486821135, + "grad_norm": 1.830744494836067, + "learning_rate": 5.884962904064164e-06, + "loss": 0.5328, + "step": 15011 + }, + { + "epoch": 0.46009562339095256, + "grad_norm": 1.612305725809677, + "learning_rate": 5.884474417640573e-06, + "loss": 0.6688, + "step": 15012 + }, + { + "epoch": 0.46012627191369376, + "grad_norm": 1.8918491843914245, + "learning_rate": 5.883985922502029e-06, + "loss": 0.6487, + "step": 15013 + }, + { + "epoch": 0.46015692043643497, + "grad_norm": 1.5827711740627255, + "learning_rate": 5.8834974186533456e-06, + "loss": 0.6647, + "step": 15014 + }, + { + "epoch": 0.4601875689591762, + "grad_norm": 1.7226744980053104, + "learning_rate": 5.883008906099336e-06, + "loss": 0.7193, + "step": 15015 + }, + { + "epoch": 0.4602182174819174, + "grad_norm": 1.5802106703201177, + "learning_rate": 5.882520384844813e-06, + "loss": 0.6296, + "step": 15016 + }, + { + "epoch": 0.4602488660046586, + "grad_norm": 0.8531902701412596, + "learning_rate": 5.8820318548945925e-06, + "loss": 0.4449, + "step": 15017 + }, + { + "epoch": 0.4602795145273998, + "grad_norm": 0.8479161606510741, + "learning_rate": 5.881543316253485e-06, + "loss": 0.4303, + "step": 15018 + }, + { + "epoch": 0.460310163050141, + "grad_norm": 1.6893082323755098, + "learning_rate": 5.881054768926306e-06, + "loss": 0.621, + "step": 15019 + }, + { + "epoch": 0.4603408115728822, + "grad_norm": 1.6175855610185992, + "learning_rate": 5.880566212917868e-06, + "loss": 0.6484, + "step": 15020 + }, + { + "epoch": 0.4603714600956234, + "grad_norm": 1.649660577038679, + "learning_rate": 5.880077648232987e-06, + "loss": 0.6318, + "step": 15021 + }, + { + "epoch": 0.4604021086183646, + "grad_norm": 0.8173417043117454, + "learning_rate": 5.879589074876476e-06, + "loss": 0.4539, + "step": 15022 + }, + { + "epoch": 0.4604327571411058, + "grad_norm": 1.741547701988968, + "learning_rate": 5.879100492853147e-06, + "loss": 0.5353, + "step": 15023 + }, + { + "epoch": 0.460463405663847, + "grad_norm": 1.9614894151693112, + "learning_rate": 5.878611902167818e-06, + "loss": 0.6924, + "step": 15024 + }, + { + "epoch": 0.46049405418658823, + "grad_norm": 1.7121154229548115, + "learning_rate": 5.8781233028253e-06, + "loss": 0.6416, + "step": 15025 + }, + { + "epoch": 0.46052470270932944, + "grad_norm": 1.5862601412352995, + "learning_rate": 5.8776346948304075e-06, + "loss": 0.6515, + "step": 15026 + }, + { + "epoch": 0.4605553512320706, + "grad_norm": 1.7722470802406642, + "learning_rate": 5.877146078187957e-06, + "loss": 0.6336, + "step": 15027 + }, + { + "epoch": 0.4605859997548118, + "grad_norm": 1.746431215030762, + "learning_rate": 5.876657452902762e-06, + "loss": 0.6575, + "step": 15028 + }, + { + "epoch": 0.460616648277553, + "grad_norm": 0.7832023376532832, + "learning_rate": 5.8761688189796376e-06, + "loss": 0.4228, + "step": 15029 + }, + { + "epoch": 0.4606472968002942, + "grad_norm": 1.642173369034907, + "learning_rate": 5.875680176423396e-06, + "loss": 0.641, + "step": 15030 + }, + { + "epoch": 0.4606779453230354, + "grad_norm": 0.7795519277914658, + "learning_rate": 5.8751915252388546e-06, + "loss": 0.4595, + "step": 15031 + }, + { + "epoch": 0.4607085938457766, + "grad_norm": 1.6620672301747474, + "learning_rate": 5.874702865430826e-06, + "loss": 0.6905, + "step": 15032 + }, + { + "epoch": 0.4607392423685178, + "grad_norm": 0.7992754191455869, + "learning_rate": 5.874214197004128e-06, + "loss": 0.4657, + "step": 15033 + }, + { + "epoch": 0.460769890891259, + "grad_norm": 1.682919873756366, + "learning_rate": 5.873725519963572e-06, + "loss": 0.6039, + "step": 15034 + }, + { + "epoch": 0.46080053941400023, + "grad_norm": 0.771041269333698, + "learning_rate": 5.873236834313976e-06, + "loss": 0.4522, + "step": 15035 + }, + { + "epoch": 0.46083118793674144, + "grad_norm": 1.7839408703324195, + "learning_rate": 5.872748140060152e-06, + "loss": 0.6281, + "step": 15036 + }, + { + "epoch": 0.46086183645948264, + "grad_norm": 1.8841137268198709, + "learning_rate": 5.872259437206921e-06, + "loss": 0.7214, + "step": 15037 + }, + { + "epoch": 0.46089248498222385, + "grad_norm": 1.7108471877612166, + "learning_rate": 5.871770725759093e-06, + "loss": 0.6737, + "step": 15038 + }, + { + "epoch": 0.46092313350496505, + "grad_norm": 1.608345800185223, + "learning_rate": 5.871282005721484e-06, + "loss": 0.6908, + "step": 15039 + }, + { + "epoch": 0.46095378202770626, + "grad_norm": 1.8330613587473092, + "learning_rate": 5.870793277098912e-06, + "loss": 0.6838, + "step": 15040 + }, + { + "epoch": 0.46098443055044747, + "grad_norm": 0.7848478870257379, + "learning_rate": 5.870304539896189e-06, + "loss": 0.4398, + "step": 15041 + }, + { + "epoch": 0.46101507907318867, + "grad_norm": 1.6790392561733303, + "learning_rate": 5.8698157941181344e-06, + "loss": 0.5886, + "step": 15042 + }, + { + "epoch": 0.4610457275959299, + "grad_norm": 0.791807237377808, + "learning_rate": 5.869327039769561e-06, + "loss": 0.4423, + "step": 15043 + }, + { + "epoch": 0.4610763761186711, + "grad_norm": 1.4981713011817597, + "learning_rate": 5.868838276855287e-06, + "loss": 0.6927, + "step": 15044 + }, + { + "epoch": 0.4611070246414123, + "grad_norm": 1.9925268035600578, + "learning_rate": 5.868349505380127e-06, + "loss": 0.6639, + "step": 15045 + }, + { + "epoch": 0.4611376731641535, + "grad_norm": 1.734848794653148, + "learning_rate": 5.867860725348896e-06, + "loss": 0.601, + "step": 15046 + }, + { + "epoch": 0.4611683216868947, + "grad_norm": 1.594121330700238, + "learning_rate": 5.867371936766412e-06, + "loss": 0.6993, + "step": 15047 + }, + { + "epoch": 0.4611989702096359, + "grad_norm": 1.7583033965332917, + "learning_rate": 5.866883139637492e-06, + "loss": 0.5901, + "step": 15048 + }, + { + "epoch": 0.4612296187323771, + "grad_norm": 1.7907056583498573, + "learning_rate": 5.866394333966948e-06, + "loss": 0.6043, + "step": 15049 + }, + { + "epoch": 0.4612602672551183, + "grad_norm": 1.66956925923563, + "learning_rate": 5.865905519759601e-06, + "loss": 0.6304, + "step": 15050 + }, + { + "epoch": 0.4612909157778595, + "grad_norm": 0.8539892879998839, + "learning_rate": 5.865416697020263e-06, + "loss": 0.4503, + "step": 15051 + }, + { + "epoch": 0.46132156430060073, + "grad_norm": 1.5624361953177008, + "learning_rate": 5.864927865753754e-06, + "loss": 0.6027, + "step": 15052 + }, + { + "epoch": 0.46135221282334193, + "grad_norm": 1.7199992416675285, + "learning_rate": 5.86443902596489e-06, + "loss": 0.6713, + "step": 15053 + }, + { + "epoch": 0.46138286134608314, + "grad_norm": 0.821093182552182, + "learning_rate": 5.863950177658486e-06, + "loss": 0.4496, + "step": 15054 + }, + { + "epoch": 0.46141350986882435, + "grad_norm": 1.5013377064415026, + "learning_rate": 5.863461320839361e-06, + "loss": 0.6783, + "step": 15055 + }, + { + "epoch": 0.46144415839156555, + "grad_norm": 1.809434928980658, + "learning_rate": 5.862972455512331e-06, + "loss": 0.6157, + "step": 15056 + }, + { + "epoch": 0.46147480691430676, + "grad_norm": 1.7604603950569604, + "learning_rate": 5.862483581682211e-06, + "loss": 0.5984, + "step": 15057 + }, + { + "epoch": 0.4615054554370479, + "grad_norm": 0.7608999233980241, + "learning_rate": 5.86199469935382e-06, + "loss": 0.4546, + "step": 15058 + }, + { + "epoch": 0.4615361039597891, + "grad_norm": 1.6493334510781479, + "learning_rate": 5.861505808531975e-06, + "loss": 0.5991, + "step": 15059 + }, + { + "epoch": 0.4615667524825303, + "grad_norm": 1.5680100390550264, + "learning_rate": 5.861016909221494e-06, + "loss": 0.6137, + "step": 15060 + }, + { + "epoch": 0.4615974010052715, + "grad_norm": 1.638515157825199, + "learning_rate": 5.860528001427193e-06, + "loss": 0.6456, + "step": 15061 + }, + { + "epoch": 0.46162804952801273, + "grad_norm": 2.010428495427904, + "learning_rate": 5.860039085153887e-06, + "loss": 0.6561, + "step": 15062 + }, + { + "epoch": 0.46165869805075394, + "grad_norm": 0.8181209913758593, + "learning_rate": 5.859550160406397e-06, + "loss": 0.4224, + "step": 15063 + }, + { + "epoch": 0.46168934657349514, + "grad_norm": 1.769500448871125, + "learning_rate": 5.859061227189541e-06, + "loss": 0.566, + "step": 15064 + }, + { + "epoch": 0.46171999509623635, + "grad_norm": 1.955594714995619, + "learning_rate": 5.858572285508134e-06, + "loss": 0.6613, + "step": 15065 + }, + { + "epoch": 0.46175064361897755, + "grad_norm": 1.7952929509831166, + "learning_rate": 5.858083335366993e-06, + "loss": 0.7234, + "step": 15066 + }, + { + "epoch": 0.46178129214171876, + "grad_norm": 1.7471454585853152, + "learning_rate": 5.8575943767709384e-06, + "loss": 0.7083, + "step": 15067 + }, + { + "epoch": 0.46181194066445996, + "grad_norm": 1.6322874440755375, + "learning_rate": 5.857105409724788e-06, + "loss": 0.6182, + "step": 15068 + }, + { + "epoch": 0.46184258918720117, + "grad_norm": 1.6719466312791882, + "learning_rate": 5.856616434233358e-06, + "loss": 0.6395, + "step": 15069 + }, + { + "epoch": 0.4618732377099424, + "grad_norm": 1.83801811403753, + "learning_rate": 5.856127450301467e-06, + "loss": 0.6793, + "step": 15070 + }, + { + "epoch": 0.4619038862326836, + "grad_norm": 1.6277917516486284, + "learning_rate": 5.855638457933933e-06, + "loss": 0.6107, + "step": 15071 + }, + { + "epoch": 0.4619345347554248, + "grad_norm": 1.6991288054435942, + "learning_rate": 5.855149457135575e-06, + "loss": 0.6489, + "step": 15072 + }, + { + "epoch": 0.461965183278166, + "grad_norm": 1.8852913250375107, + "learning_rate": 5.854660447911209e-06, + "loss": 0.7353, + "step": 15073 + }, + { + "epoch": 0.4619958318009072, + "grad_norm": 1.8576243699794772, + "learning_rate": 5.854171430265656e-06, + "loss": 0.5248, + "step": 15074 + }, + { + "epoch": 0.4620264803236484, + "grad_norm": 1.6664545777697886, + "learning_rate": 5.853682404203733e-06, + "loss": 0.5776, + "step": 15075 + }, + { + "epoch": 0.4620571288463896, + "grad_norm": 1.5965168879247926, + "learning_rate": 5.85319336973026e-06, + "loss": 0.5667, + "step": 15076 + }, + { + "epoch": 0.4620877773691308, + "grad_norm": 1.8382925546943651, + "learning_rate": 5.852704326850053e-06, + "loss": 0.6172, + "step": 15077 + }, + { + "epoch": 0.462118425891872, + "grad_norm": 1.767896020050351, + "learning_rate": 5.852215275567933e-06, + "loss": 0.6538, + "step": 15078 + }, + { + "epoch": 0.4621490744146132, + "grad_norm": 1.963060631749633, + "learning_rate": 5.851726215888718e-06, + "loss": 0.6073, + "step": 15079 + }, + { + "epoch": 0.46217972293735443, + "grad_norm": 1.6608372750132419, + "learning_rate": 5.851237147817226e-06, + "loss": 0.7378, + "step": 15080 + }, + { + "epoch": 0.46221037146009564, + "grad_norm": 1.6718397686951703, + "learning_rate": 5.8507480713582765e-06, + "loss": 0.664, + "step": 15081 + }, + { + "epoch": 0.46224101998283684, + "grad_norm": 1.7711428998866252, + "learning_rate": 5.850258986516688e-06, + "loss": 0.6646, + "step": 15082 + }, + { + "epoch": 0.46227166850557805, + "grad_norm": 1.5885789824940755, + "learning_rate": 5.8497698932972826e-06, + "loss": 0.6073, + "step": 15083 + }, + { + "epoch": 0.46230231702831925, + "grad_norm": 1.708514457894335, + "learning_rate": 5.849280791704874e-06, + "loss": 0.6915, + "step": 15084 + }, + { + "epoch": 0.46233296555106046, + "grad_norm": 0.8045612293430491, + "learning_rate": 5.848791681744287e-06, + "loss": 0.4436, + "step": 15085 + }, + { + "epoch": 0.46236361407380167, + "grad_norm": 1.6890437007125099, + "learning_rate": 5.848302563420336e-06, + "loss": 0.7105, + "step": 15086 + }, + { + "epoch": 0.46239426259654287, + "grad_norm": 0.7798246625030107, + "learning_rate": 5.8478134367378455e-06, + "loss": 0.4458, + "step": 15087 + }, + { + "epoch": 0.4624249111192841, + "grad_norm": 0.8342014202048715, + "learning_rate": 5.84732430170163e-06, + "loss": 0.443, + "step": 15088 + }, + { + "epoch": 0.4624555596420252, + "grad_norm": 1.9424076986923615, + "learning_rate": 5.846835158316513e-06, + "loss": 0.7067, + "step": 15089 + }, + { + "epoch": 0.46248620816476643, + "grad_norm": 1.6694296534024904, + "learning_rate": 5.846346006587312e-06, + "loss": 0.6761, + "step": 15090 + }, + { + "epoch": 0.46251685668750764, + "grad_norm": 1.7909156912314368, + "learning_rate": 5.845856846518847e-06, + "loss": 0.5753, + "step": 15091 + }, + { + "epoch": 0.46254750521024884, + "grad_norm": 0.8144214288550056, + "learning_rate": 5.8453676781159375e-06, + "loss": 0.4368, + "step": 15092 + }, + { + "epoch": 0.46257815373299005, + "grad_norm": 1.7706979283901367, + "learning_rate": 5.844878501383406e-06, + "loss": 0.7763, + "step": 15093 + }, + { + "epoch": 0.46260880225573126, + "grad_norm": 1.767019300460785, + "learning_rate": 5.844389316326069e-06, + "loss": 0.6512, + "step": 15094 + }, + { + "epoch": 0.46263945077847246, + "grad_norm": 1.7380693566825967, + "learning_rate": 5.843900122948747e-06, + "loss": 0.6988, + "step": 15095 + }, + { + "epoch": 0.46267009930121367, + "grad_norm": 1.8601414493546942, + "learning_rate": 5.843410921256262e-06, + "loss": 0.6548, + "step": 15096 + }, + { + "epoch": 0.46270074782395487, + "grad_norm": 1.579948537506221, + "learning_rate": 5.842921711253433e-06, + "loss": 0.6152, + "step": 15097 + }, + { + "epoch": 0.4627313963466961, + "grad_norm": 1.693781298448835, + "learning_rate": 5.842432492945083e-06, + "loss": 0.5816, + "step": 15098 + }, + { + "epoch": 0.4627620448694373, + "grad_norm": 1.6969236255723092, + "learning_rate": 5.841943266336027e-06, + "loss": 0.6227, + "step": 15099 + }, + { + "epoch": 0.4627926933921785, + "grad_norm": 0.8161534440016094, + "learning_rate": 5.841454031431091e-06, + "loss": 0.4305, + "step": 15100 + }, + { + "epoch": 0.4628233419149197, + "grad_norm": 1.8590226367246587, + "learning_rate": 5.840964788235091e-06, + "loss": 0.6492, + "step": 15101 + }, + { + "epoch": 0.4628539904376609, + "grad_norm": 1.883113004174209, + "learning_rate": 5.840475536752852e-06, + "loss": 0.7605, + "step": 15102 + }, + { + "epoch": 0.4628846389604021, + "grad_norm": 1.507584327939436, + "learning_rate": 5.839986276989191e-06, + "loss": 0.6104, + "step": 15103 + }, + { + "epoch": 0.4629152874831433, + "grad_norm": 1.5440337800151038, + "learning_rate": 5.839497008948931e-06, + "loss": 0.6199, + "step": 15104 + }, + { + "epoch": 0.4629459360058845, + "grad_norm": 0.7702414883931997, + "learning_rate": 5.839007732636891e-06, + "loss": 0.4207, + "step": 15105 + }, + { + "epoch": 0.4629765845286257, + "grad_norm": 1.6564767690372757, + "learning_rate": 5.838518448057894e-06, + "loss": 0.6719, + "step": 15106 + }, + { + "epoch": 0.46300723305136693, + "grad_norm": 2.342415847440356, + "learning_rate": 5.83802915521676e-06, + "loss": 0.7512, + "step": 15107 + }, + { + "epoch": 0.46303788157410813, + "grad_norm": 1.8077652147675976, + "learning_rate": 5.8375398541183106e-06, + "loss": 0.7136, + "step": 15108 + }, + { + "epoch": 0.46306853009684934, + "grad_norm": 0.7532110107877398, + "learning_rate": 5.837050544767367e-06, + "loss": 0.4629, + "step": 15109 + }, + { + "epoch": 0.46309917861959055, + "grad_norm": 1.809682957435512, + "learning_rate": 5.83656122716875e-06, + "loss": 0.7109, + "step": 15110 + }, + { + "epoch": 0.46312982714233175, + "grad_norm": 1.8842718019798288, + "learning_rate": 5.836071901327281e-06, + "loss": 0.7334, + "step": 15111 + }, + { + "epoch": 0.46316047566507296, + "grad_norm": 1.705282629114447, + "learning_rate": 5.8355825672477805e-06, + "loss": 0.6069, + "step": 15112 + }, + { + "epoch": 0.46319112418781416, + "grad_norm": 1.7086458750657043, + "learning_rate": 5.835093224935073e-06, + "loss": 0.6483, + "step": 15113 + }, + { + "epoch": 0.46322177271055537, + "grad_norm": 1.7966225068955082, + "learning_rate": 5.834603874393978e-06, + "loss": 0.546, + "step": 15114 + }, + { + "epoch": 0.4632524212332966, + "grad_norm": 1.537315881060872, + "learning_rate": 5.8341145156293175e-06, + "loss": 0.5818, + "step": 15115 + }, + { + "epoch": 0.4632830697560378, + "grad_norm": 2.031096374952793, + "learning_rate": 5.8336251486459114e-06, + "loss": 0.703, + "step": 15116 + }, + { + "epoch": 0.463313718278779, + "grad_norm": 1.5638772009068078, + "learning_rate": 5.833135773448587e-06, + "loss": 0.6124, + "step": 15117 + }, + { + "epoch": 0.4633443668015202, + "grad_norm": 1.5808457964796387, + "learning_rate": 5.832646390042159e-06, + "loss": 0.5594, + "step": 15118 + }, + { + "epoch": 0.4633750153242614, + "grad_norm": 1.7824040304233326, + "learning_rate": 5.832156998431456e-06, + "loss": 0.6168, + "step": 15119 + }, + { + "epoch": 0.46340566384700255, + "grad_norm": 1.5946951551515154, + "learning_rate": 5.831667598621294e-06, + "loss": 0.6578, + "step": 15120 + }, + { + "epoch": 0.46343631236974375, + "grad_norm": 2.0769022741943686, + "learning_rate": 5.831178190616501e-06, + "loss": 0.6522, + "step": 15121 + }, + { + "epoch": 0.46346696089248496, + "grad_norm": 1.7929087983867968, + "learning_rate": 5.830688774421896e-06, + "loss": 0.5962, + "step": 15122 + }, + { + "epoch": 0.46349760941522616, + "grad_norm": 1.6913228394104907, + "learning_rate": 5.8301993500423e-06, + "loss": 0.591, + "step": 15123 + }, + { + "epoch": 0.46352825793796737, + "grad_norm": 1.779081051890894, + "learning_rate": 5.82970991748254e-06, + "loss": 0.6155, + "step": 15124 + }, + { + "epoch": 0.4635589064607086, + "grad_norm": 1.6757769205873785, + "learning_rate": 5.829220476747436e-06, + "loss": 0.5651, + "step": 15125 + }, + { + "epoch": 0.4635895549834498, + "grad_norm": 1.8107510568098963, + "learning_rate": 5.82873102784181e-06, + "loss": 0.6015, + "step": 15126 + }, + { + "epoch": 0.463620203506191, + "grad_norm": 1.8771220869602454, + "learning_rate": 5.828241570770483e-06, + "loss": 0.7058, + "step": 15127 + }, + { + "epoch": 0.4636508520289322, + "grad_norm": 1.536665855132083, + "learning_rate": 5.827752105538282e-06, + "loss": 0.6327, + "step": 15128 + }, + { + "epoch": 0.4636815005516734, + "grad_norm": 1.694357282424998, + "learning_rate": 5.827262632150028e-06, + "loss": 0.6055, + "step": 15129 + }, + { + "epoch": 0.4637121490744146, + "grad_norm": 1.5791257322539929, + "learning_rate": 5.826773150610543e-06, + "loss": 0.6787, + "step": 15130 + }, + { + "epoch": 0.4637427975971558, + "grad_norm": 1.7496885961816813, + "learning_rate": 5.82628366092465e-06, + "loss": 0.6552, + "step": 15131 + }, + { + "epoch": 0.463773446119897, + "grad_norm": 1.7246366448462256, + "learning_rate": 5.825794163097173e-06, + "loss": 0.6451, + "step": 15132 + }, + { + "epoch": 0.4638040946426382, + "grad_norm": 1.792397685078719, + "learning_rate": 5.825304657132935e-06, + "loss": 0.6855, + "step": 15133 + }, + { + "epoch": 0.4638347431653794, + "grad_norm": 0.8302633339744655, + "learning_rate": 5.824815143036758e-06, + "loss": 0.4432, + "step": 15134 + }, + { + "epoch": 0.46386539168812063, + "grad_norm": 1.8026222214101875, + "learning_rate": 5.824325620813468e-06, + "loss": 0.6289, + "step": 15135 + }, + { + "epoch": 0.46389604021086184, + "grad_norm": 1.8815713347381515, + "learning_rate": 5.823836090467887e-06, + "loss": 0.7604, + "step": 15136 + }, + { + "epoch": 0.46392668873360304, + "grad_norm": 1.5220082675168736, + "learning_rate": 5.8233465520048375e-06, + "loss": 0.6348, + "step": 15137 + }, + { + "epoch": 0.46395733725634425, + "grad_norm": 1.6063925956258591, + "learning_rate": 5.822857005429142e-06, + "loss": 0.7585, + "step": 15138 + }, + { + "epoch": 0.46398798577908545, + "grad_norm": 0.822638480373699, + "learning_rate": 5.8223674507456285e-06, + "loss": 0.4685, + "step": 15139 + }, + { + "epoch": 0.46401863430182666, + "grad_norm": 1.5510156398590924, + "learning_rate": 5.8218778879591175e-06, + "loss": 0.6002, + "step": 15140 + }, + { + "epoch": 0.46404928282456787, + "grad_norm": 0.7540027451622787, + "learning_rate": 5.821388317074434e-06, + "loss": 0.4193, + "step": 15141 + }, + { + "epoch": 0.46407993134730907, + "grad_norm": 2.03870842057595, + "learning_rate": 5.820898738096399e-06, + "loss": 0.6758, + "step": 15142 + }, + { + "epoch": 0.4641105798700503, + "grad_norm": 1.648684181163835, + "learning_rate": 5.82040915102984e-06, + "loss": 0.642, + "step": 15143 + }, + { + "epoch": 0.4641412283927915, + "grad_norm": 1.6426218926250713, + "learning_rate": 5.819919555879579e-06, + "loss": 0.5624, + "step": 15144 + }, + { + "epoch": 0.4641718769155327, + "grad_norm": 1.9487564103669108, + "learning_rate": 5.8194299526504425e-06, + "loss": 0.6801, + "step": 15145 + }, + { + "epoch": 0.4642025254382739, + "grad_norm": 1.6693678402818608, + "learning_rate": 5.818940341347251e-06, + "loss": 0.5865, + "step": 15146 + }, + { + "epoch": 0.4642331739610151, + "grad_norm": 1.8323503854903989, + "learning_rate": 5.818450721974832e-06, + "loss": 0.6768, + "step": 15147 + }, + { + "epoch": 0.4642638224837563, + "grad_norm": 1.7319258342910873, + "learning_rate": 5.817961094538008e-06, + "loss": 0.6495, + "step": 15148 + }, + { + "epoch": 0.4642944710064975, + "grad_norm": 1.926755597684182, + "learning_rate": 5.817471459041605e-06, + "loss": 0.8043, + "step": 15149 + }, + { + "epoch": 0.4643251195292387, + "grad_norm": 1.8188039859906078, + "learning_rate": 5.816981815490446e-06, + "loss": 0.651, + "step": 15150 + }, + { + "epoch": 0.46435576805197987, + "grad_norm": 1.8354709070642954, + "learning_rate": 5.816492163889355e-06, + "loss": 0.7307, + "step": 15151 + }, + { + "epoch": 0.4643864165747211, + "grad_norm": 1.904729232741421, + "learning_rate": 5.81600250424316e-06, + "loss": 0.7081, + "step": 15152 + }, + { + "epoch": 0.4644170650974623, + "grad_norm": 0.8520463335441937, + "learning_rate": 5.815512836556683e-06, + "loss": 0.444, + "step": 15153 + }, + { + "epoch": 0.4644477136202035, + "grad_norm": 1.8795234087746138, + "learning_rate": 5.815023160834749e-06, + "loss": 0.6349, + "step": 15154 + }, + { + "epoch": 0.4644783621429447, + "grad_norm": 1.6062176562993307, + "learning_rate": 5.814533477082182e-06, + "loss": 0.67, + "step": 15155 + }, + { + "epoch": 0.4645090106656859, + "grad_norm": 0.8699081010862701, + "learning_rate": 5.81404378530381e-06, + "loss": 0.4653, + "step": 15156 + }, + { + "epoch": 0.4645396591884271, + "grad_norm": 1.764698723150222, + "learning_rate": 5.813554085504455e-06, + "loss": 0.591, + "step": 15157 + }, + { + "epoch": 0.4645703077111683, + "grad_norm": 1.8757665067480769, + "learning_rate": 5.813064377688944e-06, + "loss": 0.6214, + "step": 15158 + }, + { + "epoch": 0.4646009562339095, + "grad_norm": 1.540987712293716, + "learning_rate": 5.812574661862101e-06, + "loss": 0.6011, + "step": 15159 + }, + { + "epoch": 0.4646316047566507, + "grad_norm": 1.8243629653777202, + "learning_rate": 5.812084938028753e-06, + "loss": 0.6978, + "step": 15160 + }, + { + "epoch": 0.4646622532793919, + "grad_norm": 1.8022282311104856, + "learning_rate": 5.811595206193725e-06, + "loss": 0.6331, + "step": 15161 + }, + { + "epoch": 0.46469290180213313, + "grad_norm": 1.7035121462235527, + "learning_rate": 5.81110546636184e-06, + "loss": 0.6585, + "step": 15162 + }, + { + "epoch": 0.46472355032487433, + "grad_norm": 1.7607472111222806, + "learning_rate": 5.8106157185379264e-06, + "loss": 0.6374, + "step": 15163 + }, + { + "epoch": 0.46475419884761554, + "grad_norm": 0.8650966899383514, + "learning_rate": 5.810125962726808e-06, + "loss": 0.4431, + "step": 15164 + }, + { + "epoch": 0.46478484737035675, + "grad_norm": 1.6672331296660488, + "learning_rate": 5.809636198933313e-06, + "loss": 0.6016, + "step": 15165 + }, + { + "epoch": 0.46481549589309795, + "grad_norm": 2.242048554833845, + "learning_rate": 5.809146427162262e-06, + "loss": 0.6917, + "step": 15166 + }, + { + "epoch": 0.46484614441583916, + "grad_norm": 1.5410934832975047, + "learning_rate": 5.808656647418488e-06, + "loss": 0.5821, + "step": 15167 + }, + { + "epoch": 0.46487679293858036, + "grad_norm": 1.8397114586241656, + "learning_rate": 5.808166859706811e-06, + "loss": 0.5888, + "step": 15168 + }, + { + "epoch": 0.46490744146132157, + "grad_norm": 1.616079858086225, + "learning_rate": 5.80767706403206e-06, + "loss": 0.6291, + "step": 15169 + }, + { + "epoch": 0.4649380899840628, + "grad_norm": 0.787953442771804, + "learning_rate": 5.807187260399058e-06, + "loss": 0.4445, + "step": 15170 + }, + { + "epoch": 0.464968738506804, + "grad_norm": 1.614588008999774, + "learning_rate": 5.806697448812637e-06, + "loss": 0.7049, + "step": 15171 + }, + { + "epoch": 0.4649993870295452, + "grad_norm": 0.7774094285776775, + "learning_rate": 5.806207629277617e-06, + "loss": 0.4466, + "step": 15172 + }, + { + "epoch": 0.4650300355522864, + "grad_norm": 1.9725513341852083, + "learning_rate": 5.805717801798828e-06, + "loss": 0.6216, + "step": 15173 + }, + { + "epoch": 0.4650606840750276, + "grad_norm": 1.8783674019445082, + "learning_rate": 5.805227966381095e-06, + "loss": 0.6925, + "step": 15174 + }, + { + "epoch": 0.4650913325977688, + "grad_norm": 1.7452083863575958, + "learning_rate": 5.8047381230292455e-06, + "loss": 0.5953, + "step": 15175 + }, + { + "epoch": 0.46512198112051, + "grad_norm": 1.6471500763647955, + "learning_rate": 5.804248271748104e-06, + "loss": 0.6833, + "step": 15176 + }, + { + "epoch": 0.4651526296432512, + "grad_norm": 1.8798820536331233, + "learning_rate": 5.8037584125425e-06, + "loss": 0.6446, + "step": 15177 + }, + { + "epoch": 0.4651832781659924, + "grad_norm": 1.8879385352946452, + "learning_rate": 5.8032685454172574e-06, + "loss": 0.7146, + "step": 15178 + }, + { + "epoch": 0.4652139266887336, + "grad_norm": 0.8035801911931518, + "learning_rate": 5.802778670377205e-06, + "loss": 0.4385, + "step": 15179 + }, + { + "epoch": 0.46524457521147483, + "grad_norm": 0.7677985395228235, + "learning_rate": 5.802288787427169e-06, + "loss": 0.4483, + "step": 15180 + }, + { + "epoch": 0.46527522373421604, + "grad_norm": 1.6993085458664796, + "learning_rate": 5.801798896571975e-06, + "loss": 0.7249, + "step": 15181 + }, + { + "epoch": 0.4653058722569572, + "grad_norm": 1.6536988639249992, + "learning_rate": 5.8013089978164535e-06, + "loss": 0.6265, + "step": 15182 + }, + { + "epoch": 0.4653365207796984, + "grad_norm": 1.689963044371769, + "learning_rate": 5.800819091165428e-06, + "loss": 0.639, + "step": 15183 + }, + { + "epoch": 0.4653671693024396, + "grad_norm": 1.8367260896162034, + "learning_rate": 5.800329176623728e-06, + "loss": 0.7199, + "step": 15184 + }, + { + "epoch": 0.4653978178251808, + "grad_norm": 1.655447229421824, + "learning_rate": 5.799839254196179e-06, + "loss": 0.6766, + "step": 15185 + }, + { + "epoch": 0.465428466347922, + "grad_norm": 1.6949730692007947, + "learning_rate": 5.79934932388761e-06, + "loss": 0.7119, + "step": 15186 + }, + { + "epoch": 0.4654591148706632, + "grad_norm": 1.7673307036060073, + "learning_rate": 5.798859385702848e-06, + "loss": 0.6142, + "step": 15187 + }, + { + "epoch": 0.4654897633934044, + "grad_norm": 1.3995552408010346, + "learning_rate": 5.798369439646718e-06, + "loss": 0.5809, + "step": 15188 + }, + { + "epoch": 0.4655204119161456, + "grad_norm": 1.8775810946124303, + "learning_rate": 5.7978794857240506e-06, + "loss": 0.7196, + "step": 15189 + }, + { + "epoch": 0.46555106043888683, + "grad_norm": 1.755442488086937, + "learning_rate": 5.797389523939674e-06, + "loss": 0.7203, + "step": 15190 + }, + { + "epoch": 0.46558170896162804, + "grad_norm": 1.7186039821870798, + "learning_rate": 5.796899554298413e-06, + "loss": 0.6384, + "step": 15191 + }, + { + "epoch": 0.46561235748436924, + "grad_norm": 1.6590875833038923, + "learning_rate": 5.796409576805096e-06, + "loss": 0.6475, + "step": 15192 + }, + { + "epoch": 0.46564300600711045, + "grad_norm": 2.589111780436161, + "learning_rate": 5.795919591464553e-06, + "loss": 0.7278, + "step": 15193 + }, + { + "epoch": 0.46567365452985165, + "grad_norm": 1.8740268754655593, + "learning_rate": 5.79542959828161e-06, + "loss": 0.5791, + "step": 15194 + }, + { + "epoch": 0.46570430305259286, + "grad_norm": 1.8471531106389385, + "learning_rate": 5.794939597261097e-06, + "loss": 0.6586, + "step": 15195 + }, + { + "epoch": 0.46573495157533407, + "grad_norm": 1.725808865966762, + "learning_rate": 5.794449588407838e-06, + "loss": 0.6381, + "step": 15196 + }, + { + "epoch": 0.46576560009807527, + "grad_norm": 1.6925157346623196, + "learning_rate": 5.793959571726666e-06, + "loss": 0.6854, + "step": 15197 + }, + { + "epoch": 0.4657962486208165, + "grad_norm": 1.6112932217094929, + "learning_rate": 5.7934695472224066e-06, + "loss": 0.5716, + "step": 15198 + }, + { + "epoch": 0.4658268971435577, + "grad_norm": 1.6573761149099813, + "learning_rate": 5.79297951489989e-06, + "loss": 0.657, + "step": 15199 + }, + { + "epoch": 0.4658575456662989, + "grad_norm": 1.6530769825546574, + "learning_rate": 5.792489474763941e-06, + "loss": 0.7364, + "step": 15200 + }, + { + "epoch": 0.4658881941890401, + "grad_norm": 1.683201293279754, + "learning_rate": 5.791999426819393e-06, + "loss": 0.5966, + "step": 15201 + }, + { + "epoch": 0.4659188427117813, + "grad_norm": 1.7489080811575575, + "learning_rate": 5.79150937107107e-06, + "loss": 0.6182, + "step": 15202 + }, + { + "epoch": 0.4659494912345225, + "grad_norm": 1.705181293313091, + "learning_rate": 5.7910193075238034e-06, + "loss": 0.6075, + "step": 15203 + }, + { + "epoch": 0.4659801397572637, + "grad_norm": 1.6356636307806873, + "learning_rate": 5.790529236182421e-06, + "loss": 0.7208, + "step": 15204 + }, + { + "epoch": 0.4660107882800049, + "grad_norm": 1.4784377415223806, + "learning_rate": 5.7900391570517504e-06, + "loss": 0.5945, + "step": 15205 + }, + { + "epoch": 0.4660414368027461, + "grad_norm": 1.5207240549090841, + "learning_rate": 5.789549070136625e-06, + "loss": 0.571, + "step": 15206 + }, + { + "epoch": 0.46607208532548733, + "grad_norm": 1.578774005800256, + "learning_rate": 5.789058975441868e-06, + "loss": 0.7292, + "step": 15207 + }, + { + "epoch": 0.46610273384822853, + "grad_norm": 1.6362615005755354, + "learning_rate": 5.788568872972312e-06, + "loss": 0.6205, + "step": 15208 + }, + { + "epoch": 0.46613338237096974, + "grad_norm": 1.8224295019686167, + "learning_rate": 5.788078762732785e-06, + "loss": 0.5948, + "step": 15209 + }, + { + "epoch": 0.46616403089371095, + "grad_norm": 1.88440575097643, + "learning_rate": 5.787588644728117e-06, + "loss": 0.6705, + "step": 15210 + }, + { + "epoch": 0.46619467941645215, + "grad_norm": 0.9091144402188898, + "learning_rate": 5.787098518963136e-06, + "loss": 0.46, + "step": 15211 + }, + { + "epoch": 0.46622532793919336, + "grad_norm": 1.6586378455760915, + "learning_rate": 5.786608385442671e-06, + "loss": 0.5739, + "step": 15212 + }, + { + "epoch": 0.4662559764619345, + "grad_norm": 1.659986315322739, + "learning_rate": 5.786118244171552e-06, + "loss": 0.6652, + "step": 15213 + }, + { + "epoch": 0.4662866249846757, + "grad_norm": 1.7134862760586294, + "learning_rate": 5.7856280951546116e-06, + "loss": 0.5618, + "step": 15214 + }, + { + "epoch": 0.4663172735074169, + "grad_norm": 1.585449426477338, + "learning_rate": 5.785137938396674e-06, + "loss": 0.6234, + "step": 15215 + }, + { + "epoch": 0.4663479220301581, + "grad_norm": 1.7679455764979115, + "learning_rate": 5.784647773902574e-06, + "loss": 0.6412, + "step": 15216 + }, + { + "epoch": 0.46637857055289933, + "grad_norm": 1.7071457426293037, + "learning_rate": 5.784157601677136e-06, + "loss": 0.7124, + "step": 15217 + }, + { + "epoch": 0.46640921907564054, + "grad_norm": 1.7416644748648027, + "learning_rate": 5.7836674217251945e-06, + "loss": 0.6381, + "step": 15218 + }, + { + "epoch": 0.46643986759838174, + "grad_norm": 1.7485245879814688, + "learning_rate": 5.783177234051576e-06, + "loss": 0.7031, + "step": 15219 + }, + { + "epoch": 0.46647051612112295, + "grad_norm": 0.7599728505002189, + "learning_rate": 5.782687038661111e-06, + "loss": 0.4527, + "step": 15220 + }, + { + "epoch": 0.46650116464386415, + "grad_norm": 1.6194179779014812, + "learning_rate": 5.782196835558633e-06, + "loss": 0.6381, + "step": 15221 + }, + { + "epoch": 0.46653181316660536, + "grad_norm": 1.6272257637690615, + "learning_rate": 5.781706624748968e-06, + "loss": 0.6274, + "step": 15222 + }, + { + "epoch": 0.46656246168934656, + "grad_norm": 1.7963186450358803, + "learning_rate": 5.781216406236948e-06, + "loss": 0.6873, + "step": 15223 + }, + { + "epoch": 0.46659311021208777, + "grad_norm": 1.6538176511876073, + "learning_rate": 5.780726180027402e-06, + "loss": 0.6736, + "step": 15224 + }, + { + "epoch": 0.466623758734829, + "grad_norm": 0.7926631183922496, + "learning_rate": 5.780235946125163e-06, + "loss": 0.4589, + "step": 15225 + }, + { + "epoch": 0.4666544072575702, + "grad_norm": 1.661348408829997, + "learning_rate": 5.779745704535057e-06, + "loss": 0.6909, + "step": 15226 + }, + { + "epoch": 0.4666850557803114, + "grad_norm": 1.5361157078247152, + "learning_rate": 5.7792554552619184e-06, + "loss": 0.6631, + "step": 15227 + }, + { + "epoch": 0.4667157043030526, + "grad_norm": 1.7884310245903297, + "learning_rate": 5.778765198310576e-06, + "loss": 0.7191, + "step": 15228 + }, + { + "epoch": 0.4667463528257938, + "grad_norm": 1.7170818363382214, + "learning_rate": 5.778274933685863e-06, + "loss": 0.6005, + "step": 15229 + }, + { + "epoch": 0.466777001348535, + "grad_norm": 1.8075327586420697, + "learning_rate": 5.777784661392606e-06, + "loss": 0.676, + "step": 15230 + }, + { + "epoch": 0.4668076498712762, + "grad_norm": 1.648422277686206, + "learning_rate": 5.777294381435636e-06, + "loss": 0.6321, + "step": 15231 + }, + { + "epoch": 0.4668382983940174, + "grad_norm": 1.8720627706649824, + "learning_rate": 5.776804093819789e-06, + "loss": 0.6717, + "step": 15232 + }, + { + "epoch": 0.4668689469167586, + "grad_norm": 1.682479482939991, + "learning_rate": 5.776313798549891e-06, + "loss": 0.7645, + "step": 15233 + }, + { + "epoch": 0.4668995954394998, + "grad_norm": 1.7655793958620059, + "learning_rate": 5.7758234956307745e-06, + "loss": 0.642, + "step": 15234 + }, + { + "epoch": 0.46693024396224103, + "grad_norm": 1.6115935035994078, + "learning_rate": 5.77533318506727e-06, + "loss": 0.7229, + "step": 15235 + }, + { + "epoch": 0.46696089248498224, + "grad_norm": 1.7266347963969564, + "learning_rate": 5.7748428668642095e-06, + "loss": 0.7064, + "step": 15236 + }, + { + "epoch": 0.46699154100772344, + "grad_norm": 0.8090454850709186, + "learning_rate": 5.7743525410264256e-06, + "loss": 0.4493, + "step": 15237 + }, + { + "epoch": 0.46702218953046465, + "grad_norm": 1.6690364171303056, + "learning_rate": 5.773862207558747e-06, + "loss": 0.682, + "step": 15238 + }, + { + "epoch": 0.46705283805320585, + "grad_norm": 1.7653819193024425, + "learning_rate": 5.773371866466004e-06, + "loss": 0.6612, + "step": 15239 + }, + { + "epoch": 0.46708348657594706, + "grad_norm": 1.7882436340586776, + "learning_rate": 5.772881517753033e-06, + "loss": 0.7097, + "step": 15240 + }, + { + "epoch": 0.46711413509868827, + "grad_norm": 0.77720987928066, + "learning_rate": 5.772391161424662e-06, + "loss": 0.4419, + "step": 15241 + }, + { + "epoch": 0.46714478362142947, + "grad_norm": 1.9695914597526862, + "learning_rate": 5.771900797485723e-06, + "loss": 0.6541, + "step": 15242 + }, + { + "epoch": 0.4671754321441707, + "grad_norm": 1.7204031095758625, + "learning_rate": 5.771410425941047e-06, + "loss": 0.5932, + "step": 15243 + }, + { + "epoch": 0.4672060806669118, + "grad_norm": 1.6320049302295363, + "learning_rate": 5.770920046795468e-06, + "loss": 0.5051, + "step": 15244 + }, + { + "epoch": 0.46723672918965303, + "grad_norm": 1.784324471475959, + "learning_rate": 5.7704296600538165e-06, + "loss": 0.6506, + "step": 15245 + }, + { + "epoch": 0.46726737771239424, + "grad_norm": 1.5499448959694917, + "learning_rate": 5.769939265720923e-06, + "loss": 0.5971, + "step": 15246 + }, + { + "epoch": 0.46729802623513544, + "grad_norm": 1.7668255682538363, + "learning_rate": 5.769448863801622e-06, + "loss": 0.6603, + "step": 15247 + }, + { + "epoch": 0.46732867475787665, + "grad_norm": 1.6952577282866717, + "learning_rate": 5.7689584543007446e-06, + "loss": 0.6392, + "step": 15248 + }, + { + "epoch": 0.46735932328061786, + "grad_norm": 1.6738958276632017, + "learning_rate": 5.768468037223124e-06, + "loss": 0.6496, + "step": 15249 + }, + { + "epoch": 0.46738997180335906, + "grad_norm": 1.6359301335005743, + "learning_rate": 5.767977612573589e-06, + "loss": 0.6726, + "step": 15250 + }, + { + "epoch": 0.46742062032610027, + "grad_norm": 1.6012940959219244, + "learning_rate": 5.767487180356974e-06, + "loss": 0.6506, + "step": 15251 + }, + { + "epoch": 0.4674512688488415, + "grad_norm": 0.8276960990068464, + "learning_rate": 5.766996740578113e-06, + "loss": 0.4583, + "step": 15252 + }, + { + "epoch": 0.4674819173715827, + "grad_norm": 1.8904426315253826, + "learning_rate": 5.766506293241837e-06, + "loss": 0.6407, + "step": 15253 + }, + { + "epoch": 0.4675125658943239, + "grad_norm": 1.6546432825740036, + "learning_rate": 5.766015838352976e-06, + "loss": 0.6, + "step": 15254 + }, + { + "epoch": 0.4675432144170651, + "grad_norm": 1.7566972289049263, + "learning_rate": 5.765525375916368e-06, + "loss": 0.6618, + "step": 15255 + }, + { + "epoch": 0.4675738629398063, + "grad_norm": 1.6908717270565383, + "learning_rate": 5.76503490593684e-06, + "loss": 0.7014, + "step": 15256 + }, + { + "epoch": 0.4676045114625475, + "grad_norm": 1.7066001317657695, + "learning_rate": 5.764544428419229e-06, + "loss": 0.6216, + "step": 15257 + }, + { + "epoch": 0.4676351599852887, + "grad_norm": 1.691815422697069, + "learning_rate": 5.764053943368365e-06, + "loss": 0.6554, + "step": 15258 + }, + { + "epoch": 0.4676658085080299, + "grad_norm": 1.7106084201777099, + "learning_rate": 5.7635634507890836e-06, + "loss": 0.7081, + "step": 15259 + }, + { + "epoch": 0.4676964570307711, + "grad_norm": 1.7512705092373402, + "learning_rate": 5.763072950686215e-06, + "loss": 0.6569, + "step": 15260 + }, + { + "epoch": 0.4677271055535123, + "grad_norm": 1.8751642407883347, + "learning_rate": 5.762582443064593e-06, + "loss": 0.6528, + "step": 15261 + }, + { + "epoch": 0.46775775407625353, + "grad_norm": 0.8187095035155024, + "learning_rate": 5.762091927929052e-06, + "loss": 0.4461, + "step": 15262 + }, + { + "epoch": 0.46778840259899473, + "grad_norm": 0.8321825278806166, + "learning_rate": 5.761601405284423e-06, + "loss": 0.4664, + "step": 15263 + }, + { + "epoch": 0.46781905112173594, + "grad_norm": 1.665276559375373, + "learning_rate": 5.761110875135543e-06, + "loss": 0.5815, + "step": 15264 + }, + { + "epoch": 0.46784969964447715, + "grad_norm": 1.620878952773822, + "learning_rate": 5.7606203374872395e-06, + "loss": 0.6284, + "step": 15265 + }, + { + "epoch": 0.46788034816721835, + "grad_norm": 1.641437292830842, + "learning_rate": 5.760129792344351e-06, + "loss": 0.6839, + "step": 15266 + }, + { + "epoch": 0.46791099668995956, + "grad_norm": 2.0404661956567423, + "learning_rate": 5.759639239711709e-06, + "loss": 0.6683, + "step": 15267 + }, + { + "epoch": 0.46794164521270076, + "grad_norm": 1.727179442535462, + "learning_rate": 5.7591486795941484e-06, + "loss": 0.5693, + "step": 15268 + }, + { + "epoch": 0.46797229373544197, + "grad_norm": 1.8295636689100288, + "learning_rate": 5.758658111996499e-06, + "loss": 0.6332, + "step": 15269 + }, + { + "epoch": 0.4680029422581832, + "grad_norm": 1.7564238095479099, + "learning_rate": 5.758167536923599e-06, + "loss": 0.5729, + "step": 15270 + }, + { + "epoch": 0.4680335907809244, + "grad_norm": 0.8688210260909248, + "learning_rate": 5.7576769543802805e-06, + "loss": 0.4561, + "step": 15271 + }, + { + "epoch": 0.4680642393036656, + "grad_norm": 1.7845933611631553, + "learning_rate": 5.7571863643713755e-06, + "loss": 0.6396, + "step": 15272 + }, + { + "epoch": 0.4680948878264068, + "grad_norm": 1.893538276685192, + "learning_rate": 5.756695766901721e-06, + "loss": 0.7206, + "step": 15273 + }, + { + "epoch": 0.468125536349148, + "grad_norm": 1.715128727886111, + "learning_rate": 5.756205161976148e-06, + "loss": 0.6873, + "step": 15274 + }, + { + "epoch": 0.46815618487188915, + "grad_norm": 1.6537227273046302, + "learning_rate": 5.755714549599495e-06, + "loss": 0.6968, + "step": 15275 + }, + { + "epoch": 0.46818683339463035, + "grad_norm": 2.0195505153686866, + "learning_rate": 5.75522392977659e-06, + "loss": 0.6433, + "step": 15276 + }, + { + "epoch": 0.46821748191737156, + "grad_norm": 0.8032114978444873, + "learning_rate": 5.754733302512272e-06, + "loss": 0.4599, + "step": 15277 + }, + { + "epoch": 0.46824813044011276, + "grad_norm": 1.780977391004402, + "learning_rate": 5.754242667811372e-06, + "loss": 0.6536, + "step": 15278 + }, + { + "epoch": 0.46827877896285397, + "grad_norm": 1.7543203890125052, + "learning_rate": 5.753752025678728e-06, + "loss": 0.6618, + "step": 15279 + }, + { + "epoch": 0.4683094274855952, + "grad_norm": 1.8839013300288747, + "learning_rate": 5.753261376119172e-06, + "loss": 0.7546, + "step": 15280 + }, + { + "epoch": 0.4683400760083364, + "grad_norm": 1.6934422508541807, + "learning_rate": 5.752770719137538e-06, + "loss": 0.6663, + "step": 15281 + }, + { + "epoch": 0.4683707245310776, + "grad_norm": 0.7533126181439422, + "learning_rate": 5.752280054738662e-06, + "loss": 0.4314, + "step": 15282 + }, + { + "epoch": 0.4684013730538188, + "grad_norm": 0.7782333379450317, + "learning_rate": 5.751789382927379e-06, + "loss": 0.4233, + "step": 15283 + }, + { + "epoch": 0.46843202157656, + "grad_norm": 0.781332004699337, + "learning_rate": 5.751298703708522e-06, + "loss": 0.4678, + "step": 15284 + }, + { + "epoch": 0.4684626700993012, + "grad_norm": 1.6514420968656232, + "learning_rate": 5.750808017086927e-06, + "loss": 0.5925, + "step": 15285 + }, + { + "epoch": 0.4684933186220424, + "grad_norm": 1.8160302695487625, + "learning_rate": 5.750317323067427e-06, + "loss": 0.6502, + "step": 15286 + }, + { + "epoch": 0.4685239671447836, + "grad_norm": 0.7600296019996711, + "learning_rate": 5.74982662165486e-06, + "loss": 0.4215, + "step": 15287 + }, + { + "epoch": 0.4685546156675248, + "grad_norm": 1.6115587706647372, + "learning_rate": 5.749335912854059e-06, + "loss": 0.6439, + "step": 15288 + }, + { + "epoch": 0.468585264190266, + "grad_norm": 1.8348904315070877, + "learning_rate": 5.74884519666986e-06, + "loss": 0.7068, + "step": 15289 + }, + { + "epoch": 0.46861591271300723, + "grad_norm": 1.6749238021847899, + "learning_rate": 5.748354473107097e-06, + "loss": 0.6442, + "step": 15290 + }, + { + "epoch": 0.46864656123574844, + "grad_norm": 1.7239030933669937, + "learning_rate": 5.747863742170607e-06, + "loss": 0.6451, + "step": 15291 + }, + { + "epoch": 0.46867720975848964, + "grad_norm": 1.6776117780305095, + "learning_rate": 5.7473730038652245e-06, + "loss": 0.5747, + "step": 15292 + }, + { + "epoch": 0.46870785828123085, + "grad_norm": 1.6723576508684366, + "learning_rate": 5.746882258195782e-06, + "loss": 0.6134, + "step": 15293 + }, + { + "epoch": 0.46873850680397205, + "grad_norm": 1.6471451588372181, + "learning_rate": 5.746391505167119e-06, + "loss": 0.6051, + "step": 15294 + }, + { + "epoch": 0.46876915532671326, + "grad_norm": 1.8051756200484579, + "learning_rate": 5.74590074478407e-06, + "loss": 0.6748, + "step": 15295 + }, + { + "epoch": 0.46879980384945447, + "grad_norm": 1.4626061286004468, + "learning_rate": 5.74540997705147e-06, + "loss": 0.5415, + "step": 15296 + }, + { + "epoch": 0.46883045237219567, + "grad_norm": 1.8485483340539095, + "learning_rate": 5.744919201974154e-06, + "loss": 0.6638, + "step": 15297 + }, + { + "epoch": 0.4688611008949369, + "grad_norm": 1.8425028118751972, + "learning_rate": 5.744428419556959e-06, + "loss": 0.7281, + "step": 15298 + }, + { + "epoch": 0.4688917494176781, + "grad_norm": 1.739929126303711, + "learning_rate": 5.743937629804721e-06, + "loss": 0.7118, + "step": 15299 + }, + { + "epoch": 0.4689223979404193, + "grad_norm": 1.827010651066358, + "learning_rate": 5.743446832722274e-06, + "loss": 0.6201, + "step": 15300 + }, + { + "epoch": 0.4689530464631605, + "grad_norm": 1.7858739767720306, + "learning_rate": 5.742956028314455e-06, + "loss": 0.6054, + "step": 15301 + }, + { + "epoch": 0.4689836949859017, + "grad_norm": 1.7574725753343534, + "learning_rate": 5.742465216586102e-06, + "loss": 0.6441, + "step": 15302 + }, + { + "epoch": 0.4690143435086429, + "grad_norm": 1.6263178872176105, + "learning_rate": 5.741974397542047e-06, + "loss": 0.6664, + "step": 15303 + }, + { + "epoch": 0.4690449920313841, + "grad_norm": 1.741402072741425, + "learning_rate": 5.741483571187129e-06, + "loss": 0.6926, + "step": 15304 + }, + { + "epoch": 0.4690756405541253, + "grad_norm": 1.5085596576306282, + "learning_rate": 5.7409927375261845e-06, + "loss": 0.6017, + "step": 15305 + }, + { + "epoch": 0.4691062890768665, + "grad_norm": 1.9465180696454096, + "learning_rate": 5.740501896564049e-06, + "loss": 0.6362, + "step": 15306 + }, + { + "epoch": 0.4691369375996077, + "grad_norm": 1.5558928363811628, + "learning_rate": 5.740011048305558e-06, + "loss": 0.629, + "step": 15307 + }, + { + "epoch": 0.4691675861223489, + "grad_norm": 1.6997598144335508, + "learning_rate": 5.7395201927555486e-06, + "loss": 0.6112, + "step": 15308 + }, + { + "epoch": 0.4691982346450901, + "grad_norm": 0.9194280929110819, + "learning_rate": 5.739029329918859e-06, + "loss": 0.4619, + "step": 15309 + }, + { + "epoch": 0.4692288831678313, + "grad_norm": 1.6561347249304739, + "learning_rate": 5.738538459800323e-06, + "loss": 0.5055, + "step": 15310 + }, + { + "epoch": 0.4692595316905725, + "grad_norm": 1.62379374953995, + "learning_rate": 5.73804758240478e-06, + "loss": 0.6251, + "step": 15311 + }, + { + "epoch": 0.4692901802133137, + "grad_norm": 0.7955556072947267, + "learning_rate": 5.737556697737063e-06, + "loss": 0.4362, + "step": 15312 + }, + { + "epoch": 0.4693208287360549, + "grad_norm": 1.7112912547894072, + "learning_rate": 5.737065805802013e-06, + "loss": 0.6373, + "step": 15313 + }, + { + "epoch": 0.4693514772587961, + "grad_norm": 1.5865339068471014, + "learning_rate": 5.736574906604465e-06, + "loss": 0.7287, + "step": 15314 + }, + { + "epoch": 0.4693821257815373, + "grad_norm": 1.8947464282531068, + "learning_rate": 5.736084000149254e-06, + "loss": 0.678, + "step": 15315 + }, + { + "epoch": 0.4694127743042785, + "grad_norm": 1.6680319448615564, + "learning_rate": 5.7355930864412215e-06, + "loss": 0.5682, + "step": 15316 + }, + { + "epoch": 0.46944342282701973, + "grad_norm": 1.882276229501566, + "learning_rate": 5.7351021654852004e-06, + "loss": 0.6142, + "step": 15317 + }, + { + "epoch": 0.46947407134976094, + "grad_norm": 0.813264945202007, + "learning_rate": 5.734611237286032e-06, + "loss": 0.4327, + "step": 15318 + }, + { + "epoch": 0.46950471987250214, + "grad_norm": 0.793306333558807, + "learning_rate": 5.734120301848548e-06, + "loss": 0.4263, + "step": 15319 + }, + { + "epoch": 0.46953536839524335, + "grad_norm": 1.6703689469082317, + "learning_rate": 5.733629359177591e-06, + "loss": 0.5592, + "step": 15320 + }, + { + "epoch": 0.46956601691798455, + "grad_norm": 1.6999425851614094, + "learning_rate": 5.733138409277996e-06, + "loss": 0.7382, + "step": 15321 + }, + { + "epoch": 0.46959666544072576, + "grad_norm": 1.6379901762635702, + "learning_rate": 5.7326474521546e-06, + "loss": 0.683, + "step": 15322 + }, + { + "epoch": 0.46962731396346696, + "grad_norm": 1.7954029879127296, + "learning_rate": 5.732156487812241e-06, + "loss": 0.6511, + "step": 15323 + }, + { + "epoch": 0.46965796248620817, + "grad_norm": 1.7918315317963103, + "learning_rate": 5.731665516255758e-06, + "loss": 0.6411, + "step": 15324 + }, + { + "epoch": 0.4696886110089494, + "grad_norm": 1.806801625005755, + "learning_rate": 5.731174537489986e-06, + "loss": 0.7184, + "step": 15325 + }, + { + "epoch": 0.4697192595316906, + "grad_norm": 1.7572993740858958, + "learning_rate": 5.730683551519764e-06, + "loss": 0.5818, + "step": 15326 + }, + { + "epoch": 0.4697499080544318, + "grad_norm": 1.715496104112758, + "learning_rate": 5.7301925583499314e-06, + "loss": 0.5924, + "step": 15327 + }, + { + "epoch": 0.469780556577173, + "grad_norm": 0.7767813460114579, + "learning_rate": 5.729701557985325e-06, + "loss": 0.4389, + "step": 15328 + }, + { + "epoch": 0.4698112050999142, + "grad_norm": 1.59555523309083, + "learning_rate": 5.729210550430782e-06, + "loss": 0.5559, + "step": 15329 + }, + { + "epoch": 0.4698418536226554, + "grad_norm": 1.6387605576949797, + "learning_rate": 5.728719535691139e-06, + "loss": 0.6578, + "step": 15330 + }, + { + "epoch": 0.4698725021453966, + "grad_norm": 1.8417867935225791, + "learning_rate": 5.728228513771238e-06, + "loss": 0.6185, + "step": 15331 + }, + { + "epoch": 0.4699031506681378, + "grad_norm": 1.9098281266775572, + "learning_rate": 5.727737484675914e-06, + "loss": 0.7399, + "step": 15332 + }, + { + "epoch": 0.469933799190879, + "grad_norm": 1.59263729345396, + "learning_rate": 5.727246448410008e-06, + "loss": 0.6095, + "step": 15333 + }, + { + "epoch": 0.4699644477136202, + "grad_norm": 1.638216306387881, + "learning_rate": 5.726755404978355e-06, + "loss": 0.6166, + "step": 15334 + }, + { + "epoch": 0.46999509623636143, + "grad_norm": 1.8293710630175708, + "learning_rate": 5.726264354385795e-06, + "loss": 0.6867, + "step": 15335 + }, + { + "epoch": 0.47002574475910264, + "grad_norm": 1.7660102783975913, + "learning_rate": 5.725773296637167e-06, + "loss": 0.5867, + "step": 15336 + }, + { + "epoch": 0.47005639328184384, + "grad_norm": 1.6730914108113797, + "learning_rate": 5.7252822317373105e-06, + "loss": 0.594, + "step": 15337 + }, + { + "epoch": 0.470087041804585, + "grad_norm": 1.7591210649541273, + "learning_rate": 5.724791159691061e-06, + "loss": 0.6299, + "step": 15338 + }, + { + "epoch": 0.4701176903273262, + "grad_norm": 1.7711220583153275, + "learning_rate": 5.7243000805032585e-06, + "loss": 0.6209, + "step": 15339 + }, + { + "epoch": 0.4701483388500674, + "grad_norm": 1.800975221228765, + "learning_rate": 5.723808994178742e-06, + "loss": 0.7112, + "step": 15340 + }, + { + "epoch": 0.4701789873728086, + "grad_norm": 1.647575850077172, + "learning_rate": 5.7233179007223514e-06, + "loss": 0.6108, + "step": 15341 + }, + { + "epoch": 0.4702096358955498, + "grad_norm": 1.6591686715795064, + "learning_rate": 5.722826800138924e-06, + "loss": 0.6684, + "step": 15342 + }, + { + "epoch": 0.470240284418291, + "grad_norm": 1.7051016378000465, + "learning_rate": 5.7223356924332986e-06, + "loss": 0.6324, + "step": 15343 + }, + { + "epoch": 0.4702709329410322, + "grad_norm": 1.8349487604410322, + "learning_rate": 5.721844577610315e-06, + "loss": 0.7291, + "step": 15344 + }, + { + "epoch": 0.47030158146377343, + "grad_norm": 1.8188873007551327, + "learning_rate": 5.721353455674813e-06, + "loss": 0.7062, + "step": 15345 + }, + { + "epoch": 0.47033222998651464, + "grad_norm": 1.9685039771115298, + "learning_rate": 5.7208623266316296e-06, + "loss": 0.7738, + "step": 15346 + }, + { + "epoch": 0.47036287850925584, + "grad_norm": 0.854166814505216, + "learning_rate": 5.720371190485605e-06, + "loss": 0.4235, + "step": 15347 + }, + { + "epoch": 0.47039352703199705, + "grad_norm": 1.9199123176045358, + "learning_rate": 5.719880047241582e-06, + "loss": 0.6169, + "step": 15348 + }, + { + "epoch": 0.47042417555473826, + "grad_norm": 1.513707837066518, + "learning_rate": 5.719388896904393e-06, + "loss": 0.6086, + "step": 15349 + }, + { + "epoch": 0.47045482407747946, + "grad_norm": 1.6537890281004073, + "learning_rate": 5.718897739478883e-06, + "loss": 0.602, + "step": 15350 + }, + { + "epoch": 0.47048547260022067, + "grad_norm": 1.6791768352162801, + "learning_rate": 5.718406574969888e-06, + "loss": 0.6945, + "step": 15351 + }, + { + "epoch": 0.4705161211229619, + "grad_norm": 1.6035804015618733, + "learning_rate": 5.717915403382251e-06, + "loss": 0.6968, + "step": 15352 + }, + { + "epoch": 0.4705467696457031, + "grad_norm": 1.694381530380286, + "learning_rate": 5.717424224720809e-06, + "loss": 0.7407, + "step": 15353 + }, + { + "epoch": 0.4705774181684443, + "grad_norm": 1.6173432692310228, + "learning_rate": 5.716933038990402e-06, + "loss": 0.5707, + "step": 15354 + }, + { + "epoch": 0.4706080666911855, + "grad_norm": 1.7905930239656171, + "learning_rate": 5.71644184619587e-06, + "loss": 0.6441, + "step": 15355 + }, + { + "epoch": 0.4706387152139267, + "grad_norm": 1.6513959775791671, + "learning_rate": 5.715950646342055e-06, + "loss": 0.6968, + "step": 15356 + }, + { + "epoch": 0.4706693637366679, + "grad_norm": 1.633612886873287, + "learning_rate": 5.715459439433795e-06, + "loss": 0.6822, + "step": 15357 + }, + { + "epoch": 0.4707000122594091, + "grad_norm": 1.6553838827364769, + "learning_rate": 5.714968225475927e-06, + "loss": 0.654, + "step": 15358 + }, + { + "epoch": 0.4707306607821503, + "grad_norm": 1.4520706460233468, + "learning_rate": 5.7144770044732976e-06, + "loss": 0.5455, + "step": 15359 + }, + { + "epoch": 0.4707613093048915, + "grad_norm": 1.8009397231982984, + "learning_rate": 5.7139857764307424e-06, + "loss": 0.581, + "step": 15360 + }, + { + "epoch": 0.4707919578276327, + "grad_norm": 1.644374878189106, + "learning_rate": 5.713494541353103e-06, + "loss": 0.6893, + "step": 15361 + }, + { + "epoch": 0.47082260635037393, + "grad_norm": 1.7121843276249857, + "learning_rate": 5.713003299245219e-06, + "loss": 0.7218, + "step": 15362 + }, + { + "epoch": 0.47085325487311513, + "grad_norm": 1.5809833110198084, + "learning_rate": 5.712512050111931e-06, + "loss": 0.6171, + "step": 15363 + }, + { + "epoch": 0.47088390339585634, + "grad_norm": 1.6734547813068552, + "learning_rate": 5.71202079395808e-06, + "loss": 0.666, + "step": 15364 + }, + { + "epoch": 0.47091455191859755, + "grad_norm": 0.8904781488664311, + "learning_rate": 5.711529530788505e-06, + "loss": 0.4449, + "step": 15365 + }, + { + "epoch": 0.47094520044133875, + "grad_norm": 1.6060624989158292, + "learning_rate": 5.711038260608047e-06, + "loss": 0.5721, + "step": 15366 + }, + { + "epoch": 0.47097584896407996, + "grad_norm": 1.492334107303944, + "learning_rate": 5.7105469834215485e-06, + "loss": 0.6312, + "step": 15367 + }, + { + "epoch": 0.47100649748682116, + "grad_norm": 1.9397429759628484, + "learning_rate": 5.710055699233848e-06, + "loss": 0.6469, + "step": 15368 + }, + { + "epoch": 0.4710371460095623, + "grad_norm": 1.8313182209040848, + "learning_rate": 5.709564408049787e-06, + "loss": 0.6729, + "step": 15369 + }, + { + "epoch": 0.4710677945323035, + "grad_norm": 1.8414256168802179, + "learning_rate": 5.709073109874207e-06, + "loss": 0.6997, + "step": 15370 + }, + { + "epoch": 0.4710984430550447, + "grad_norm": 1.9947735413402874, + "learning_rate": 5.708581804711947e-06, + "loss": 0.6864, + "step": 15371 + }, + { + "epoch": 0.47112909157778593, + "grad_norm": 1.7563913573284242, + "learning_rate": 5.708090492567851e-06, + "loss": 0.7186, + "step": 15372 + }, + { + "epoch": 0.47115974010052714, + "grad_norm": 0.7477822848281048, + "learning_rate": 5.707599173446756e-06, + "loss": 0.429, + "step": 15373 + }, + { + "epoch": 0.47119038862326834, + "grad_norm": 1.8354635530203083, + "learning_rate": 5.707107847353508e-06, + "loss": 0.6334, + "step": 15374 + }, + { + "epoch": 0.47122103714600955, + "grad_norm": 1.788379770093719, + "learning_rate": 5.706616514292944e-06, + "loss": 0.5641, + "step": 15375 + }, + { + "epoch": 0.47125168566875075, + "grad_norm": 0.786486707449813, + "learning_rate": 5.7061251742699066e-06, + "loss": 0.4509, + "step": 15376 + }, + { + "epoch": 0.47128233419149196, + "grad_norm": 0.7940277048653479, + "learning_rate": 5.705633827289236e-06, + "loss": 0.431, + "step": 15377 + }, + { + "epoch": 0.47131298271423316, + "grad_norm": 1.68842667338076, + "learning_rate": 5.705142473355777e-06, + "loss": 0.62, + "step": 15378 + }, + { + "epoch": 0.47134363123697437, + "grad_norm": 1.8998471970625614, + "learning_rate": 5.7046511124743676e-06, + "loss": 0.6976, + "step": 15379 + }, + { + "epoch": 0.4713742797597156, + "grad_norm": 1.7286014028847168, + "learning_rate": 5.7041597446498505e-06, + "loss": 0.6634, + "step": 15380 + }, + { + "epoch": 0.4714049282824568, + "grad_norm": 1.8574770346974345, + "learning_rate": 5.703668369887068e-06, + "loss": 0.7337, + "step": 15381 + }, + { + "epoch": 0.471435576805198, + "grad_norm": 0.8589766842777545, + "learning_rate": 5.703176988190861e-06, + "loss": 0.4497, + "step": 15382 + }, + { + "epoch": 0.4714662253279392, + "grad_norm": 1.8713308177362231, + "learning_rate": 5.70268559956607e-06, + "loss": 0.6173, + "step": 15383 + }, + { + "epoch": 0.4714968738506804, + "grad_norm": 1.6464431862987428, + "learning_rate": 5.702194204017539e-06, + "loss": 0.6438, + "step": 15384 + }, + { + "epoch": 0.4715275223734216, + "grad_norm": 1.5937044468931707, + "learning_rate": 5.7017028015501075e-06, + "loss": 0.6689, + "step": 15385 + }, + { + "epoch": 0.4715581708961628, + "grad_norm": 1.7506835711766462, + "learning_rate": 5.701211392168619e-06, + "loss": 0.6367, + "step": 15386 + }, + { + "epoch": 0.471588819418904, + "grad_norm": 1.6627046258242368, + "learning_rate": 5.700719975877917e-06, + "loss": 0.6303, + "step": 15387 + }, + { + "epoch": 0.4716194679416452, + "grad_norm": 1.6786133055341086, + "learning_rate": 5.70022855268284e-06, + "loss": 0.7209, + "step": 15388 + }, + { + "epoch": 0.4716501164643864, + "grad_norm": 1.8091631294297499, + "learning_rate": 5.6997371225882335e-06, + "loss": 0.666, + "step": 15389 + }, + { + "epoch": 0.47168076498712763, + "grad_norm": 0.8140956735330429, + "learning_rate": 5.699245685598936e-06, + "loss": 0.4463, + "step": 15390 + }, + { + "epoch": 0.47171141350986884, + "grad_norm": 1.6034552786750857, + "learning_rate": 5.698754241719795e-06, + "loss": 0.7129, + "step": 15391 + }, + { + "epoch": 0.47174206203261004, + "grad_norm": 1.5827359808971335, + "learning_rate": 5.698262790955647e-06, + "loss": 0.667, + "step": 15392 + }, + { + "epoch": 0.47177271055535125, + "grad_norm": 1.7184585401496169, + "learning_rate": 5.697771333311338e-06, + "loss": 0.6025, + "step": 15393 + }, + { + "epoch": 0.47180335907809245, + "grad_norm": 1.8618852401145904, + "learning_rate": 5.697279868791709e-06, + "loss": 0.6871, + "step": 15394 + }, + { + "epoch": 0.47183400760083366, + "grad_norm": 1.5657790523717543, + "learning_rate": 5.696788397401603e-06, + "loss": 0.5838, + "step": 15395 + }, + { + "epoch": 0.47186465612357487, + "grad_norm": 2.1065597177177278, + "learning_rate": 5.696296919145864e-06, + "loss": 0.6491, + "step": 15396 + }, + { + "epoch": 0.47189530464631607, + "grad_norm": 1.9543870387733442, + "learning_rate": 5.695805434029331e-06, + "loss": 0.62, + "step": 15397 + }, + { + "epoch": 0.4719259531690573, + "grad_norm": 1.8344680247422591, + "learning_rate": 5.695313942056852e-06, + "loss": 0.6657, + "step": 15398 + }, + { + "epoch": 0.4719566016917985, + "grad_norm": 1.6846381384744842, + "learning_rate": 5.6948224432332655e-06, + "loss": 0.6544, + "step": 15399 + }, + { + "epoch": 0.47198725021453963, + "grad_norm": 1.6147268963144097, + "learning_rate": 5.694330937563415e-06, + "loss": 0.6394, + "step": 15400 + }, + { + "epoch": 0.47201789873728084, + "grad_norm": 1.6357211980001114, + "learning_rate": 5.693839425052145e-06, + "loss": 0.6246, + "step": 15401 + }, + { + "epoch": 0.47204854726002204, + "grad_norm": 1.6927399984896738, + "learning_rate": 5.6933479057042975e-06, + "loss": 0.6739, + "step": 15402 + }, + { + "epoch": 0.47207919578276325, + "grad_norm": 0.83356458453688, + "learning_rate": 5.692856379524715e-06, + "loss": 0.4582, + "step": 15403 + }, + { + "epoch": 0.47210984430550446, + "grad_norm": 1.7725002693453424, + "learning_rate": 5.6923648465182415e-06, + "loss": 0.5445, + "step": 15404 + }, + { + "epoch": 0.47214049282824566, + "grad_norm": 1.7163231702131232, + "learning_rate": 5.6918733066897195e-06, + "loss": 0.6787, + "step": 15405 + }, + { + "epoch": 0.47217114135098687, + "grad_norm": 1.9603665874062464, + "learning_rate": 5.691381760043995e-06, + "loss": 0.5801, + "step": 15406 + }, + { + "epoch": 0.4722017898737281, + "grad_norm": 0.7865545860854599, + "learning_rate": 5.6908902065859065e-06, + "loss": 0.4437, + "step": 15407 + }, + { + "epoch": 0.4722324383964693, + "grad_norm": 1.6880776695263866, + "learning_rate": 5.690398646320302e-06, + "loss": 0.7309, + "step": 15408 + }, + { + "epoch": 0.4722630869192105, + "grad_norm": 1.8866379399185822, + "learning_rate": 5.6899070792520206e-06, + "loss": 0.6441, + "step": 15409 + }, + { + "epoch": 0.4722937354419517, + "grad_norm": 1.5729228496846799, + "learning_rate": 5.68941550538591e-06, + "loss": 0.6545, + "step": 15410 + }, + { + "epoch": 0.4723243839646929, + "grad_norm": 1.410986477251837, + "learning_rate": 5.6889239247268125e-06, + "loss": 0.6472, + "step": 15411 + }, + { + "epoch": 0.4723550324874341, + "grad_norm": 1.8685650748162517, + "learning_rate": 5.68843233727957e-06, + "loss": 0.6304, + "step": 15412 + }, + { + "epoch": 0.4723856810101753, + "grad_norm": 1.7371435329218123, + "learning_rate": 5.687940743049028e-06, + "loss": 0.5405, + "step": 15413 + }, + { + "epoch": 0.4724163295329165, + "grad_norm": 2.2713301816430467, + "learning_rate": 5.6874491420400305e-06, + "loss": 0.7014, + "step": 15414 + }, + { + "epoch": 0.4724469780556577, + "grad_norm": 1.752102678649657, + "learning_rate": 5.68695753425742e-06, + "loss": 0.7025, + "step": 15415 + }, + { + "epoch": 0.4724776265783989, + "grad_norm": 0.809975469247889, + "learning_rate": 5.686465919706041e-06, + "loss": 0.4569, + "step": 15416 + }, + { + "epoch": 0.47250827510114013, + "grad_norm": 1.9950550602051478, + "learning_rate": 5.685974298390739e-06, + "loss": 0.6342, + "step": 15417 + }, + { + "epoch": 0.47253892362388134, + "grad_norm": 1.7346463624262958, + "learning_rate": 5.685482670316356e-06, + "loss": 0.6771, + "step": 15418 + }, + { + "epoch": 0.47256957214662254, + "grad_norm": 1.8043902221937422, + "learning_rate": 5.6849910354877355e-06, + "loss": 0.6907, + "step": 15419 + }, + { + "epoch": 0.47260022066936375, + "grad_norm": 1.7292877058918072, + "learning_rate": 5.684499393909724e-06, + "loss": 0.5575, + "step": 15420 + }, + { + "epoch": 0.47263086919210495, + "grad_norm": 0.7891527730656096, + "learning_rate": 5.6840077455871655e-06, + "loss": 0.4429, + "step": 15421 + }, + { + "epoch": 0.47266151771484616, + "grad_norm": 1.662570530558954, + "learning_rate": 5.683516090524903e-06, + "loss": 0.6349, + "step": 15422 + }, + { + "epoch": 0.47269216623758736, + "grad_norm": 1.772029443791544, + "learning_rate": 5.6830244287277814e-06, + "loss": 0.5903, + "step": 15423 + }, + { + "epoch": 0.47272281476032857, + "grad_norm": 1.5428238842387811, + "learning_rate": 5.682532760200646e-06, + "loss": 0.5991, + "step": 15424 + }, + { + "epoch": 0.4727534632830698, + "grad_norm": 1.821685473482376, + "learning_rate": 5.68204108494834e-06, + "loss": 0.8071, + "step": 15425 + }, + { + "epoch": 0.472784111805811, + "grad_norm": 2.004035457907489, + "learning_rate": 5.681549402975709e-06, + "loss": 0.7268, + "step": 15426 + }, + { + "epoch": 0.4728147603285522, + "grad_norm": 1.6177425709537347, + "learning_rate": 5.681057714287597e-06, + "loss": 0.5799, + "step": 15427 + }, + { + "epoch": 0.4728454088512934, + "grad_norm": 1.7108024675960278, + "learning_rate": 5.68056601888885e-06, + "loss": 0.6243, + "step": 15428 + }, + { + "epoch": 0.4728760573740346, + "grad_norm": 0.830837692327954, + "learning_rate": 5.680074316784312e-06, + "loss": 0.434, + "step": 15429 + }, + { + "epoch": 0.4729067058967758, + "grad_norm": 1.5312459523455786, + "learning_rate": 5.679582607978826e-06, + "loss": 0.5369, + "step": 15430 + }, + { + "epoch": 0.47293735441951695, + "grad_norm": 1.6323867489038737, + "learning_rate": 5.6790908924772395e-06, + "loss": 0.7124, + "step": 15431 + }, + { + "epoch": 0.47296800294225816, + "grad_norm": 0.754140696131826, + "learning_rate": 5.6785991702843976e-06, + "loss": 0.4355, + "step": 15432 + }, + { + "epoch": 0.47299865146499936, + "grad_norm": 1.67795683944206, + "learning_rate": 5.6781074414051444e-06, + "loss": 0.6746, + "step": 15433 + }, + { + "epoch": 0.47302929998774057, + "grad_norm": 1.8039461810635964, + "learning_rate": 5.677615705844325e-06, + "loss": 0.6682, + "step": 15434 + }, + { + "epoch": 0.4730599485104818, + "grad_norm": 1.7581545899644249, + "learning_rate": 5.677123963606783e-06, + "loss": 0.5576, + "step": 15435 + }, + { + "epoch": 0.473090597033223, + "grad_norm": 1.4378611472575276, + "learning_rate": 5.676632214697367e-06, + "loss": 0.6139, + "step": 15436 + }, + { + "epoch": 0.4731212455559642, + "grad_norm": 1.6772841614754057, + "learning_rate": 5.67614045912092e-06, + "loss": 0.6772, + "step": 15437 + }, + { + "epoch": 0.4731518940787054, + "grad_norm": 1.822537478718402, + "learning_rate": 5.675648696882288e-06, + "loss": 0.6959, + "step": 15438 + }, + { + "epoch": 0.4731825426014466, + "grad_norm": 1.6789924375331204, + "learning_rate": 5.675156927986316e-06, + "loss": 0.6473, + "step": 15439 + }, + { + "epoch": 0.4732131911241878, + "grad_norm": 1.5964482101607675, + "learning_rate": 5.674665152437851e-06, + "loss": 0.6174, + "step": 15440 + }, + { + "epoch": 0.473243839646929, + "grad_norm": 0.816301118786572, + "learning_rate": 5.674173370241738e-06, + "loss": 0.431, + "step": 15441 + }, + { + "epoch": 0.4732744881696702, + "grad_norm": 1.7571656546057848, + "learning_rate": 5.673681581402821e-06, + "loss": 0.6819, + "step": 15442 + }, + { + "epoch": 0.4733051366924114, + "grad_norm": 1.825838093879898, + "learning_rate": 5.6731897859259475e-06, + "loss": 0.713, + "step": 15443 + }, + { + "epoch": 0.4733357852151526, + "grad_norm": 1.5985376943805123, + "learning_rate": 5.672697983815962e-06, + "loss": 0.6254, + "step": 15444 + }, + { + "epoch": 0.47336643373789383, + "grad_norm": 0.9401603708105577, + "learning_rate": 5.672206175077715e-06, + "loss": 0.4269, + "step": 15445 + }, + { + "epoch": 0.47339708226063504, + "grad_norm": 1.7425100453269824, + "learning_rate": 5.671714359716044e-06, + "loss": 0.6589, + "step": 15446 + }, + { + "epoch": 0.47342773078337624, + "grad_norm": 1.716044889081237, + "learning_rate": 5.671222537735802e-06, + "loss": 0.7123, + "step": 15447 + }, + { + "epoch": 0.47345837930611745, + "grad_norm": 1.7219126101659372, + "learning_rate": 5.670730709141831e-06, + "loss": 0.6978, + "step": 15448 + }, + { + "epoch": 0.47348902782885866, + "grad_norm": 2.0177365531069578, + "learning_rate": 5.67023887393898e-06, + "loss": 0.7142, + "step": 15449 + }, + { + "epoch": 0.47351967635159986, + "grad_norm": 1.6777377819379096, + "learning_rate": 5.6697470321320936e-06, + "loss": 0.5876, + "step": 15450 + }, + { + "epoch": 0.47355032487434107, + "grad_norm": 1.8650971247186574, + "learning_rate": 5.669255183726019e-06, + "loss": 0.6965, + "step": 15451 + }, + { + "epoch": 0.47358097339708227, + "grad_norm": 1.8170359420279198, + "learning_rate": 5.6687633287256e-06, + "loss": 0.7209, + "step": 15452 + }, + { + "epoch": 0.4736116219198235, + "grad_norm": 0.8764290692905162, + "learning_rate": 5.668271467135686e-06, + "loss": 0.4788, + "step": 15453 + }, + { + "epoch": 0.4736422704425647, + "grad_norm": 1.4910091264024439, + "learning_rate": 5.667779598961122e-06, + "loss": 0.5993, + "step": 15454 + }, + { + "epoch": 0.4736729189653059, + "grad_norm": 2.109227883469829, + "learning_rate": 5.667287724206754e-06, + "loss": 0.7047, + "step": 15455 + }, + { + "epoch": 0.4737035674880471, + "grad_norm": 1.5363143264244963, + "learning_rate": 5.666795842877432e-06, + "loss": 0.5974, + "step": 15456 + }, + { + "epoch": 0.4737342160107883, + "grad_norm": 1.7554539004266738, + "learning_rate": 5.666303954977998e-06, + "loss": 0.6511, + "step": 15457 + }, + { + "epoch": 0.4737648645335295, + "grad_norm": 1.5185403606752326, + "learning_rate": 5.6658120605133004e-06, + "loss": 0.6247, + "step": 15458 + }, + { + "epoch": 0.4737955130562707, + "grad_norm": 1.7001453083798104, + "learning_rate": 5.6653201594881855e-06, + "loss": 0.6823, + "step": 15459 + }, + { + "epoch": 0.4738261615790119, + "grad_norm": 1.61185931503769, + "learning_rate": 5.664828251907504e-06, + "loss": 0.6625, + "step": 15460 + }, + { + "epoch": 0.4738568101017531, + "grad_norm": 1.8282768548847317, + "learning_rate": 5.664336337776097e-06, + "loss": 0.6338, + "step": 15461 + }, + { + "epoch": 0.4738874586244943, + "grad_norm": 1.4511020272977548, + "learning_rate": 5.6638444170988136e-06, + "loss": 0.6113, + "step": 15462 + }, + { + "epoch": 0.4739181071472355, + "grad_norm": 0.7779504020987157, + "learning_rate": 5.6633524898805015e-06, + "loss": 0.4559, + "step": 15463 + }, + { + "epoch": 0.4739487556699767, + "grad_norm": 1.6579556582426402, + "learning_rate": 5.662860556126009e-06, + "loss": 0.6666, + "step": 15464 + }, + { + "epoch": 0.4739794041927179, + "grad_norm": 1.5562531288093215, + "learning_rate": 5.662368615840182e-06, + "loss": 0.4865, + "step": 15465 + }, + { + "epoch": 0.4740100527154591, + "grad_norm": 0.840914634136671, + "learning_rate": 5.661876669027866e-06, + "loss": 0.4345, + "step": 15466 + }, + { + "epoch": 0.4740407012382003, + "grad_norm": 1.644418376057417, + "learning_rate": 5.661384715693911e-06, + "loss": 0.5534, + "step": 15467 + }, + { + "epoch": 0.4740713497609415, + "grad_norm": 1.6902600747260668, + "learning_rate": 5.660892755843162e-06, + "loss": 0.6699, + "step": 15468 + }, + { + "epoch": 0.4741019982836827, + "grad_norm": 0.7729899829324477, + "learning_rate": 5.6604007894804695e-06, + "loss": 0.445, + "step": 15469 + }, + { + "epoch": 0.4741326468064239, + "grad_norm": 1.5820941284600258, + "learning_rate": 5.659908816610677e-06, + "loss": 0.6452, + "step": 15470 + }, + { + "epoch": 0.4741632953291651, + "grad_norm": 1.5450605215544297, + "learning_rate": 5.659416837238636e-06, + "loss": 0.63, + "step": 15471 + }, + { + "epoch": 0.47419394385190633, + "grad_norm": 1.8045104819131237, + "learning_rate": 5.658924851369191e-06, + "loss": 0.6514, + "step": 15472 + }, + { + "epoch": 0.47422459237464754, + "grad_norm": 1.7270009988909556, + "learning_rate": 5.658432859007191e-06, + "loss": 0.6934, + "step": 15473 + }, + { + "epoch": 0.47425524089738874, + "grad_norm": 1.9851646169025834, + "learning_rate": 5.657940860157484e-06, + "loss": 0.6637, + "step": 15474 + }, + { + "epoch": 0.47428588942012995, + "grad_norm": 1.7380968835443777, + "learning_rate": 5.657448854824916e-06, + "loss": 0.6057, + "step": 15475 + }, + { + "epoch": 0.47431653794287115, + "grad_norm": 1.8552283282747923, + "learning_rate": 5.656956843014338e-06, + "loss": 0.6655, + "step": 15476 + }, + { + "epoch": 0.47434718646561236, + "grad_norm": 1.787781093211889, + "learning_rate": 5.656464824730595e-06, + "loss": 0.7087, + "step": 15477 + }, + { + "epoch": 0.47437783498835356, + "grad_norm": 1.5545905446797201, + "learning_rate": 5.655972799978536e-06, + "loss": 0.6156, + "step": 15478 + }, + { + "epoch": 0.47440848351109477, + "grad_norm": 0.7944217690621942, + "learning_rate": 5.655480768763011e-06, + "loss": 0.4691, + "step": 15479 + }, + { + "epoch": 0.474439132033836, + "grad_norm": 0.7664186571804441, + "learning_rate": 5.654988731088864e-06, + "loss": 0.43, + "step": 15480 + }, + { + "epoch": 0.4744697805565772, + "grad_norm": 1.7208063986095954, + "learning_rate": 5.654496686960946e-06, + "loss": 0.7042, + "step": 15481 + }, + { + "epoch": 0.4745004290793184, + "grad_norm": 0.7933061595124745, + "learning_rate": 5.654004636384105e-06, + "loss": 0.4583, + "step": 15482 + }, + { + "epoch": 0.4745310776020596, + "grad_norm": 1.754999064195664, + "learning_rate": 5.653512579363189e-06, + "loss": 0.6361, + "step": 15483 + }, + { + "epoch": 0.4745617261248008, + "grad_norm": 1.6849058380689685, + "learning_rate": 5.653020515903047e-06, + "loss": 0.689, + "step": 15484 + }, + { + "epoch": 0.474592374647542, + "grad_norm": 0.7512360486332472, + "learning_rate": 5.652528446008526e-06, + "loss": 0.4156, + "step": 15485 + }, + { + "epoch": 0.4746230231702832, + "grad_norm": 1.686927197400893, + "learning_rate": 5.652036369684476e-06, + "loss": 0.6369, + "step": 15486 + }, + { + "epoch": 0.4746536716930244, + "grad_norm": 1.919688063551495, + "learning_rate": 5.651544286935744e-06, + "loss": 0.616, + "step": 15487 + }, + { + "epoch": 0.4746843202157656, + "grad_norm": 1.7525719029733853, + "learning_rate": 5.65105219776718e-06, + "loss": 0.6828, + "step": 15488 + }, + { + "epoch": 0.4747149687385068, + "grad_norm": 1.6571343324253363, + "learning_rate": 5.650560102183631e-06, + "loss": 0.5963, + "step": 15489 + }, + { + "epoch": 0.47474561726124803, + "grad_norm": 1.9598980322599082, + "learning_rate": 5.6500680001899474e-06, + "loss": 0.7026, + "step": 15490 + }, + { + "epoch": 0.47477626578398924, + "grad_norm": 1.5412733396152707, + "learning_rate": 5.649575891790978e-06, + "loss": 0.6398, + "step": 15491 + }, + { + "epoch": 0.47480691430673044, + "grad_norm": 1.734341110457538, + "learning_rate": 5.649083776991571e-06, + "loss": 0.607, + "step": 15492 + }, + { + "epoch": 0.4748375628294716, + "grad_norm": 0.8189212342904613, + "learning_rate": 5.6485916557965755e-06, + "loss": 0.4426, + "step": 15493 + }, + { + "epoch": 0.4748682113522128, + "grad_norm": 1.8896561893076729, + "learning_rate": 5.648099528210839e-06, + "loss": 0.6364, + "step": 15494 + }, + { + "epoch": 0.474898859874954, + "grad_norm": 1.881748470621892, + "learning_rate": 5.647607394239215e-06, + "loss": 0.7073, + "step": 15495 + }, + { + "epoch": 0.4749295083976952, + "grad_norm": 1.479417889794829, + "learning_rate": 5.647115253886547e-06, + "loss": 0.6813, + "step": 15496 + }, + { + "epoch": 0.4749601569204364, + "grad_norm": 1.5384055221240505, + "learning_rate": 5.646623107157688e-06, + "loss": 0.6629, + "step": 15497 + }, + { + "epoch": 0.4749908054431776, + "grad_norm": 1.718932637304251, + "learning_rate": 5.646130954057485e-06, + "loss": 0.709, + "step": 15498 + }, + { + "epoch": 0.4750214539659188, + "grad_norm": 1.6856376798766473, + "learning_rate": 5.645638794590791e-06, + "loss": 0.6461, + "step": 15499 + }, + { + "epoch": 0.47505210248866003, + "grad_norm": 1.987366670400017, + "learning_rate": 5.6451466287624505e-06, + "loss": 0.6322, + "step": 15500 + }, + { + "epoch": 0.47508275101140124, + "grad_norm": 0.7924886851557382, + "learning_rate": 5.644654456577315e-06, + "loss": 0.4689, + "step": 15501 + }, + { + "epoch": 0.47511339953414244, + "grad_norm": 1.7758566048257967, + "learning_rate": 5.6441622780402365e-06, + "loss": 0.7002, + "step": 15502 + }, + { + "epoch": 0.47514404805688365, + "grad_norm": 1.611598699517901, + "learning_rate": 5.64367009315606e-06, + "loss": 0.7204, + "step": 15503 + }, + { + "epoch": 0.47517469657962486, + "grad_norm": 1.5759132067062087, + "learning_rate": 5.643177901929637e-06, + "loss": 0.6183, + "step": 15504 + }, + { + "epoch": 0.47520534510236606, + "grad_norm": 1.8760579026261686, + "learning_rate": 5.64268570436582e-06, + "loss": 0.6414, + "step": 15505 + }, + { + "epoch": 0.47523599362510727, + "grad_norm": 1.9783614665159077, + "learning_rate": 5.642193500469455e-06, + "loss": 0.6879, + "step": 15506 + }, + { + "epoch": 0.4752666421478485, + "grad_norm": 1.6169990838548705, + "learning_rate": 5.641701290245391e-06, + "loss": 0.6318, + "step": 15507 + }, + { + "epoch": 0.4752972906705897, + "grad_norm": 1.5911096811454053, + "learning_rate": 5.641209073698483e-06, + "loss": 0.5898, + "step": 15508 + }, + { + "epoch": 0.4753279391933309, + "grad_norm": 1.804579813084678, + "learning_rate": 5.640716850833576e-06, + "loss": 0.7345, + "step": 15509 + }, + { + "epoch": 0.4753585877160721, + "grad_norm": 0.7846693613683985, + "learning_rate": 5.640224621655526e-06, + "loss": 0.4496, + "step": 15510 + }, + { + "epoch": 0.4753892362388133, + "grad_norm": 0.7780770846903071, + "learning_rate": 5.6397323861691744e-06, + "loss": 0.4577, + "step": 15511 + }, + { + "epoch": 0.4754198847615545, + "grad_norm": 1.6915694875567588, + "learning_rate": 5.639240144379378e-06, + "loss": 0.655, + "step": 15512 + }, + { + "epoch": 0.4754505332842957, + "grad_norm": 1.6529625347872212, + "learning_rate": 5.638747896290984e-06, + "loss": 0.6766, + "step": 15513 + }, + { + "epoch": 0.4754811818070369, + "grad_norm": 1.728793554861915, + "learning_rate": 5.638255641908844e-06, + "loss": 0.6106, + "step": 15514 + }, + { + "epoch": 0.4755118303297781, + "grad_norm": 0.7875520048017741, + "learning_rate": 5.6377633812378076e-06, + "loss": 0.4418, + "step": 15515 + }, + { + "epoch": 0.4755424788525193, + "grad_norm": 1.5897993629763938, + "learning_rate": 5.6372711142827255e-06, + "loss": 0.6074, + "step": 15516 + }, + { + "epoch": 0.47557312737526053, + "grad_norm": 0.8053703758052364, + "learning_rate": 5.636778841048447e-06, + "loss": 0.4596, + "step": 15517 + }, + { + "epoch": 0.47560377589800173, + "grad_norm": 1.7972521464521396, + "learning_rate": 5.6362865615398255e-06, + "loss": 0.6283, + "step": 15518 + }, + { + "epoch": 0.47563442442074294, + "grad_norm": 0.81293461126903, + "learning_rate": 5.635794275761709e-06, + "loss": 0.4479, + "step": 15519 + }, + { + "epoch": 0.47566507294348415, + "grad_norm": 1.8525301062048631, + "learning_rate": 5.635301983718948e-06, + "loss": 0.5749, + "step": 15520 + }, + { + "epoch": 0.47569572146622535, + "grad_norm": 1.5491544902145036, + "learning_rate": 5.634809685416396e-06, + "loss": 0.6513, + "step": 15521 + }, + { + "epoch": 0.47572636998896656, + "grad_norm": 1.8977290142424985, + "learning_rate": 5.6343173808589e-06, + "loss": 0.6559, + "step": 15522 + }, + { + "epoch": 0.47575701851170776, + "grad_norm": 1.6832039994360215, + "learning_rate": 5.633825070051314e-06, + "loss": 0.6645, + "step": 15523 + }, + { + "epoch": 0.4757876670344489, + "grad_norm": 1.8411277663368941, + "learning_rate": 5.633332752998486e-06, + "loss": 0.5749, + "step": 15524 + }, + { + "epoch": 0.4758183155571901, + "grad_norm": 1.60534455963053, + "learning_rate": 5.632840429705269e-06, + "loss": 0.6392, + "step": 15525 + }, + { + "epoch": 0.4758489640799313, + "grad_norm": 1.9409461107712593, + "learning_rate": 5.632348100176513e-06, + "loss": 0.7559, + "step": 15526 + }, + { + "epoch": 0.47587961260267253, + "grad_norm": 0.8654519454960313, + "learning_rate": 5.63185576441707e-06, + "loss": 0.4728, + "step": 15527 + }, + { + "epoch": 0.47591026112541374, + "grad_norm": 1.7425351439795411, + "learning_rate": 5.631363422431789e-06, + "loss": 0.5893, + "step": 15528 + }, + { + "epoch": 0.47594090964815494, + "grad_norm": 1.8034287357966114, + "learning_rate": 5.6308710742255255e-06, + "loss": 0.6269, + "step": 15529 + }, + { + "epoch": 0.47597155817089615, + "grad_norm": 1.7130612385167228, + "learning_rate": 5.630378719803126e-06, + "loss": 0.6683, + "step": 15530 + }, + { + "epoch": 0.47600220669363735, + "grad_norm": 1.8984953115466603, + "learning_rate": 5.629886359169443e-06, + "loss": 0.6587, + "step": 15531 + }, + { + "epoch": 0.47603285521637856, + "grad_norm": 1.5796170250212163, + "learning_rate": 5.62939399232933e-06, + "loss": 0.5905, + "step": 15532 + }, + { + "epoch": 0.47606350373911976, + "grad_norm": 1.5715323829082242, + "learning_rate": 5.628901619287636e-06, + "loss": 0.5574, + "step": 15533 + }, + { + "epoch": 0.47609415226186097, + "grad_norm": 1.8791626368884362, + "learning_rate": 5.628409240049214e-06, + "loss": 0.6717, + "step": 15534 + }, + { + "epoch": 0.4761248007846022, + "grad_norm": 1.5938077708757987, + "learning_rate": 5.627916854618914e-06, + "loss": 0.6407, + "step": 15535 + }, + { + "epoch": 0.4761554493073434, + "grad_norm": 1.717222595805861, + "learning_rate": 5.62742446300159e-06, + "loss": 0.6917, + "step": 15536 + }, + { + "epoch": 0.4761860978300846, + "grad_norm": 1.938558382289328, + "learning_rate": 5.626932065202091e-06, + "loss": 0.6374, + "step": 15537 + }, + { + "epoch": 0.4762167463528258, + "grad_norm": 1.7012256179579708, + "learning_rate": 5.626439661225272e-06, + "loss": 0.5862, + "step": 15538 + }, + { + "epoch": 0.476247394875567, + "grad_norm": 1.8624569502356618, + "learning_rate": 5.625947251075979e-06, + "loss": 0.7282, + "step": 15539 + }, + { + "epoch": 0.4762780433983082, + "grad_norm": 1.8273464500307197, + "learning_rate": 5.6254548347590695e-06, + "loss": 0.6632, + "step": 15540 + }, + { + "epoch": 0.4763086919210494, + "grad_norm": 1.8183089520483169, + "learning_rate": 5.624962412279395e-06, + "loss": 0.6648, + "step": 15541 + }, + { + "epoch": 0.4763393404437906, + "grad_norm": 1.8202389720064491, + "learning_rate": 5.624469983641805e-06, + "loss": 0.5173, + "step": 15542 + }, + { + "epoch": 0.4763699889665318, + "grad_norm": 1.5332525816630531, + "learning_rate": 5.623977548851151e-06, + "loss": 0.5359, + "step": 15543 + }, + { + "epoch": 0.476400637489273, + "grad_norm": 1.65670474379285, + "learning_rate": 5.623485107912289e-06, + "loss": 0.601, + "step": 15544 + }, + { + "epoch": 0.47643128601201423, + "grad_norm": 1.7782441141026049, + "learning_rate": 5.622992660830066e-06, + "loss": 0.6449, + "step": 15545 + }, + { + "epoch": 0.47646193453475544, + "grad_norm": 1.6446102061576298, + "learning_rate": 5.622500207609338e-06, + "loss": 0.7251, + "step": 15546 + }, + { + "epoch": 0.47649258305749664, + "grad_norm": 0.7990946382112932, + "learning_rate": 5.622007748254957e-06, + "loss": 0.4362, + "step": 15547 + }, + { + "epoch": 0.47652323158023785, + "grad_norm": 0.8377296073899956, + "learning_rate": 5.6215152827717745e-06, + "loss": 0.4561, + "step": 15548 + }, + { + "epoch": 0.47655388010297905, + "grad_norm": 1.8677725397074028, + "learning_rate": 5.621022811164643e-06, + "loss": 0.6589, + "step": 15549 + }, + { + "epoch": 0.47658452862572026, + "grad_norm": 0.7645586918276411, + "learning_rate": 5.620530333438413e-06, + "loss": 0.4346, + "step": 15550 + }, + { + "epoch": 0.47661517714846147, + "grad_norm": 2.1099795527546323, + "learning_rate": 5.620037849597942e-06, + "loss": 0.7206, + "step": 15551 + }, + { + "epoch": 0.47664582567120267, + "grad_norm": 1.586889589532007, + "learning_rate": 5.619545359648075e-06, + "loss": 0.6323, + "step": 15552 + }, + { + "epoch": 0.4766764741939439, + "grad_norm": 0.7445736434761381, + "learning_rate": 5.6190528635936735e-06, + "loss": 0.4087, + "step": 15553 + }, + { + "epoch": 0.4767071227166851, + "grad_norm": 1.6053893911054078, + "learning_rate": 5.618560361439583e-06, + "loss": 0.5237, + "step": 15554 + }, + { + "epoch": 0.47673777123942623, + "grad_norm": 1.8139360389186776, + "learning_rate": 5.618067853190661e-06, + "loss": 0.6754, + "step": 15555 + }, + { + "epoch": 0.47676841976216744, + "grad_norm": 1.7416192727575008, + "learning_rate": 5.617575338851757e-06, + "loss": 0.6585, + "step": 15556 + }, + { + "epoch": 0.47679906828490864, + "grad_norm": 1.7472715884291887, + "learning_rate": 5.617082818427726e-06, + "loss": 0.6725, + "step": 15557 + }, + { + "epoch": 0.47682971680764985, + "grad_norm": 1.757446125786569, + "learning_rate": 5.6165902919234186e-06, + "loss": 0.6694, + "step": 15558 + }, + { + "epoch": 0.47686036533039106, + "grad_norm": 1.5740038552281257, + "learning_rate": 5.616097759343691e-06, + "loss": 0.5997, + "step": 15559 + }, + { + "epoch": 0.47689101385313226, + "grad_norm": 1.5311214964640032, + "learning_rate": 5.615605220693393e-06, + "loss": 0.5569, + "step": 15560 + }, + { + "epoch": 0.47692166237587347, + "grad_norm": 1.7640035651801176, + "learning_rate": 5.615112675977379e-06, + "loss": 0.701, + "step": 15561 + }, + { + "epoch": 0.4769523108986147, + "grad_norm": 2.1060929913151374, + "learning_rate": 5.614620125200504e-06, + "loss": 0.5983, + "step": 15562 + }, + { + "epoch": 0.4769829594213559, + "grad_norm": 1.6348999800721873, + "learning_rate": 5.614127568367618e-06, + "loss": 0.6781, + "step": 15563 + }, + { + "epoch": 0.4770136079440971, + "grad_norm": 1.8118640666583852, + "learning_rate": 5.6136350054835785e-06, + "loss": 0.6431, + "step": 15564 + }, + { + "epoch": 0.4770442564668383, + "grad_norm": 1.6097345687178892, + "learning_rate": 5.6131424365532335e-06, + "loss": 0.6724, + "step": 15565 + }, + { + "epoch": 0.4770749049895795, + "grad_norm": 1.7293266491026056, + "learning_rate": 5.6126498615814405e-06, + "loss": 0.6609, + "step": 15566 + }, + { + "epoch": 0.4771055535123207, + "grad_norm": 1.7893939232841394, + "learning_rate": 5.612157280573052e-06, + "loss": 0.699, + "step": 15567 + }, + { + "epoch": 0.4771362020350619, + "grad_norm": 1.4928099674397612, + "learning_rate": 5.6116646935329215e-06, + "loss": 0.6314, + "step": 15568 + }, + { + "epoch": 0.4771668505578031, + "grad_norm": 1.8770323900333565, + "learning_rate": 5.611172100465902e-06, + "loss": 0.7063, + "step": 15569 + }, + { + "epoch": 0.4771974990805443, + "grad_norm": 1.6757565507209966, + "learning_rate": 5.610679501376847e-06, + "loss": 0.5922, + "step": 15570 + }, + { + "epoch": 0.4772281476032855, + "grad_norm": 1.552202591711004, + "learning_rate": 5.610186896270608e-06, + "loss": 0.5207, + "step": 15571 + }, + { + "epoch": 0.47725879612602673, + "grad_norm": 0.8890319779902363, + "learning_rate": 5.609694285152045e-06, + "loss": 0.4296, + "step": 15572 + }, + { + "epoch": 0.47728944464876794, + "grad_norm": 1.6500154741235182, + "learning_rate": 5.6092016680260074e-06, + "loss": 0.5972, + "step": 15573 + }, + { + "epoch": 0.47732009317150914, + "grad_norm": 1.6695740451547878, + "learning_rate": 5.6087090448973505e-06, + "loss": 0.6105, + "step": 15574 + }, + { + "epoch": 0.47735074169425035, + "grad_norm": 1.8476268273582428, + "learning_rate": 5.608216415770926e-06, + "loss": 0.6975, + "step": 15575 + }, + { + "epoch": 0.47738139021699155, + "grad_norm": 1.966366387315669, + "learning_rate": 5.60772378065159e-06, + "loss": 0.6709, + "step": 15576 + }, + { + "epoch": 0.47741203873973276, + "grad_norm": 2.1032519242407957, + "learning_rate": 5.607231139544197e-06, + "loss": 0.6647, + "step": 15577 + }, + { + "epoch": 0.47744268726247396, + "grad_norm": 1.6044138250258386, + "learning_rate": 5.606738492453599e-06, + "loss": 0.6191, + "step": 15578 + }, + { + "epoch": 0.47747333578521517, + "grad_norm": 1.7048485818277592, + "learning_rate": 5.606245839384653e-06, + "loss": 0.6991, + "step": 15579 + }, + { + "epoch": 0.4775039843079564, + "grad_norm": 1.477265903198017, + "learning_rate": 5.60575318034221e-06, + "loss": 0.6449, + "step": 15580 + }, + { + "epoch": 0.4775346328306976, + "grad_norm": 1.819466959528439, + "learning_rate": 5.605260515331128e-06, + "loss": 0.6402, + "step": 15581 + }, + { + "epoch": 0.4775652813534388, + "grad_norm": 0.8116012431780533, + "learning_rate": 5.604767844356256e-06, + "loss": 0.4439, + "step": 15582 + }, + { + "epoch": 0.47759592987618, + "grad_norm": 1.8976645308593283, + "learning_rate": 5.604275167422455e-06, + "loss": 0.6413, + "step": 15583 + }, + { + "epoch": 0.4776265783989212, + "grad_norm": 1.6749811186919041, + "learning_rate": 5.603782484534574e-06, + "loss": 0.6556, + "step": 15584 + }, + { + "epoch": 0.4776572269216624, + "grad_norm": 1.9157857130455715, + "learning_rate": 5.60328979569747e-06, + "loss": 0.6362, + "step": 15585 + }, + { + "epoch": 0.47768787544440355, + "grad_norm": 1.5572251157237975, + "learning_rate": 5.6027971009159975e-06, + "loss": 0.5913, + "step": 15586 + }, + { + "epoch": 0.47771852396714476, + "grad_norm": 1.975077616383793, + "learning_rate": 5.602304400195012e-06, + "loss": 0.6921, + "step": 15587 + }, + { + "epoch": 0.47774917248988596, + "grad_norm": 1.8083686370438878, + "learning_rate": 5.601811693539366e-06, + "loss": 0.6373, + "step": 15588 + }, + { + "epoch": 0.47777982101262717, + "grad_norm": 0.7698683495515339, + "learning_rate": 5.601318980953916e-06, + "loss": 0.4469, + "step": 15589 + }, + { + "epoch": 0.4778104695353684, + "grad_norm": 1.698929625438872, + "learning_rate": 5.600826262443516e-06, + "loss": 0.5971, + "step": 15590 + }, + { + "epoch": 0.4778411180581096, + "grad_norm": 1.7688677936885404, + "learning_rate": 5.6003335380130205e-06, + "loss": 0.6269, + "step": 15591 + }, + { + "epoch": 0.4778717665808508, + "grad_norm": 1.7773407170855278, + "learning_rate": 5.599840807667286e-06, + "loss": 0.6782, + "step": 15592 + }, + { + "epoch": 0.477902415103592, + "grad_norm": 1.6863076529377763, + "learning_rate": 5.599348071411166e-06, + "loss": 0.603, + "step": 15593 + }, + { + "epoch": 0.4779330636263332, + "grad_norm": 1.7216771627048855, + "learning_rate": 5.598855329249516e-06, + "loss": 0.7271, + "step": 15594 + }, + { + "epoch": 0.4779637121490744, + "grad_norm": 1.7331383230166535, + "learning_rate": 5.598362581187192e-06, + "loss": 0.6266, + "step": 15595 + }, + { + "epoch": 0.4779943606718156, + "grad_norm": 1.786961303316964, + "learning_rate": 5.597869827229048e-06, + "loss": 0.6108, + "step": 15596 + }, + { + "epoch": 0.4780250091945568, + "grad_norm": 1.7247579585714095, + "learning_rate": 5.597377067379939e-06, + "loss": 0.6241, + "step": 15597 + }, + { + "epoch": 0.478055657717298, + "grad_norm": 1.5540757193005086, + "learning_rate": 5.596884301644722e-06, + "loss": 0.5818, + "step": 15598 + }, + { + "epoch": 0.4780863062400392, + "grad_norm": 1.6472451304742606, + "learning_rate": 5.59639153002825e-06, + "loss": 0.5648, + "step": 15599 + }, + { + "epoch": 0.47811695476278043, + "grad_norm": 1.9116180284450104, + "learning_rate": 5.595898752535381e-06, + "loss": 0.6554, + "step": 15600 + }, + { + "epoch": 0.47814760328552164, + "grad_norm": 1.709798007197622, + "learning_rate": 5.5954059691709674e-06, + "loss": 0.6111, + "step": 15601 + }, + { + "epoch": 0.47817825180826284, + "grad_norm": 2.0221135060318973, + "learning_rate": 5.594913179939868e-06, + "loss": 0.6523, + "step": 15602 + }, + { + "epoch": 0.47820890033100405, + "grad_norm": 1.6460440484324923, + "learning_rate": 5.594420384846937e-06, + "loss": 0.5766, + "step": 15603 + }, + { + "epoch": 0.47823954885374526, + "grad_norm": 0.8693781213211843, + "learning_rate": 5.593927583897027e-06, + "loss": 0.4735, + "step": 15604 + }, + { + "epoch": 0.47827019737648646, + "grad_norm": 1.5905833404360576, + "learning_rate": 5.593434777095001e-06, + "loss": 0.5293, + "step": 15605 + }, + { + "epoch": 0.47830084589922767, + "grad_norm": 1.7818658926969055, + "learning_rate": 5.5929419644457075e-06, + "loss": 0.6347, + "step": 15606 + }, + { + "epoch": 0.4783314944219689, + "grad_norm": 0.7762191351084722, + "learning_rate": 5.5924491459540055e-06, + "loss": 0.4432, + "step": 15607 + }, + { + "epoch": 0.4783621429447101, + "grad_norm": 0.7722137631110161, + "learning_rate": 5.59195632162475e-06, + "loss": 0.4327, + "step": 15608 + }, + { + "epoch": 0.4783927914674513, + "grad_norm": 0.7949009098874537, + "learning_rate": 5.591463491462799e-06, + "loss": 0.4656, + "step": 15609 + }, + { + "epoch": 0.4784234399901925, + "grad_norm": 1.6259145592328128, + "learning_rate": 5.5909706554730055e-06, + "loss": 0.629, + "step": 15610 + }, + { + "epoch": 0.4784540885129337, + "grad_norm": 1.6526604324454144, + "learning_rate": 5.5904778136602274e-06, + "loss": 0.6461, + "step": 15611 + }, + { + "epoch": 0.4784847370356749, + "grad_norm": 1.7867521897621095, + "learning_rate": 5.589984966029319e-06, + "loss": 0.6528, + "step": 15612 + }, + { + "epoch": 0.4785153855584161, + "grad_norm": 0.7484012172388018, + "learning_rate": 5.5894921125851395e-06, + "loss": 0.4136, + "step": 15613 + }, + { + "epoch": 0.4785460340811573, + "grad_norm": 1.7425635611961117, + "learning_rate": 5.588999253332543e-06, + "loss": 0.5768, + "step": 15614 + }, + { + "epoch": 0.4785766826038985, + "grad_norm": 1.5550781070084223, + "learning_rate": 5.588506388276386e-06, + "loss": 0.6891, + "step": 15615 + }, + { + "epoch": 0.4786073311266397, + "grad_norm": 1.709947174514684, + "learning_rate": 5.5880135174215254e-06, + "loss": 0.7044, + "step": 15616 + }, + { + "epoch": 0.4786379796493809, + "grad_norm": 1.5886867256407573, + "learning_rate": 5.587520640772816e-06, + "loss": 0.5897, + "step": 15617 + }, + { + "epoch": 0.4786686281721221, + "grad_norm": 1.6007577249088158, + "learning_rate": 5.587027758335118e-06, + "loss": 0.6324, + "step": 15618 + }, + { + "epoch": 0.4786992766948633, + "grad_norm": 1.5512604085254218, + "learning_rate": 5.586534870113282e-06, + "loss": 0.6803, + "step": 15619 + }, + { + "epoch": 0.4787299252176045, + "grad_norm": 1.7576724940817925, + "learning_rate": 5.58604197611217e-06, + "loss": 0.6107, + "step": 15620 + }, + { + "epoch": 0.4787605737403457, + "grad_norm": 1.8173678999584204, + "learning_rate": 5.585549076336635e-06, + "loss": 0.6398, + "step": 15621 + }, + { + "epoch": 0.4787912222630869, + "grad_norm": 1.755779921052031, + "learning_rate": 5.585056170791537e-06, + "loss": 0.6511, + "step": 15622 + }, + { + "epoch": 0.4788218707858281, + "grad_norm": 1.6724254968731276, + "learning_rate": 5.584563259481729e-06, + "loss": 0.6086, + "step": 15623 + }, + { + "epoch": 0.4788525193085693, + "grad_norm": 1.5961965210515088, + "learning_rate": 5.584070342412071e-06, + "loss": 0.6309, + "step": 15624 + }, + { + "epoch": 0.4788831678313105, + "grad_norm": 1.7793921711392748, + "learning_rate": 5.583577419587417e-06, + "loss": 0.6291, + "step": 15625 + }, + { + "epoch": 0.4789138163540517, + "grad_norm": 1.9845772304818186, + "learning_rate": 5.583084491012628e-06, + "loss": 0.6597, + "step": 15626 + }, + { + "epoch": 0.47894446487679293, + "grad_norm": 1.4276944027134046, + "learning_rate": 5.582591556692556e-06, + "loss": 0.6777, + "step": 15627 + }, + { + "epoch": 0.47897511339953414, + "grad_norm": 2.0200834673044104, + "learning_rate": 5.58209861663206e-06, + "loss": 0.7198, + "step": 15628 + }, + { + "epoch": 0.47900576192227534, + "grad_norm": 1.8122000519142376, + "learning_rate": 5.5816056708359975e-06, + "loss": 0.7022, + "step": 15629 + }, + { + "epoch": 0.47903641044501655, + "grad_norm": 1.4797547541118696, + "learning_rate": 5.581112719309227e-06, + "loss": 0.6124, + "step": 15630 + }, + { + "epoch": 0.47906705896775775, + "grad_norm": 1.625455800137405, + "learning_rate": 5.580619762056604e-06, + "loss": 0.6117, + "step": 15631 + }, + { + "epoch": 0.47909770749049896, + "grad_norm": 1.8752368364225718, + "learning_rate": 5.580126799082984e-06, + "loss": 0.5393, + "step": 15632 + }, + { + "epoch": 0.47912835601324016, + "grad_norm": 2.14980039168143, + "learning_rate": 5.57963383039323e-06, + "loss": 0.6762, + "step": 15633 + }, + { + "epoch": 0.47915900453598137, + "grad_norm": 1.6939497108376567, + "learning_rate": 5.5791408559921925e-06, + "loss": 0.612, + "step": 15634 + }, + { + "epoch": 0.4791896530587226, + "grad_norm": 1.8126544226637673, + "learning_rate": 5.578647875884733e-06, + "loss": 0.6479, + "step": 15635 + }, + { + "epoch": 0.4792203015814638, + "grad_norm": 0.8657916172878326, + "learning_rate": 5.578154890075707e-06, + "loss": 0.4595, + "step": 15636 + }, + { + "epoch": 0.479250950104205, + "grad_norm": 1.5499544341820883, + "learning_rate": 5.577661898569975e-06, + "loss": 0.6866, + "step": 15637 + }, + { + "epoch": 0.4792815986269462, + "grad_norm": 1.7134083646487333, + "learning_rate": 5.57716890137239e-06, + "loss": 0.5714, + "step": 15638 + }, + { + "epoch": 0.4793122471496874, + "grad_norm": 0.7879964076346224, + "learning_rate": 5.576675898487813e-06, + "loss": 0.4422, + "step": 15639 + }, + { + "epoch": 0.4793428956724286, + "grad_norm": 1.5932975257622493, + "learning_rate": 5.5761828899211e-06, + "loss": 0.652, + "step": 15640 + }, + { + "epoch": 0.4793735441951698, + "grad_norm": 0.8220474624561189, + "learning_rate": 5.575689875677112e-06, + "loss": 0.4567, + "step": 15641 + }, + { + "epoch": 0.479404192717911, + "grad_norm": 1.864955029049998, + "learning_rate": 5.575196855760703e-06, + "loss": 0.6573, + "step": 15642 + }, + { + "epoch": 0.4794348412406522, + "grad_norm": 1.7235525630156625, + "learning_rate": 5.574703830176732e-06, + "loss": 0.6282, + "step": 15643 + }, + { + "epoch": 0.4794654897633934, + "grad_norm": 1.8145996836478624, + "learning_rate": 5.574210798930056e-06, + "loss": 0.7177, + "step": 15644 + }, + { + "epoch": 0.47949613828613463, + "grad_norm": 1.814619179015747, + "learning_rate": 5.573717762025537e-06, + "loss": 0.6658, + "step": 15645 + }, + { + "epoch": 0.47952678680887584, + "grad_norm": 1.6881071861989227, + "learning_rate": 5.573224719468028e-06, + "loss": 0.5687, + "step": 15646 + }, + { + "epoch": 0.47955743533161704, + "grad_norm": 0.7786817045453475, + "learning_rate": 5.572731671262389e-06, + "loss": 0.4444, + "step": 15647 + }, + { + "epoch": 0.4795880838543582, + "grad_norm": 1.5216321899671466, + "learning_rate": 5.572238617413479e-06, + "loss": 0.622, + "step": 15648 + }, + { + "epoch": 0.4796187323770994, + "grad_norm": 1.6454105596478699, + "learning_rate": 5.5717455579261555e-06, + "loss": 0.6338, + "step": 15649 + }, + { + "epoch": 0.4796493808998406, + "grad_norm": 1.706489676598938, + "learning_rate": 5.571252492805278e-06, + "loss": 0.6815, + "step": 15650 + }, + { + "epoch": 0.4796800294225818, + "grad_norm": 1.5026634941959698, + "learning_rate": 5.5707594220557005e-06, + "loss": 0.561, + "step": 15651 + }, + { + "epoch": 0.479710677945323, + "grad_norm": 1.62118140099748, + "learning_rate": 5.570266345682287e-06, + "loss": 0.6042, + "step": 15652 + }, + { + "epoch": 0.4797413264680642, + "grad_norm": 1.7241707873035241, + "learning_rate": 5.569773263689893e-06, + "loss": 0.5367, + "step": 15653 + }, + { + "epoch": 0.4797719749908054, + "grad_norm": 1.62143673809291, + "learning_rate": 5.569280176083376e-06, + "loss": 0.5434, + "step": 15654 + }, + { + "epoch": 0.47980262351354663, + "grad_norm": 1.470905245675293, + "learning_rate": 5.568787082867596e-06, + "loss": 0.5718, + "step": 15655 + }, + { + "epoch": 0.47983327203628784, + "grad_norm": 0.8638944008749874, + "learning_rate": 5.5682939840474126e-06, + "loss": 0.425, + "step": 15656 + }, + { + "epoch": 0.47986392055902904, + "grad_norm": 1.8429533036957715, + "learning_rate": 5.567800879627682e-06, + "loss": 0.7527, + "step": 15657 + }, + { + "epoch": 0.47989456908177025, + "grad_norm": 1.6560135807423926, + "learning_rate": 5.5673077696132635e-06, + "loss": 0.6278, + "step": 15658 + }, + { + "epoch": 0.47992521760451146, + "grad_norm": 1.8138516556811852, + "learning_rate": 5.566814654009017e-06, + "loss": 0.7181, + "step": 15659 + }, + { + "epoch": 0.47995586612725266, + "grad_norm": 1.8449466692250251, + "learning_rate": 5.566321532819802e-06, + "loss": 0.7625, + "step": 15660 + }, + { + "epoch": 0.47998651464999387, + "grad_norm": 1.5664780080537242, + "learning_rate": 5.565828406050476e-06, + "loss": 0.5854, + "step": 15661 + }, + { + "epoch": 0.4800171631727351, + "grad_norm": 1.6815059862684287, + "learning_rate": 5.5653352737058955e-06, + "loss": 0.6507, + "step": 15662 + }, + { + "epoch": 0.4800478116954763, + "grad_norm": 1.576926611889412, + "learning_rate": 5.5648421357909235e-06, + "loss": 0.5487, + "step": 15663 + }, + { + "epoch": 0.4800784602182175, + "grad_norm": 1.810270717134344, + "learning_rate": 5.564348992310417e-06, + "loss": 0.7322, + "step": 15664 + }, + { + "epoch": 0.4801091087409587, + "grad_norm": 1.7783340346842877, + "learning_rate": 5.563855843269237e-06, + "loss": 0.6703, + "step": 15665 + }, + { + "epoch": 0.4801397572636999, + "grad_norm": 1.6031449031547425, + "learning_rate": 5.563362688672238e-06, + "loss": 0.666, + "step": 15666 + }, + { + "epoch": 0.4801704057864411, + "grad_norm": 1.6062615830039904, + "learning_rate": 5.562869528524285e-06, + "loss": 0.6247, + "step": 15667 + }, + { + "epoch": 0.4802010543091823, + "grad_norm": 1.4603545481720022, + "learning_rate": 5.5623763628302345e-06, + "loss": 0.547, + "step": 15668 + }, + { + "epoch": 0.4802317028319235, + "grad_norm": 1.786682262969251, + "learning_rate": 5.561883191594945e-06, + "loss": 0.5992, + "step": 15669 + }, + { + "epoch": 0.4802623513546647, + "grad_norm": 1.9091633636967196, + "learning_rate": 5.561390014823276e-06, + "loss": 0.7749, + "step": 15670 + }, + { + "epoch": 0.4802929998774059, + "grad_norm": 1.658110604609876, + "learning_rate": 5.5608968325200875e-06, + "loss": 0.5899, + "step": 15671 + }, + { + "epoch": 0.48032364840014713, + "grad_norm": 0.8597043913670884, + "learning_rate": 5.56040364469024e-06, + "loss": 0.4564, + "step": 15672 + }, + { + "epoch": 0.48035429692288834, + "grad_norm": 1.70656110939501, + "learning_rate": 5.55991045133859e-06, + "loss": 0.6444, + "step": 15673 + }, + { + "epoch": 0.48038494544562954, + "grad_norm": 0.86753300282286, + "learning_rate": 5.559417252470002e-06, + "loss": 0.4493, + "step": 15674 + }, + { + "epoch": 0.48041559396837075, + "grad_norm": 0.7931415166601012, + "learning_rate": 5.55892404808933e-06, + "loss": 0.4456, + "step": 15675 + }, + { + "epoch": 0.48044624249111195, + "grad_norm": 0.7686561773066666, + "learning_rate": 5.55843083820144e-06, + "loss": 0.4411, + "step": 15676 + }, + { + "epoch": 0.48047689101385316, + "grad_norm": 0.7704106989481533, + "learning_rate": 5.557937622811185e-06, + "loss": 0.454, + "step": 15677 + }, + { + "epoch": 0.48050753953659436, + "grad_norm": 1.9553693928350577, + "learning_rate": 5.5574444019234285e-06, + "loss": 0.6486, + "step": 15678 + }, + { + "epoch": 0.4805381880593355, + "grad_norm": 1.6893632951814324, + "learning_rate": 5.556951175543029e-06, + "loss": 0.6058, + "step": 15679 + }, + { + "epoch": 0.4805688365820767, + "grad_norm": 1.5995744000812728, + "learning_rate": 5.556457943674849e-06, + "loss": 0.544, + "step": 15680 + }, + { + "epoch": 0.4805994851048179, + "grad_norm": 1.748500124338794, + "learning_rate": 5.555964706323746e-06, + "loss": 0.6812, + "step": 15681 + }, + { + "epoch": 0.48063013362755913, + "grad_norm": 1.7499480700532453, + "learning_rate": 5.555471463494579e-06, + "loss": 0.5746, + "step": 15682 + }, + { + "epoch": 0.48066078215030034, + "grad_norm": 1.6218779011941686, + "learning_rate": 5.554978215192211e-06, + "loss": 0.6751, + "step": 15683 + }, + { + "epoch": 0.48069143067304154, + "grad_norm": 1.9277919492571702, + "learning_rate": 5.554484961421499e-06, + "loss": 0.6448, + "step": 15684 + }, + { + "epoch": 0.48072207919578275, + "grad_norm": 0.908737019473093, + "learning_rate": 5.553991702187307e-06, + "loss": 0.4323, + "step": 15685 + }, + { + "epoch": 0.48075272771852395, + "grad_norm": 1.606974359449174, + "learning_rate": 5.55349843749449e-06, + "loss": 0.6326, + "step": 15686 + }, + { + "epoch": 0.48078337624126516, + "grad_norm": 1.5664645980077871, + "learning_rate": 5.553005167347914e-06, + "loss": 0.6535, + "step": 15687 + }, + { + "epoch": 0.48081402476400636, + "grad_norm": 1.628066331539518, + "learning_rate": 5.552511891752435e-06, + "loss": 0.585, + "step": 15688 + }, + { + "epoch": 0.48084467328674757, + "grad_norm": 1.736239206831339, + "learning_rate": 5.552018610712917e-06, + "loss": 0.6818, + "step": 15689 + }, + { + "epoch": 0.4808753218094888, + "grad_norm": 1.7010228473774307, + "learning_rate": 5.551525324234216e-06, + "loss": 0.5928, + "step": 15690 + }, + { + "epoch": 0.48090597033223, + "grad_norm": 1.749445759473527, + "learning_rate": 5.5510320323211975e-06, + "loss": 0.7007, + "step": 15691 + }, + { + "epoch": 0.4809366188549712, + "grad_norm": 0.8210066207354734, + "learning_rate": 5.5505387349787175e-06, + "loss": 0.4465, + "step": 15692 + }, + { + "epoch": 0.4809672673777124, + "grad_norm": 1.6506873496371013, + "learning_rate": 5.5500454322116395e-06, + "loss": 0.6628, + "step": 15693 + }, + { + "epoch": 0.4809979159004536, + "grad_norm": 1.66081034638066, + "learning_rate": 5.54955212402482e-06, + "loss": 0.632, + "step": 15694 + }, + { + "epoch": 0.4810285644231948, + "grad_norm": 1.7191272999794986, + "learning_rate": 5.549058810423128e-06, + "loss": 0.6428, + "step": 15695 + }, + { + "epoch": 0.481059212945936, + "grad_norm": 1.8105141102118385, + "learning_rate": 5.548565491411415e-06, + "loss": 0.6739, + "step": 15696 + }, + { + "epoch": 0.4810898614686772, + "grad_norm": 1.732919940804733, + "learning_rate": 5.548072166994548e-06, + "loss": 0.5267, + "step": 15697 + }, + { + "epoch": 0.4811205099914184, + "grad_norm": 1.6129740405100514, + "learning_rate": 5.547578837177384e-06, + "loss": 0.565, + "step": 15698 + }, + { + "epoch": 0.4811511585141596, + "grad_norm": 1.7837162012239036, + "learning_rate": 5.547085501964787e-06, + "loss": 0.7198, + "step": 15699 + }, + { + "epoch": 0.48118180703690083, + "grad_norm": 1.6732227855921689, + "learning_rate": 5.5465921613616155e-06, + "loss": 0.7071, + "step": 15700 + }, + { + "epoch": 0.48121245555964204, + "grad_norm": 1.5797230973453518, + "learning_rate": 5.546098815372732e-06, + "loss": 0.5632, + "step": 15701 + }, + { + "epoch": 0.48124310408238324, + "grad_norm": 1.5625075672476478, + "learning_rate": 5.545605464002998e-06, + "loss": 0.6107, + "step": 15702 + }, + { + "epoch": 0.48127375260512445, + "grad_norm": 1.756252929824732, + "learning_rate": 5.545112107257273e-06, + "loss": 0.7046, + "step": 15703 + }, + { + "epoch": 0.48130440112786566, + "grad_norm": 1.7759183022670042, + "learning_rate": 5.54461874514042e-06, + "loss": 0.5369, + "step": 15704 + }, + { + "epoch": 0.48133504965060686, + "grad_norm": 1.517983997492629, + "learning_rate": 5.544125377657297e-06, + "loss": 0.5906, + "step": 15705 + }, + { + "epoch": 0.48136569817334807, + "grad_norm": 0.8364172873272396, + "learning_rate": 5.543632004812769e-06, + "loss": 0.4453, + "step": 15706 + }, + { + "epoch": 0.4813963466960893, + "grad_norm": 1.4815223215658022, + "learning_rate": 5.543138626611696e-06, + "loss": 0.5562, + "step": 15707 + }, + { + "epoch": 0.4814269952188305, + "grad_norm": 1.5587935425421886, + "learning_rate": 5.542645243058938e-06, + "loss": 0.5691, + "step": 15708 + }, + { + "epoch": 0.4814576437415717, + "grad_norm": 1.771298611789703, + "learning_rate": 5.5421518541593575e-06, + "loss": 0.592, + "step": 15709 + }, + { + "epoch": 0.48148829226431283, + "grad_norm": 1.7871349608241458, + "learning_rate": 5.541658459917817e-06, + "loss": 0.646, + "step": 15710 + }, + { + "epoch": 0.48151894078705404, + "grad_norm": 1.527110684125726, + "learning_rate": 5.541165060339178e-06, + "loss": 0.5994, + "step": 15711 + }, + { + "epoch": 0.48154958930979525, + "grad_norm": 1.7490862895027737, + "learning_rate": 5.540671655428298e-06, + "loss": 0.713, + "step": 15712 + }, + { + "epoch": 0.48158023783253645, + "grad_norm": 1.8471865143939057, + "learning_rate": 5.540178245190044e-06, + "loss": 0.5353, + "step": 15713 + }, + { + "epoch": 0.48161088635527766, + "grad_norm": 1.8029415031189278, + "learning_rate": 5.539684829629276e-06, + "loss": 0.5805, + "step": 15714 + }, + { + "epoch": 0.48164153487801886, + "grad_norm": 1.823031300165881, + "learning_rate": 5.5391914087508545e-06, + "loss": 0.6395, + "step": 15715 + }, + { + "epoch": 0.48167218340076007, + "grad_norm": 1.6724483159721795, + "learning_rate": 5.538697982559642e-06, + "loss": 0.6313, + "step": 15716 + }, + { + "epoch": 0.4817028319235013, + "grad_norm": 1.6895754410839905, + "learning_rate": 5.538204551060501e-06, + "loss": 0.7391, + "step": 15717 + }, + { + "epoch": 0.4817334804462425, + "grad_norm": 1.5655801170910624, + "learning_rate": 5.537711114258293e-06, + "loss": 0.6269, + "step": 15718 + }, + { + "epoch": 0.4817641289689837, + "grad_norm": 1.588743233978677, + "learning_rate": 5.53721767215788e-06, + "loss": 0.6351, + "step": 15719 + }, + { + "epoch": 0.4817947774917249, + "grad_norm": 1.8121021549651097, + "learning_rate": 5.536724224764122e-06, + "loss": 0.6503, + "step": 15720 + }, + { + "epoch": 0.4818254260144661, + "grad_norm": 1.6655457753382825, + "learning_rate": 5.536230772081884e-06, + "loss": 0.6636, + "step": 15721 + }, + { + "epoch": 0.4818560745372073, + "grad_norm": 1.9145003713671265, + "learning_rate": 5.535737314116027e-06, + "loss": 0.7231, + "step": 15722 + }, + { + "epoch": 0.4818867230599485, + "grad_norm": 1.7486131555829871, + "learning_rate": 5.535243850871414e-06, + "loss": 0.6465, + "step": 15723 + }, + { + "epoch": 0.4819173715826897, + "grad_norm": 1.7407693481532756, + "learning_rate": 5.534750382352905e-06, + "loss": 0.5925, + "step": 15724 + }, + { + "epoch": 0.4819480201054309, + "grad_norm": 1.617721719638258, + "learning_rate": 5.534256908565365e-06, + "loss": 0.6894, + "step": 15725 + }, + { + "epoch": 0.4819786686281721, + "grad_norm": 1.7360017220037387, + "learning_rate": 5.533763429513655e-06, + "loss": 0.6572, + "step": 15726 + }, + { + "epoch": 0.48200931715091333, + "grad_norm": 1.7702227968404223, + "learning_rate": 5.5332699452026354e-06, + "loss": 0.6429, + "step": 15727 + }, + { + "epoch": 0.48203996567365454, + "grad_norm": 2.047048825707372, + "learning_rate": 5.5327764556371725e-06, + "loss": 0.6396, + "step": 15728 + }, + { + "epoch": 0.48207061419639574, + "grad_norm": 1.5474973923581898, + "learning_rate": 5.5322829608221255e-06, + "loss": 0.4987, + "step": 15729 + }, + { + "epoch": 0.48210126271913695, + "grad_norm": 1.5860342703598005, + "learning_rate": 5.53178946076236e-06, + "loss": 0.5841, + "step": 15730 + }, + { + "epoch": 0.48213191124187815, + "grad_norm": 0.8250639715961207, + "learning_rate": 5.531295955462735e-06, + "loss": 0.4369, + "step": 15731 + }, + { + "epoch": 0.48216255976461936, + "grad_norm": 1.768897879196321, + "learning_rate": 5.5308024449281165e-06, + "loss": 0.7244, + "step": 15732 + }, + { + "epoch": 0.48219320828736056, + "grad_norm": 0.7643307347820756, + "learning_rate": 5.530308929163364e-06, + "loss": 0.4254, + "step": 15733 + }, + { + "epoch": 0.48222385681010177, + "grad_norm": 1.6282836730270842, + "learning_rate": 5.5298154081733436e-06, + "loss": 0.603, + "step": 15734 + }, + { + "epoch": 0.482254505332843, + "grad_norm": 1.7041881762421296, + "learning_rate": 5.529321881962916e-06, + "loss": 0.6942, + "step": 15735 + }, + { + "epoch": 0.4822851538555842, + "grad_norm": 1.7772028585663668, + "learning_rate": 5.528828350536944e-06, + "loss": 0.5818, + "step": 15736 + }, + { + "epoch": 0.4823158023783254, + "grad_norm": 1.9044673609268215, + "learning_rate": 5.528334813900291e-06, + "loss": 0.6331, + "step": 15737 + }, + { + "epoch": 0.4823464509010666, + "grad_norm": 2.0857069491265925, + "learning_rate": 5.52784127205782e-06, + "loss": 0.668, + "step": 15738 + }, + { + "epoch": 0.4823770994238078, + "grad_norm": 1.6348438230984723, + "learning_rate": 5.527347725014395e-06, + "loss": 0.6454, + "step": 15739 + }, + { + "epoch": 0.482407747946549, + "grad_norm": 1.6346792336882154, + "learning_rate": 5.526854172774877e-06, + "loss": 0.6153, + "step": 15740 + }, + { + "epoch": 0.48243839646929015, + "grad_norm": 2.476922618825902, + "learning_rate": 5.526360615344129e-06, + "loss": 0.6716, + "step": 15741 + }, + { + "epoch": 0.48246904499203136, + "grad_norm": 1.6094738699581448, + "learning_rate": 5.525867052727016e-06, + "loss": 0.6179, + "step": 15742 + }, + { + "epoch": 0.48249969351477257, + "grad_norm": 0.9884607034878936, + "learning_rate": 5.525373484928401e-06, + "loss": 0.446, + "step": 15743 + }, + { + "epoch": 0.48253034203751377, + "grad_norm": 1.8675965971991688, + "learning_rate": 5.524879911953146e-06, + "loss": 0.6517, + "step": 15744 + }, + { + "epoch": 0.482560990560255, + "grad_norm": 1.8234499165274192, + "learning_rate": 5.5243863338061165e-06, + "loss": 0.6543, + "step": 15745 + }, + { + "epoch": 0.4825916390829962, + "grad_norm": 1.8032267488638418, + "learning_rate": 5.523892750492171e-06, + "loss": 0.585, + "step": 15746 + }, + { + "epoch": 0.4826222876057374, + "grad_norm": 1.7406267087408411, + "learning_rate": 5.523399162016179e-06, + "loss": 0.6785, + "step": 15747 + }, + { + "epoch": 0.4826529361284786, + "grad_norm": 1.9789592756408731, + "learning_rate": 5.5229055683829995e-06, + "loss": 0.7249, + "step": 15748 + }, + { + "epoch": 0.4826835846512198, + "grad_norm": 0.7865988370369688, + "learning_rate": 5.5224119695975e-06, + "loss": 0.4294, + "step": 15749 + }, + { + "epoch": 0.482714233173961, + "grad_norm": 1.888914294901852, + "learning_rate": 5.521918365664539e-06, + "loss": 0.64, + "step": 15750 + }, + { + "epoch": 0.4827448816967022, + "grad_norm": 1.502687095260885, + "learning_rate": 5.521424756588984e-06, + "loss": 0.5596, + "step": 15751 + }, + { + "epoch": 0.4827755302194434, + "grad_norm": 0.8058231273231562, + "learning_rate": 5.520931142375697e-06, + "loss": 0.4482, + "step": 15752 + }, + { + "epoch": 0.4828061787421846, + "grad_norm": 1.856301406609348, + "learning_rate": 5.520437523029542e-06, + "loss": 0.6213, + "step": 15753 + }, + { + "epoch": 0.4828368272649258, + "grad_norm": 1.5644298241925338, + "learning_rate": 5.519943898555384e-06, + "loss": 0.6286, + "step": 15754 + }, + { + "epoch": 0.48286747578766703, + "grad_norm": 1.7776665576972581, + "learning_rate": 5.519450268958084e-06, + "loss": 0.5402, + "step": 15755 + }, + { + "epoch": 0.48289812431040824, + "grad_norm": 1.7113964397099866, + "learning_rate": 5.518956634242509e-06, + "loss": 0.7002, + "step": 15756 + }, + { + "epoch": 0.48292877283314944, + "grad_norm": 1.8380171679280373, + "learning_rate": 5.518462994413522e-06, + "loss": 0.7081, + "step": 15757 + }, + { + "epoch": 0.48295942135589065, + "grad_norm": 1.6171859209968722, + "learning_rate": 5.517969349475987e-06, + "loss": 0.6644, + "step": 15758 + }, + { + "epoch": 0.48299006987863186, + "grad_norm": 1.664054165748235, + "learning_rate": 5.517475699434764e-06, + "loss": 0.6756, + "step": 15759 + }, + { + "epoch": 0.48302071840137306, + "grad_norm": 1.8112640251777914, + "learning_rate": 5.5169820442947255e-06, + "loss": 0.6961, + "step": 15760 + }, + { + "epoch": 0.48305136692411427, + "grad_norm": 1.754345311987408, + "learning_rate": 5.516488384060726e-06, + "loss": 0.6547, + "step": 15761 + }, + { + "epoch": 0.4830820154468555, + "grad_norm": 1.7921746968847914, + "learning_rate": 5.515994718737637e-06, + "loss": 0.6658, + "step": 15762 + }, + { + "epoch": 0.4831126639695967, + "grad_norm": 1.730679944317756, + "learning_rate": 5.515501048330319e-06, + "loss": 0.7058, + "step": 15763 + }, + { + "epoch": 0.4831433124923379, + "grad_norm": 1.783756215660874, + "learning_rate": 5.515007372843637e-06, + "loss": 0.629, + "step": 15764 + }, + { + "epoch": 0.4831739610150791, + "grad_norm": 1.7707686087997623, + "learning_rate": 5.514513692282457e-06, + "loss": 0.7019, + "step": 15765 + }, + { + "epoch": 0.4832046095378203, + "grad_norm": 1.874678090895228, + "learning_rate": 5.514020006651641e-06, + "loss": 0.5898, + "step": 15766 + }, + { + "epoch": 0.4832352580605615, + "grad_norm": 1.7764049444892451, + "learning_rate": 5.513526315956053e-06, + "loss": 0.5777, + "step": 15767 + }, + { + "epoch": 0.4832659065833027, + "grad_norm": 2.1810427054640433, + "learning_rate": 5.513032620200561e-06, + "loss": 0.7454, + "step": 15768 + }, + { + "epoch": 0.4832965551060439, + "grad_norm": 2.0823551486465814, + "learning_rate": 5.512538919390027e-06, + "loss": 0.6817, + "step": 15769 + }, + { + "epoch": 0.4833272036287851, + "grad_norm": 1.7061741872987906, + "learning_rate": 5.512045213529315e-06, + "loss": 0.615, + "step": 15770 + }, + { + "epoch": 0.4833578521515263, + "grad_norm": 1.6347063841806138, + "learning_rate": 5.51155150262329e-06, + "loss": 0.6552, + "step": 15771 + }, + { + "epoch": 0.4833885006742675, + "grad_norm": 1.5225217572139853, + "learning_rate": 5.511057786676819e-06, + "loss": 0.6168, + "step": 15772 + }, + { + "epoch": 0.4834191491970087, + "grad_norm": 1.7035696872442374, + "learning_rate": 5.510564065694764e-06, + "loss": 0.5762, + "step": 15773 + }, + { + "epoch": 0.4834497977197499, + "grad_norm": 1.7138656204805665, + "learning_rate": 5.5100703396819895e-06, + "loss": 0.678, + "step": 15774 + }, + { + "epoch": 0.4834804462424911, + "grad_norm": 1.7000833794993866, + "learning_rate": 5.5095766086433635e-06, + "loss": 0.5524, + "step": 15775 + }, + { + "epoch": 0.4835110947652323, + "grad_norm": 1.8168034254578596, + "learning_rate": 5.509082872583747e-06, + "loss": 0.6699, + "step": 15776 + }, + { + "epoch": 0.4835417432879735, + "grad_norm": 1.7893173836878606, + "learning_rate": 5.508589131508009e-06, + "loss": 0.8067, + "step": 15777 + }, + { + "epoch": 0.4835723918107147, + "grad_norm": 1.5199823166085844, + "learning_rate": 5.50809538542101e-06, + "loss": 0.5743, + "step": 15778 + }, + { + "epoch": 0.4836030403334559, + "grad_norm": 1.842548848890921, + "learning_rate": 5.507601634327617e-06, + "loss": 0.5957, + "step": 15779 + }, + { + "epoch": 0.4836336888561971, + "grad_norm": 1.5806339286510611, + "learning_rate": 5.507107878232697e-06, + "loss": 0.6016, + "step": 15780 + }, + { + "epoch": 0.4836643373789383, + "grad_norm": 1.749140025393735, + "learning_rate": 5.506614117141112e-06, + "loss": 0.7345, + "step": 15781 + }, + { + "epoch": 0.48369498590167953, + "grad_norm": 1.7843826303378618, + "learning_rate": 5.506120351057729e-06, + "loss": 0.6311, + "step": 15782 + }, + { + "epoch": 0.48372563442442074, + "grad_norm": 1.7463151495531235, + "learning_rate": 5.505626579987411e-06, + "loss": 0.6703, + "step": 15783 + }, + { + "epoch": 0.48375628294716194, + "grad_norm": 1.662253820850021, + "learning_rate": 5.505132803935028e-06, + "loss": 0.6183, + "step": 15784 + }, + { + "epoch": 0.48378693146990315, + "grad_norm": 1.7450983500904893, + "learning_rate": 5.50463902290544e-06, + "loss": 0.6858, + "step": 15785 + }, + { + "epoch": 0.48381757999264435, + "grad_norm": 1.9569918876747279, + "learning_rate": 5.504145236903515e-06, + "loss": 0.5883, + "step": 15786 + }, + { + "epoch": 0.48384822851538556, + "grad_norm": 1.6392844719930482, + "learning_rate": 5.503651445934119e-06, + "loss": 0.626, + "step": 15787 + }, + { + "epoch": 0.48387887703812676, + "grad_norm": 1.621231502563741, + "learning_rate": 5.5031576500021155e-06, + "loss": 0.5376, + "step": 15788 + }, + { + "epoch": 0.48390952556086797, + "grad_norm": 2.521543868676282, + "learning_rate": 5.502663849112371e-06, + "loss": 0.6764, + "step": 15789 + }, + { + "epoch": 0.4839401740836092, + "grad_norm": 1.6234751497911686, + "learning_rate": 5.5021700432697515e-06, + "loss": 0.667, + "step": 15790 + }, + { + "epoch": 0.4839708226063504, + "grad_norm": 1.5914305778134634, + "learning_rate": 5.501676232479122e-06, + "loss": 0.5526, + "step": 15791 + }, + { + "epoch": 0.4840014711290916, + "grad_norm": 1.6984275651400977, + "learning_rate": 5.501182416745347e-06, + "loss": 0.622, + "step": 15792 + }, + { + "epoch": 0.4840321196518328, + "grad_norm": 1.532457132253177, + "learning_rate": 5.500688596073295e-06, + "loss": 0.5531, + "step": 15793 + }, + { + "epoch": 0.484062768174574, + "grad_norm": 1.8374947837572344, + "learning_rate": 5.50019477046783e-06, + "loss": 0.5437, + "step": 15794 + }, + { + "epoch": 0.4840934166973152, + "grad_norm": 1.7555205167729906, + "learning_rate": 5.4997009399338176e-06, + "loss": 0.6915, + "step": 15795 + }, + { + "epoch": 0.4841240652200564, + "grad_norm": 1.8513561165146322, + "learning_rate": 5.499207104476123e-06, + "loss": 0.6869, + "step": 15796 + }, + { + "epoch": 0.4841547137427976, + "grad_norm": 0.8537889153312361, + "learning_rate": 5.498713264099615e-06, + "loss": 0.4143, + "step": 15797 + }, + { + "epoch": 0.4841853622655388, + "grad_norm": 0.8645193143232978, + "learning_rate": 5.4982194188091545e-06, + "loss": 0.4437, + "step": 15798 + }, + { + "epoch": 0.48421601078828, + "grad_norm": 1.9108915761601621, + "learning_rate": 5.497725568609614e-06, + "loss": 0.6652, + "step": 15799 + }, + { + "epoch": 0.48424665931102123, + "grad_norm": 1.840607247028583, + "learning_rate": 5.497231713505854e-06, + "loss": 0.6192, + "step": 15800 + }, + { + "epoch": 0.48427730783376244, + "grad_norm": 1.928926318303481, + "learning_rate": 5.496737853502744e-06, + "loss": 0.6463, + "step": 15801 + }, + { + "epoch": 0.48430795635650364, + "grad_norm": 1.6096526966862361, + "learning_rate": 5.496243988605147e-06, + "loss": 0.6197, + "step": 15802 + }, + { + "epoch": 0.4843386048792448, + "grad_norm": 1.644744795846705, + "learning_rate": 5.4957501188179345e-06, + "loss": 0.609, + "step": 15803 + }, + { + "epoch": 0.484369253401986, + "grad_norm": 1.6806800793393761, + "learning_rate": 5.495256244145966e-06, + "loss": 0.6219, + "step": 15804 + }, + { + "epoch": 0.4843999019247272, + "grad_norm": 1.7257901513143548, + "learning_rate": 5.494762364594112e-06, + "loss": 0.61, + "step": 15805 + }, + { + "epoch": 0.4844305504474684, + "grad_norm": 1.6905190816814086, + "learning_rate": 5.494268480167237e-06, + "loss": 0.6924, + "step": 15806 + }, + { + "epoch": 0.4844611989702096, + "grad_norm": 1.7445337622854593, + "learning_rate": 5.493774590870209e-06, + "loss": 0.7126, + "step": 15807 + }, + { + "epoch": 0.4844918474929508, + "grad_norm": 1.6415179192024376, + "learning_rate": 5.493280696707894e-06, + "loss": 0.6783, + "step": 15808 + }, + { + "epoch": 0.48452249601569203, + "grad_norm": 1.8832467787910394, + "learning_rate": 5.492786797685157e-06, + "loss": 0.6558, + "step": 15809 + }, + { + "epoch": 0.48455314453843323, + "grad_norm": 1.8403822814738715, + "learning_rate": 5.492292893806866e-06, + "loss": 0.7642, + "step": 15810 + }, + { + "epoch": 0.48458379306117444, + "grad_norm": 1.5837516355339354, + "learning_rate": 5.491798985077889e-06, + "loss": 0.578, + "step": 15811 + }, + { + "epoch": 0.48461444158391564, + "grad_norm": 1.8462837351626893, + "learning_rate": 5.491305071503089e-06, + "loss": 0.6394, + "step": 15812 + }, + { + "epoch": 0.48464509010665685, + "grad_norm": 1.7126741499041715, + "learning_rate": 5.490811153087334e-06, + "loss": 0.5951, + "step": 15813 + }, + { + "epoch": 0.48467573862939806, + "grad_norm": 1.7357205727207865, + "learning_rate": 5.490317229835493e-06, + "loss": 0.5249, + "step": 15814 + }, + { + "epoch": 0.48470638715213926, + "grad_norm": 0.9837731954658124, + "learning_rate": 5.48982330175243e-06, + "loss": 0.4266, + "step": 15815 + }, + { + "epoch": 0.48473703567488047, + "grad_norm": 1.8194366823797812, + "learning_rate": 5.489329368843012e-06, + "loss": 0.6241, + "step": 15816 + }, + { + "epoch": 0.4847676841976217, + "grad_norm": 1.8133548985680086, + "learning_rate": 5.488835431112106e-06, + "loss": 0.7067, + "step": 15817 + }, + { + "epoch": 0.4847983327203629, + "grad_norm": 1.874752511921787, + "learning_rate": 5.488341488564582e-06, + "loss": 0.7419, + "step": 15818 + }, + { + "epoch": 0.4848289812431041, + "grad_norm": 1.6175655834001557, + "learning_rate": 5.487847541205302e-06, + "loss": 0.5054, + "step": 15819 + }, + { + "epoch": 0.4848596297658453, + "grad_norm": 1.7208762587744784, + "learning_rate": 5.487353589039136e-06, + "loss": 0.6402, + "step": 15820 + }, + { + "epoch": 0.4848902782885865, + "grad_norm": 1.7248384389452769, + "learning_rate": 5.48685963207095e-06, + "loss": 0.6433, + "step": 15821 + }, + { + "epoch": 0.4849209268113277, + "grad_norm": 1.862604383078436, + "learning_rate": 5.486365670305612e-06, + "loss": 0.6722, + "step": 15822 + }, + { + "epoch": 0.4849515753340689, + "grad_norm": 1.7971028712538837, + "learning_rate": 5.485871703747989e-06, + "loss": 0.68, + "step": 15823 + }, + { + "epoch": 0.4849822238568101, + "grad_norm": 1.7258512992093968, + "learning_rate": 5.4853777324029464e-06, + "loss": 0.6096, + "step": 15824 + }, + { + "epoch": 0.4850128723795513, + "grad_norm": 1.5036005444821274, + "learning_rate": 5.484883756275354e-06, + "loss": 0.5654, + "step": 15825 + }, + { + "epoch": 0.4850435209022925, + "grad_norm": 0.8145530183506257, + "learning_rate": 5.484389775370078e-06, + "loss": 0.4136, + "step": 15826 + }, + { + "epoch": 0.48507416942503373, + "grad_norm": 1.6689942486093186, + "learning_rate": 5.483895789691985e-06, + "loss": 0.6382, + "step": 15827 + }, + { + "epoch": 0.48510481794777494, + "grad_norm": 1.7553717860897073, + "learning_rate": 5.483401799245943e-06, + "loss": 0.7121, + "step": 15828 + }, + { + "epoch": 0.48513546647051614, + "grad_norm": 2.0523394401440083, + "learning_rate": 5.48290780403682e-06, + "loss": 0.6997, + "step": 15829 + }, + { + "epoch": 0.48516611499325735, + "grad_norm": 1.6497751592462793, + "learning_rate": 5.482413804069483e-06, + "loss": 0.7412, + "step": 15830 + }, + { + "epoch": 0.48519676351599855, + "grad_norm": 1.7298323945642273, + "learning_rate": 5.481919799348799e-06, + "loss": 0.6127, + "step": 15831 + }, + { + "epoch": 0.48522741203873976, + "grad_norm": 1.75170141082604, + "learning_rate": 5.481425789879635e-06, + "loss": 0.6741, + "step": 15832 + }, + { + "epoch": 0.48525806056148096, + "grad_norm": 1.735024214032782, + "learning_rate": 5.48093177566686e-06, + "loss": 0.6045, + "step": 15833 + }, + { + "epoch": 0.4852887090842221, + "grad_norm": 1.749643099513447, + "learning_rate": 5.4804377567153424e-06, + "loss": 0.5504, + "step": 15834 + }, + { + "epoch": 0.4853193576069633, + "grad_norm": 1.6155719929494723, + "learning_rate": 5.479943733029947e-06, + "loss": 0.7198, + "step": 15835 + }, + { + "epoch": 0.4853500061297045, + "grad_norm": 1.9215128438976798, + "learning_rate": 5.479449704615543e-06, + "loss": 0.6572, + "step": 15836 + }, + { + "epoch": 0.48538065465244573, + "grad_norm": 1.69127731750518, + "learning_rate": 5.478955671477e-06, + "loss": 0.6227, + "step": 15837 + }, + { + "epoch": 0.48541130317518694, + "grad_norm": 1.6627628478381191, + "learning_rate": 5.478461633619185e-06, + "loss": 0.6585, + "step": 15838 + }, + { + "epoch": 0.48544195169792814, + "grad_norm": 0.9276451612854842, + "learning_rate": 5.477967591046962e-06, + "loss": 0.4522, + "step": 15839 + }, + { + "epoch": 0.48547260022066935, + "grad_norm": 1.8563994043554366, + "learning_rate": 5.4774735437652036e-06, + "loss": 0.666, + "step": 15840 + }, + { + "epoch": 0.48550324874341055, + "grad_norm": 1.6466433392335036, + "learning_rate": 5.476979491778777e-06, + "loss": 0.6353, + "step": 15841 + }, + { + "epoch": 0.48553389726615176, + "grad_norm": 1.9481663083505238, + "learning_rate": 5.476485435092549e-06, + "loss": 0.6126, + "step": 15842 + }, + { + "epoch": 0.48556454578889297, + "grad_norm": 1.7298376869564505, + "learning_rate": 5.475991373711387e-06, + "loss": 0.7016, + "step": 15843 + }, + { + "epoch": 0.48559519431163417, + "grad_norm": 1.7773665807867653, + "learning_rate": 5.47549730764016e-06, + "loss": 0.7614, + "step": 15844 + }, + { + "epoch": 0.4856258428343754, + "grad_norm": 0.8278728440350146, + "learning_rate": 5.475003236883738e-06, + "loss": 0.4459, + "step": 15845 + }, + { + "epoch": 0.4856564913571166, + "grad_norm": 1.6376403580956016, + "learning_rate": 5.474509161446987e-06, + "loss": 0.635, + "step": 15846 + }, + { + "epoch": 0.4856871398798578, + "grad_norm": 1.4568916293594392, + "learning_rate": 5.474015081334776e-06, + "loss": 0.5738, + "step": 15847 + }, + { + "epoch": 0.485717788402599, + "grad_norm": 0.802305018686493, + "learning_rate": 5.473520996551972e-06, + "loss": 0.4524, + "step": 15848 + }, + { + "epoch": 0.4857484369253402, + "grad_norm": 1.9021007151698157, + "learning_rate": 5.473026907103446e-06, + "loss": 0.7241, + "step": 15849 + }, + { + "epoch": 0.4857790854480814, + "grad_norm": 1.6378518327930318, + "learning_rate": 5.472532812994063e-06, + "loss": 0.5859, + "step": 15850 + }, + { + "epoch": 0.4858097339708226, + "grad_norm": 0.8214033596139939, + "learning_rate": 5.472038714228695e-06, + "loss": 0.46, + "step": 15851 + }, + { + "epoch": 0.4858403824935638, + "grad_norm": 1.662657490461652, + "learning_rate": 5.471544610812207e-06, + "loss": 0.6031, + "step": 15852 + }, + { + "epoch": 0.485871031016305, + "grad_norm": 1.8472131932896148, + "learning_rate": 5.471050502749472e-06, + "loss": 0.6514, + "step": 15853 + }, + { + "epoch": 0.4859016795390462, + "grad_norm": 1.8653922752800227, + "learning_rate": 5.470556390045354e-06, + "loss": 0.6863, + "step": 15854 + }, + { + "epoch": 0.48593232806178743, + "grad_norm": 1.7763660935728718, + "learning_rate": 5.470062272704724e-06, + "loss": 0.6898, + "step": 15855 + }, + { + "epoch": 0.48596297658452864, + "grad_norm": 1.5287597976613845, + "learning_rate": 5.46956815073245e-06, + "loss": 0.6279, + "step": 15856 + }, + { + "epoch": 0.48599362510726984, + "grad_norm": 1.6045676775871989, + "learning_rate": 5.469074024133401e-06, + "loss": 0.6017, + "step": 15857 + }, + { + "epoch": 0.48602427363001105, + "grad_norm": 1.9129988712133343, + "learning_rate": 5.468579892912446e-06, + "loss": 0.6482, + "step": 15858 + }, + { + "epoch": 0.48605492215275226, + "grad_norm": 2.1725399213942125, + "learning_rate": 5.468085757074453e-06, + "loss": 0.7017, + "step": 15859 + }, + { + "epoch": 0.48608557067549346, + "grad_norm": 2.439728230545245, + "learning_rate": 5.4675916166242904e-06, + "loss": 0.7275, + "step": 15860 + }, + { + "epoch": 0.48611621919823467, + "grad_norm": 1.8745688060613153, + "learning_rate": 5.467097471566829e-06, + "loss": 0.6086, + "step": 15861 + }, + { + "epoch": 0.4861468677209759, + "grad_norm": 1.7084149287661865, + "learning_rate": 5.466603321906937e-06, + "loss": 0.5571, + "step": 15862 + }, + { + "epoch": 0.4861775162437171, + "grad_norm": 1.6491435345104652, + "learning_rate": 5.466109167649483e-06, + "loss": 0.6173, + "step": 15863 + }, + { + "epoch": 0.4862081647664583, + "grad_norm": 0.8256161821620547, + "learning_rate": 5.465615008799336e-06, + "loss": 0.4493, + "step": 15864 + }, + { + "epoch": 0.48623881328919943, + "grad_norm": 1.567837625292595, + "learning_rate": 5.4651208453613634e-06, + "loss": 0.5505, + "step": 15865 + }, + { + "epoch": 0.48626946181194064, + "grad_norm": 1.7622252653707438, + "learning_rate": 5.464626677340438e-06, + "loss": 0.5646, + "step": 15866 + }, + { + "epoch": 0.48630011033468185, + "grad_norm": 1.5173831755645715, + "learning_rate": 5.464132504741426e-06, + "loss": 0.5452, + "step": 15867 + }, + { + "epoch": 0.48633075885742305, + "grad_norm": 1.6963516904794318, + "learning_rate": 5.4636383275692e-06, + "loss": 0.605, + "step": 15868 + }, + { + "epoch": 0.48636140738016426, + "grad_norm": 1.7444697549313877, + "learning_rate": 5.463144145828624e-06, + "loss": 0.6637, + "step": 15869 + }, + { + "epoch": 0.48639205590290546, + "grad_norm": 1.80190504252318, + "learning_rate": 5.462649959524572e-06, + "loss": 0.6954, + "step": 15870 + }, + { + "epoch": 0.48642270442564667, + "grad_norm": 1.5610954647613677, + "learning_rate": 5.46215576866191e-06, + "loss": 0.5908, + "step": 15871 + }, + { + "epoch": 0.4864533529483879, + "grad_norm": 0.7627889710219808, + "learning_rate": 5.461661573245512e-06, + "loss": 0.4294, + "step": 15872 + }, + { + "epoch": 0.4864840014711291, + "grad_norm": 1.744480816016109, + "learning_rate": 5.4611673732802405e-06, + "loss": 0.6744, + "step": 15873 + }, + { + "epoch": 0.4865146499938703, + "grad_norm": 1.562278503320459, + "learning_rate": 5.460673168770971e-06, + "loss": 0.6708, + "step": 15874 + }, + { + "epoch": 0.4865452985166115, + "grad_norm": 1.6252648919238706, + "learning_rate": 5.460178959722571e-06, + "loss": 0.6723, + "step": 15875 + }, + { + "epoch": 0.4865759470393527, + "grad_norm": 1.7570307379881436, + "learning_rate": 5.4596847461399095e-06, + "loss": 0.6438, + "step": 15876 + }, + { + "epoch": 0.4866065955620939, + "grad_norm": 0.7500224078376408, + "learning_rate": 5.459190528027857e-06, + "loss": 0.4167, + "step": 15877 + }, + { + "epoch": 0.4866372440848351, + "grad_norm": 1.9758614921422475, + "learning_rate": 5.458696305391281e-06, + "loss": 0.567, + "step": 15878 + }, + { + "epoch": 0.4866678926075763, + "grad_norm": 0.8010232619056361, + "learning_rate": 5.458202078235056e-06, + "loss": 0.4461, + "step": 15879 + }, + { + "epoch": 0.4866985411303175, + "grad_norm": 1.8436355131526034, + "learning_rate": 5.457707846564046e-06, + "loss": 0.6554, + "step": 15880 + }, + { + "epoch": 0.4867291896530587, + "grad_norm": 1.7870749051404426, + "learning_rate": 5.457213610383125e-06, + "loss": 0.7122, + "step": 15881 + }, + { + "epoch": 0.48675983817579993, + "grad_norm": 0.7569330563704816, + "learning_rate": 5.456719369697161e-06, + "loss": 0.4317, + "step": 15882 + }, + { + "epoch": 0.48679048669854114, + "grad_norm": 0.7497853383306794, + "learning_rate": 5.456225124511024e-06, + "loss": 0.4491, + "step": 15883 + }, + { + "epoch": 0.48682113522128234, + "grad_norm": 1.5209422706516091, + "learning_rate": 5.455730874829584e-06, + "loss": 0.674, + "step": 15884 + }, + { + "epoch": 0.48685178374402355, + "grad_norm": 1.6182220812855779, + "learning_rate": 5.455236620657712e-06, + "loss": 0.6608, + "step": 15885 + }, + { + "epoch": 0.48688243226676475, + "grad_norm": 1.7770233790383998, + "learning_rate": 5.454742362000276e-06, + "loss": 0.6623, + "step": 15886 + }, + { + "epoch": 0.48691308078950596, + "grad_norm": 1.834756556532752, + "learning_rate": 5.454248098862147e-06, + "loss": 0.645, + "step": 15887 + }, + { + "epoch": 0.48694372931224716, + "grad_norm": 1.7684385592241725, + "learning_rate": 5.453753831248196e-06, + "loss": 0.6358, + "step": 15888 + }, + { + "epoch": 0.48697437783498837, + "grad_norm": 1.8001070118342257, + "learning_rate": 5.453259559163293e-06, + "loss": 0.6844, + "step": 15889 + }, + { + "epoch": 0.4870050263577296, + "grad_norm": 1.809110666177827, + "learning_rate": 5.4527652826123055e-06, + "loss": 0.572, + "step": 15890 + }, + { + "epoch": 0.4870356748804708, + "grad_norm": 0.7981752931626935, + "learning_rate": 5.452271001600108e-06, + "loss": 0.4421, + "step": 15891 + }, + { + "epoch": 0.487066323403212, + "grad_norm": 1.6254815845557773, + "learning_rate": 5.451776716131569e-06, + "loss": 0.5988, + "step": 15892 + }, + { + "epoch": 0.4870969719259532, + "grad_norm": 1.7096008144654025, + "learning_rate": 5.451282426211555e-06, + "loss": 0.6446, + "step": 15893 + }, + { + "epoch": 0.4871276204486944, + "grad_norm": 1.5979126863835176, + "learning_rate": 5.450788131844943e-06, + "loss": 0.6913, + "step": 15894 + }, + { + "epoch": 0.4871582689714356, + "grad_norm": 1.6495768303640006, + "learning_rate": 5.4502938330365996e-06, + "loss": 0.5461, + "step": 15895 + }, + { + "epoch": 0.48718891749417675, + "grad_norm": 1.838392952020897, + "learning_rate": 5.449799529791395e-06, + "loss": 0.5897, + "step": 15896 + }, + { + "epoch": 0.48721956601691796, + "grad_norm": 1.5682457615367986, + "learning_rate": 5.4493052221142005e-06, + "loss": 0.6257, + "step": 15897 + }, + { + "epoch": 0.48725021453965917, + "grad_norm": 1.7447493088649655, + "learning_rate": 5.448810910009888e-06, + "loss": 0.6219, + "step": 15898 + }, + { + "epoch": 0.48728086306240037, + "grad_norm": 1.7136590030758523, + "learning_rate": 5.448316593483325e-06, + "loss": 0.5777, + "step": 15899 + }, + { + "epoch": 0.4873115115851416, + "grad_norm": 0.7748411029111291, + "learning_rate": 5.4478222725393856e-06, + "loss": 0.4303, + "step": 15900 + }, + { + "epoch": 0.4873421601078828, + "grad_norm": 0.7934004542186871, + "learning_rate": 5.4473279471829364e-06, + "loss": 0.4462, + "step": 15901 + }, + { + "epoch": 0.487372808630624, + "grad_norm": 1.726836941117933, + "learning_rate": 5.446833617418853e-06, + "loss": 0.72, + "step": 15902 + }, + { + "epoch": 0.4874034571533652, + "grad_norm": 1.8382959157912835, + "learning_rate": 5.4463392832520035e-06, + "loss": 0.6679, + "step": 15903 + }, + { + "epoch": 0.4874341056761064, + "grad_norm": 1.6866892650450573, + "learning_rate": 5.445844944687256e-06, + "loss": 0.5487, + "step": 15904 + }, + { + "epoch": 0.4874647541988476, + "grad_norm": 2.0474613286016496, + "learning_rate": 5.445350601729488e-06, + "loss": 0.6213, + "step": 15905 + }, + { + "epoch": 0.4874954027215888, + "grad_norm": 1.5947120747748555, + "learning_rate": 5.444856254383564e-06, + "loss": 0.6086, + "step": 15906 + }, + { + "epoch": 0.48752605124433, + "grad_norm": 1.5499811347610248, + "learning_rate": 5.444361902654359e-06, + "loss": 0.5746, + "step": 15907 + }, + { + "epoch": 0.4875566997670712, + "grad_norm": 0.7834321235357506, + "learning_rate": 5.443867546546741e-06, + "loss": 0.4313, + "step": 15908 + }, + { + "epoch": 0.48758734828981243, + "grad_norm": 1.903829478499705, + "learning_rate": 5.443373186065583e-06, + "loss": 0.6287, + "step": 15909 + }, + { + "epoch": 0.48761799681255363, + "grad_norm": 1.6927144232039082, + "learning_rate": 5.4428788212157555e-06, + "loss": 0.6306, + "step": 15910 + }, + { + "epoch": 0.48764864533529484, + "grad_norm": 1.8477806607768306, + "learning_rate": 5.442384452002132e-06, + "loss": 0.6887, + "step": 15911 + }, + { + "epoch": 0.48767929385803604, + "grad_norm": 1.694305015917879, + "learning_rate": 5.441890078429578e-06, + "loss": 0.686, + "step": 15912 + }, + { + "epoch": 0.48770994238077725, + "grad_norm": 1.6732771072353212, + "learning_rate": 5.441395700502969e-06, + "loss": 0.5992, + "step": 15913 + }, + { + "epoch": 0.48774059090351846, + "grad_norm": 1.743028585735475, + "learning_rate": 5.4409013182271766e-06, + "loss": 0.6603, + "step": 15914 + }, + { + "epoch": 0.48777123942625966, + "grad_norm": 1.8305633632497957, + "learning_rate": 5.44040693160707e-06, + "loss": 0.63, + "step": 15915 + }, + { + "epoch": 0.48780188794900087, + "grad_norm": 1.5608319213197446, + "learning_rate": 5.43991254064752e-06, + "loss": 0.6611, + "step": 15916 + }, + { + "epoch": 0.4878325364717421, + "grad_norm": 1.641818834990967, + "learning_rate": 5.4394181453534e-06, + "loss": 0.5847, + "step": 15917 + }, + { + "epoch": 0.4878631849944833, + "grad_norm": 1.6987923332251307, + "learning_rate": 5.438923745729581e-06, + "loss": 0.6784, + "step": 15918 + }, + { + "epoch": 0.4878938335172245, + "grad_norm": 1.7238635369279272, + "learning_rate": 5.438429341780932e-06, + "loss": 0.6796, + "step": 15919 + }, + { + "epoch": 0.4879244820399657, + "grad_norm": 2.0299021696425457, + "learning_rate": 5.437934933512329e-06, + "loss": 0.6821, + "step": 15920 + }, + { + "epoch": 0.4879551305627069, + "grad_norm": 1.848852220255245, + "learning_rate": 5.43744052092864e-06, + "loss": 0.6682, + "step": 15921 + }, + { + "epoch": 0.4879857790854481, + "grad_norm": 1.8455683687178643, + "learning_rate": 5.4369461040347385e-06, + "loss": 0.6736, + "step": 15922 + }, + { + "epoch": 0.4880164276081893, + "grad_norm": 1.7930856294457118, + "learning_rate": 5.436451682835494e-06, + "loss": 0.6888, + "step": 15923 + }, + { + "epoch": 0.4880470761309305, + "grad_norm": 0.8170567640925451, + "learning_rate": 5.43595725733578e-06, + "loss": 0.4299, + "step": 15924 + }, + { + "epoch": 0.4880777246536717, + "grad_norm": 1.6707729143075412, + "learning_rate": 5.435462827540466e-06, + "loss": 0.6848, + "step": 15925 + }, + { + "epoch": 0.4881083731764129, + "grad_norm": 1.5427232280913783, + "learning_rate": 5.4349683934544294e-06, + "loss": 0.559, + "step": 15926 + }, + { + "epoch": 0.4881390216991541, + "grad_norm": 1.8026631929036199, + "learning_rate": 5.434473955082534e-06, + "loss": 0.678, + "step": 15927 + }, + { + "epoch": 0.4881696702218953, + "grad_norm": 1.9606710053803056, + "learning_rate": 5.433979512429658e-06, + "loss": 0.7419, + "step": 15928 + }, + { + "epoch": 0.4882003187446365, + "grad_norm": 1.6079437085374444, + "learning_rate": 5.4334850655006686e-06, + "loss": 0.605, + "step": 15929 + }, + { + "epoch": 0.4882309672673777, + "grad_norm": 1.778705201191175, + "learning_rate": 5.432990614300442e-06, + "loss": 0.6654, + "step": 15930 + }, + { + "epoch": 0.4882616157901189, + "grad_norm": 0.8038164856513702, + "learning_rate": 5.432496158833846e-06, + "loss": 0.4428, + "step": 15931 + }, + { + "epoch": 0.4882922643128601, + "grad_norm": 1.6777293318578073, + "learning_rate": 5.432001699105756e-06, + "loss": 0.6611, + "step": 15932 + }, + { + "epoch": 0.4883229128356013, + "grad_norm": 1.6696499051236282, + "learning_rate": 5.431507235121043e-06, + "loss": 0.5527, + "step": 15933 + }, + { + "epoch": 0.4883535613583425, + "grad_norm": 1.7582977099858415, + "learning_rate": 5.4310127668845795e-06, + "loss": 0.5486, + "step": 15934 + }, + { + "epoch": 0.4883842098810837, + "grad_norm": 0.7762509320473416, + "learning_rate": 5.430518294401236e-06, + "loss": 0.4401, + "step": 15935 + }, + { + "epoch": 0.4884148584038249, + "grad_norm": 1.6843520801730472, + "learning_rate": 5.430023817675883e-06, + "loss": 0.7351, + "step": 15936 + }, + { + "epoch": 0.48844550692656613, + "grad_norm": 1.616965368936947, + "learning_rate": 5.429529336713399e-06, + "loss": 0.5215, + "step": 15937 + }, + { + "epoch": 0.48847615544930734, + "grad_norm": 1.7400478045822831, + "learning_rate": 5.429034851518652e-06, + "loss": 0.6101, + "step": 15938 + }, + { + "epoch": 0.48850680397204854, + "grad_norm": 1.8176519939433256, + "learning_rate": 5.428540362096514e-06, + "loss": 0.5578, + "step": 15939 + }, + { + "epoch": 0.48853745249478975, + "grad_norm": 0.7609319065509222, + "learning_rate": 5.428045868451858e-06, + "loss": 0.432, + "step": 15940 + }, + { + "epoch": 0.48856810101753095, + "grad_norm": 1.6146928116864079, + "learning_rate": 5.427551370589558e-06, + "loss": 0.6435, + "step": 15941 + }, + { + "epoch": 0.48859874954027216, + "grad_norm": 1.7160173715957534, + "learning_rate": 5.4270568685144835e-06, + "loss": 0.7386, + "step": 15942 + }, + { + "epoch": 0.48862939806301336, + "grad_norm": 1.7324266275561582, + "learning_rate": 5.426562362231509e-06, + "loss": 0.7779, + "step": 15943 + }, + { + "epoch": 0.48866004658575457, + "grad_norm": 1.7602577176126, + "learning_rate": 5.426067851745504e-06, + "loss": 0.6693, + "step": 15944 + }, + { + "epoch": 0.4886906951084958, + "grad_norm": 1.8604500442230678, + "learning_rate": 5.425573337061346e-06, + "loss": 0.691, + "step": 15945 + }, + { + "epoch": 0.488721343631237, + "grad_norm": 1.8610814165948866, + "learning_rate": 5.425078818183905e-06, + "loss": 0.734, + "step": 15946 + }, + { + "epoch": 0.4887519921539782, + "grad_norm": 1.7312926239517163, + "learning_rate": 5.424584295118053e-06, + "loss": 0.5341, + "step": 15947 + }, + { + "epoch": 0.4887826406767194, + "grad_norm": 1.8319683981250958, + "learning_rate": 5.424089767868663e-06, + "loss": 0.6464, + "step": 15948 + }, + { + "epoch": 0.4888132891994606, + "grad_norm": 1.5849428002785775, + "learning_rate": 5.42359523644061e-06, + "loss": 0.6217, + "step": 15949 + }, + { + "epoch": 0.4888439377222018, + "grad_norm": 0.7853999893322102, + "learning_rate": 5.423100700838763e-06, + "loss": 0.441, + "step": 15950 + }, + { + "epoch": 0.488874586244943, + "grad_norm": 1.6845846357912346, + "learning_rate": 5.422606161067996e-06, + "loss": 0.5684, + "step": 15951 + }, + { + "epoch": 0.4889052347676842, + "grad_norm": 0.815694623432629, + "learning_rate": 5.4221116171331835e-06, + "loss": 0.4558, + "step": 15952 + }, + { + "epoch": 0.4889358832904254, + "grad_norm": 1.8434628612526822, + "learning_rate": 5.421617069039198e-06, + "loss": 0.6748, + "step": 15953 + }, + { + "epoch": 0.4889665318131666, + "grad_norm": 1.9106277803258338, + "learning_rate": 5.42112251679091e-06, + "loss": 0.688, + "step": 15954 + }, + { + "epoch": 0.48899718033590783, + "grad_norm": 1.5903342041517, + "learning_rate": 5.420627960393194e-06, + "loss": 0.6405, + "step": 15955 + }, + { + "epoch": 0.48902782885864904, + "grad_norm": 1.8035425334761825, + "learning_rate": 5.420133399850924e-06, + "loss": 0.6113, + "step": 15956 + }, + { + "epoch": 0.48905847738139024, + "grad_norm": 1.752982958347368, + "learning_rate": 5.419638835168972e-06, + "loss": 0.7399, + "step": 15957 + }, + { + "epoch": 0.4890891259041314, + "grad_norm": 1.6772626637376242, + "learning_rate": 5.419144266352211e-06, + "loss": 0.6448, + "step": 15958 + }, + { + "epoch": 0.4891197744268726, + "grad_norm": 1.645041657870058, + "learning_rate": 5.418649693405514e-06, + "loss": 0.6998, + "step": 15959 + }, + { + "epoch": 0.4891504229496138, + "grad_norm": 1.7897498912352388, + "learning_rate": 5.418155116333755e-06, + "loss": 0.7522, + "step": 15960 + }, + { + "epoch": 0.489181071472355, + "grad_norm": 1.8377067137372993, + "learning_rate": 5.417660535141806e-06, + "loss": 0.6649, + "step": 15961 + }, + { + "epoch": 0.4892117199950962, + "grad_norm": 1.5898289559549548, + "learning_rate": 5.417165949834542e-06, + "loss": 0.6629, + "step": 15962 + }, + { + "epoch": 0.4892423685178374, + "grad_norm": 1.8255926225626322, + "learning_rate": 5.416671360416834e-06, + "loss": 0.6841, + "step": 15963 + }, + { + "epoch": 0.48927301704057863, + "grad_norm": 1.6704032130925537, + "learning_rate": 5.416176766893556e-06, + "loss": 0.6073, + "step": 15964 + }, + { + "epoch": 0.48930366556331983, + "grad_norm": 0.8935215806942955, + "learning_rate": 5.415682169269585e-06, + "loss": 0.4455, + "step": 15965 + }, + { + "epoch": 0.48933431408606104, + "grad_norm": 1.6834448072126187, + "learning_rate": 5.415187567549788e-06, + "loss": 0.6152, + "step": 15966 + }, + { + "epoch": 0.48936496260880225, + "grad_norm": 1.6789270620668142, + "learning_rate": 5.414692961739043e-06, + "loss": 0.5241, + "step": 15967 + }, + { + "epoch": 0.48939561113154345, + "grad_norm": 1.7203283863201497, + "learning_rate": 5.414198351842223e-06, + "loss": 0.6706, + "step": 15968 + }, + { + "epoch": 0.48942625965428466, + "grad_norm": 1.7136495636500557, + "learning_rate": 5.413703737864199e-06, + "loss": 0.4707, + "step": 15969 + }, + { + "epoch": 0.48945690817702586, + "grad_norm": 1.557638669657221, + "learning_rate": 5.4132091198098455e-06, + "loss": 0.6225, + "step": 15970 + }, + { + "epoch": 0.48948755669976707, + "grad_norm": 0.7831794614898636, + "learning_rate": 5.412714497684039e-06, + "loss": 0.4403, + "step": 15971 + }, + { + "epoch": 0.4895182052225083, + "grad_norm": 1.4378596789324445, + "learning_rate": 5.4122198714916495e-06, + "loss": 0.642, + "step": 15972 + }, + { + "epoch": 0.4895488537452495, + "grad_norm": 1.6653277922835494, + "learning_rate": 5.411725241237552e-06, + "loss": 0.7114, + "step": 15973 + }, + { + "epoch": 0.4895795022679907, + "grad_norm": 2.107600928378901, + "learning_rate": 5.411230606926622e-06, + "loss": 0.5767, + "step": 15974 + }, + { + "epoch": 0.4896101507907319, + "grad_norm": 1.652311136317443, + "learning_rate": 5.41073596856373e-06, + "loss": 0.6579, + "step": 15975 + }, + { + "epoch": 0.4896407993134731, + "grad_norm": 0.8280815574880565, + "learning_rate": 5.410241326153753e-06, + "loss": 0.4501, + "step": 15976 + }, + { + "epoch": 0.4896714478362143, + "grad_norm": 0.8053330229678843, + "learning_rate": 5.4097466797015615e-06, + "loss": 0.4537, + "step": 15977 + }, + { + "epoch": 0.4897020963589555, + "grad_norm": 1.4106326851417166, + "learning_rate": 5.409252029212032e-06, + "loss": 0.5589, + "step": 15978 + }, + { + "epoch": 0.4897327448816967, + "grad_norm": 1.775716224132647, + "learning_rate": 5.408757374690037e-06, + "loss": 0.6485, + "step": 15979 + }, + { + "epoch": 0.4897633934044379, + "grad_norm": 1.6957070674070704, + "learning_rate": 5.408262716140452e-06, + "loss": 0.5961, + "step": 15980 + }, + { + "epoch": 0.4897940419271791, + "grad_norm": 0.7466283966918306, + "learning_rate": 5.407768053568148e-06, + "loss": 0.4354, + "step": 15981 + }, + { + "epoch": 0.48982469044992033, + "grad_norm": 1.900793852858367, + "learning_rate": 5.407273386978003e-06, + "loss": 0.6067, + "step": 15982 + }, + { + "epoch": 0.48985533897266154, + "grad_norm": 1.7482828629950364, + "learning_rate": 5.406778716374888e-06, + "loss": 0.6481, + "step": 15983 + }, + { + "epoch": 0.48988598749540274, + "grad_norm": 1.7846122483409768, + "learning_rate": 5.40628404176368e-06, + "loss": 0.7174, + "step": 15984 + }, + { + "epoch": 0.48991663601814395, + "grad_norm": 0.7698328751693427, + "learning_rate": 5.405789363149251e-06, + "loss": 0.4276, + "step": 15985 + }, + { + "epoch": 0.48994728454088515, + "grad_norm": 1.7471986472868044, + "learning_rate": 5.405294680536475e-06, + "loss": 0.7246, + "step": 15986 + }, + { + "epoch": 0.48997793306362636, + "grad_norm": 1.8703240311843046, + "learning_rate": 5.404799993930226e-06, + "loss": 0.6271, + "step": 15987 + }, + { + "epoch": 0.49000858158636756, + "grad_norm": 1.4754579734686835, + "learning_rate": 5.404305303335379e-06, + "loss": 0.5178, + "step": 15988 + }, + { + "epoch": 0.4900392301091087, + "grad_norm": 1.728683137167847, + "learning_rate": 5.40381060875681e-06, + "loss": 0.6787, + "step": 15989 + }, + { + "epoch": 0.4900698786318499, + "grad_norm": 1.5216025736532788, + "learning_rate": 5.403315910199389e-06, + "loss": 0.6292, + "step": 15990 + }, + { + "epoch": 0.4901005271545911, + "grad_norm": 1.6564766423189548, + "learning_rate": 5.402821207667998e-06, + "loss": 0.6573, + "step": 15991 + }, + { + "epoch": 0.49013117567733233, + "grad_norm": 0.7680996193859738, + "learning_rate": 5.402326501167502e-06, + "loss": 0.4246, + "step": 15992 + }, + { + "epoch": 0.49016182420007354, + "grad_norm": 1.7315093730732536, + "learning_rate": 5.4018317907027816e-06, + "loss": 0.6393, + "step": 15993 + }, + { + "epoch": 0.49019247272281474, + "grad_norm": 1.6291323254620302, + "learning_rate": 5.401337076278709e-06, + "loss": 0.6642, + "step": 15994 + }, + { + "epoch": 0.49022312124555595, + "grad_norm": 0.7887975702927075, + "learning_rate": 5.400842357900161e-06, + "loss": 0.4582, + "step": 15995 + }, + { + "epoch": 0.49025376976829715, + "grad_norm": 1.8345960695237402, + "learning_rate": 5.40034763557201e-06, + "loss": 0.7408, + "step": 15996 + }, + { + "epoch": 0.49028441829103836, + "grad_norm": 1.794952758133336, + "learning_rate": 5.399852909299131e-06, + "loss": 0.5652, + "step": 15997 + }, + { + "epoch": 0.49031506681377957, + "grad_norm": 1.9740920944230156, + "learning_rate": 5.399358179086399e-06, + "loss": 0.7445, + "step": 15998 + }, + { + "epoch": 0.49034571533652077, + "grad_norm": 0.7579875559224953, + "learning_rate": 5.398863444938689e-06, + "loss": 0.4265, + "step": 15999 + }, + { + "epoch": 0.490376363859262, + "grad_norm": 1.8352396626227823, + "learning_rate": 5.398368706860876e-06, + "loss": 0.6542, + "step": 16000 + }, + { + "epoch": 0.4904070123820032, + "grad_norm": 1.6371739466022601, + "learning_rate": 5.397873964857833e-06, + "loss": 0.6618, + "step": 16001 + }, + { + "epoch": 0.4904376609047444, + "grad_norm": 1.5605585907247619, + "learning_rate": 5.3973792189344366e-06, + "loss": 0.6159, + "step": 16002 + }, + { + "epoch": 0.4904683094274856, + "grad_norm": 1.4936440723665492, + "learning_rate": 5.396884469095562e-06, + "loss": 0.5975, + "step": 16003 + }, + { + "epoch": 0.4904989579502268, + "grad_norm": 1.7544472661210855, + "learning_rate": 5.396389715346082e-06, + "loss": 0.6936, + "step": 16004 + }, + { + "epoch": 0.490529606472968, + "grad_norm": 1.8474032799500357, + "learning_rate": 5.395894957690871e-06, + "loss": 0.6535, + "step": 16005 + }, + { + "epoch": 0.4905602549957092, + "grad_norm": 1.6407636042769167, + "learning_rate": 5.395400196134809e-06, + "loss": 0.6112, + "step": 16006 + }, + { + "epoch": 0.4905909035184504, + "grad_norm": 1.6700211152501354, + "learning_rate": 5.394905430682766e-06, + "loss": 0.6364, + "step": 16007 + }, + { + "epoch": 0.4906215520411916, + "grad_norm": 1.6214377827981339, + "learning_rate": 5.3944106613396196e-06, + "loss": 0.6287, + "step": 16008 + }, + { + "epoch": 0.4906522005639328, + "grad_norm": 1.6134488800145381, + "learning_rate": 5.393915888110242e-06, + "loss": 0.6411, + "step": 16009 + }, + { + "epoch": 0.49068284908667403, + "grad_norm": 1.6236974189278595, + "learning_rate": 5.393421110999513e-06, + "loss": 0.4927, + "step": 16010 + }, + { + "epoch": 0.49071349760941524, + "grad_norm": 1.6128371606605174, + "learning_rate": 5.392926330012305e-06, + "loss": 0.6431, + "step": 16011 + }, + { + "epoch": 0.49074414613215644, + "grad_norm": 0.8091348662292467, + "learning_rate": 5.3924315451534915e-06, + "loss": 0.4242, + "step": 16012 + }, + { + "epoch": 0.49077479465489765, + "grad_norm": 1.770134107906551, + "learning_rate": 5.39193675642795e-06, + "loss": 0.6531, + "step": 16013 + }, + { + "epoch": 0.49080544317763886, + "grad_norm": 1.8579220141748045, + "learning_rate": 5.391441963840556e-06, + "loss": 0.5375, + "step": 16014 + }, + { + "epoch": 0.49083609170038006, + "grad_norm": 1.7990255141905103, + "learning_rate": 5.3909471673961844e-06, + "loss": 0.6976, + "step": 16015 + }, + { + "epoch": 0.49086674022312127, + "grad_norm": 1.6333552340274575, + "learning_rate": 5.3904523670997085e-06, + "loss": 0.6664, + "step": 16016 + }, + { + "epoch": 0.4908973887458625, + "grad_norm": 0.7546947411598194, + "learning_rate": 5.389957562956007e-06, + "loss": 0.4285, + "step": 16017 + }, + { + "epoch": 0.4909280372686037, + "grad_norm": 1.816489287804029, + "learning_rate": 5.389462754969955e-06, + "loss": 0.6273, + "step": 16018 + }, + { + "epoch": 0.4909586857913449, + "grad_norm": 0.8130943191924145, + "learning_rate": 5.388967943146426e-06, + "loss": 0.4368, + "step": 16019 + }, + { + "epoch": 0.49098933431408603, + "grad_norm": 1.9569358679172675, + "learning_rate": 5.388473127490295e-06, + "loss": 0.6866, + "step": 16020 + }, + { + "epoch": 0.49101998283682724, + "grad_norm": 1.672319848170703, + "learning_rate": 5.3879783080064396e-06, + "loss": 0.6097, + "step": 16021 + }, + { + "epoch": 0.49105063135956845, + "grad_norm": 1.9590703849362394, + "learning_rate": 5.387483484699736e-06, + "loss": 0.7043, + "step": 16022 + }, + { + "epoch": 0.49108127988230965, + "grad_norm": 0.7889595999560701, + "learning_rate": 5.3869886575750575e-06, + "loss": 0.427, + "step": 16023 + }, + { + "epoch": 0.49111192840505086, + "grad_norm": 1.9511339172896986, + "learning_rate": 5.386493826637279e-06, + "loss": 0.6915, + "step": 16024 + }, + { + "epoch": 0.49114257692779206, + "grad_norm": 0.8112900322252339, + "learning_rate": 5.38599899189128e-06, + "loss": 0.4275, + "step": 16025 + }, + { + "epoch": 0.49117322545053327, + "grad_norm": 0.7430861606981914, + "learning_rate": 5.385504153341934e-06, + "loss": 0.4236, + "step": 16026 + }, + { + "epoch": 0.4912038739732745, + "grad_norm": 1.681345020823609, + "learning_rate": 5.385009310994116e-06, + "loss": 0.539, + "step": 16027 + }, + { + "epoch": 0.4912345224960157, + "grad_norm": 1.5994445816060003, + "learning_rate": 5.384514464852704e-06, + "loss": 0.6116, + "step": 16028 + }, + { + "epoch": 0.4912651710187569, + "grad_norm": 1.611921414361951, + "learning_rate": 5.384019614922572e-06, + "loss": 0.6542, + "step": 16029 + }, + { + "epoch": 0.4912958195414981, + "grad_norm": 1.8466687271414737, + "learning_rate": 5.383524761208597e-06, + "loss": 0.7131, + "step": 16030 + }, + { + "epoch": 0.4913264680642393, + "grad_norm": 2.0646195971475527, + "learning_rate": 5.383029903715653e-06, + "loss": 0.6577, + "step": 16031 + }, + { + "epoch": 0.4913571165869805, + "grad_norm": 1.8656741721691141, + "learning_rate": 5.382535042448619e-06, + "loss": 0.5957, + "step": 16032 + }, + { + "epoch": 0.4913877651097217, + "grad_norm": 1.6717273227547487, + "learning_rate": 5.382040177412368e-06, + "loss": 0.686, + "step": 16033 + }, + { + "epoch": 0.4914184136324629, + "grad_norm": 2.003326509187147, + "learning_rate": 5.3815453086117785e-06, + "loss": 0.6871, + "step": 16034 + }, + { + "epoch": 0.4914490621552041, + "grad_norm": 0.8165762897805785, + "learning_rate": 5.381050436051724e-06, + "loss": 0.4259, + "step": 16035 + }, + { + "epoch": 0.4914797106779453, + "grad_norm": 1.7072064151189057, + "learning_rate": 5.380555559737084e-06, + "loss": 0.6925, + "step": 16036 + }, + { + "epoch": 0.49151035920068653, + "grad_norm": 0.8206073643743199, + "learning_rate": 5.38006067967273e-06, + "loss": 0.4243, + "step": 16037 + }, + { + "epoch": 0.49154100772342774, + "grad_norm": 1.6809350083066834, + "learning_rate": 5.379565795863545e-06, + "loss": 0.6601, + "step": 16038 + }, + { + "epoch": 0.49157165624616894, + "grad_norm": 1.5803265686626526, + "learning_rate": 5.379070908314398e-06, + "loss": 0.6334, + "step": 16039 + }, + { + "epoch": 0.49160230476891015, + "grad_norm": 0.7658406531084696, + "learning_rate": 5.378576017030168e-06, + "loss": 0.4269, + "step": 16040 + }, + { + "epoch": 0.49163295329165135, + "grad_norm": 0.7526138500017153, + "learning_rate": 5.378081122015733e-06, + "loss": 0.4155, + "step": 16041 + }, + { + "epoch": 0.49166360181439256, + "grad_norm": 0.7953775991945798, + "learning_rate": 5.377586223275968e-06, + "loss": 0.4303, + "step": 16042 + }, + { + "epoch": 0.49169425033713376, + "grad_norm": 1.7600185999560458, + "learning_rate": 5.377091320815748e-06, + "loss": 0.6338, + "step": 16043 + }, + { + "epoch": 0.49172489885987497, + "grad_norm": 1.703220533195655, + "learning_rate": 5.376596414639952e-06, + "loss": 0.6554, + "step": 16044 + }, + { + "epoch": 0.4917555473826162, + "grad_norm": 0.7663600909153662, + "learning_rate": 5.376101504753456e-06, + "loss": 0.4391, + "step": 16045 + }, + { + "epoch": 0.4917861959053574, + "grad_norm": 0.7782005332987554, + "learning_rate": 5.375606591161133e-06, + "loss": 0.4213, + "step": 16046 + }, + { + "epoch": 0.4918168444280986, + "grad_norm": 1.6825657099136764, + "learning_rate": 5.375111673867865e-06, + "loss": 0.6656, + "step": 16047 + }, + { + "epoch": 0.4918474929508398, + "grad_norm": 1.6593616059323208, + "learning_rate": 5.374616752878523e-06, + "loss": 0.6304, + "step": 16048 + }, + { + "epoch": 0.491878141473581, + "grad_norm": 0.7747663472179278, + "learning_rate": 5.374121828197989e-06, + "loss": 0.4474, + "step": 16049 + }, + { + "epoch": 0.4919087899963222, + "grad_norm": 0.8119737204774655, + "learning_rate": 5.373626899831135e-06, + "loss": 0.4354, + "step": 16050 + }, + { + "epoch": 0.49193943851906335, + "grad_norm": 0.7543659433467145, + "learning_rate": 5.37313196778284e-06, + "loss": 0.46, + "step": 16051 + }, + { + "epoch": 0.49197008704180456, + "grad_norm": 1.7954686254910708, + "learning_rate": 5.37263703205798e-06, + "loss": 0.6194, + "step": 16052 + }, + { + "epoch": 0.49200073556454577, + "grad_norm": 2.0014178131917046, + "learning_rate": 5.372142092661432e-06, + "loss": 0.7568, + "step": 16053 + }, + { + "epoch": 0.49203138408728697, + "grad_norm": 1.603748694529132, + "learning_rate": 5.371647149598074e-06, + "loss": 0.6062, + "step": 16054 + }, + { + "epoch": 0.4920620326100282, + "grad_norm": 0.7785704815564803, + "learning_rate": 5.371152202872781e-06, + "loss": 0.4445, + "step": 16055 + }, + { + "epoch": 0.4920926811327694, + "grad_norm": 1.7549128789447235, + "learning_rate": 5.370657252490429e-06, + "loss": 0.6274, + "step": 16056 + }, + { + "epoch": 0.4921233296555106, + "grad_norm": 1.8435648008064067, + "learning_rate": 5.370162298455898e-06, + "loss": 0.6508, + "step": 16057 + }, + { + "epoch": 0.4921539781782518, + "grad_norm": 1.56616426488381, + "learning_rate": 5.369667340774062e-06, + "loss": 0.6143, + "step": 16058 + }, + { + "epoch": 0.492184626700993, + "grad_norm": 1.6458535727519454, + "learning_rate": 5.369172379449798e-06, + "loss": 0.7135, + "step": 16059 + }, + { + "epoch": 0.4922152752237342, + "grad_norm": 1.6776617768159565, + "learning_rate": 5.368677414487987e-06, + "loss": 0.6985, + "step": 16060 + }, + { + "epoch": 0.4922459237464754, + "grad_norm": 1.8117728413725709, + "learning_rate": 5.3681824458935015e-06, + "loss": 0.6852, + "step": 16061 + }, + { + "epoch": 0.4922765722692166, + "grad_norm": 1.643628148292832, + "learning_rate": 5.367687473671221e-06, + "loss": 0.6272, + "step": 16062 + }, + { + "epoch": 0.4923072207919578, + "grad_norm": 1.5670415253227081, + "learning_rate": 5.36719249782602e-06, + "loss": 0.6483, + "step": 16063 + }, + { + "epoch": 0.49233786931469903, + "grad_norm": 1.4940893680460785, + "learning_rate": 5.366697518362779e-06, + "loss": 0.6272, + "step": 16064 + }, + { + "epoch": 0.49236851783744023, + "grad_norm": 1.7421343919491972, + "learning_rate": 5.366202535286373e-06, + "loss": 0.6653, + "step": 16065 + }, + { + "epoch": 0.49239916636018144, + "grad_norm": 2.3406061955133732, + "learning_rate": 5.3657075486016805e-06, + "loss": 0.5123, + "step": 16066 + }, + { + "epoch": 0.49242981488292265, + "grad_norm": 0.7637094793062287, + "learning_rate": 5.365212558313576e-06, + "loss": 0.4205, + "step": 16067 + }, + { + "epoch": 0.49246046340566385, + "grad_norm": 1.9845261150718128, + "learning_rate": 5.3647175644269404e-06, + "loss": 0.6694, + "step": 16068 + }, + { + "epoch": 0.49249111192840506, + "grad_norm": 1.7550682064986203, + "learning_rate": 5.364222566946649e-06, + "loss": 0.6801, + "step": 16069 + }, + { + "epoch": 0.49252176045114626, + "grad_norm": 1.6974308664777373, + "learning_rate": 5.363727565877579e-06, + "loss": 0.6752, + "step": 16070 + }, + { + "epoch": 0.49255240897388747, + "grad_norm": 0.7477107699947779, + "learning_rate": 5.363232561224608e-06, + "loss": 0.4299, + "step": 16071 + }, + { + "epoch": 0.4925830574966287, + "grad_norm": 1.7714686130595305, + "learning_rate": 5.362737552992615e-06, + "loss": 0.5888, + "step": 16072 + }, + { + "epoch": 0.4926137060193699, + "grad_norm": 1.7385271470463806, + "learning_rate": 5.362242541186475e-06, + "loss": 0.6124, + "step": 16073 + }, + { + "epoch": 0.4926443545421111, + "grad_norm": 1.6574013866080144, + "learning_rate": 5.361747525811066e-06, + "loss": 0.6037, + "step": 16074 + }, + { + "epoch": 0.4926750030648523, + "grad_norm": 1.577270620231314, + "learning_rate": 5.3612525068712675e-06, + "loss": 0.542, + "step": 16075 + }, + { + "epoch": 0.4927056515875935, + "grad_norm": 1.7492528795476323, + "learning_rate": 5.360757484371956e-06, + "loss": 0.7102, + "step": 16076 + }, + { + "epoch": 0.4927363001103347, + "grad_norm": 1.8141515128698509, + "learning_rate": 5.360262458318008e-06, + "loss": 0.6261, + "step": 16077 + }, + { + "epoch": 0.4927669486330759, + "grad_norm": 1.805449629825261, + "learning_rate": 5.359767428714299e-06, + "loss": 0.7024, + "step": 16078 + }, + { + "epoch": 0.4927975971558171, + "grad_norm": 1.8396061700180693, + "learning_rate": 5.359272395565713e-06, + "loss": 0.7657, + "step": 16079 + }, + { + "epoch": 0.4928282456785583, + "grad_norm": 1.6338698605059552, + "learning_rate": 5.358777358877124e-06, + "loss": 0.6725, + "step": 16080 + }, + { + "epoch": 0.4928588942012995, + "grad_norm": 1.6037737943619195, + "learning_rate": 5.358282318653409e-06, + "loss": 0.5275, + "step": 16081 + }, + { + "epoch": 0.4928895427240407, + "grad_norm": 1.847724701612787, + "learning_rate": 5.3577872748994465e-06, + "loss": 0.5967, + "step": 16082 + }, + { + "epoch": 0.4929201912467819, + "grad_norm": 1.5709591406458567, + "learning_rate": 5.357292227620115e-06, + "loss": 0.5635, + "step": 16083 + }, + { + "epoch": 0.4929508397695231, + "grad_norm": 0.8023495991307018, + "learning_rate": 5.356797176820291e-06, + "loss": 0.4278, + "step": 16084 + }, + { + "epoch": 0.4929814882922643, + "grad_norm": 1.6384652022164499, + "learning_rate": 5.3563021225048525e-06, + "loss": 0.6545, + "step": 16085 + }, + { + "epoch": 0.4930121368150055, + "grad_norm": 1.895882311942351, + "learning_rate": 5.35580706467868e-06, + "loss": 0.7035, + "step": 16086 + }, + { + "epoch": 0.4930427853377467, + "grad_norm": 1.6962408982455934, + "learning_rate": 5.355312003346648e-06, + "loss": 0.6736, + "step": 16087 + }, + { + "epoch": 0.4930734338604879, + "grad_norm": 1.6752623617084637, + "learning_rate": 5.354816938513638e-06, + "loss": 0.6023, + "step": 16088 + }, + { + "epoch": 0.4931040823832291, + "grad_norm": 0.8257996723600691, + "learning_rate": 5.354321870184522e-06, + "loss": 0.4358, + "step": 16089 + }, + { + "epoch": 0.4931347309059703, + "grad_norm": 1.7224151595334953, + "learning_rate": 5.3538267983641855e-06, + "loss": 0.6016, + "step": 16090 + }, + { + "epoch": 0.4931653794287115, + "grad_norm": 1.6218723957159369, + "learning_rate": 5.353331723057501e-06, + "loss": 0.6189, + "step": 16091 + }, + { + "epoch": 0.49319602795145273, + "grad_norm": 1.6965035974744578, + "learning_rate": 5.35283664426935e-06, + "loss": 0.562, + "step": 16092 + }, + { + "epoch": 0.49322667647419394, + "grad_norm": 2.025777452606229, + "learning_rate": 5.352341562004608e-06, + "loss": 0.5523, + "step": 16093 + }, + { + "epoch": 0.49325732499693514, + "grad_norm": 1.7122617154478668, + "learning_rate": 5.351846476268157e-06, + "loss": 0.711, + "step": 16094 + }, + { + "epoch": 0.49328797351967635, + "grad_norm": 1.7492718136280472, + "learning_rate": 5.35135138706487e-06, + "loss": 0.6293, + "step": 16095 + }, + { + "epoch": 0.49331862204241755, + "grad_norm": 1.6450832027007465, + "learning_rate": 5.3508562943996275e-06, + "loss": 0.6207, + "step": 16096 + }, + { + "epoch": 0.49334927056515876, + "grad_norm": 1.592709184826643, + "learning_rate": 5.35036119827731e-06, + "loss": 0.738, + "step": 16097 + }, + { + "epoch": 0.49337991908789997, + "grad_norm": 1.8266120830799573, + "learning_rate": 5.349866098702792e-06, + "loss": 0.687, + "step": 16098 + }, + { + "epoch": 0.49341056761064117, + "grad_norm": 1.6562999484237644, + "learning_rate": 5.349370995680957e-06, + "loss": 0.6494, + "step": 16099 + }, + { + "epoch": 0.4934412161333824, + "grad_norm": 1.7903513001659743, + "learning_rate": 5.3488758892166785e-06, + "loss": 0.6334, + "step": 16100 + }, + { + "epoch": 0.4934718646561236, + "grad_norm": 1.72033900506453, + "learning_rate": 5.3483807793148355e-06, + "loss": 0.5864, + "step": 16101 + }, + { + "epoch": 0.4935025131788648, + "grad_norm": 0.8449237529045672, + "learning_rate": 5.347885665980308e-06, + "loss": 0.444, + "step": 16102 + }, + { + "epoch": 0.493533161701606, + "grad_norm": 1.8415143303075792, + "learning_rate": 5.347390549217976e-06, + "loss": 0.6208, + "step": 16103 + }, + { + "epoch": 0.4935638102243472, + "grad_norm": 1.758567592894372, + "learning_rate": 5.346895429032714e-06, + "loss": 0.5669, + "step": 16104 + }, + { + "epoch": 0.4935944587470884, + "grad_norm": 0.7820506252407078, + "learning_rate": 5.346400305429403e-06, + "loss": 0.4371, + "step": 16105 + }, + { + "epoch": 0.4936251072698296, + "grad_norm": 0.7585942154671667, + "learning_rate": 5.34590517841292e-06, + "loss": 0.4303, + "step": 16106 + }, + { + "epoch": 0.4936557557925708, + "grad_norm": 1.5223223136800024, + "learning_rate": 5.345410047988148e-06, + "loss": 0.5048, + "step": 16107 + }, + { + "epoch": 0.493686404315312, + "grad_norm": 1.8456920624253488, + "learning_rate": 5.34491491415996e-06, + "loss": 0.5949, + "step": 16108 + }, + { + "epoch": 0.4937170528380532, + "grad_norm": 1.8608814739146342, + "learning_rate": 5.344419776933237e-06, + "loss": 0.5769, + "step": 16109 + }, + { + "epoch": 0.49374770136079443, + "grad_norm": 1.8645847542253176, + "learning_rate": 5.343924636312858e-06, + "loss": 0.5884, + "step": 16110 + }, + { + "epoch": 0.49377834988353564, + "grad_norm": 1.8273493248223822, + "learning_rate": 5.343429492303702e-06, + "loss": 0.6908, + "step": 16111 + }, + { + "epoch": 0.49380899840627684, + "grad_norm": 1.5365324086708663, + "learning_rate": 5.342934344910648e-06, + "loss": 0.6089, + "step": 16112 + }, + { + "epoch": 0.493839646929018, + "grad_norm": 1.8261958989629672, + "learning_rate": 5.3424391941385724e-06, + "loss": 0.6138, + "step": 16113 + }, + { + "epoch": 0.4938702954517592, + "grad_norm": 0.8507387982940463, + "learning_rate": 5.341944039992357e-06, + "loss": 0.4485, + "step": 16114 + }, + { + "epoch": 0.4939009439745004, + "grad_norm": 0.7919326049805275, + "learning_rate": 5.34144888247688e-06, + "loss": 0.4564, + "step": 16115 + }, + { + "epoch": 0.4939315924972416, + "grad_norm": 1.7096240266069145, + "learning_rate": 5.340953721597019e-06, + "loss": 0.7025, + "step": 16116 + }, + { + "epoch": 0.4939622410199828, + "grad_norm": 1.874086685661637, + "learning_rate": 5.340458557357653e-06, + "loss": 0.5857, + "step": 16117 + }, + { + "epoch": 0.493992889542724, + "grad_norm": 1.676411800845107, + "learning_rate": 5.339963389763663e-06, + "loss": 0.5827, + "step": 16118 + }, + { + "epoch": 0.49402353806546523, + "grad_norm": 1.7010345239292484, + "learning_rate": 5.339468218819926e-06, + "loss": 0.6038, + "step": 16119 + }, + { + "epoch": 0.49405418658820643, + "grad_norm": 1.5461111317603349, + "learning_rate": 5.338973044531323e-06, + "loss": 0.5705, + "step": 16120 + }, + { + "epoch": 0.49408483511094764, + "grad_norm": 1.7168873143514527, + "learning_rate": 5.33847786690273e-06, + "loss": 0.5741, + "step": 16121 + }, + { + "epoch": 0.49411548363368885, + "grad_norm": 1.9585186254912619, + "learning_rate": 5.337982685939029e-06, + "loss": 0.6281, + "step": 16122 + }, + { + "epoch": 0.49414613215643005, + "grad_norm": 1.6753766543350317, + "learning_rate": 5.337487501645099e-06, + "loss": 0.535, + "step": 16123 + }, + { + "epoch": 0.49417678067917126, + "grad_norm": 1.73926828742593, + "learning_rate": 5.3369923140258165e-06, + "loss": 0.6772, + "step": 16124 + }, + { + "epoch": 0.49420742920191246, + "grad_norm": 1.6744237066173218, + "learning_rate": 5.336497123086063e-06, + "loss": 0.6424, + "step": 16125 + }, + { + "epoch": 0.49423807772465367, + "grad_norm": 1.739840410241923, + "learning_rate": 5.336001928830719e-06, + "loss": 0.6284, + "step": 16126 + }, + { + "epoch": 0.4942687262473949, + "grad_norm": 1.8017811406263042, + "learning_rate": 5.3355067312646605e-06, + "loss": 0.5964, + "step": 16127 + }, + { + "epoch": 0.4942993747701361, + "grad_norm": 1.8155654579925051, + "learning_rate": 5.335011530392767e-06, + "loss": 0.6425, + "step": 16128 + }, + { + "epoch": 0.4943300232928773, + "grad_norm": 1.7995735017755548, + "learning_rate": 5.334516326219921e-06, + "loss": 0.7261, + "step": 16129 + }, + { + "epoch": 0.4943606718156185, + "grad_norm": 1.6865349304016821, + "learning_rate": 5.334021118751e-06, + "loss": 0.7361, + "step": 16130 + }, + { + "epoch": 0.4943913203383597, + "grad_norm": 1.657318445262679, + "learning_rate": 5.3335259079908845e-06, + "loss": 0.5254, + "step": 16131 + }, + { + "epoch": 0.4944219688611009, + "grad_norm": 1.6897935390057186, + "learning_rate": 5.33303069394445e-06, + "loss": 0.6309, + "step": 16132 + }, + { + "epoch": 0.4944526173838421, + "grad_norm": 2.0220340857851005, + "learning_rate": 5.33253547661658e-06, + "loss": 0.6675, + "step": 16133 + }, + { + "epoch": 0.4944832659065833, + "grad_norm": 1.742923156425449, + "learning_rate": 5.332040256012154e-06, + "loss": 0.6368, + "step": 16134 + }, + { + "epoch": 0.4945139144293245, + "grad_norm": 1.8299956365878243, + "learning_rate": 5.331545032136049e-06, + "loss": 0.617, + "step": 16135 + }, + { + "epoch": 0.4945445629520657, + "grad_norm": 1.683319225850707, + "learning_rate": 5.331049804993147e-06, + "loss": 0.6885, + "step": 16136 + }, + { + "epoch": 0.49457521147480693, + "grad_norm": 1.8889825190185927, + "learning_rate": 5.330554574588327e-06, + "loss": 0.7122, + "step": 16137 + }, + { + "epoch": 0.49460585999754814, + "grad_norm": 1.7776190105939396, + "learning_rate": 5.330059340926466e-06, + "loss": 0.6996, + "step": 16138 + }, + { + "epoch": 0.49463650852028934, + "grad_norm": 1.5858780518470335, + "learning_rate": 5.329564104012448e-06, + "loss": 0.6117, + "step": 16139 + }, + { + "epoch": 0.49466715704303055, + "grad_norm": 0.8304596776468546, + "learning_rate": 5.32906886385115e-06, + "loss": 0.4489, + "step": 16140 + }, + { + "epoch": 0.49469780556577175, + "grad_norm": 1.6674749541351133, + "learning_rate": 5.328573620447452e-06, + "loss": 0.5991, + "step": 16141 + }, + { + "epoch": 0.49472845408851296, + "grad_norm": 1.690890285465648, + "learning_rate": 5.328078373806235e-06, + "loss": 0.5536, + "step": 16142 + }, + { + "epoch": 0.49475910261125416, + "grad_norm": 1.7327810974324842, + "learning_rate": 5.327583123932376e-06, + "loss": 0.5985, + "step": 16143 + }, + { + "epoch": 0.4947897511339953, + "grad_norm": 0.8183580116624664, + "learning_rate": 5.327087870830757e-06, + "loss": 0.4386, + "step": 16144 + }, + { + "epoch": 0.4948203996567365, + "grad_norm": 1.6485506247209176, + "learning_rate": 5.3265926145062585e-06, + "loss": 0.6152, + "step": 16145 + }, + { + "epoch": 0.4948510481794777, + "grad_norm": 1.557531114862725, + "learning_rate": 5.326097354963759e-06, + "loss": 0.6382, + "step": 16146 + }, + { + "epoch": 0.49488169670221893, + "grad_norm": 1.7555343476759528, + "learning_rate": 5.325602092208139e-06, + "loss": 0.6882, + "step": 16147 + }, + { + "epoch": 0.49491234522496014, + "grad_norm": 1.6297461909751965, + "learning_rate": 5.325106826244278e-06, + "loss": 0.5432, + "step": 16148 + }, + { + "epoch": 0.49494299374770134, + "grad_norm": 1.5056530114780466, + "learning_rate": 5.324611557077057e-06, + "loss": 0.5581, + "step": 16149 + }, + { + "epoch": 0.49497364227044255, + "grad_norm": 1.6549291043364578, + "learning_rate": 5.324116284711355e-06, + "loss": 0.6232, + "step": 16150 + }, + { + "epoch": 0.49500429079318375, + "grad_norm": 1.5794959077069513, + "learning_rate": 5.323621009152051e-06, + "loss": 0.5635, + "step": 16151 + }, + { + "epoch": 0.49503493931592496, + "grad_norm": 1.6133686537495864, + "learning_rate": 5.323125730404029e-06, + "loss": 0.5869, + "step": 16152 + }, + { + "epoch": 0.49506558783866617, + "grad_norm": 1.72088771615478, + "learning_rate": 5.322630448472165e-06, + "loss": 0.6065, + "step": 16153 + }, + { + "epoch": 0.49509623636140737, + "grad_norm": 1.4770866896563803, + "learning_rate": 5.322135163361339e-06, + "loss": 0.6449, + "step": 16154 + }, + { + "epoch": 0.4951268848841486, + "grad_norm": 1.509101474892127, + "learning_rate": 5.321639875076435e-06, + "loss": 0.621, + "step": 16155 + }, + { + "epoch": 0.4951575334068898, + "grad_norm": 0.9209557293979038, + "learning_rate": 5.32114458362233e-06, + "loss": 0.4145, + "step": 16156 + }, + { + "epoch": 0.495188181929631, + "grad_norm": 1.7355348289044947, + "learning_rate": 5.320649289003906e-06, + "loss": 0.6542, + "step": 16157 + }, + { + "epoch": 0.4952188304523722, + "grad_norm": 1.7643293475657649, + "learning_rate": 5.3201539912260426e-06, + "loss": 0.6746, + "step": 16158 + }, + { + "epoch": 0.4952494789751134, + "grad_norm": 1.662570495906049, + "learning_rate": 5.31965869029362e-06, + "loss": 0.6277, + "step": 16159 + }, + { + "epoch": 0.4952801274978546, + "grad_norm": 0.8394003373892022, + "learning_rate": 5.319163386211517e-06, + "loss": 0.4255, + "step": 16160 + }, + { + "epoch": 0.4953107760205958, + "grad_norm": 1.8864210202635516, + "learning_rate": 5.318668078984618e-06, + "loss": 0.6711, + "step": 16161 + }, + { + "epoch": 0.495341424543337, + "grad_norm": 1.7275420536381418, + "learning_rate": 5.318172768617798e-06, + "loss": 0.6595, + "step": 16162 + }, + { + "epoch": 0.4953720730660782, + "grad_norm": 1.875916715623536, + "learning_rate": 5.317677455115943e-06, + "loss": 0.6342, + "step": 16163 + }, + { + "epoch": 0.49540272158881943, + "grad_norm": 1.770625012265086, + "learning_rate": 5.3171821384839284e-06, + "loss": 0.6685, + "step": 16164 + }, + { + "epoch": 0.49543337011156063, + "grad_norm": 2.0676067015392805, + "learning_rate": 5.316686818726639e-06, + "loss": 0.5896, + "step": 16165 + }, + { + "epoch": 0.49546401863430184, + "grad_norm": 1.7257031049954823, + "learning_rate": 5.3161914958489525e-06, + "loss": 0.647, + "step": 16166 + }, + { + "epoch": 0.49549466715704304, + "grad_norm": 1.8106321701864991, + "learning_rate": 5.3156961698557495e-06, + "loss": 0.6189, + "step": 16167 + }, + { + "epoch": 0.49552531567978425, + "grad_norm": 1.8035464175017997, + "learning_rate": 5.315200840751912e-06, + "loss": 0.6261, + "step": 16168 + }, + { + "epoch": 0.49555596420252546, + "grad_norm": 1.5712568598212209, + "learning_rate": 5.314705508542321e-06, + "loss": 0.6496, + "step": 16169 + }, + { + "epoch": 0.49558661272526666, + "grad_norm": 0.8209638044908621, + "learning_rate": 5.314210173231855e-06, + "loss": 0.4421, + "step": 16170 + }, + { + "epoch": 0.49561726124800787, + "grad_norm": 1.612611032282978, + "learning_rate": 5.313714834825395e-06, + "loss": 0.6723, + "step": 16171 + }, + { + "epoch": 0.4956479097707491, + "grad_norm": 1.7551779682344304, + "learning_rate": 5.313219493327826e-06, + "loss": 0.6736, + "step": 16172 + }, + { + "epoch": 0.4956785582934903, + "grad_norm": 1.8345278738951796, + "learning_rate": 5.312724148744022e-06, + "loss": 0.6942, + "step": 16173 + }, + { + "epoch": 0.4957092068162315, + "grad_norm": 1.560907074380846, + "learning_rate": 5.312228801078867e-06, + "loss": 0.6287, + "step": 16174 + }, + { + "epoch": 0.49573985533897263, + "grad_norm": 1.7392768381872878, + "learning_rate": 5.311733450337242e-06, + "loss": 0.7522, + "step": 16175 + }, + { + "epoch": 0.49577050386171384, + "grad_norm": 1.5029904089714354, + "learning_rate": 5.311238096524027e-06, + "loss": 0.6901, + "step": 16176 + }, + { + "epoch": 0.49580115238445505, + "grad_norm": 1.5447338638097288, + "learning_rate": 5.310742739644106e-06, + "loss": 0.6415, + "step": 16177 + }, + { + "epoch": 0.49583180090719625, + "grad_norm": 1.695844799278974, + "learning_rate": 5.310247379702356e-06, + "loss": 0.5341, + "step": 16178 + }, + { + "epoch": 0.49586244942993746, + "grad_norm": 1.769239966859743, + "learning_rate": 5.309752016703657e-06, + "loss": 0.6164, + "step": 16179 + }, + { + "epoch": 0.49589309795267866, + "grad_norm": 1.7504371953086837, + "learning_rate": 5.309256650652894e-06, + "loss": 0.6406, + "step": 16180 + }, + { + "epoch": 0.49592374647541987, + "grad_norm": 1.6429172009580546, + "learning_rate": 5.3087612815549476e-06, + "loss": 0.6942, + "step": 16181 + }, + { + "epoch": 0.4959543949981611, + "grad_norm": 1.6822166067989301, + "learning_rate": 5.308265909414694e-06, + "loss": 0.6394, + "step": 16182 + }, + { + "epoch": 0.4959850435209023, + "grad_norm": 1.8553854800087966, + "learning_rate": 5.3077705342370204e-06, + "loss": 0.7206, + "step": 16183 + }, + { + "epoch": 0.4960156920436435, + "grad_norm": 1.6674787367669293, + "learning_rate": 5.307275156026804e-06, + "loss": 0.6675, + "step": 16184 + }, + { + "epoch": 0.4960463405663847, + "grad_norm": 1.8334103009925462, + "learning_rate": 5.306779774788926e-06, + "loss": 0.6957, + "step": 16185 + }, + { + "epoch": 0.4960769890891259, + "grad_norm": 0.8119664043884982, + "learning_rate": 5.306284390528269e-06, + "loss": 0.4395, + "step": 16186 + }, + { + "epoch": 0.4961076376118671, + "grad_norm": 1.6640080831687534, + "learning_rate": 5.305789003249714e-06, + "loss": 0.654, + "step": 16187 + }, + { + "epoch": 0.4961382861346083, + "grad_norm": 1.8682878902423141, + "learning_rate": 5.305293612958141e-06, + "loss": 0.7101, + "step": 16188 + }, + { + "epoch": 0.4961689346573495, + "grad_norm": 1.7571001029791942, + "learning_rate": 5.304798219658433e-06, + "loss": 0.6778, + "step": 16189 + }, + { + "epoch": 0.4961995831800907, + "grad_norm": 1.831878486024662, + "learning_rate": 5.304302823355468e-06, + "loss": 0.7178, + "step": 16190 + }, + { + "epoch": 0.4962302317028319, + "grad_norm": 1.818354053696709, + "learning_rate": 5.303807424054131e-06, + "loss": 0.6431, + "step": 16191 + }, + { + "epoch": 0.49626088022557313, + "grad_norm": 2.005465493661631, + "learning_rate": 5.303312021759302e-06, + "loss": 0.7799, + "step": 16192 + }, + { + "epoch": 0.49629152874831434, + "grad_norm": 1.7675363482898265, + "learning_rate": 5.302816616475861e-06, + "loss": 0.604, + "step": 16193 + }, + { + "epoch": 0.49632217727105554, + "grad_norm": 1.716426175801718, + "learning_rate": 5.302321208208692e-06, + "loss": 0.6508, + "step": 16194 + }, + { + "epoch": 0.49635282579379675, + "grad_norm": 0.7972839268053123, + "learning_rate": 5.301825796962672e-06, + "loss": 0.4653, + "step": 16195 + }, + { + "epoch": 0.49638347431653795, + "grad_norm": 1.6739983135889211, + "learning_rate": 5.3013303827426885e-06, + "loss": 0.651, + "step": 16196 + }, + { + "epoch": 0.49641412283927916, + "grad_norm": 0.7680385314093521, + "learning_rate": 5.300834965553617e-06, + "loss": 0.4339, + "step": 16197 + }, + { + "epoch": 0.49644477136202037, + "grad_norm": 0.7915307905164682, + "learning_rate": 5.300339545400344e-06, + "loss": 0.4438, + "step": 16198 + }, + { + "epoch": 0.49647541988476157, + "grad_norm": 1.7703923123806267, + "learning_rate": 5.2998441222877475e-06, + "loss": 0.718, + "step": 16199 + }, + { + "epoch": 0.4965060684075028, + "grad_norm": 0.7690494188980301, + "learning_rate": 5.2993486962207095e-06, + "loss": 0.4615, + "step": 16200 + }, + { + "epoch": 0.496536716930244, + "grad_norm": 1.7950803477735118, + "learning_rate": 5.298853267204111e-06, + "loss": 0.7, + "step": 16201 + }, + { + "epoch": 0.4965673654529852, + "grad_norm": 1.840365279950168, + "learning_rate": 5.298357835242838e-06, + "loss": 0.6415, + "step": 16202 + }, + { + "epoch": 0.4965980139757264, + "grad_norm": 1.4608339818560758, + "learning_rate": 5.297862400341768e-06, + "loss": 0.5663, + "step": 16203 + }, + { + "epoch": 0.4966286624984676, + "grad_norm": 1.4069978828142073, + "learning_rate": 5.2973669625057825e-06, + "loss": 0.5058, + "step": 16204 + }, + { + "epoch": 0.4966593110212088, + "grad_norm": 1.9520320704779057, + "learning_rate": 5.296871521739763e-06, + "loss": 0.646, + "step": 16205 + }, + { + "epoch": 0.49668995954394995, + "grad_norm": 1.6122230248968379, + "learning_rate": 5.296376078048595e-06, + "loss": 0.5713, + "step": 16206 + }, + { + "epoch": 0.49672060806669116, + "grad_norm": 1.8517604359538629, + "learning_rate": 5.295880631437158e-06, + "loss": 0.6668, + "step": 16207 + }, + { + "epoch": 0.49675125658943237, + "grad_norm": 1.796154948994176, + "learning_rate": 5.2953851819103305e-06, + "loss": 0.6152, + "step": 16208 + }, + { + "epoch": 0.49678190511217357, + "grad_norm": 1.5440177565700564, + "learning_rate": 5.2948897294729995e-06, + "loss": 0.6492, + "step": 16209 + }, + { + "epoch": 0.4968125536349148, + "grad_norm": 1.9489949259313937, + "learning_rate": 5.294394274130044e-06, + "loss": 0.6266, + "step": 16210 + }, + { + "epoch": 0.496843202157656, + "grad_norm": 1.7831261087492747, + "learning_rate": 5.293898815886347e-06, + "loss": 0.6687, + "step": 16211 + }, + { + "epoch": 0.4968738506803972, + "grad_norm": 1.8334078195838506, + "learning_rate": 5.293403354746789e-06, + "loss": 0.6943, + "step": 16212 + }, + { + "epoch": 0.4969044992031384, + "grad_norm": 2.123253820739389, + "learning_rate": 5.292907890716252e-06, + "loss": 0.6984, + "step": 16213 + }, + { + "epoch": 0.4969351477258796, + "grad_norm": 1.9978680966328148, + "learning_rate": 5.292412423799619e-06, + "loss": 0.7318, + "step": 16214 + }, + { + "epoch": 0.4969657962486208, + "grad_norm": 0.919874092658123, + "learning_rate": 5.291916954001773e-06, + "loss": 0.4447, + "step": 16215 + }, + { + "epoch": 0.496996444771362, + "grad_norm": 1.812503962233617, + "learning_rate": 5.2914214813275935e-06, + "loss": 0.7352, + "step": 16216 + }, + { + "epoch": 0.4970270932941032, + "grad_norm": 0.8717694223571926, + "learning_rate": 5.290926005781964e-06, + "loss": 0.4255, + "step": 16217 + }, + { + "epoch": 0.4970577418168444, + "grad_norm": 1.7891434508945134, + "learning_rate": 5.290430527369764e-06, + "loss": 0.6048, + "step": 16218 + }, + { + "epoch": 0.49708839033958563, + "grad_norm": 1.7905600502992196, + "learning_rate": 5.28993504609588e-06, + "loss": 0.703, + "step": 16219 + }, + { + "epoch": 0.49711903886232683, + "grad_norm": 1.793874606883862, + "learning_rate": 5.289439561965192e-06, + "loss": 0.6611, + "step": 16220 + }, + { + "epoch": 0.49714968738506804, + "grad_norm": 1.6234893246014805, + "learning_rate": 5.28894407498258e-06, + "loss": 0.6318, + "step": 16221 + }, + { + "epoch": 0.49718033590780925, + "grad_norm": 1.580442383981767, + "learning_rate": 5.28844858515293e-06, + "loss": 0.6184, + "step": 16222 + }, + { + "epoch": 0.49721098443055045, + "grad_norm": 0.8350527760856444, + "learning_rate": 5.287953092481122e-06, + "loss": 0.4357, + "step": 16223 + }, + { + "epoch": 0.49724163295329166, + "grad_norm": 1.700320671032362, + "learning_rate": 5.287457596972039e-06, + "loss": 0.6289, + "step": 16224 + }, + { + "epoch": 0.49727228147603286, + "grad_norm": 1.769881324416044, + "learning_rate": 5.28696209863056e-06, + "loss": 0.6022, + "step": 16225 + }, + { + "epoch": 0.49730292999877407, + "grad_norm": 1.788719610764909, + "learning_rate": 5.286466597461574e-06, + "loss": 0.6665, + "step": 16226 + }, + { + "epoch": 0.4973335785215153, + "grad_norm": 1.824438730953216, + "learning_rate": 5.285971093469956e-06, + "loss": 0.612, + "step": 16227 + }, + { + "epoch": 0.4973642270442565, + "grad_norm": 0.8097451778139932, + "learning_rate": 5.285475586660593e-06, + "loss": 0.413, + "step": 16228 + }, + { + "epoch": 0.4973948755669977, + "grad_norm": 1.7968016562304905, + "learning_rate": 5.284980077038365e-06, + "loss": 0.5431, + "step": 16229 + }, + { + "epoch": 0.4974255240897389, + "grad_norm": 2.002233256316254, + "learning_rate": 5.284484564608158e-06, + "loss": 0.6203, + "step": 16230 + }, + { + "epoch": 0.4974561726124801, + "grad_norm": 1.7073010477759207, + "learning_rate": 5.2839890493748495e-06, + "loss": 0.6811, + "step": 16231 + }, + { + "epoch": 0.4974868211352213, + "grad_norm": 1.6150762709998148, + "learning_rate": 5.283493531343324e-06, + "loss": 0.5472, + "step": 16232 + }, + { + "epoch": 0.4975174696579625, + "grad_norm": 1.5958402866539139, + "learning_rate": 5.282998010518465e-06, + "loss": 0.6107, + "step": 16233 + }, + { + "epoch": 0.4975481181807037, + "grad_norm": 1.6903615160656917, + "learning_rate": 5.282502486905154e-06, + "loss": 0.6662, + "step": 16234 + }, + { + "epoch": 0.4975787667034449, + "grad_norm": 1.767014846876855, + "learning_rate": 5.282006960508275e-06, + "loss": 0.7521, + "step": 16235 + }, + { + "epoch": 0.4976094152261861, + "grad_norm": 0.8454458262549763, + "learning_rate": 5.281511431332707e-06, + "loss": 0.422, + "step": 16236 + }, + { + "epoch": 0.4976400637489273, + "grad_norm": 1.9094308214094078, + "learning_rate": 5.281015899383336e-06, + "loss": 0.7358, + "step": 16237 + }, + { + "epoch": 0.4976707122716685, + "grad_norm": 1.6448684113136975, + "learning_rate": 5.280520364665044e-06, + "loss": 0.6635, + "step": 16238 + }, + { + "epoch": 0.4977013607944097, + "grad_norm": 1.7222719072113675, + "learning_rate": 5.2800248271827124e-06, + "loss": 0.6148, + "step": 16239 + }, + { + "epoch": 0.4977320093171509, + "grad_norm": 1.8100061231442117, + "learning_rate": 5.279529286941224e-06, + "loss": 0.6559, + "step": 16240 + }, + { + "epoch": 0.4977626578398921, + "grad_norm": 1.7384075191448602, + "learning_rate": 5.279033743945463e-06, + "loss": 0.6291, + "step": 16241 + }, + { + "epoch": 0.4977933063626333, + "grad_norm": 1.720311553237497, + "learning_rate": 5.27853819820031e-06, + "loss": 0.6188, + "step": 16242 + }, + { + "epoch": 0.4978239548853745, + "grad_norm": 1.8045505104989654, + "learning_rate": 5.278042649710651e-06, + "loss": 0.6069, + "step": 16243 + }, + { + "epoch": 0.4978546034081157, + "grad_norm": 1.6628289077077685, + "learning_rate": 5.277547098481364e-06, + "loss": 0.6235, + "step": 16244 + }, + { + "epoch": 0.4978852519308569, + "grad_norm": 1.6206118828382994, + "learning_rate": 5.277051544517337e-06, + "loss": 0.5787, + "step": 16245 + }, + { + "epoch": 0.4979159004535981, + "grad_norm": 1.837455715115055, + "learning_rate": 5.276555987823448e-06, + "loss": 0.6938, + "step": 16246 + }, + { + "epoch": 0.49794654897633933, + "grad_norm": 1.6948273196895736, + "learning_rate": 5.276060428404582e-06, + "loss": 0.6434, + "step": 16247 + }, + { + "epoch": 0.49797719749908054, + "grad_norm": 1.7000129518177407, + "learning_rate": 5.275564866265624e-06, + "loss": 0.6892, + "step": 16248 + }, + { + "epoch": 0.49800784602182174, + "grad_norm": 1.77846245291664, + "learning_rate": 5.275069301411454e-06, + "loss": 0.6229, + "step": 16249 + }, + { + "epoch": 0.49803849454456295, + "grad_norm": 1.6688412467240554, + "learning_rate": 5.274573733846956e-06, + "loss": 0.6232, + "step": 16250 + }, + { + "epoch": 0.49806914306730415, + "grad_norm": 0.7980025072903154, + "learning_rate": 5.274078163577011e-06, + "loss": 0.42, + "step": 16251 + }, + { + "epoch": 0.49809979159004536, + "grad_norm": 1.521336437218647, + "learning_rate": 5.2735825906065065e-06, + "loss": 0.6144, + "step": 16252 + }, + { + "epoch": 0.49813044011278657, + "grad_norm": 1.7716418736111956, + "learning_rate": 5.273087014940321e-06, + "loss": 0.6745, + "step": 16253 + }, + { + "epoch": 0.49816108863552777, + "grad_norm": 0.7905851258965905, + "learning_rate": 5.27259143658334e-06, + "loss": 0.4363, + "step": 16254 + }, + { + "epoch": 0.498191737158269, + "grad_norm": 1.5557504153925894, + "learning_rate": 5.272095855540444e-06, + "loss": 0.6408, + "step": 16255 + }, + { + "epoch": 0.4982223856810102, + "grad_norm": 1.7963213155596618, + "learning_rate": 5.271600271816521e-06, + "loss": 0.5913, + "step": 16256 + }, + { + "epoch": 0.4982530342037514, + "grad_norm": 0.7790407052117992, + "learning_rate": 5.271104685416449e-06, + "loss": 0.4346, + "step": 16257 + }, + { + "epoch": 0.4982836827264926, + "grad_norm": 1.6110818940469427, + "learning_rate": 5.270609096345114e-06, + "loss": 0.6725, + "step": 16258 + }, + { + "epoch": 0.4983143312492338, + "grad_norm": 1.8265561375613275, + "learning_rate": 5.270113504607397e-06, + "loss": 0.6801, + "step": 16259 + }, + { + "epoch": 0.498344979771975, + "grad_norm": 1.6646586729173434, + "learning_rate": 5.269617910208183e-06, + "loss": 0.6108, + "step": 16260 + }, + { + "epoch": 0.4983756282947162, + "grad_norm": 1.686067223342425, + "learning_rate": 5.269122313152356e-06, + "loss": 0.6495, + "step": 16261 + }, + { + "epoch": 0.4984062768174574, + "grad_norm": 1.7748592639676952, + "learning_rate": 5.268626713444797e-06, + "loss": 0.6169, + "step": 16262 + }, + { + "epoch": 0.4984369253401986, + "grad_norm": 1.6198817000523027, + "learning_rate": 5.26813111109039e-06, + "loss": 0.5902, + "step": 16263 + }, + { + "epoch": 0.49846757386293983, + "grad_norm": 0.7860097356646888, + "learning_rate": 5.267635506094019e-06, + "loss": 0.4214, + "step": 16264 + }, + { + "epoch": 0.49849822238568103, + "grad_norm": 0.8191357742909876, + "learning_rate": 5.267139898460568e-06, + "loss": 0.4262, + "step": 16265 + }, + { + "epoch": 0.49852887090842224, + "grad_norm": 1.908414770195076, + "learning_rate": 5.266644288194918e-06, + "loss": 0.7291, + "step": 16266 + }, + { + "epoch": 0.49855951943116344, + "grad_norm": 1.76886958000415, + "learning_rate": 5.266148675301953e-06, + "loss": 0.6264, + "step": 16267 + }, + { + "epoch": 0.4985901679539046, + "grad_norm": 0.7627568303602125, + "learning_rate": 5.265653059786558e-06, + "loss": 0.4263, + "step": 16268 + }, + { + "epoch": 0.4986208164766458, + "grad_norm": 1.6841948377035079, + "learning_rate": 5.265157441653616e-06, + "loss": 0.7191, + "step": 16269 + }, + { + "epoch": 0.498651464999387, + "grad_norm": 1.5603145520628245, + "learning_rate": 5.264661820908008e-06, + "loss": 0.6346, + "step": 16270 + }, + { + "epoch": 0.4986821135221282, + "grad_norm": 1.840343568434805, + "learning_rate": 5.264166197554621e-06, + "loss": 0.6464, + "step": 16271 + }, + { + "epoch": 0.4987127620448694, + "grad_norm": 1.859546966377986, + "learning_rate": 5.263670571598335e-06, + "loss": 0.5968, + "step": 16272 + }, + { + "epoch": 0.4987434105676106, + "grad_norm": 1.7147258379655788, + "learning_rate": 5.263174943044037e-06, + "loss": 0.6938, + "step": 16273 + }, + { + "epoch": 0.49877405909035183, + "grad_norm": 1.621776423814195, + "learning_rate": 5.262679311896609e-06, + "loss": 0.5585, + "step": 16274 + }, + { + "epoch": 0.49880470761309303, + "grad_norm": 1.6731265003059677, + "learning_rate": 5.262183678160935e-06, + "loss": 0.6914, + "step": 16275 + }, + { + "epoch": 0.49883535613583424, + "grad_norm": 1.703191286198811, + "learning_rate": 5.261688041841897e-06, + "loss": 0.5998, + "step": 16276 + }, + { + "epoch": 0.49886600465857545, + "grad_norm": 0.8639307050473861, + "learning_rate": 5.26119240294438e-06, + "loss": 0.4623, + "step": 16277 + }, + { + "epoch": 0.49889665318131665, + "grad_norm": 1.6972000764700064, + "learning_rate": 5.260696761473268e-06, + "loss": 0.6294, + "step": 16278 + }, + { + "epoch": 0.49892730170405786, + "grad_norm": 1.5310949750416512, + "learning_rate": 5.260201117433441e-06, + "loss": 0.515, + "step": 16279 + }, + { + "epoch": 0.49895795022679906, + "grad_norm": 1.4901175947335066, + "learning_rate": 5.25970547082979e-06, + "loss": 0.6177, + "step": 16280 + }, + { + "epoch": 0.49898859874954027, + "grad_norm": 1.8768603831651067, + "learning_rate": 5.259209821667193e-06, + "loss": 0.5895, + "step": 16281 + }, + { + "epoch": 0.4990192472722815, + "grad_norm": 1.8674696908807167, + "learning_rate": 5.2587141699505355e-06, + "loss": 0.7497, + "step": 16282 + }, + { + "epoch": 0.4990498957950227, + "grad_norm": 1.7737205111573622, + "learning_rate": 5.2582185156847e-06, + "loss": 0.5776, + "step": 16283 + }, + { + "epoch": 0.4990805443177639, + "grad_norm": 1.7946524073136, + "learning_rate": 5.2577228588745736e-06, + "loss": 0.6157, + "step": 16284 + }, + { + "epoch": 0.4991111928405051, + "grad_norm": 1.813336946980399, + "learning_rate": 5.257227199525035e-06, + "loss": 0.699, + "step": 16285 + }, + { + "epoch": 0.4991418413632463, + "grad_norm": 1.7007717789448482, + "learning_rate": 5.256731537640973e-06, + "loss": 0.649, + "step": 16286 + }, + { + "epoch": 0.4991724898859875, + "grad_norm": 1.612380028412785, + "learning_rate": 5.256235873227268e-06, + "loss": 0.5839, + "step": 16287 + }, + { + "epoch": 0.4992031384087287, + "grad_norm": 1.7615788705834585, + "learning_rate": 5.255740206288808e-06, + "loss": 0.6956, + "step": 16288 + }, + { + "epoch": 0.4992337869314699, + "grad_norm": 1.9208165932561232, + "learning_rate": 5.255244536830472e-06, + "loss": 0.6514, + "step": 16289 + }, + { + "epoch": 0.4992644354542111, + "grad_norm": 1.7206759729800838, + "learning_rate": 5.254748864857147e-06, + "loss": 0.6156, + "step": 16290 + }, + { + "epoch": 0.4992950839769523, + "grad_norm": 1.7918312037134048, + "learning_rate": 5.254253190373716e-06, + "loss": 0.6189, + "step": 16291 + }, + { + "epoch": 0.49932573249969353, + "grad_norm": 3.75120288104131, + "learning_rate": 5.253757513385064e-06, + "loss": 0.6748, + "step": 16292 + }, + { + "epoch": 0.49935638102243474, + "grad_norm": 1.8889172895671205, + "learning_rate": 5.253261833896074e-06, + "loss": 0.6507, + "step": 16293 + }, + { + "epoch": 0.49938702954517594, + "grad_norm": 0.8121156518107633, + "learning_rate": 5.252766151911629e-06, + "loss": 0.4404, + "step": 16294 + }, + { + "epoch": 0.49941767806791715, + "grad_norm": 1.799940445593437, + "learning_rate": 5.252270467436615e-06, + "loss": 0.705, + "step": 16295 + }, + { + "epoch": 0.49944832659065835, + "grad_norm": 1.5711672431608186, + "learning_rate": 5.251774780475916e-06, + "loss": 0.5993, + "step": 16296 + }, + { + "epoch": 0.49947897511339956, + "grad_norm": 1.824715967088055, + "learning_rate": 5.251279091034417e-06, + "loss": 0.6737, + "step": 16297 + }, + { + "epoch": 0.49950962363614076, + "grad_norm": 1.6604012975519888, + "learning_rate": 5.250783399116998e-06, + "loss": 0.5614, + "step": 16298 + }, + { + "epoch": 0.4995402721588819, + "grad_norm": 1.5899715253850586, + "learning_rate": 5.25028770472855e-06, + "loss": 0.6012, + "step": 16299 + }, + { + "epoch": 0.4995709206816231, + "grad_norm": 1.6880487577160896, + "learning_rate": 5.24979200787395e-06, + "loss": 0.6059, + "step": 16300 + }, + { + "epoch": 0.4996015692043643, + "grad_norm": 1.6857333305320914, + "learning_rate": 5.249296308558086e-06, + "loss": 0.6195, + "step": 16301 + }, + { + "epoch": 0.49963221772710553, + "grad_norm": 1.6958205771900965, + "learning_rate": 5.248800606785842e-06, + "loss": 0.6395, + "step": 16302 + }, + { + "epoch": 0.49966286624984674, + "grad_norm": 1.7037810207950388, + "learning_rate": 5.2483049025621025e-06, + "loss": 0.6467, + "step": 16303 + }, + { + "epoch": 0.49969351477258794, + "grad_norm": 1.6769534437608078, + "learning_rate": 5.24780919589175e-06, + "loss": 0.5895, + "step": 16304 + }, + { + "epoch": 0.49972416329532915, + "grad_norm": 1.8124097444123612, + "learning_rate": 5.247313486779671e-06, + "loss": 0.5557, + "step": 16305 + }, + { + "epoch": 0.49975481181807035, + "grad_norm": 1.843210857369111, + "learning_rate": 5.246817775230748e-06, + "loss": 0.6138, + "step": 16306 + }, + { + "epoch": 0.49978546034081156, + "grad_norm": 1.6187795158968563, + "learning_rate": 5.2463220612498675e-06, + "loss": 0.649, + "step": 16307 + }, + { + "epoch": 0.49981610886355277, + "grad_norm": 1.6126184957535543, + "learning_rate": 5.245826344841912e-06, + "loss": 0.6016, + "step": 16308 + }, + { + "epoch": 0.49984675738629397, + "grad_norm": 2.0045313036592507, + "learning_rate": 5.2453306260117665e-06, + "loss": 0.7202, + "step": 16309 + }, + { + "epoch": 0.4998774059090352, + "grad_norm": 1.5120911188937776, + "learning_rate": 5.2448349047643165e-06, + "loss": 0.5785, + "step": 16310 + }, + { + "epoch": 0.4999080544317764, + "grad_norm": 1.9371628488922454, + "learning_rate": 5.244339181104446e-06, + "loss": 0.6586, + "step": 16311 + }, + { + "epoch": 0.4999387029545176, + "grad_norm": 1.6392489978148133, + "learning_rate": 5.243843455037038e-06, + "loss": 0.5062, + "step": 16312 + }, + { + "epoch": 0.4999693514772588, + "grad_norm": 1.7071353056238667, + "learning_rate": 5.243347726566977e-06, + "loss": 0.6652, + "step": 16313 + }, + { + "epoch": 0.5, + "grad_norm": 1.7085496659286128, + "learning_rate": 5.242851995699149e-06, + "loss": 0.6943, + "step": 16314 + }, + { + "epoch": 0.5000306485227412, + "grad_norm": 1.4232604348469646, + "learning_rate": 5.2423562624384394e-06, + "loss": 0.5121, + "step": 16315 + }, + { + "epoch": 0.5000612970454824, + "grad_norm": 0.8034202790709951, + "learning_rate": 5.24186052678973e-06, + "loss": 0.4203, + "step": 16316 + }, + { + "epoch": 0.5000919455682236, + "grad_norm": 0.7712192049248275, + "learning_rate": 5.241364788757907e-06, + "loss": 0.4398, + "step": 16317 + }, + { + "epoch": 0.5001225940909648, + "grad_norm": 1.7398843801418735, + "learning_rate": 5.240869048347857e-06, + "loss": 0.8038, + "step": 16318 + }, + { + "epoch": 0.500153242613706, + "grad_norm": 0.8007505981024847, + "learning_rate": 5.240373305564463e-06, + "loss": 0.4529, + "step": 16319 + }, + { + "epoch": 0.5001838911364472, + "grad_norm": 0.8187166920887079, + "learning_rate": 5.239877560412606e-06, + "loss": 0.4558, + "step": 16320 + }, + { + "epoch": 0.5002145396591884, + "grad_norm": 1.7358015678308318, + "learning_rate": 5.239381812897176e-06, + "loss": 0.626, + "step": 16321 + }, + { + "epoch": 0.5002451881819296, + "grad_norm": 1.8374229936663662, + "learning_rate": 5.238886063023055e-06, + "loss": 0.6259, + "step": 16322 + }, + { + "epoch": 0.5002758367046708, + "grad_norm": 0.7929078909328924, + "learning_rate": 5.2383903107951305e-06, + "loss": 0.44, + "step": 16323 + }, + { + "epoch": 0.5003064852274121, + "grad_norm": 1.8592195605745958, + "learning_rate": 5.2378945562182825e-06, + "loss": 0.6532, + "step": 16324 + }, + { + "epoch": 0.5003371337501532, + "grad_norm": 1.7577750151745173, + "learning_rate": 5.2373987992974005e-06, + "loss": 0.5914, + "step": 16325 + }, + { + "epoch": 0.5003677822728945, + "grad_norm": 0.8121926722166212, + "learning_rate": 5.236903040037366e-06, + "loss": 0.4404, + "step": 16326 + }, + { + "epoch": 0.5003984307956356, + "grad_norm": 0.7357677652080362, + "learning_rate": 5.236407278443068e-06, + "loss": 0.4284, + "step": 16327 + }, + { + "epoch": 0.5004290793183769, + "grad_norm": 2.185834114512189, + "learning_rate": 5.235911514519385e-06, + "loss": 0.6035, + "step": 16328 + }, + { + "epoch": 0.500459727841118, + "grad_norm": 1.968677390757535, + "learning_rate": 5.235415748271208e-06, + "loss": 0.6599, + "step": 16329 + }, + { + "epoch": 0.5004903763638593, + "grad_norm": 1.7463360558518084, + "learning_rate": 5.234919979703419e-06, + "loss": 0.6499, + "step": 16330 + }, + { + "epoch": 0.5005210248866004, + "grad_norm": 1.6938498870460819, + "learning_rate": 5.234424208820902e-06, + "loss": 0.6597, + "step": 16331 + }, + { + "epoch": 0.5005516734093417, + "grad_norm": 1.625344724512027, + "learning_rate": 5.233928435628543e-06, + "loss": 0.5517, + "step": 16332 + }, + { + "epoch": 0.5005823219320829, + "grad_norm": 1.8206913524669353, + "learning_rate": 5.233432660131228e-06, + "loss": 0.7007, + "step": 16333 + }, + { + "epoch": 0.5006129704548241, + "grad_norm": 0.8636490727381161, + "learning_rate": 5.232936882333844e-06, + "loss": 0.4532, + "step": 16334 + }, + { + "epoch": 0.5006436189775653, + "grad_norm": 1.9464114763207305, + "learning_rate": 5.232441102241269e-06, + "loss": 0.6595, + "step": 16335 + }, + { + "epoch": 0.5006742675003065, + "grad_norm": 1.7446098239383134, + "learning_rate": 5.231945319858395e-06, + "loss": 0.6882, + "step": 16336 + }, + { + "epoch": 0.5007049160230477, + "grad_norm": 1.6209058553054676, + "learning_rate": 5.231449535190103e-06, + "loss": 0.5224, + "step": 16337 + }, + { + "epoch": 0.5007355645457889, + "grad_norm": 1.613797266678913, + "learning_rate": 5.230953748241282e-06, + "loss": 0.6035, + "step": 16338 + }, + { + "epoch": 0.5007662130685301, + "grad_norm": 2.0417044462908356, + "learning_rate": 5.230457959016812e-06, + "loss": 0.724, + "step": 16339 + }, + { + "epoch": 0.5007968615912713, + "grad_norm": 1.699747156963151, + "learning_rate": 5.229962167521582e-06, + "loss": 0.645, + "step": 16340 + }, + { + "epoch": 0.5008275101140125, + "grad_norm": 0.7916076353534405, + "learning_rate": 5.229466373760474e-06, + "loss": 0.4343, + "step": 16341 + }, + { + "epoch": 0.5008581586367538, + "grad_norm": 1.713436246116799, + "learning_rate": 5.228970577738377e-06, + "loss": 0.6589, + "step": 16342 + }, + { + "epoch": 0.5008888071594949, + "grad_norm": 1.8107658326149483, + "learning_rate": 5.2284747794601745e-06, + "loss": 0.6173, + "step": 16343 + }, + { + "epoch": 0.5009194556822362, + "grad_norm": 1.8952091913890687, + "learning_rate": 5.2279789789307515e-06, + "loss": 0.5981, + "step": 16344 + }, + { + "epoch": 0.5009501042049773, + "grad_norm": 0.769785475040978, + "learning_rate": 5.227483176154991e-06, + "loss": 0.4494, + "step": 16345 + }, + { + "epoch": 0.5009807527277185, + "grad_norm": 1.830190590187198, + "learning_rate": 5.226987371137784e-06, + "loss": 0.6045, + "step": 16346 + }, + { + "epoch": 0.5010114012504597, + "grad_norm": 1.6993455690968213, + "learning_rate": 5.226491563884011e-06, + "loss": 0.517, + "step": 16347 + }, + { + "epoch": 0.5010420497732009, + "grad_norm": 1.8241267647276642, + "learning_rate": 5.225995754398557e-06, + "loss": 0.6577, + "step": 16348 + }, + { + "epoch": 0.5010726982959421, + "grad_norm": 1.8443171506669793, + "learning_rate": 5.22549994268631e-06, + "loss": 0.8224, + "step": 16349 + }, + { + "epoch": 0.5011033468186833, + "grad_norm": 0.8046175597249243, + "learning_rate": 5.225004128752156e-06, + "loss": 0.4213, + "step": 16350 + }, + { + "epoch": 0.5011339953414246, + "grad_norm": 1.7521672300941715, + "learning_rate": 5.224508312600978e-06, + "loss": 0.5898, + "step": 16351 + }, + { + "epoch": 0.5011646438641657, + "grad_norm": 1.7794519363756232, + "learning_rate": 5.224012494237661e-06, + "loss": 0.6366, + "step": 16352 + }, + { + "epoch": 0.501195292386907, + "grad_norm": 1.7394510358593496, + "learning_rate": 5.2235166736670925e-06, + "loss": 0.6632, + "step": 16353 + }, + { + "epoch": 0.5012259409096481, + "grad_norm": 1.6803937730349394, + "learning_rate": 5.2230208508941575e-06, + "loss": 0.61, + "step": 16354 + }, + { + "epoch": 0.5012565894323894, + "grad_norm": 1.9458492904751878, + "learning_rate": 5.22252502592374e-06, + "loss": 0.6297, + "step": 16355 + }, + { + "epoch": 0.5012872379551305, + "grad_norm": 1.4628792149300358, + "learning_rate": 5.222029198760725e-06, + "loss": 0.6371, + "step": 16356 + }, + { + "epoch": 0.5013178864778718, + "grad_norm": 1.6957565882799441, + "learning_rate": 5.221533369410002e-06, + "loss": 0.6267, + "step": 16357 + }, + { + "epoch": 0.5013485350006129, + "grad_norm": 1.6303688467977027, + "learning_rate": 5.221037537876454e-06, + "loss": 0.6511, + "step": 16358 + }, + { + "epoch": 0.5013791835233542, + "grad_norm": 1.5959910067231247, + "learning_rate": 5.2205417041649655e-06, + "loss": 0.6516, + "step": 16359 + }, + { + "epoch": 0.5014098320460953, + "grad_norm": 1.816700526335924, + "learning_rate": 5.220045868280424e-06, + "loss": 0.658, + "step": 16360 + }, + { + "epoch": 0.5014404805688366, + "grad_norm": 1.824679980470203, + "learning_rate": 5.219550030227714e-06, + "loss": 0.6632, + "step": 16361 + }, + { + "epoch": 0.5014711290915778, + "grad_norm": 0.8472591312979034, + "learning_rate": 5.219054190011721e-06, + "loss": 0.4346, + "step": 16362 + }, + { + "epoch": 0.501501777614319, + "grad_norm": 1.7141144040859, + "learning_rate": 5.2185583476373306e-06, + "loss": 0.5925, + "step": 16363 + }, + { + "epoch": 0.5015324261370602, + "grad_norm": 1.687023972751839, + "learning_rate": 5.218062503109429e-06, + "loss": 0.6641, + "step": 16364 + }, + { + "epoch": 0.5015630746598014, + "grad_norm": 1.833332475335243, + "learning_rate": 5.217566656432903e-06, + "loss": 0.6644, + "step": 16365 + }, + { + "epoch": 0.5015937231825426, + "grad_norm": 1.7227173312105346, + "learning_rate": 5.217070807612636e-06, + "loss": 0.5759, + "step": 16366 + }, + { + "epoch": 0.5016243717052838, + "grad_norm": 1.81961316429812, + "learning_rate": 5.216574956653515e-06, + "loss": 0.6126, + "step": 16367 + }, + { + "epoch": 0.501655020228025, + "grad_norm": 1.833671521069984, + "learning_rate": 5.216079103560425e-06, + "loss": 0.5978, + "step": 16368 + }, + { + "epoch": 0.5016856687507663, + "grad_norm": 1.6595428438714865, + "learning_rate": 5.215583248338254e-06, + "loss": 0.6174, + "step": 16369 + }, + { + "epoch": 0.5017163172735074, + "grad_norm": 1.8215476850085126, + "learning_rate": 5.215087390991885e-06, + "loss": 0.7372, + "step": 16370 + }, + { + "epoch": 0.5017469657962487, + "grad_norm": 1.8834038316734116, + "learning_rate": 5.214591531526204e-06, + "loss": 0.6347, + "step": 16371 + }, + { + "epoch": 0.5017776143189898, + "grad_norm": 1.70171923400506, + "learning_rate": 5.2140956699460986e-06, + "loss": 0.653, + "step": 16372 + }, + { + "epoch": 0.5018082628417311, + "grad_norm": 1.8131182977295943, + "learning_rate": 5.213599806256455e-06, + "loss": 0.6385, + "step": 16373 + }, + { + "epoch": 0.5018389113644722, + "grad_norm": 1.8445262151564548, + "learning_rate": 5.213103940462155e-06, + "loss": 0.7024, + "step": 16374 + }, + { + "epoch": 0.5018695598872135, + "grad_norm": 1.5404185292782906, + "learning_rate": 5.212608072568089e-06, + "loss": 0.6583, + "step": 16375 + }, + { + "epoch": 0.5019002084099546, + "grad_norm": 0.8659058955799628, + "learning_rate": 5.2121122025791415e-06, + "loss": 0.4379, + "step": 16376 + }, + { + "epoch": 0.5019308569326958, + "grad_norm": 1.9665325580161315, + "learning_rate": 5.2116163305002e-06, + "loss": 0.585, + "step": 16377 + }, + { + "epoch": 0.501961505455437, + "grad_norm": 1.699035975494023, + "learning_rate": 5.211120456336145e-06, + "loss": 0.6566, + "step": 16378 + }, + { + "epoch": 0.5019921539781782, + "grad_norm": 1.747303270034652, + "learning_rate": 5.210624580091869e-06, + "loss": 0.6185, + "step": 16379 + }, + { + "epoch": 0.5020228025009195, + "grad_norm": 1.8880806024529684, + "learning_rate": 5.210128701772254e-06, + "loss": 0.6059, + "step": 16380 + }, + { + "epoch": 0.5020534510236606, + "grad_norm": 0.7524720484572776, + "learning_rate": 5.209632821382187e-06, + "loss": 0.4345, + "step": 16381 + }, + { + "epoch": 0.5020840995464019, + "grad_norm": 1.6260139446263293, + "learning_rate": 5.209136938926553e-06, + "loss": 0.7231, + "step": 16382 + }, + { + "epoch": 0.502114748069143, + "grad_norm": 1.7830771813515192, + "learning_rate": 5.2086410544102405e-06, + "loss": 0.6429, + "step": 16383 + }, + { + "epoch": 0.5021453965918843, + "grad_norm": 1.6420524639987784, + "learning_rate": 5.208145167838134e-06, + "loss": 0.6396, + "step": 16384 + }, + { + "epoch": 0.5021760451146254, + "grad_norm": 1.7723835195834188, + "learning_rate": 5.20764927921512e-06, + "loss": 0.6103, + "step": 16385 + }, + { + "epoch": 0.5022066936373667, + "grad_norm": 0.7892216086320889, + "learning_rate": 5.207153388546085e-06, + "loss": 0.4468, + "step": 16386 + }, + { + "epoch": 0.5022373421601078, + "grad_norm": 1.6889049752006338, + "learning_rate": 5.206657495835914e-06, + "loss": 0.6158, + "step": 16387 + }, + { + "epoch": 0.5022679906828491, + "grad_norm": 0.8267107630666807, + "learning_rate": 5.206161601089495e-06, + "loss": 0.4456, + "step": 16388 + }, + { + "epoch": 0.5022986392055903, + "grad_norm": 1.6490568154923917, + "learning_rate": 5.2056657043117124e-06, + "loss": 0.6303, + "step": 16389 + }, + { + "epoch": 0.5023292877283315, + "grad_norm": 1.6611740002475412, + "learning_rate": 5.2051698055074526e-06, + "loss": 0.6517, + "step": 16390 + }, + { + "epoch": 0.5023599362510727, + "grad_norm": 1.5905615473700974, + "learning_rate": 5.204673904681601e-06, + "loss": 0.5918, + "step": 16391 + }, + { + "epoch": 0.5023905847738139, + "grad_norm": 1.7401483363759334, + "learning_rate": 5.204178001839049e-06, + "loss": 0.6542, + "step": 16392 + }, + { + "epoch": 0.5024212332965551, + "grad_norm": 1.7309457650908167, + "learning_rate": 5.203682096984674e-06, + "loss": 0.6441, + "step": 16393 + }, + { + "epoch": 0.5024518818192963, + "grad_norm": 1.647256329175675, + "learning_rate": 5.203186190123371e-06, + "loss": 0.5689, + "step": 16394 + }, + { + "epoch": 0.5024825303420375, + "grad_norm": 1.7766902691821342, + "learning_rate": 5.20269028126002e-06, + "loss": 0.7493, + "step": 16395 + }, + { + "epoch": 0.5025131788647788, + "grad_norm": 0.800668979858291, + "learning_rate": 5.202194370399511e-06, + "loss": 0.4176, + "step": 16396 + }, + { + "epoch": 0.5025438273875199, + "grad_norm": 1.7030691888110956, + "learning_rate": 5.201698457546729e-06, + "loss": 0.7288, + "step": 16397 + }, + { + "epoch": 0.5025744759102612, + "grad_norm": 1.7868735328712786, + "learning_rate": 5.2012025427065606e-06, + "loss": 0.7233, + "step": 16398 + }, + { + "epoch": 0.5026051244330023, + "grad_norm": 0.7977131654290214, + "learning_rate": 5.200706625883891e-06, + "loss": 0.4355, + "step": 16399 + }, + { + "epoch": 0.5026357729557436, + "grad_norm": 0.787530818590338, + "learning_rate": 5.2002107070836095e-06, + "loss": 0.4279, + "step": 16400 + }, + { + "epoch": 0.5026664214784847, + "grad_norm": 1.930972509686876, + "learning_rate": 5.199714786310599e-06, + "loss": 0.6357, + "step": 16401 + }, + { + "epoch": 0.502697070001226, + "grad_norm": 0.8255140472709614, + "learning_rate": 5.199218863569748e-06, + "loss": 0.4229, + "step": 16402 + }, + { + "epoch": 0.5027277185239671, + "grad_norm": 1.6794701153258933, + "learning_rate": 5.198722938865944e-06, + "loss": 0.6377, + "step": 16403 + }, + { + "epoch": 0.5027583670467084, + "grad_norm": 1.6400125683556792, + "learning_rate": 5.19822701220407e-06, + "loss": 0.6586, + "step": 16404 + }, + { + "epoch": 0.5027890155694495, + "grad_norm": 1.983444478888393, + "learning_rate": 5.197731083589016e-06, + "loss": 0.7031, + "step": 16405 + }, + { + "epoch": 0.5028196640921908, + "grad_norm": 1.6521482577120876, + "learning_rate": 5.197235153025666e-06, + "loss": 0.6127, + "step": 16406 + }, + { + "epoch": 0.502850312614932, + "grad_norm": 1.6124288008394059, + "learning_rate": 5.1967392205189094e-06, + "loss": 0.6463, + "step": 16407 + }, + { + "epoch": 0.5028809611376731, + "grad_norm": 1.58039096942022, + "learning_rate": 5.196243286073629e-06, + "loss": 0.6802, + "step": 16408 + }, + { + "epoch": 0.5029116096604144, + "grad_norm": 1.6194907007739434, + "learning_rate": 5.195747349694714e-06, + "loss": 0.5906, + "step": 16409 + }, + { + "epoch": 0.5029422581831555, + "grad_norm": 1.9389613849782321, + "learning_rate": 5.195251411387049e-06, + "loss": 0.6762, + "step": 16410 + }, + { + "epoch": 0.5029729067058968, + "grad_norm": 1.7690152689793615, + "learning_rate": 5.1947554711555235e-06, + "loss": 0.6773, + "step": 16411 + }, + { + "epoch": 0.5030035552286379, + "grad_norm": 1.5593080065750222, + "learning_rate": 5.1942595290050225e-06, + "loss": 0.6313, + "step": 16412 + }, + { + "epoch": 0.5030342037513792, + "grad_norm": 0.8512457984447679, + "learning_rate": 5.193763584940431e-06, + "loss": 0.4399, + "step": 16413 + }, + { + "epoch": 0.5030648522741203, + "grad_norm": 1.9279154881714364, + "learning_rate": 5.1932676389666395e-06, + "loss": 0.6361, + "step": 16414 + }, + { + "epoch": 0.5030955007968616, + "grad_norm": 1.8774186697265347, + "learning_rate": 5.1927716910885314e-06, + "loss": 0.723, + "step": 16415 + }, + { + "epoch": 0.5031261493196028, + "grad_norm": 0.8087053711140475, + "learning_rate": 5.192275741310995e-06, + "loss": 0.4313, + "step": 16416 + }, + { + "epoch": 0.503156797842344, + "grad_norm": 1.9502507527510404, + "learning_rate": 5.1917797896389155e-06, + "loss": 0.6677, + "step": 16417 + }, + { + "epoch": 0.5031874463650852, + "grad_norm": 1.6090226935990428, + "learning_rate": 5.191283836077181e-06, + "loss": 0.6047, + "step": 16418 + }, + { + "epoch": 0.5032180948878264, + "grad_norm": 0.7877908220975133, + "learning_rate": 5.190787880630679e-06, + "loss": 0.452, + "step": 16419 + }, + { + "epoch": 0.5032487434105676, + "grad_norm": 0.790970469270689, + "learning_rate": 5.190291923304295e-06, + "loss": 0.4513, + "step": 16420 + }, + { + "epoch": 0.5032793919333088, + "grad_norm": 0.7728051879143654, + "learning_rate": 5.189795964102915e-06, + "loss": 0.4541, + "step": 16421 + }, + { + "epoch": 0.50331004045605, + "grad_norm": 1.6572003334527796, + "learning_rate": 5.189300003031426e-06, + "loss": 0.6064, + "step": 16422 + }, + { + "epoch": 0.5033406889787913, + "grad_norm": 1.919210952821336, + "learning_rate": 5.188804040094718e-06, + "loss": 0.7982, + "step": 16423 + }, + { + "epoch": 0.5033713375015324, + "grad_norm": 0.7657940120104079, + "learning_rate": 5.188308075297674e-06, + "loss": 0.4394, + "step": 16424 + }, + { + "epoch": 0.5034019860242737, + "grad_norm": 1.7008733426553317, + "learning_rate": 5.1878121086451824e-06, + "loss": 0.7197, + "step": 16425 + }, + { + "epoch": 0.5034326345470148, + "grad_norm": 1.5992639449604167, + "learning_rate": 5.187316140142131e-06, + "loss": 0.6781, + "step": 16426 + }, + { + "epoch": 0.5034632830697561, + "grad_norm": 1.688827071153518, + "learning_rate": 5.1868201697934054e-06, + "loss": 0.5985, + "step": 16427 + }, + { + "epoch": 0.5034939315924972, + "grad_norm": 0.7851086686139455, + "learning_rate": 5.1863241976038915e-06, + "loss": 0.4331, + "step": 16428 + }, + { + "epoch": 0.5035245801152385, + "grad_norm": 1.561314192078714, + "learning_rate": 5.185828223578479e-06, + "loss": 0.6257, + "step": 16429 + }, + { + "epoch": 0.5035552286379796, + "grad_norm": 0.7721637322230792, + "learning_rate": 5.185332247722053e-06, + "loss": 0.4286, + "step": 16430 + }, + { + "epoch": 0.5035858771607209, + "grad_norm": 1.6504057400589631, + "learning_rate": 5.184836270039503e-06, + "loss": 0.5756, + "step": 16431 + }, + { + "epoch": 0.503616525683462, + "grad_norm": 1.625534162062772, + "learning_rate": 5.184340290535711e-06, + "loss": 0.6172, + "step": 16432 + }, + { + "epoch": 0.5036471742062033, + "grad_norm": 1.7106699371645537, + "learning_rate": 5.183844309215567e-06, + "loss": 0.6772, + "step": 16433 + }, + { + "epoch": 0.5036778227289445, + "grad_norm": 0.8159812131705697, + "learning_rate": 5.18334832608396e-06, + "loss": 0.4702, + "step": 16434 + }, + { + "epoch": 0.5037084712516857, + "grad_norm": 1.7912311245333268, + "learning_rate": 5.182852341145774e-06, + "loss": 0.6018, + "step": 16435 + }, + { + "epoch": 0.5037391197744269, + "grad_norm": 1.843528858684504, + "learning_rate": 5.182356354405896e-06, + "loss": 0.6271, + "step": 16436 + }, + { + "epoch": 0.5037697682971681, + "grad_norm": 1.733952764597927, + "learning_rate": 5.1818603658692155e-06, + "loss": 0.5423, + "step": 16437 + }, + { + "epoch": 0.5038004168199093, + "grad_norm": 1.5686446874669024, + "learning_rate": 5.18136437554062e-06, + "loss": 0.6242, + "step": 16438 + }, + { + "epoch": 0.5038310653426504, + "grad_norm": 1.7615272989767325, + "learning_rate": 5.18086838342499e-06, + "loss": 0.6448, + "step": 16439 + }, + { + "epoch": 0.5038617138653917, + "grad_norm": 1.8145377991078273, + "learning_rate": 5.180372389527221e-06, + "loss": 0.6398, + "step": 16440 + }, + { + "epoch": 0.5038923623881328, + "grad_norm": 1.5525283844844067, + "learning_rate": 5.179876393852198e-06, + "loss": 0.5832, + "step": 16441 + }, + { + "epoch": 0.5039230109108741, + "grad_norm": 1.7952425055601344, + "learning_rate": 5.179380396404805e-06, + "loss": 0.6491, + "step": 16442 + }, + { + "epoch": 0.5039536594336153, + "grad_norm": 1.7639816597976385, + "learning_rate": 5.178884397189931e-06, + "loss": 0.701, + "step": 16443 + }, + { + "epoch": 0.5039843079563565, + "grad_norm": 1.912288024355431, + "learning_rate": 5.178388396212462e-06, + "loss": 0.7148, + "step": 16444 + }, + { + "epoch": 0.5040149564790977, + "grad_norm": 1.7049496906843082, + "learning_rate": 5.1778923934772885e-06, + "loss": 0.6396, + "step": 16445 + }, + { + "epoch": 0.5040456050018389, + "grad_norm": 1.9240747464869432, + "learning_rate": 5.177396388989296e-06, + "loss": 0.6788, + "step": 16446 + }, + { + "epoch": 0.5040762535245801, + "grad_norm": 1.9648843308003117, + "learning_rate": 5.176900382753369e-06, + "loss": 0.6798, + "step": 16447 + }, + { + "epoch": 0.5041069020473213, + "grad_norm": 1.1195803483547244, + "learning_rate": 5.1764043747744e-06, + "loss": 0.42, + "step": 16448 + }, + { + "epoch": 0.5041375505700625, + "grad_norm": 1.5766218216585426, + "learning_rate": 5.175908365057272e-06, + "loss": 0.6079, + "step": 16449 + }, + { + "epoch": 0.5041681990928037, + "grad_norm": 1.7417569850769459, + "learning_rate": 5.175412353606876e-06, + "loss": 0.6609, + "step": 16450 + }, + { + "epoch": 0.5041988476155449, + "grad_norm": 1.847910230371017, + "learning_rate": 5.1749163404280945e-06, + "loss": 0.6747, + "step": 16451 + }, + { + "epoch": 0.5042294961382862, + "grad_norm": 1.7757876791010692, + "learning_rate": 5.1744203255258185e-06, + "loss": 0.5758, + "step": 16452 + }, + { + "epoch": 0.5042601446610273, + "grad_norm": 1.899517307078199, + "learning_rate": 5.173924308904934e-06, + "loss": 0.5631, + "step": 16453 + }, + { + "epoch": 0.5042907931837686, + "grad_norm": 0.7669006664090152, + "learning_rate": 5.1734282905703295e-06, + "loss": 0.4045, + "step": 16454 + }, + { + "epoch": 0.5043214417065097, + "grad_norm": 1.7276354525079387, + "learning_rate": 5.172932270526891e-06, + "loss": 0.6248, + "step": 16455 + }, + { + "epoch": 0.504352090229251, + "grad_norm": 1.8096953881860876, + "learning_rate": 5.172436248779507e-06, + "loss": 0.7555, + "step": 16456 + }, + { + "epoch": 0.5043827387519921, + "grad_norm": 1.665913137031637, + "learning_rate": 5.171940225333065e-06, + "loss": 0.6167, + "step": 16457 + }, + { + "epoch": 0.5044133872747334, + "grad_norm": 1.8271846458256975, + "learning_rate": 5.171444200192451e-06, + "loss": 0.684, + "step": 16458 + }, + { + "epoch": 0.5044440357974745, + "grad_norm": 1.862076035316359, + "learning_rate": 5.170948173362555e-06, + "loss": 0.5654, + "step": 16459 + }, + { + "epoch": 0.5044746843202158, + "grad_norm": 1.7865794599192861, + "learning_rate": 5.17045214484826e-06, + "loss": 0.6177, + "step": 16460 + }, + { + "epoch": 0.504505332842957, + "grad_norm": 1.6102993650358641, + "learning_rate": 5.1699561146544595e-06, + "loss": 0.5879, + "step": 16461 + }, + { + "epoch": 0.5045359813656982, + "grad_norm": 1.7245204123909585, + "learning_rate": 5.1694600827860365e-06, + "loss": 0.6653, + "step": 16462 + }, + { + "epoch": 0.5045666298884394, + "grad_norm": 1.5264327135337878, + "learning_rate": 5.16896404924788e-06, + "loss": 0.5736, + "step": 16463 + }, + { + "epoch": 0.5045972784111806, + "grad_norm": 1.6164775157317615, + "learning_rate": 5.1684680140448775e-06, + "loss": 0.5964, + "step": 16464 + }, + { + "epoch": 0.5046279269339218, + "grad_norm": 2.347423555381885, + "learning_rate": 5.167971977181916e-06, + "loss": 0.6315, + "step": 16465 + }, + { + "epoch": 0.504658575456663, + "grad_norm": 1.6661450017207857, + "learning_rate": 5.167475938663885e-06, + "loss": 0.6191, + "step": 16466 + }, + { + "epoch": 0.5046892239794042, + "grad_norm": 1.6814004359985797, + "learning_rate": 5.16697989849567e-06, + "loss": 0.5969, + "step": 16467 + }, + { + "epoch": 0.5047198725021455, + "grad_norm": 1.7023741821388505, + "learning_rate": 5.166483856682158e-06, + "loss": 0.6462, + "step": 16468 + }, + { + "epoch": 0.5047505210248866, + "grad_norm": 0.8355568840966475, + "learning_rate": 5.1659878132282406e-06, + "loss": 0.445, + "step": 16469 + }, + { + "epoch": 0.5047811695476278, + "grad_norm": 2.009294029112707, + "learning_rate": 5.165491768138801e-06, + "loss": 0.6506, + "step": 16470 + }, + { + "epoch": 0.504811818070369, + "grad_norm": 1.8208208997537734, + "learning_rate": 5.164995721418729e-06, + "loss": 0.5737, + "step": 16471 + }, + { + "epoch": 0.5048424665931102, + "grad_norm": 1.7161904547849451, + "learning_rate": 5.164499673072913e-06, + "loss": 0.6674, + "step": 16472 + }, + { + "epoch": 0.5048731151158514, + "grad_norm": 2.066929894258744, + "learning_rate": 5.164003623106238e-06, + "loss": 0.6584, + "step": 16473 + }, + { + "epoch": 0.5049037636385926, + "grad_norm": 1.5447745459653626, + "learning_rate": 5.163507571523595e-06, + "loss": 0.6246, + "step": 16474 + }, + { + "epoch": 0.5049344121613338, + "grad_norm": 1.8441282266235328, + "learning_rate": 5.163011518329868e-06, + "loss": 0.6573, + "step": 16475 + }, + { + "epoch": 0.504965060684075, + "grad_norm": 1.5766287402655723, + "learning_rate": 5.162515463529949e-06, + "loss": 0.661, + "step": 16476 + }, + { + "epoch": 0.5049957092068162, + "grad_norm": 1.5978239886238539, + "learning_rate": 5.162019407128722e-06, + "loss": 0.6292, + "step": 16477 + }, + { + "epoch": 0.5050263577295574, + "grad_norm": 1.6378858075792189, + "learning_rate": 5.161523349131078e-06, + "loss": 0.6603, + "step": 16478 + }, + { + "epoch": 0.5050570062522987, + "grad_norm": 1.865585045673601, + "learning_rate": 5.1610272895419e-06, + "loss": 0.6631, + "step": 16479 + }, + { + "epoch": 0.5050876547750398, + "grad_norm": 1.7170592252475723, + "learning_rate": 5.160531228366081e-06, + "loss": 0.7054, + "step": 16480 + }, + { + "epoch": 0.5051183032977811, + "grad_norm": 1.5433191609909485, + "learning_rate": 5.160035165608508e-06, + "loss": 0.6071, + "step": 16481 + }, + { + "epoch": 0.5051489518205222, + "grad_norm": 0.7863732404403623, + "learning_rate": 5.159539101274065e-06, + "loss": 0.4189, + "step": 16482 + }, + { + "epoch": 0.5051796003432635, + "grad_norm": 1.6014702013848106, + "learning_rate": 5.159043035367643e-06, + "loss": 0.6212, + "step": 16483 + }, + { + "epoch": 0.5052102488660046, + "grad_norm": 2.0119903213204076, + "learning_rate": 5.158546967894131e-06, + "loss": 0.6744, + "step": 16484 + }, + { + "epoch": 0.5052408973887459, + "grad_norm": 1.948202892119166, + "learning_rate": 5.158050898858415e-06, + "loss": 0.6646, + "step": 16485 + }, + { + "epoch": 0.505271545911487, + "grad_norm": 1.5207390850910099, + "learning_rate": 5.157554828265381e-06, + "loss": 0.6818, + "step": 16486 + }, + { + "epoch": 0.5053021944342283, + "grad_norm": 1.6425177848313863, + "learning_rate": 5.15705875611992e-06, + "loss": 0.656, + "step": 16487 + }, + { + "epoch": 0.5053328429569695, + "grad_norm": 1.6983397849315847, + "learning_rate": 5.156562682426919e-06, + "loss": 0.5788, + "step": 16488 + }, + { + "epoch": 0.5053634914797107, + "grad_norm": 0.7815899895609385, + "learning_rate": 5.156066607191266e-06, + "loss": 0.4431, + "step": 16489 + }, + { + "epoch": 0.5053941400024519, + "grad_norm": 1.689937729965657, + "learning_rate": 5.155570530417848e-06, + "loss": 0.6512, + "step": 16490 + }, + { + "epoch": 0.5054247885251931, + "grad_norm": 1.7412087800748157, + "learning_rate": 5.155074452111555e-06, + "loss": 0.6531, + "step": 16491 + }, + { + "epoch": 0.5054554370479343, + "grad_norm": 2.0361935926581656, + "learning_rate": 5.1545783722772725e-06, + "loss": 0.6177, + "step": 16492 + }, + { + "epoch": 0.5054860855706755, + "grad_norm": 1.8999023804416704, + "learning_rate": 5.154082290919891e-06, + "loss": 0.5519, + "step": 16493 + }, + { + "epoch": 0.5055167340934167, + "grad_norm": 1.6748936782607546, + "learning_rate": 5.153586208044296e-06, + "loss": 0.6906, + "step": 16494 + }, + { + "epoch": 0.505547382616158, + "grad_norm": 1.6496813462314315, + "learning_rate": 5.153090123655378e-06, + "loss": 0.6852, + "step": 16495 + }, + { + "epoch": 0.5055780311388991, + "grad_norm": 1.7971226965583647, + "learning_rate": 5.152594037758023e-06, + "loss": 0.606, + "step": 16496 + }, + { + "epoch": 0.5056086796616404, + "grad_norm": 1.8899865983745279, + "learning_rate": 5.152097950357119e-06, + "loss": 0.6078, + "step": 16497 + }, + { + "epoch": 0.5056393281843815, + "grad_norm": 1.8569161112508512, + "learning_rate": 5.151601861457557e-06, + "loss": 0.6251, + "step": 16498 + }, + { + "epoch": 0.5056699767071228, + "grad_norm": 1.9052389916576613, + "learning_rate": 5.151105771064221e-06, + "loss": 0.6563, + "step": 16499 + }, + { + "epoch": 0.5057006252298639, + "grad_norm": 1.9734729181821575, + "learning_rate": 5.150609679182004e-06, + "loss": 0.6477, + "step": 16500 + }, + { + "epoch": 0.5057312737526051, + "grad_norm": 1.8978409683743318, + "learning_rate": 5.150113585815788e-06, + "loss": 0.6725, + "step": 16501 + }, + { + "epoch": 0.5057619222753463, + "grad_norm": 1.7306185248130053, + "learning_rate": 5.149617490970466e-06, + "loss": 0.6571, + "step": 16502 + }, + { + "epoch": 0.5057925707980875, + "grad_norm": 0.8406443690366311, + "learning_rate": 5.149121394650924e-06, + "loss": 0.4602, + "step": 16503 + }, + { + "epoch": 0.5058232193208287, + "grad_norm": 0.8228719623863187, + "learning_rate": 5.148625296862053e-06, + "loss": 0.446, + "step": 16504 + }, + { + "epoch": 0.5058538678435699, + "grad_norm": 1.7073988594679055, + "learning_rate": 5.148129197608737e-06, + "loss": 0.5933, + "step": 16505 + }, + { + "epoch": 0.5058845163663112, + "grad_norm": 1.887921234869021, + "learning_rate": 5.147633096895866e-06, + "loss": 0.6482, + "step": 16506 + }, + { + "epoch": 0.5059151648890523, + "grad_norm": 1.948955293320806, + "learning_rate": 5.14713699472833e-06, + "loss": 0.7271, + "step": 16507 + }, + { + "epoch": 0.5059458134117936, + "grad_norm": 1.8452443499105233, + "learning_rate": 5.146640891111013e-06, + "loss": 0.7399, + "step": 16508 + }, + { + "epoch": 0.5059764619345347, + "grad_norm": 0.7992083949327303, + "learning_rate": 5.146144786048808e-06, + "loss": 0.4609, + "step": 16509 + }, + { + "epoch": 0.506007110457276, + "grad_norm": 1.9596159717576735, + "learning_rate": 5.145648679546598e-06, + "loss": 0.6105, + "step": 16510 + }, + { + "epoch": 0.5060377589800171, + "grad_norm": 1.6542685006920215, + "learning_rate": 5.145152571609279e-06, + "loss": 0.61, + "step": 16511 + }, + { + "epoch": 0.5060684075027584, + "grad_norm": 1.6129870380282274, + "learning_rate": 5.14465646224173e-06, + "loss": 0.6301, + "step": 16512 + }, + { + "epoch": 0.5060990560254995, + "grad_norm": 1.6200020496269343, + "learning_rate": 5.144160351448847e-06, + "loss": 0.5586, + "step": 16513 + }, + { + "epoch": 0.5061297045482408, + "grad_norm": 1.8755837001892885, + "learning_rate": 5.143664239235513e-06, + "loss": 0.709, + "step": 16514 + }, + { + "epoch": 0.506160353070982, + "grad_norm": 1.6184928387260251, + "learning_rate": 5.143168125606621e-06, + "loss": 0.6707, + "step": 16515 + }, + { + "epoch": 0.5061910015937232, + "grad_norm": 1.5611143836863175, + "learning_rate": 5.1426720105670545e-06, + "loss": 0.548, + "step": 16516 + }, + { + "epoch": 0.5062216501164644, + "grad_norm": 1.6777668021235337, + "learning_rate": 5.142175894121706e-06, + "loss": 0.6266, + "step": 16517 + }, + { + "epoch": 0.5062522986392056, + "grad_norm": 1.6380846453427689, + "learning_rate": 5.14167977627546e-06, + "loss": 0.6052, + "step": 16518 + }, + { + "epoch": 0.5062829471619468, + "grad_norm": 1.5977901020439138, + "learning_rate": 5.141183657033208e-06, + "loss": 0.6433, + "step": 16519 + }, + { + "epoch": 0.506313595684688, + "grad_norm": 1.6583143964478475, + "learning_rate": 5.140687536399838e-06, + "loss": 0.5115, + "step": 16520 + }, + { + "epoch": 0.5063442442074292, + "grad_norm": 1.8997875549038241, + "learning_rate": 5.140191414380236e-06, + "loss": 0.6529, + "step": 16521 + }, + { + "epoch": 0.5063748927301704, + "grad_norm": 1.6862302256917647, + "learning_rate": 5.139695290979293e-06, + "loss": 0.6413, + "step": 16522 + }, + { + "epoch": 0.5064055412529116, + "grad_norm": 1.7755934924127905, + "learning_rate": 5.139199166201897e-06, + "loss": 0.6806, + "step": 16523 + }, + { + "epoch": 0.5064361897756529, + "grad_norm": 1.628391254984528, + "learning_rate": 5.138703040052936e-06, + "loss": 0.6969, + "step": 16524 + }, + { + "epoch": 0.506466838298394, + "grad_norm": 1.9424973806416195, + "learning_rate": 5.138206912537297e-06, + "loss": 0.6746, + "step": 16525 + }, + { + "epoch": 0.5064974868211353, + "grad_norm": 2.001181916495822, + "learning_rate": 5.1377107836598715e-06, + "loss": 0.6834, + "step": 16526 + }, + { + "epoch": 0.5065281353438764, + "grad_norm": 1.50796224152637, + "learning_rate": 5.137214653425546e-06, + "loss": 0.575, + "step": 16527 + }, + { + "epoch": 0.5065587838666177, + "grad_norm": 1.9290156972633083, + "learning_rate": 5.13671852183921e-06, + "loss": 0.7115, + "step": 16528 + }, + { + "epoch": 0.5065894323893588, + "grad_norm": 1.6585229457871986, + "learning_rate": 5.13622238890575e-06, + "loss": 0.5905, + "step": 16529 + }, + { + "epoch": 0.5066200809121001, + "grad_norm": 1.6876171383178384, + "learning_rate": 5.1357262546300565e-06, + "loss": 0.6206, + "step": 16530 + }, + { + "epoch": 0.5066507294348412, + "grad_norm": 1.4615693505853247, + "learning_rate": 5.135230119017019e-06, + "loss": 0.5717, + "step": 16531 + }, + { + "epoch": 0.5066813779575824, + "grad_norm": 1.6804395487927641, + "learning_rate": 5.134733982071523e-06, + "loss": 0.6441, + "step": 16532 + }, + { + "epoch": 0.5067120264803237, + "grad_norm": 1.7259429053251878, + "learning_rate": 5.134237843798457e-06, + "loss": 0.639, + "step": 16533 + }, + { + "epoch": 0.5067426750030648, + "grad_norm": 1.9869901019522667, + "learning_rate": 5.133741704202714e-06, + "loss": 0.6095, + "step": 16534 + }, + { + "epoch": 0.5067733235258061, + "grad_norm": 1.7910956649758425, + "learning_rate": 5.13324556328918e-06, + "loss": 0.712, + "step": 16535 + }, + { + "epoch": 0.5068039720485472, + "grad_norm": 1.880427796750579, + "learning_rate": 5.13274942106274e-06, + "loss": 0.6772, + "step": 16536 + }, + { + "epoch": 0.5068346205712885, + "grad_norm": 1.8901723823205918, + "learning_rate": 5.13225327752829e-06, + "loss": 0.5224, + "step": 16537 + }, + { + "epoch": 0.5068652690940296, + "grad_norm": 0.8756519717001764, + "learning_rate": 5.131757132690713e-06, + "loss": 0.4397, + "step": 16538 + }, + { + "epoch": 0.5068959176167709, + "grad_norm": 1.6715913969719296, + "learning_rate": 5.131260986554899e-06, + "loss": 0.6127, + "step": 16539 + }, + { + "epoch": 0.506926566139512, + "grad_norm": 1.6856992032584381, + "learning_rate": 5.130764839125736e-06, + "loss": 0.605, + "step": 16540 + }, + { + "epoch": 0.5069572146622533, + "grad_norm": 0.8344946313080327, + "learning_rate": 5.130268690408114e-06, + "loss": 0.4526, + "step": 16541 + }, + { + "epoch": 0.5069878631849944, + "grad_norm": 1.6505724495729683, + "learning_rate": 5.1297725404069234e-06, + "loss": 0.6138, + "step": 16542 + }, + { + "epoch": 0.5070185117077357, + "grad_norm": 1.7268680748169492, + "learning_rate": 5.129276389127049e-06, + "loss": 0.6141, + "step": 16543 + }, + { + "epoch": 0.5070491602304769, + "grad_norm": 0.8339183101179434, + "learning_rate": 5.128780236573381e-06, + "loss": 0.4427, + "step": 16544 + }, + { + "epoch": 0.5070798087532181, + "grad_norm": 1.79679996170508, + "learning_rate": 5.1282840827508085e-06, + "loss": 0.5738, + "step": 16545 + }, + { + "epoch": 0.5071104572759593, + "grad_norm": 0.8473451249088759, + "learning_rate": 5.1277879276642206e-06, + "loss": 0.4314, + "step": 16546 + }, + { + "epoch": 0.5071411057987005, + "grad_norm": 1.9353112632246114, + "learning_rate": 5.1272917713185055e-06, + "loss": 0.7055, + "step": 16547 + }, + { + "epoch": 0.5071717543214417, + "grad_norm": 1.556253293908934, + "learning_rate": 5.1267956137185514e-06, + "loss": 0.6741, + "step": 16548 + }, + { + "epoch": 0.5072024028441829, + "grad_norm": 0.8324071054569203, + "learning_rate": 5.126299454869248e-06, + "loss": 0.4488, + "step": 16549 + }, + { + "epoch": 0.5072330513669241, + "grad_norm": 1.8067353680768719, + "learning_rate": 5.1258032947754845e-06, + "loss": 0.6194, + "step": 16550 + }, + { + "epoch": 0.5072636998896654, + "grad_norm": 1.7662907609904506, + "learning_rate": 5.125307133442148e-06, + "loss": 0.5531, + "step": 16551 + }, + { + "epoch": 0.5072943484124065, + "grad_norm": 1.6043539277939718, + "learning_rate": 5.124810970874129e-06, + "loss": 0.6193, + "step": 16552 + }, + { + "epoch": 0.5073249969351478, + "grad_norm": 1.765744368690706, + "learning_rate": 5.124314807076314e-06, + "loss": 0.6178, + "step": 16553 + }, + { + "epoch": 0.5073556454578889, + "grad_norm": 1.7738083975688614, + "learning_rate": 5.1238186420535965e-06, + "loss": 0.6691, + "step": 16554 + }, + { + "epoch": 0.5073862939806302, + "grad_norm": 1.6847843516466485, + "learning_rate": 5.123322475810859e-06, + "loss": 0.6585, + "step": 16555 + }, + { + "epoch": 0.5074169425033713, + "grad_norm": 1.9554465817331617, + "learning_rate": 5.122826308352995e-06, + "loss": 0.6937, + "step": 16556 + }, + { + "epoch": 0.5074475910261126, + "grad_norm": 1.5818666403321944, + "learning_rate": 5.122330139684892e-06, + "loss": 0.6204, + "step": 16557 + }, + { + "epoch": 0.5074782395488537, + "grad_norm": 0.7928578901864669, + "learning_rate": 5.1218339698114395e-06, + "loss": 0.4396, + "step": 16558 + }, + { + "epoch": 0.507508888071595, + "grad_norm": 0.7935888084713261, + "learning_rate": 5.121337798737523e-06, + "loss": 0.4264, + "step": 16559 + }, + { + "epoch": 0.5075395365943361, + "grad_norm": 1.5382674406139831, + "learning_rate": 5.1208416264680376e-06, + "loss": 0.5624, + "step": 16560 + }, + { + "epoch": 0.5075701851170774, + "grad_norm": 1.6539417807122114, + "learning_rate": 5.120345453007867e-06, + "loss": 0.5946, + "step": 16561 + }, + { + "epoch": 0.5076008336398186, + "grad_norm": 1.5339180494977012, + "learning_rate": 5.1198492783619015e-06, + "loss": 0.6232, + "step": 16562 + }, + { + "epoch": 0.5076314821625597, + "grad_norm": 0.8310511447819374, + "learning_rate": 5.119353102535031e-06, + "loss": 0.4219, + "step": 16563 + }, + { + "epoch": 0.507662130685301, + "grad_norm": 1.7692949832014848, + "learning_rate": 5.118856925532144e-06, + "loss": 0.7074, + "step": 16564 + }, + { + "epoch": 0.5076927792080421, + "grad_norm": 1.4709249398646456, + "learning_rate": 5.11836074735813e-06, + "loss": 0.643, + "step": 16565 + }, + { + "epoch": 0.5077234277307834, + "grad_norm": 1.9576575214570655, + "learning_rate": 5.117864568017875e-06, + "loss": 0.6749, + "step": 16566 + }, + { + "epoch": 0.5077540762535245, + "grad_norm": 2.055552537843354, + "learning_rate": 5.117368387516272e-06, + "loss": 0.6839, + "step": 16567 + }, + { + "epoch": 0.5077847247762658, + "grad_norm": 1.6322853624318365, + "learning_rate": 5.116872205858207e-06, + "loss": 0.6135, + "step": 16568 + }, + { + "epoch": 0.507815373299007, + "grad_norm": 1.6417880635793942, + "learning_rate": 5.116376023048573e-06, + "loss": 0.6697, + "step": 16569 + }, + { + "epoch": 0.5078460218217482, + "grad_norm": 0.8082503119717082, + "learning_rate": 5.115879839092253e-06, + "loss": 0.4551, + "step": 16570 + }, + { + "epoch": 0.5078766703444894, + "grad_norm": 1.8715230939690712, + "learning_rate": 5.115383653994141e-06, + "loss": 0.528, + "step": 16571 + }, + { + "epoch": 0.5079073188672306, + "grad_norm": 1.639533431693074, + "learning_rate": 5.114887467759123e-06, + "loss": 0.5898, + "step": 16572 + }, + { + "epoch": 0.5079379673899718, + "grad_norm": 1.5424226973031185, + "learning_rate": 5.114391280392092e-06, + "loss": 0.663, + "step": 16573 + }, + { + "epoch": 0.507968615912713, + "grad_norm": 0.795055979961058, + "learning_rate": 5.1138950918979315e-06, + "loss": 0.4298, + "step": 16574 + }, + { + "epoch": 0.5079992644354542, + "grad_norm": 0.7607218036512566, + "learning_rate": 5.113398902281536e-06, + "loss": 0.4308, + "step": 16575 + }, + { + "epoch": 0.5080299129581954, + "grad_norm": 0.8007539860986522, + "learning_rate": 5.112902711547789e-06, + "loss": 0.4265, + "step": 16576 + }, + { + "epoch": 0.5080605614809366, + "grad_norm": 1.5671908145126856, + "learning_rate": 5.112406519701586e-06, + "loss": 0.588, + "step": 16577 + }, + { + "epoch": 0.5080912100036779, + "grad_norm": 1.9290030401197347, + "learning_rate": 5.111910326747811e-06, + "loss": 0.7106, + "step": 16578 + }, + { + "epoch": 0.508121858526419, + "grad_norm": 1.5511219307937947, + "learning_rate": 5.111414132691355e-06, + "loss": 0.5985, + "step": 16579 + }, + { + "epoch": 0.5081525070491603, + "grad_norm": 1.6444393538888173, + "learning_rate": 5.110917937537108e-06, + "loss": 0.6138, + "step": 16580 + }, + { + "epoch": 0.5081831555719014, + "grad_norm": 1.6516313543048207, + "learning_rate": 5.110421741289957e-06, + "loss": 0.7043, + "step": 16581 + }, + { + "epoch": 0.5082138040946427, + "grad_norm": 1.5938358308092428, + "learning_rate": 5.109925543954793e-06, + "loss": 0.6161, + "step": 16582 + }, + { + "epoch": 0.5082444526173838, + "grad_norm": 1.8268019354864102, + "learning_rate": 5.109429345536504e-06, + "loss": 0.6348, + "step": 16583 + }, + { + "epoch": 0.5082751011401251, + "grad_norm": 1.6933117944895244, + "learning_rate": 5.108933146039981e-06, + "loss": 0.6669, + "step": 16584 + }, + { + "epoch": 0.5083057496628662, + "grad_norm": 1.7440116404825006, + "learning_rate": 5.108436945470111e-06, + "loss": 0.6934, + "step": 16585 + }, + { + "epoch": 0.5083363981856075, + "grad_norm": 1.7925062880183893, + "learning_rate": 5.107940743831784e-06, + "loss": 0.6785, + "step": 16586 + }, + { + "epoch": 0.5083670467083486, + "grad_norm": 1.6974423589882115, + "learning_rate": 5.107444541129889e-06, + "loss": 0.705, + "step": 16587 + }, + { + "epoch": 0.5083976952310899, + "grad_norm": 0.890895075248693, + "learning_rate": 5.106948337369315e-06, + "loss": 0.4303, + "step": 16588 + }, + { + "epoch": 0.5084283437538311, + "grad_norm": 1.673266533487511, + "learning_rate": 5.106452132554953e-06, + "loss": 0.706, + "step": 16589 + }, + { + "epoch": 0.5084589922765723, + "grad_norm": 1.6869038484320233, + "learning_rate": 5.10595592669169e-06, + "loss": 0.6374, + "step": 16590 + }, + { + "epoch": 0.5084896407993135, + "grad_norm": 1.7188855951982582, + "learning_rate": 5.105459719784416e-06, + "loss": 0.5973, + "step": 16591 + }, + { + "epoch": 0.5085202893220547, + "grad_norm": 1.872573629480021, + "learning_rate": 5.104963511838021e-06, + "loss": 0.625, + "step": 16592 + }, + { + "epoch": 0.5085509378447959, + "grad_norm": 1.8770167603271255, + "learning_rate": 5.104467302857393e-06, + "loss": 0.6694, + "step": 16593 + }, + { + "epoch": 0.508581586367537, + "grad_norm": 1.6295323104599158, + "learning_rate": 5.103971092847422e-06, + "loss": 0.7595, + "step": 16594 + }, + { + "epoch": 0.5086122348902783, + "grad_norm": 2.1127233408424617, + "learning_rate": 5.103474881812998e-06, + "loss": 0.7081, + "step": 16595 + }, + { + "epoch": 0.5086428834130194, + "grad_norm": 1.766917463341789, + "learning_rate": 5.102978669759009e-06, + "loss": 0.6215, + "step": 16596 + }, + { + "epoch": 0.5086735319357607, + "grad_norm": 1.7416576662328043, + "learning_rate": 5.102482456690345e-06, + "loss": 0.663, + "step": 16597 + }, + { + "epoch": 0.5087041804585019, + "grad_norm": 1.6376289579244854, + "learning_rate": 5.101986242611895e-06, + "loss": 0.6222, + "step": 16598 + }, + { + "epoch": 0.5087348289812431, + "grad_norm": 1.7150034732445698, + "learning_rate": 5.101490027528548e-06, + "loss": 0.6569, + "step": 16599 + }, + { + "epoch": 0.5087654775039843, + "grad_norm": 1.9963710839911188, + "learning_rate": 5.100993811445195e-06, + "loss": 0.6207, + "step": 16600 + }, + { + "epoch": 0.5087961260267255, + "grad_norm": 1.7164991747831064, + "learning_rate": 5.100497594366724e-06, + "loss": 0.7528, + "step": 16601 + }, + { + "epoch": 0.5088267745494667, + "grad_norm": 1.7771327015421947, + "learning_rate": 5.100001376298023e-06, + "loss": 0.6336, + "step": 16602 + }, + { + "epoch": 0.5088574230722079, + "grad_norm": 0.8252116208123415, + "learning_rate": 5.099505157243984e-06, + "loss": 0.4252, + "step": 16603 + }, + { + "epoch": 0.5088880715949491, + "grad_norm": 2.0454714699404866, + "learning_rate": 5.099008937209495e-06, + "loss": 0.6498, + "step": 16604 + }, + { + "epoch": 0.5089187201176903, + "grad_norm": 1.578317503498672, + "learning_rate": 5.098512716199445e-06, + "loss": 0.5945, + "step": 16605 + }, + { + "epoch": 0.5089493686404315, + "grad_norm": 0.8122556790039557, + "learning_rate": 5.098016494218725e-06, + "loss": 0.4403, + "step": 16606 + }, + { + "epoch": 0.5089800171631728, + "grad_norm": 1.6009318073707135, + "learning_rate": 5.097520271272223e-06, + "loss": 0.6706, + "step": 16607 + }, + { + "epoch": 0.5090106656859139, + "grad_norm": 1.7763312873508732, + "learning_rate": 5.097024047364829e-06, + "loss": 0.6678, + "step": 16608 + }, + { + "epoch": 0.5090413142086552, + "grad_norm": 1.615911367238483, + "learning_rate": 5.096527822501431e-06, + "loss": 0.6557, + "step": 16609 + }, + { + "epoch": 0.5090719627313963, + "grad_norm": 1.9047688909353881, + "learning_rate": 5.0960315966869215e-06, + "loss": 0.7186, + "step": 16610 + }, + { + "epoch": 0.5091026112541376, + "grad_norm": 1.6577068912301882, + "learning_rate": 5.095535369926188e-06, + "loss": 0.6349, + "step": 16611 + }, + { + "epoch": 0.5091332597768787, + "grad_norm": 0.7684380086842336, + "learning_rate": 5.09503914222412e-06, + "loss": 0.4349, + "step": 16612 + }, + { + "epoch": 0.50916390829962, + "grad_norm": 1.5120200632142837, + "learning_rate": 5.094542913585605e-06, + "loss": 0.5806, + "step": 16613 + }, + { + "epoch": 0.5091945568223611, + "grad_norm": 2.0412922169943672, + "learning_rate": 5.094046684015536e-06, + "loss": 0.6901, + "step": 16614 + }, + { + "epoch": 0.5092252053451024, + "grad_norm": 2.089075678030547, + "learning_rate": 5.0935504535188005e-06, + "loss": 0.6694, + "step": 16615 + }, + { + "epoch": 0.5092558538678436, + "grad_norm": 1.6779574584113024, + "learning_rate": 5.09305422210029e-06, + "loss": 0.6836, + "step": 16616 + }, + { + "epoch": 0.5092865023905848, + "grad_norm": 1.6859163046075056, + "learning_rate": 5.09255798976489e-06, + "loss": 0.6414, + "step": 16617 + }, + { + "epoch": 0.509317150913326, + "grad_norm": 1.6202194963499454, + "learning_rate": 5.092061756517494e-06, + "loss": 0.6384, + "step": 16618 + }, + { + "epoch": 0.5093477994360672, + "grad_norm": 1.7366789329398986, + "learning_rate": 5.09156552236299e-06, + "loss": 0.6602, + "step": 16619 + }, + { + "epoch": 0.5093784479588084, + "grad_norm": 1.797772328087729, + "learning_rate": 5.091069287306266e-06, + "loss": 0.6867, + "step": 16620 + }, + { + "epoch": 0.5094090964815496, + "grad_norm": 1.88535543845707, + "learning_rate": 5.090573051352215e-06, + "loss": 0.6992, + "step": 16621 + }, + { + "epoch": 0.5094397450042908, + "grad_norm": 1.738989543568604, + "learning_rate": 5.0900768145057224e-06, + "loss": 0.6221, + "step": 16622 + }, + { + "epoch": 0.509470393527032, + "grad_norm": 0.7959536547606337, + "learning_rate": 5.089580576771682e-06, + "loss": 0.4433, + "step": 16623 + }, + { + "epoch": 0.5095010420497732, + "grad_norm": 2.105915670859422, + "learning_rate": 5.089084338154981e-06, + "loss": 0.6709, + "step": 16624 + }, + { + "epoch": 0.5095316905725144, + "grad_norm": 1.925676717397161, + "learning_rate": 5.088588098660508e-06, + "loss": 0.7061, + "step": 16625 + }, + { + "epoch": 0.5095623390952556, + "grad_norm": 1.712737425267269, + "learning_rate": 5.088091858293153e-06, + "loss": 0.6286, + "step": 16626 + }, + { + "epoch": 0.5095929876179968, + "grad_norm": 1.9058372356259903, + "learning_rate": 5.08759561705781e-06, + "loss": 0.6182, + "step": 16627 + }, + { + "epoch": 0.509623636140738, + "grad_norm": 0.787540705046771, + "learning_rate": 5.087099374959362e-06, + "loss": 0.4305, + "step": 16628 + }, + { + "epoch": 0.5096542846634792, + "grad_norm": 0.7774440912231363, + "learning_rate": 5.086603132002702e-06, + "loss": 0.4386, + "step": 16629 + }, + { + "epoch": 0.5096849331862204, + "grad_norm": 0.7855677519019597, + "learning_rate": 5.0861068881927185e-06, + "loss": 0.4326, + "step": 16630 + }, + { + "epoch": 0.5097155817089616, + "grad_norm": 1.7953346806997177, + "learning_rate": 5.085610643534305e-06, + "loss": 0.5832, + "step": 16631 + }, + { + "epoch": 0.5097462302317028, + "grad_norm": 1.920384367520524, + "learning_rate": 5.0851143980323445e-06, + "loss": 0.6072, + "step": 16632 + }, + { + "epoch": 0.509776878754444, + "grad_norm": 2.047628865616506, + "learning_rate": 5.084618151691733e-06, + "loss": 0.6591, + "step": 16633 + }, + { + "epoch": 0.5098075272771853, + "grad_norm": 1.7296103994352794, + "learning_rate": 5.084121904517354e-06, + "loss": 0.6765, + "step": 16634 + }, + { + "epoch": 0.5098381757999264, + "grad_norm": 1.7279915165446573, + "learning_rate": 5.083625656514101e-06, + "loss": 0.6116, + "step": 16635 + }, + { + "epoch": 0.5098688243226677, + "grad_norm": 0.8048913592943445, + "learning_rate": 5.083129407686865e-06, + "loss": 0.4361, + "step": 16636 + }, + { + "epoch": 0.5098994728454088, + "grad_norm": 1.6142868345620867, + "learning_rate": 5.082633158040532e-06, + "loss": 0.5841, + "step": 16637 + }, + { + "epoch": 0.5099301213681501, + "grad_norm": 1.9058185283286786, + "learning_rate": 5.082136907579995e-06, + "loss": 0.7384, + "step": 16638 + }, + { + "epoch": 0.5099607698908912, + "grad_norm": 1.7582084946518337, + "learning_rate": 5.081640656310141e-06, + "loss": 0.6551, + "step": 16639 + }, + { + "epoch": 0.5099914184136325, + "grad_norm": 2.042596754512278, + "learning_rate": 5.081144404235861e-06, + "loss": 0.5952, + "step": 16640 + }, + { + "epoch": 0.5100220669363736, + "grad_norm": 1.670862687646357, + "learning_rate": 5.080648151362043e-06, + "loss": 0.623, + "step": 16641 + }, + { + "epoch": 0.5100527154591149, + "grad_norm": 1.6051717380065846, + "learning_rate": 5.080151897693581e-06, + "loss": 0.5758, + "step": 16642 + }, + { + "epoch": 0.510083363981856, + "grad_norm": 0.8323571743652639, + "learning_rate": 5.079655643235358e-06, + "loss": 0.4428, + "step": 16643 + }, + { + "epoch": 0.5101140125045973, + "grad_norm": 1.9860084266980464, + "learning_rate": 5.079159387992271e-06, + "loss": 0.6733, + "step": 16644 + }, + { + "epoch": 0.5101446610273385, + "grad_norm": 1.6021040975817178, + "learning_rate": 5.0786631319692034e-06, + "loss": 0.5792, + "step": 16645 + }, + { + "epoch": 0.5101753095500797, + "grad_norm": 1.6569002863063784, + "learning_rate": 5.07816687517105e-06, + "loss": 0.6671, + "step": 16646 + }, + { + "epoch": 0.5102059580728209, + "grad_norm": 1.722738663565071, + "learning_rate": 5.077670617602698e-06, + "loss": 0.6325, + "step": 16647 + }, + { + "epoch": 0.5102366065955621, + "grad_norm": 1.826350485433878, + "learning_rate": 5.0771743592690356e-06, + "loss": 0.6794, + "step": 16648 + }, + { + "epoch": 0.5102672551183033, + "grad_norm": 1.764768102809877, + "learning_rate": 5.076678100174958e-06, + "loss": 0.6873, + "step": 16649 + }, + { + "epoch": 0.5102979036410445, + "grad_norm": 0.7727740071744215, + "learning_rate": 5.0761818403253496e-06, + "loss": 0.4342, + "step": 16650 + }, + { + "epoch": 0.5103285521637857, + "grad_norm": 1.6984275512209068, + "learning_rate": 5.0756855797251015e-06, + "loss": 0.5989, + "step": 16651 + }, + { + "epoch": 0.510359200686527, + "grad_norm": 1.7245040303151982, + "learning_rate": 5.0751893183791046e-06, + "loss": 0.6946, + "step": 16652 + }, + { + "epoch": 0.5103898492092681, + "grad_norm": 1.7542849475563307, + "learning_rate": 5.074693056292248e-06, + "loss": 0.6186, + "step": 16653 + }, + { + "epoch": 0.5104204977320094, + "grad_norm": 1.9360052077787713, + "learning_rate": 5.0741967934694224e-06, + "loss": 0.7045, + "step": 16654 + }, + { + "epoch": 0.5104511462547505, + "grad_norm": 0.7850153195581879, + "learning_rate": 5.073700529915516e-06, + "loss": 0.424, + "step": 16655 + }, + { + "epoch": 0.5104817947774917, + "grad_norm": 1.7041433312238679, + "learning_rate": 5.073204265635418e-06, + "loss": 0.5961, + "step": 16656 + }, + { + "epoch": 0.5105124433002329, + "grad_norm": 1.8132667382703833, + "learning_rate": 5.072708000634023e-06, + "loss": 0.596, + "step": 16657 + }, + { + "epoch": 0.5105430918229741, + "grad_norm": 1.771185586738061, + "learning_rate": 5.072211734916215e-06, + "loss": 0.8201, + "step": 16658 + }, + { + "epoch": 0.5105737403457153, + "grad_norm": 1.831947735760908, + "learning_rate": 5.071715468486887e-06, + "loss": 0.5798, + "step": 16659 + }, + { + "epoch": 0.5106043888684565, + "grad_norm": 1.9024385755426036, + "learning_rate": 5.071219201350928e-06, + "loss": 0.6805, + "step": 16660 + }, + { + "epoch": 0.5106350373911978, + "grad_norm": 1.8063815317172558, + "learning_rate": 5.070722933513228e-06, + "loss": 0.6671, + "step": 16661 + }, + { + "epoch": 0.5106656859139389, + "grad_norm": 1.8279155296261653, + "learning_rate": 5.070226664978677e-06, + "loss": 0.669, + "step": 16662 + }, + { + "epoch": 0.5106963344366802, + "grad_norm": 1.8865738798772875, + "learning_rate": 5.069730395752164e-06, + "loss": 0.6944, + "step": 16663 + }, + { + "epoch": 0.5107269829594213, + "grad_norm": 1.8650065715243809, + "learning_rate": 5.06923412583858e-06, + "loss": 0.5712, + "step": 16664 + }, + { + "epoch": 0.5107576314821626, + "grad_norm": 1.8949602276012965, + "learning_rate": 5.068737855242816e-06, + "loss": 0.7681, + "step": 16665 + }, + { + "epoch": 0.5107882800049037, + "grad_norm": 1.665156179196561, + "learning_rate": 5.0682415839697585e-06, + "loss": 0.61, + "step": 16666 + }, + { + "epoch": 0.510818928527645, + "grad_norm": 1.7719173059695073, + "learning_rate": 5.0677453120242995e-06, + "loss": 0.6876, + "step": 16667 + }, + { + "epoch": 0.5108495770503861, + "grad_norm": 0.8308416845123301, + "learning_rate": 5.067249039411329e-06, + "loss": 0.4291, + "step": 16668 + }, + { + "epoch": 0.5108802255731274, + "grad_norm": 1.5653981009876003, + "learning_rate": 5.0667527661357365e-06, + "loss": 0.596, + "step": 16669 + }, + { + "epoch": 0.5109108740958686, + "grad_norm": 1.7540540114839285, + "learning_rate": 5.0662564922024115e-06, + "loss": 0.6921, + "step": 16670 + }, + { + "epoch": 0.5109415226186098, + "grad_norm": 0.7976576219284609, + "learning_rate": 5.065760217616243e-06, + "loss": 0.4473, + "step": 16671 + }, + { + "epoch": 0.510972171141351, + "grad_norm": 1.7752676760218316, + "learning_rate": 5.065263942382125e-06, + "loss": 0.5376, + "step": 16672 + }, + { + "epoch": 0.5110028196640922, + "grad_norm": 1.6558367149712905, + "learning_rate": 5.064767666504944e-06, + "loss": 0.6602, + "step": 16673 + }, + { + "epoch": 0.5110334681868334, + "grad_norm": 1.5500348794221321, + "learning_rate": 5.064271389989589e-06, + "loss": 0.6661, + "step": 16674 + }, + { + "epoch": 0.5110641167095746, + "grad_norm": 1.9123511918461595, + "learning_rate": 5.063775112840953e-06, + "loss": 0.7058, + "step": 16675 + }, + { + "epoch": 0.5110947652323158, + "grad_norm": 0.783679078355533, + "learning_rate": 5.063278835063923e-06, + "loss": 0.4464, + "step": 16676 + }, + { + "epoch": 0.511125413755057, + "grad_norm": 1.8493430201114467, + "learning_rate": 5.062782556663393e-06, + "loss": 0.6477, + "step": 16677 + }, + { + "epoch": 0.5111560622777982, + "grad_norm": 1.779986715713368, + "learning_rate": 5.062286277644248e-06, + "loss": 0.6622, + "step": 16678 + }, + { + "epoch": 0.5111867108005395, + "grad_norm": 1.6333065915302722, + "learning_rate": 5.0617899980113815e-06, + "loss": 0.6999, + "step": 16679 + }, + { + "epoch": 0.5112173593232806, + "grad_norm": 1.6176284463590915, + "learning_rate": 5.061293717769682e-06, + "loss": 0.5551, + "step": 16680 + }, + { + "epoch": 0.5112480078460219, + "grad_norm": 1.7155259408521952, + "learning_rate": 5.060797436924041e-06, + "loss": 0.6665, + "step": 16681 + }, + { + "epoch": 0.511278656368763, + "grad_norm": 1.5772693088365102, + "learning_rate": 5.060301155479346e-06, + "loss": 0.6291, + "step": 16682 + }, + { + "epoch": 0.5113093048915043, + "grad_norm": 0.8537961193343604, + "learning_rate": 5.059804873440488e-06, + "loss": 0.4337, + "step": 16683 + }, + { + "epoch": 0.5113399534142454, + "grad_norm": 0.8519380647951952, + "learning_rate": 5.059308590812357e-06, + "loss": 0.4601, + "step": 16684 + }, + { + "epoch": 0.5113706019369867, + "grad_norm": 1.6683983461629002, + "learning_rate": 5.058812307599846e-06, + "loss": 0.6806, + "step": 16685 + }, + { + "epoch": 0.5114012504597278, + "grad_norm": 1.6724514764665523, + "learning_rate": 5.05831602380784e-06, + "loss": 0.7253, + "step": 16686 + }, + { + "epoch": 0.511431898982469, + "grad_norm": 2.0025827563695384, + "learning_rate": 5.057819739441231e-06, + "loss": 0.5922, + "step": 16687 + }, + { + "epoch": 0.5114625475052103, + "grad_norm": 1.6840213153216583, + "learning_rate": 5.057323454504911e-06, + "loss": 0.5626, + "step": 16688 + }, + { + "epoch": 0.5114931960279514, + "grad_norm": 0.9286339747896172, + "learning_rate": 5.056827169003766e-06, + "loss": 0.4297, + "step": 16689 + }, + { + "epoch": 0.5115238445506927, + "grad_norm": 0.7262810023402565, + "learning_rate": 5.05633088294269e-06, + "loss": 0.404, + "step": 16690 + }, + { + "epoch": 0.5115544930734338, + "grad_norm": 1.740734196821161, + "learning_rate": 5.055834596326571e-06, + "loss": 0.6063, + "step": 16691 + }, + { + "epoch": 0.5115851415961751, + "grad_norm": 1.769630723250134, + "learning_rate": 5.055338309160301e-06, + "loss": 0.6273, + "step": 16692 + }, + { + "epoch": 0.5116157901189162, + "grad_norm": 1.4718689878196287, + "learning_rate": 5.054842021448766e-06, + "loss": 0.5981, + "step": 16693 + }, + { + "epoch": 0.5116464386416575, + "grad_norm": 1.52203863068357, + "learning_rate": 5.05434573319686e-06, + "loss": 0.5737, + "step": 16694 + }, + { + "epoch": 0.5116770871643986, + "grad_norm": 1.6655993847450528, + "learning_rate": 5.05384944440947e-06, + "loss": 0.6246, + "step": 16695 + }, + { + "epoch": 0.5117077356871399, + "grad_norm": 0.7799954119040073, + "learning_rate": 5.053353155091491e-06, + "loss": 0.4275, + "step": 16696 + }, + { + "epoch": 0.511738384209881, + "grad_norm": 1.6574652498600435, + "learning_rate": 5.052856865247806e-06, + "loss": 0.5996, + "step": 16697 + }, + { + "epoch": 0.5117690327326223, + "grad_norm": 1.806700721068525, + "learning_rate": 5.05236057488331e-06, + "loss": 0.5808, + "step": 16698 + }, + { + "epoch": 0.5117996812553635, + "grad_norm": 1.7325659816124204, + "learning_rate": 5.051864284002892e-06, + "loss": 0.6429, + "step": 16699 + }, + { + "epoch": 0.5118303297781047, + "grad_norm": 1.8678996883749153, + "learning_rate": 5.051367992611442e-06, + "loss": 0.6917, + "step": 16700 + }, + { + "epoch": 0.5118609783008459, + "grad_norm": 1.7399624684884967, + "learning_rate": 5.050871700713851e-06, + "loss": 0.6464, + "step": 16701 + }, + { + "epoch": 0.5118916268235871, + "grad_norm": 1.790926613895976, + "learning_rate": 5.050375408315006e-06, + "loss": 0.6584, + "step": 16702 + }, + { + "epoch": 0.5119222753463283, + "grad_norm": 1.7969859326063136, + "learning_rate": 5.049879115419801e-06, + "loss": 0.6152, + "step": 16703 + }, + { + "epoch": 0.5119529238690695, + "grad_norm": 1.7665936480993718, + "learning_rate": 5.049382822033124e-06, + "loss": 0.6468, + "step": 16704 + }, + { + "epoch": 0.5119835723918107, + "grad_norm": 1.9021180126592463, + "learning_rate": 5.048886528159865e-06, + "loss": 0.6254, + "step": 16705 + }, + { + "epoch": 0.512014220914552, + "grad_norm": 1.836767818091365, + "learning_rate": 5.048390233804914e-06, + "loss": 0.6252, + "step": 16706 + }, + { + "epoch": 0.5120448694372931, + "grad_norm": 1.896850295335641, + "learning_rate": 5.047893938973163e-06, + "loss": 0.6939, + "step": 16707 + }, + { + "epoch": 0.5120755179600344, + "grad_norm": 1.8088054514254945, + "learning_rate": 5.0473976436695e-06, + "loss": 0.6643, + "step": 16708 + }, + { + "epoch": 0.5121061664827755, + "grad_norm": 0.8484643176129377, + "learning_rate": 5.046901347898816e-06, + "loss": 0.4316, + "step": 16709 + }, + { + "epoch": 0.5121368150055168, + "grad_norm": 1.8815086346555165, + "learning_rate": 5.046405051666001e-06, + "loss": 0.7031, + "step": 16710 + }, + { + "epoch": 0.5121674635282579, + "grad_norm": 1.8344637635243364, + "learning_rate": 5.045908754975946e-06, + "loss": 0.5773, + "step": 16711 + }, + { + "epoch": 0.5121981120509992, + "grad_norm": 1.6820896017032565, + "learning_rate": 5.0454124578335395e-06, + "loss": 0.6393, + "step": 16712 + }, + { + "epoch": 0.5122287605737403, + "grad_norm": 2.120392257302482, + "learning_rate": 5.044916160243673e-06, + "loss": 0.6876, + "step": 16713 + }, + { + "epoch": 0.5122594090964816, + "grad_norm": 1.813141000836346, + "learning_rate": 5.044419862211234e-06, + "loss": 0.7223, + "step": 16714 + }, + { + "epoch": 0.5122900576192227, + "grad_norm": 1.4207924160602077, + "learning_rate": 5.043923563741117e-06, + "loss": 0.7115, + "step": 16715 + }, + { + "epoch": 0.512320706141964, + "grad_norm": 1.6797579156074673, + "learning_rate": 5.043427264838209e-06, + "loss": 0.6595, + "step": 16716 + }, + { + "epoch": 0.5123513546647052, + "grad_norm": 1.5186138140983008, + "learning_rate": 5.0429309655074e-06, + "loss": 0.5844, + "step": 16717 + }, + { + "epoch": 0.5123820031874463, + "grad_norm": 1.8961736556658266, + "learning_rate": 5.042434665753582e-06, + "loss": 0.7173, + "step": 16718 + }, + { + "epoch": 0.5124126517101876, + "grad_norm": 1.626272101835465, + "learning_rate": 5.041938365581646e-06, + "loss": 0.5468, + "step": 16719 + }, + { + "epoch": 0.5124433002329287, + "grad_norm": 1.3929187277709718, + "learning_rate": 5.041442064996479e-06, + "loss": 0.5786, + "step": 16720 + }, + { + "epoch": 0.51247394875567, + "grad_norm": 1.7240589633728656, + "learning_rate": 5.040945764002972e-06, + "loss": 0.6657, + "step": 16721 + }, + { + "epoch": 0.5125045972784111, + "grad_norm": 1.7060234804966457, + "learning_rate": 5.0404494626060175e-06, + "loss": 0.6184, + "step": 16722 + }, + { + "epoch": 0.5125352458011524, + "grad_norm": 1.7393922881269246, + "learning_rate": 5.039953160810504e-06, + "loss": 0.6391, + "step": 16723 + }, + { + "epoch": 0.5125658943238935, + "grad_norm": 1.6003798675373873, + "learning_rate": 5.039456858621322e-06, + "loss": 0.6356, + "step": 16724 + }, + { + "epoch": 0.5125965428466348, + "grad_norm": 1.7147772003661779, + "learning_rate": 5.03896055604336e-06, + "loss": 0.607, + "step": 16725 + }, + { + "epoch": 0.512627191369376, + "grad_norm": 1.8800477708655865, + "learning_rate": 5.038464253081511e-06, + "loss": 0.7048, + "step": 16726 + }, + { + "epoch": 0.5126578398921172, + "grad_norm": 1.570328128331986, + "learning_rate": 5.037967949740663e-06, + "loss": 0.5976, + "step": 16727 + }, + { + "epoch": 0.5126884884148584, + "grad_norm": 1.5667093340572957, + "learning_rate": 5.037471646025707e-06, + "loss": 0.5368, + "step": 16728 + }, + { + "epoch": 0.5127191369375996, + "grad_norm": 0.8216373685711106, + "learning_rate": 5.0369753419415335e-06, + "loss": 0.4149, + "step": 16729 + }, + { + "epoch": 0.5127497854603408, + "grad_norm": 1.7618709787575615, + "learning_rate": 5.036479037493034e-06, + "loss": 0.5916, + "step": 16730 + }, + { + "epoch": 0.512780433983082, + "grad_norm": 0.7791855760265488, + "learning_rate": 5.035982732685095e-06, + "loss": 0.4379, + "step": 16731 + }, + { + "epoch": 0.5128110825058232, + "grad_norm": 1.8964985526247733, + "learning_rate": 5.03548642752261e-06, + "loss": 0.6243, + "step": 16732 + }, + { + "epoch": 0.5128417310285645, + "grad_norm": 1.6017012739429086, + "learning_rate": 5.034990122010468e-06, + "loss": 0.5842, + "step": 16733 + }, + { + "epoch": 0.5128723795513056, + "grad_norm": 1.6030317385252228, + "learning_rate": 5.034493816153558e-06, + "loss": 0.6113, + "step": 16734 + }, + { + "epoch": 0.5129030280740469, + "grad_norm": 0.8025091368748026, + "learning_rate": 5.033997509956775e-06, + "loss": 0.4304, + "step": 16735 + }, + { + "epoch": 0.512933676596788, + "grad_norm": 1.9685189574750683, + "learning_rate": 5.0335012034250034e-06, + "loss": 0.6671, + "step": 16736 + }, + { + "epoch": 0.5129643251195293, + "grad_norm": 1.8793286593363046, + "learning_rate": 5.033004896563136e-06, + "loss": 0.7047, + "step": 16737 + }, + { + "epoch": 0.5129949736422704, + "grad_norm": 1.7730555881408558, + "learning_rate": 5.0325085893760625e-06, + "loss": 0.7382, + "step": 16738 + }, + { + "epoch": 0.5130256221650117, + "grad_norm": 0.7776382692731904, + "learning_rate": 5.032012281868676e-06, + "loss": 0.4465, + "step": 16739 + }, + { + "epoch": 0.5130562706877528, + "grad_norm": 1.747695737361174, + "learning_rate": 5.031515974045861e-06, + "loss": 0.6192, + "step": 16740 + }, + { + "epoch": 0.5130869192104941, + "grad_norm": 1.7639315398066115, + "learning_rate": 5.031019665912513e-06, + "loss": 0.6307, + "step": 16741 + }, + { + "epoch": 0.5131175677332352, + "grad_norm": 1.663788343360232, + "learning_rate": 5.0305233574735205e-06, + "loss": 0.7053, + "step": 16742 + }, + { + "epoch": 0.5131482162559765, + "grad_norm": 0.7734895196948302, + "learning_rate": 5.030027048733772e-06, + "loss": 0.4356, + "step": 16743 + }, + { + "epoch": 0.5131788647787177, + "grad_norm": 1.7869702621406827, + "learning_rate": 5.029530739698161e-06, + "loss": 0.5934, + "step": 16744 + }, + { + "epoch": 0.5132095133014589, + "grad_norm": 0.7795696575753962, + "learning_rate": 5.029034430371574e-06, + "loss": 0.4514, + "step": 16745 + }, + { + "epoch": 0.5132401618242001, + "grad_norm": 1.7284357318942734, + "learning_rate": 5.0285381207589055e-06, + "loss": 0.6413, + "step": 16746 + }, + { + "epoch": 0.5132708103469413, + "grad_norm": 1.8361409853838824, + "learning_rate": 5.028041810865042e-06, + "loss": 0.5938, + "step": 16747 + }, + { + "epoch": 0.5133014588696825, + "grad_norm": 1.6842727699108009, + "learning_rate": 5.027545500694877e-06, + "loss": 0.5726, + "step": 16748 + }, + { + "epoch": 0.5133321073924236, + "grad_norm": 1.7633849337403136, + "learning_rate": 5.027049190253297e-06, + "loss": 0.5612, + "step": 16749 + }, + { + "epoch": 0.5133627559151649, + "grad_norm": 1.6891998353196989, + "learning_rate": 5.026552879545197e-06, + "loss": 0.6487, + "step": 16750 + }, + { + "epoch": 0.513393404437906, + "grad_norm": 0.8305478455846715, + "learning_rate": 5.026056568575462e-06, + "loss": 0.4497, + "step": 16751 + }, + { + "epoch": 0.5134240529606473, + "grad_norm": 0.8019127049567744, + "learning_rate": 5.025560257348987e-06, + "loss": 0.4571, + "step": 16752 + }, + { + "epoch": 0.5134547014833885, + "grad_norm": 1.801382933585202, + "learning_rate": 5.025063945870659e-06, + "loss": 0.6042, + "step": 16753 + }, + { + "epoch": 0.5134853500061297, + "grad_norm": 1.7742925247902974, + "learning_rate": 5.02456763414537e-06, + "loss": 0.6519, + "step": 16754 + }, + { + "epoch": 0.5135159985288709, + "grad_norm": 1.6439945843920953, + "learning_rate": 5.024071322178011e-06, + "loss": 0.6316, + "step": 16755 + }, + { + "epoch": 0.5135466470516121, + "grad_norm": 1.5722252387298017, + "learning_rate": 5.0235750099734695e-06, + "loss": 0.574, + "step": 16756 + }, + { + "epoch": 0.5135772955743533, + "grad_norm": 1.786779758888415, + "learning_rate": 5.023078697536637e-06, + "loss": 0.6708, + "step": 16757 + }, + { + "epoch": 0.5136079440970945, + "grad_norm": 1.7454431608550334, + "learning_rate": 5.0225823848724054e-06, + "loss": 0.6624, + "step": 16758 + }, + { + "epoch": 0.5136385926198357, + "grad_norm": 1.778499933947951, + "learning_rate": 5.022086071985663e-06, + "loss": 0.6062, + "step": 16759 + }, + { + "epoch": 0.513669241142577, + "grad_norm": 1.3891536891920129, + "learning_rate": 5.021589758881301e-06, + "loss": 0.5903, + "step": 16760 + }, + { + "epoch": 0.5136998896653181, + "grad_norm": 1.902986628138991, + "learning_rate": 5.02109344556421e-06, + "loss": 0.664, + "step": 16761 + }, + { + "epoch": 0.5137305381880594, + "grad_norm": 2.050241162056447, + "learning_rate": 5.020597132039281e-06, + "loss": 0.6689, + "step": 16762 + }, + { + "epoch": 0.5137611867108005, + "grad_norm": 2.3819396794366017, + "learning_rate": 5.020100818311402e-06, + "loss": 0.6082, + "step": 16763 + }, + { + "epoch": 0.5137918352335418, + "grad_norm": 1.8423839589409516, + "learning_rate": 5.019604504385464e-06, + "loss": 0.7397, + "step": 16764 + }, + { + "epoch": 0.5138224837562829, + "grad_norm": 1.8243980028869868, + "learning_rate": 5.019108190266358e-06, + "loss": 0.6366, + "step": 16765 + }, + { + "epoch": 0.5138531322790242, + "grad_norm": 1.8108750322953673, + "learning_rate": 5.018611875958974e-06, + "loss": 0.645, + "step": 16766 + }, + { + "epoch": 0.5138837808017653, + "grad_norm": 1.7697368991722777, + "learning_rate": 5.0181155614682045e-06, + "loss": 0.6219, + "step": 16767 + }, + { + "epoch": 0.5139144293245066, + "grad_norm": 1.761686672371292, + "learning_rate": 5.017619246798935e-06, + "loss": 0.6209, + "step": 16768 + }, + { + "epoch": 0.5139450778472477, + "grad_norm": 1.5453974770303638, + "learning_rate": 5.017122931956059e-06, + "loss": 0.5393, + "step": 16769 + }, + { + "epoch": 0.513975726369989, + "grad_norm": 1.6204529827747167, + "learning_rate": 5.0166266169444675e-06, + "loss": 0.6279, + "step": 16770 + }, + { + "epoch": 0.5140063748927302, + "grad_norm": 1.6870014749558886, + "learning_rate": 5.016130301769049e-06, + "loss": 0.6161, + "step": 16771 + }, + { + "epoch": 0.5140370234154714, + "grad_norm": 1.7959726108937533, + "learning_rate": 5.015633986434695e-06, + "loss": 0.7309, + "step": 16772 + }, + { + "epoch": 0.5140676719382126, + "grad_norm": 2.0028285542927073, + "learning_rate": 5.015137670946295e-06, + "loss": 0.6375, + "step": 16773 + }, + { + "epoch": 0.5140983204609538, + "grad_norm": 1.6635870584260324, + "learning_rate": 5.014641355308739e-06, + "loss": 0.6739, + "step": 16774 + }, + { + "epoch": 0.514128968983695, + "grad_norm": 1.8129805843104227, + "learning_rate": 5.0141450395269184e-06, + "loss": 0.6619, + "step": 16775 + }, + { + "epoch": 0.5141596175064362, + "grad_norm": 1.679200869892332, + "learning_rate": 5.013648723605724e-06, + "loss": 0.5954, + "step": 16776 + }, + { + "epoch": 0.5141902660291774, + "grad_norm": 1.62374099503961, + "learning_rate": 5.013152407550045e-06, + "loss": 0.5057, + "step": 16777 + }, + { + "epoch": 0.5142209145519187, + "grad_norm": 1.54335791604734, + "learning_rate": 5.012656091364772e-06, + "loss": 0.5635, + "step": 16778 + }, + { + "epoch": 0.5142515630746598, + "grad_norm": 1.7022645203522246, + "learning_rate": 5.012159775054793e-06, + "loss": 0.5672, + "step": 16779 + }, + { + "epoch": 0.514282211597401, + "grad_norm": 1.5113274834705837, + "learning_rate": 5.011663458625004e-06, + "loss": 0.4871, + "step": 16780 + }, + { + "epoch": 0.5143128601201422, + "grad_norm": 1.689261912624616, + "learning_rate": 5.01116714208029e-06, + "loss": 0.5824, + "step": 16781 + }, + { + "epoch": 0.5143435086428834, + "grad_norm": 1.7148732775549822, + "learning_rate": 5.010670825425543e-06, + "loss": 0.5885, + "step": 16782 + }, + { + "epoch": 0.5143741571656246, + "grad_norm": 1.0183560186039746, + "learning_rate": 5.010174508665654e-06, + "loss": 0.442, + "step": 16783 + }, + { + "epoch": 0.5144048056883658, + "grad_norm": 1.6365315870098895, + "learning_rate": 5.009678191805514e-06, + "loss": 0.6141, + "step": 16784 + }, + { + "epoch": 0.514435454211107, + "grad_norm": 1.6849489133354507, + "learning_rate": 5.009181874850012e-06, + "loss": 0.5981, + "step": 16785 + }, + { + "epoch": 0.5144661027338482, + "grad_norm": 1.8960059724476888, + "learning_rate": 5.008685557804038e-06, + "loss": 0.7661, + "step": 16786 + }, + { + "epoch": 0.5144967512565894, + "grad_norm": 1.7700218834564643, + "learning_rate": 5.008189240672483e-06, + "loss": 0.6488, + "step": 16787 + }, + { + "epoch": 0.5145273997793306, + "grad_norm": 1.809875651751608, + "learning_rate": 5.007692923460236e-06, + "loss": 0.6912, + "step": 16788 + }, + { + "epoch": 0.5145580483020719, + "grad_norm": 1.703211372662847, + "learning_rate": 5.007196606172192e-06, + "loss": 0.6577, + "step": 16789 + }, + { + "epoch": 0.514588696824813, + "grad_norm": 1.9524893827861125, + "learning_rate": 5.0067002888132355e-06, + "loss": 0.6462, + "step": 16790 + }, + { + "epoch": 0.5146193453475543, + "grad_norm": 1.7660517450134408, + "learning_rate": 5.00620397138826e-06, + "loss": 0.7219, + "step": 16791 + }, + { + "epoch": 0.5146499938702954, + "grad_norm": 1.65300778228227, + "learning_rate": 5.005707653902155e-06, + "loss": 0.5249, + "step": 16792 + }, + { + "epoch": 0.5146806423930367, + "grad_norm": 1.559381592785846, + "learning_rate": 5.005211336359812e-06, + "loss": 0.5726, + "step": 16793 + }, + { + "epoch": 0.5147112909157778, + "grad_norm": 1.68856451024282, + "learning_rate": 5.004715018766119e-06, + "loss": 0.6416, + "step": 16794 + }, + { + "epoch": 0.5147419394385191, + "grad_norm": 1.5141813358205498, + "learning_rate": 5.004218701125968e-06, + "loss": 0.5836, + "step": 16795 + }, + { + "epoch": 0.5147725879612602, + "grad_norm": 1.7515316985033822, + "learning_rate": 5.00372238344425e-06, + "loss": 0.5466, + "step": 16796 + }, + { + "epoch": 0.5148032364840015, + "grad_norm": 1.6442378079429838, + "learning_rate": 5.003226065725853e-06, + "loss": 0.677, + "step": 16797 + }, + { + "epoch": 0.5148338850067427, + "grad_norm": 1.6262850690280355, + "learning_rate": 5.00272974797567e-06, + "loss": 0.5338, + "step": 16798 + }, + { + "epoch": 0.5148645335294839, + "grad_norm": 1.7228265883193716, + "learning_rate": 5.002233430198588e-06, + "loss": 0.6744, + "step": 16799 + }, + { + "epoch": 0.5148951820522251, + "grad_norm": 1.9657140393862493, + "learning_rate": 5.001737112399503e-06, + "loss": 0.639, + "step": 16800 + }, + { + "epoch": 0.5149258305749663, + "grad_norm": 1.8081693710701554, + "learning_rate": 5.001240794583298e-06, + "loss": 0.6028, + "step": 16801 + }, + { + "epoch": 0.5149564790977075, + "grad_norm": 1.7444426038881085, + "learning_rate": 5.000744476754871e-06, + "loss": 0.7551, + "step": 16802 + }, + { + "epoch": 0.5149871276204487, + "grad_norm": 1.7071269093820365, + "learning_rate": 5.000248158919106e-06, + "loss": 0.5789, + "step": 16803 + }, + { + "epoch": 0.5150177761431899, + "grad_norm": 1.7751548575123934, + "learning_rate": 4.999751841080895e-06, + "loss": 0.66, + "step": 16804 + }, + { + "epoch": 0.5150484246659311, + "grad_norm": 1.7560833557342863, + "learning_rate": 4.999255523245132e-06, + "loss": 0.7171, + "step": 16805 + }, + { + "epoch": 0.5150790731886723, + "grad_norm": 1.6672691272845472, + "learning_rate": 4.9987592054167026e-06, + "loss": 0.7323, + "step": 16806 + }, + { + "epoch": 0.5151097217114136, + "grad_norm": 1.7449511989401012, + "learning_rate": 4.998262887600499e-06, + "loss": 0.5476, + "step": 16807 + }, + { + "epoch": 0.5151403702341547, + "grad_norm": 1.7780887856705785, + "learning_rate": 4.997766569801413e-06, + "loss": 0.6577, + "step": 16808 + }, + { + "epoch": 0.515171018756896, + "grad_norm": 1.7192919721004363, + "learning_rate": 4.997270252024331e-06, + "loss": 0.6771, + "step": 16809 + }, + { + "epoch": 0.5152016672796371, + "grad_norm": 1.7172850620798432, + "learning_rate": 4.996773934274148e-06, + "loss": 0.5776, + "step": 16810 + }, + { + "epoch": 0.5152323158023783, + "grad_norm": 1.8283193483055933, + "learning_rate": 4.996277616555753e-06, + "loss": 0.6851, + "step": 16811 + }, + { + "epoch": 0.5152629643251195, + "grad_norm": 1.0189498377245694, + "learning_rate": 4.995781298874033e-06, + "loss": 0.4453, + "step": 16812 + }, + { + "epoch": 0.5152936128478607, + "grad_norm": 1.6152804703757748, + "learning_rate": 4.995284981233883e-06, + "loss": 0.6511, + "step": 16813 + }, + { + "epoch": 0.5153242613706019, + "grad_norm": 1.8861994538611366, + "learning_rate": 4.99478866364019e-06, + "loss": 0.6149, + "step": 16814 + }, + { + "epoch": 0.5153549098933431, + "grad_norm": 1.819535403584399, + "learning_rate": 4.994292346097846e-06, + "loss": 0.615, + "step": 16815 + }, + { + "epoch": 0.5153855584160844, + "grad_norm": 1.5750288868742364, + "learning_rate": 4.9937960286117415e-06, + "loss": 0.5871, + "step": 16816 + }, + { + "epoch": 0.5154162069388255, + "grad_norm": 1.8377756384464083, + "learning_rate": 4.993299711186768e-06, + "loss": 0.5545, + "step": 16817 + }, + { + "epoch": 0.5154468554615668, + "grad_norm": 1.8124052726149937, + "learning_rate": 4.99280339382781e-06, + "loss": 0.5864, + "step": 16818 + }, + { + "epoch": 0.5154775039843079, + "grad_norm": 1.732338704741199, + "learning_rate": 4.992307076539765e-06, + "loss": 0.6094, + "step": 16819 + }, + { + "epoch": 0.5155081525070492, + "grad_norm": 1.7556154467995984, + "learning_rate": 4.991810759327518e-06, + "loss": 0.6754, + "step": 16820 + }, + { + "epoch": 0.5155388010297903, + "grad_norm": 0.7959342686898984, + "learning_rate": 4.991314442195964e-06, + "loss": 0.4377, + "step": 16821 + }, + { + "epoch": 0.5155694495525316, + "grad_norm": 1.5335239143502757, + "learning_rate": 4.990818125149991e-06, + "loss": 0.5769, + "step": 16822 + }, + { + "epoch": 0.5156000980752727, + "grad_norm": 0.8015793539696515, + "learning_rate": 4.990321808194488e-06, + "loss": 0.4379, + "step": 16823 + }, + { + "epoch": 0.515630746598014, + "grad_norm": 1.4284303128274503, + "learning_rate": 4.989825491334347e-06, + "loss": 0.605, + "step": 16824 + }, + { + "epoch": 0.5156613951207552, + "grad_norm": 0.7520845987203412, + "learning_rate": 4.9893291745744586e-06, + "loss": 0.4497, + "step": 16825 + }, + { + "epoch": 0.5156920436434964, + "grad_norm": 1.8450896569542177, + "learning_rate": 4.988832857919711e-06, + "loss": 0.6665, + "step": 16826 + }, + { + "epoch": 0.5157226921662376, + "grad_norm": 1.7938764480515146, + "learning_rate": 4.988336541374998e-06, + "loss": 0.5979, + "step": 16827 + }, + { + "epoch": 0.5157533406889788, + "grad_norm": 0.7555575575177971, + "learning_rate": 4.987840224945207e-06, + "loss": 0.4367, + "step": 16828 + }, + { + "epoch": 0.51578398921172, + "grad_norm": 1.8741092962466228, + "learning_rate": 4.987343908635231e-06, + "loss": 0.6315, + "step": 16829 + }, + { + "epoch": 0.5158146377344612, + "grad_norm": 1.801666603470692, + "learning_rate": 4.986847592449958e-06, + "loss": 0.735, + "step": 16830 + }, + { + "epoch": 0.5158452862572024, + "grad_norm": 1.8027728559211564, + "learning_rate": 4.986351276394277e-06, + "loss": 0.6628, + "step": 16831 + }, + { + "epoch": 0.5158759347799436, + "grad_norm": 1.7915078770641693, + "learning_rate": 4.985854960473083e-06, + "loss": 0.7225, + "step": 16832 + }, + { + "epoch": 0.5159065833026848, + "grad_norm": 1.6470047449399408, + "learning_rate": 4.985358644691263e-06, + "loss": 0.6252, + "step": 16833 + }, + { + "epoch": 0.5159372318254261, + "grad_norm": 1.8579488701753963, + "learning_rate": 4.9848623290537065e-06, + "loss": 0.7197, + "step": 16834 + }, + { + "epoch": 0.5159678803481672, + "grad_norm": 1.8920835331385712, + "learning_rate": 4.984366013565307e-06, + "loss": 0.6769, + "step": 16835 + }, + { + "epoch": 0.5159985288709085, + "grad_norm": 2.031154996380277, + "learning_rate": 4.983869698230952e-06, + "loss": 0.6689, + "step": 16836 + }, + { + "epoch": 0.5160291773936496, + "grad_norm": 1.637382306297846, + "learning_rate": 4.983373383055533e-06, + "loss": 0.6417, + "step": 16837 + }, + { + "epoch": 0.5160598259163909, + "grad_norm": 1.7483302521249937, + "learning_rate": 4.982877068043942e-06, + "loss": 0.6602, + "step": 16838 + }, + { + "epoch": 0.516090474439132, + "grad_norm": 1.8823956933649746, + "learning_rate": 4.982380753201066e-06, + "loss": 0.5932, + "step": 16839 + }, + { + "epoch": 0.5161211229618733, + "grad_norm": 1.5222710297466922, + "learning_rate": 4.981884438531798e-06, + "loss": 0.6266, + "step": 16840 + }, + { + "epoch": 0.5161517714846144, + "grad_norm": 1.6655458403791918, + "learning_rate": 4.981388124041028e-06, + "loss": 0.5854, + "step": 16841 + }, + { + "epoch": 0.5161824200073556, + "grad_norm": 1.587456151792444, + "learning_rate": 4.980891809733643e-06, + "loss": 0.6538, + "step": 16842 + }, + { + "epoch": 0.5162130685300969, + "grad_norm": 0.8365586305731375, + "learning_rate": 4.980395495614538e-06, + "loss": 0.4312, + "step": 16843 + }, + { + "epoch": 0.516243717052838, + "grad_norm": 0.8378747979895028, + "learning_rate": 4.9798991816886e-06, + "loss": 0.4269, + "step": 16844 + }, + { + "epoch": 0.5162743655755793, + "grad_norm": 1.4963144562793105, + "learning_rate": 4.979402867960721e-06, + "loss": 0.6119, + "step": 16845 + }, + { + "epoch": 0.5163050140983204, + "grad_norm": 1.544742380112405, + "learning_rate": 4.978906554435791e-06, + "loss": 0.5714, + "step": 16846 + }, + { + "epoch": 0.5163356626210617, + "grad_norm": 1.7100793363169235, + "learning_rate": 4.978410241118699e-06, + "loss": 0.6495, + "step": 16847 + }, + { + "epoch": 0.5163663111438028, + "grad_norm": 1.7089078329816965, + "learning_rate": 4.977913928014338e-06, + "loss": 0.6483, + "step": 16848 + }, + { + "epoch": 0.5163969596665441, + "grad_norm": 0.8192593219844878, + "learning_rate": 4.977417615127596e-06, + "loss": 0.4313, + "step": 16849 + }, + { + "epoch": 0.5164276081892852, + "grad_norm": 0.8234860721617654, + "learning_rate": 4.976921302463364e-06, + "loss": 0.4559, + "step": 16850 + }, + { + "epoch": 0.5164582567120265, + "grad_norm": 1.7815085624780902, + "learning_rate": 4.976424990026532e-06, + "loss": 0.5876, + "step": 16851 + }, + { + "epoch": 0.5164889052347676, + "grad_norm": 0.7767211468647158, + "learning_rate": 4.975928677821992e-06, + "loss": 0.4457, + "step": 16852 + }, + { + "epoch": 0.5165195537575089, + "grad_norm": 1.7831188491767305, + "learning_rate": 4.975432365854631e-06, + "loss": 0.7124, + "step": 16853 + }, + { + "epoch": 0.5165502022802501, + "grad_norm": 1.6061108144902712, + "learning_rate": 4.974936054129343e-06, + "loss": 0.6722, + "step": 16854 + }, + { + "epoch": 0.5165808508029913, + "grad_norm": 1.659981202209019, + "learning_rate": 4.974439742651014e-06, + "loss": 0.5711, + "step": 16855 + }, + { + "epoch": 0.5166114993257325, + "grad_norm": 1.6772849713898936, + "learning_rate": 4.9739434314245385e-06, + "loss": 0.5922, + "step": 16856 + }, + { + "epoch": 0.5166421478484737, + "grad_norm": 1.662060810709716, + "learning_rate": 4.973447120454805e-06, + "loss": 0.6788, + "step": 16857 + }, + { + "epoch": 0.5166727963712149, + "grad_norm": 1.5650362078382387, + "learning_rate": 4.972950809746703e-06, + "loss": 0.6421, + "step": 16858 + }, + { + "epoch": 0.5167034448939561, + "grad_norm": 0.8022992644019968, + "learning_rate": 4.972454499305125e-06, + "loss": 0.4307, + "step": 16859 + }, + { + "epoch": 0.5167340934166973, + "grad_norm": 1.6773647391912567, + "learning_rate": 4.97195818913496e-06, + "loss": 0.6837, + "step": 16860 + }, + { + "epoch": 0.5167647419394386, + "grad_norm": 1.6034791655845648, + "learning_rate": 4.971461879241095e-06, + "loss": 0.5688, + "step": 16861 + }, + { + "epoch": 0.5167953904621797, + "grad_norm": 1.5726923290527615, + "learning_rate": 4.970965569628428e-06, + "loss": 0.5897, + "step": 16862 + }, + { + "epoch": 0.516826038984921, + "grad_norm": 1.812984816911528, + "learning_rate": 4.97046926030184e-06, + "loss": 0.6265, + "step": 16863 + }, + { + "epoch": 0.5168566875076621, + "grad_norm": 1.727578524569593, + "learning_rate": 4.96997295126623e-06, + "loss": 0.6233, + "step": 16864 + }, + { + "epoch": 0.5168873360304034, + "grad_norm": 1.7077181817945997, + "learning_rate": 4.969476642526482e-06, + "loss": 0.5832, + "step": 16865 + }, + { + "epoch": 0.5169179845531445, + "grad_norm": 1.7269807959390826, + "learning_rate": 4.9689803340874886e-06, + "loss": 0.6379, + "step": 16866 + }, + { + "epoch": 0.5169486330758858, + "grad_norm": 1.6509495396775626, + "learning_rate": 4.96848402595414e-06, + "loss": 0.5856, + "step": 16867 + }, + { + "epoch": 0.5169792815986269, + "grad_norm": 1.8038852233091276, + "learning_rate": 4.967987718131327e-06, + "loss": 0.6254, + "step": 16868 + }, + { + "epoch": 0.5170099301213682, + "grad_norm": 0.8224450160205817, + "learning_rate": 4.967491410623938e-06, + "loss": 0.4452, + "step": 16869 + }, + { + "epoch": 0.5170405786441093, + "grad_norm": 2.0687276428165706, + "learning_rate": 4.9669951034368655e-06, + "loss": 0.6292, + "step": 16870 + }, + { + "epoch": 0.5170712271668506, + "grad_norm": 0.8373893536023814, + "learning_rate": 4.966498796575e-06, + "loss": 0.449, + "step": 16871 + }, + { + "epoch": 0.5171018756895918, + "grad_norm": 1.7646238809920483, + "learning_rate": 4.9660024900432265e-06, + "loss": 0.6846, + "step": 16872 + }, + { + "epoch": 0.5171325242123329, + "grad_norm": 1.7595468331045838, + "learning_rate": 4.965506183846443e-06, + "loss": 0.632, + "step": 16873 + }, + { + "epoch": 0.5171631727350742, + "grad_norm": 1.7567077188357234, + "learning_rate": 4.965009877989532e-06, + "loss": 0.6725, + "step": 16874 + }, + { + "epoch": 0.5171938212578153, + "grad_norm": 1.7553916548883073, + "learning_rate": 4.964513572477392e-06, + "loss": 0.6497, + "step": 16875 + }, + { + "epoch": 0.5172244697805566, + "grad_norm": 1.5977031664152423, + "learning_rate": 4.964017267314907e-06, + "loss": 0.6862, + "step": 16876 + }, + { + "epoch": 0.5172551183032977, + "grad_norm": 1.7547623536841053, + "learning_rate": 4.963520962506968e-06, + "loss": 0.6293, + "step": 16877 + }, + { + "epoch": 0.517285766826039, + "grad_norm": 2.0182395712863226, + "learning_rate": 4.963024658058467e-06, + "loss": 0.6764, + "step": 16878 + }, + { + "epoch": 0.5173164153487801, + "grad_norm": 1.9477659869900796, + "learning_rate": 4.962528353974295e-06, + "loss": 0.6247, + "step": 16879 + }, + { + "epoch": 0.5173470638715214, + "grad_norm": 0.7731345170515894, + "learning_rate": 4.962032050259339e-06, + "loss": 0.4101, + "step": 16880 + }, + { + "epoch": 0.5173777123942626, + "grad_norm": 0.8173393629135701, + "learning_rate": 4.961535746918491e-06, + "loss": 0.4549, + "step": 16881 + }, + { + "epoch": 0.5174083609170038, + "grad_norm": 1.7283787025622366, + "learning_rate": 4.96103944395664e-06, + "loss": 0.593, + "step": 16882 + }, + { + "epoch": 0.517439009439745, + "grad_norm": 1.6489820067584284, + "learning_rate": 4.9605431413786795e-06, + "loss": 0.5761, + "step": 16883 + }, + { + "epoch": 0.5174696579624862, + "grad_norm": 1.8780766887381897, + "learning_rate": 4.960046839189498e-06, + "loss": 0.7073, + "step": 16884 + }, + { + "epoch": 0.5175003064852274, + "grad_norm": 1.8268830231731863, + "learning_rate": 4.9595505373939825e-06, + "loss": 0.5998, + "step": 16885 + }, + { + "epoch": 0.5175309550079686, + "grad_norm": 1.8718339580678296, + "learning_rate": 4.959054235997029e-06, + "loss": 0.661, + "step": 16886 + }, + { + "epoch": 0.5175616035307098, + "grad_norm": 1.8193711475529633, + "learning_rate": 4.958557935003523e-06, + "loss": 0.6248, + "step": 16887 + }, + { + "epoch": 0.517592252053451, + "grad_norm": 0.9160463213104721, + "learning_rate": 4.958061634418356e-06, + "loss": 0.4496, + "step": 16888 + }, + { + "epoch": 0.5176229005761922, + "grad_norm": 2.206094660782556, + "learning_rate": 4.957565334246418e-06, + "loss": 0.7393, + "step": 16889 + }, + { + "epoch": 0.5176535490989335, + "grad_norm": 1.7636842820370255, + "learning_rate": 4.9570690344926e-06, + "loss": 0.6658, + "step": 16890 + }, + { + "epoch": 0.5176841976216746, + "grad_norm": 1.5810495431341802, + "learning_rate": 4.956572735161793e-06, + "loss": 0.7259, + "step": 16891 + }, + { + "epoch": 0.5177148461444159, + "grad_norm": 1.6410053767799577, + "learning_rate": 4.956076436258885e-06, + "loss": 0.706, + "step": 16892 + }, + { + "epoch": 0.517745494667157, + "grad_norm": 1.73543764158164, + "learning_rate": 4.955580137788766e-06, + "loss": 0.7492, + "step": 16893 + }, + { + "epoch": 0.5177761431898983, + "grad_norm": 1.690570419899828, + "learning_rate": 4.95508383975633e-06, + "loss": 0.6748, + "step": 16894 + }, + { + "epoch": 0.5178067917126394, + "grad_norm": 1.584928422208863, + "learning_rate": 4.954587542166464e-06, + "loss": 0.6054, + "step": 16895 + }, + { + "epoch": 0.5178374402353807, + "grad_norm": 1.742615524881434, + "learning_rate": 4.954091245024055e-06, + "loss": 0.6387, + "step": 16896 + }, + { + "epoch": 0.5178680887581218, + "grad_norm": 1.8683364985266493, + "learning_rate": 4.9535949483340005e-06, + "loss": 0.6637, + "step": 16897 + }, + { + "epoch": 0.5178987372808631, + "grad_norm": 1.6361374887693365, + "learning_rate": 4.9530986521011855e-06, + "loss": 0.6355, + "step": 16898 + }, + { + "epoch": 0.5179293858036043, + "grad_norm": 1.7084640038431604, + "learning_rate": 4.952602356330501e-06, + "loss": 0.6597, + "step": 16899 + }, + { + "epoch": 0.5179600343263455, + "grad_norm": 1.8876326107192634, + "learning_rate": 4.9521060610268385e-06, + "loss": 0.7178, + "step": 16900 + }, + { + "epoch": 0.5179906828490867, + "grad_norm": 1.8505098798079178, + "learning_rate": 4.951609766195086e-06, + "loss": 0.6858, + "step": 16901 + }, + { + "epoch": 0.5180213313718279, + "grad_norm": 1.7511575811722757, + "learning_rate": 4.951113471840136e-06, + "loss": 0.6196, + "step": 16902 + }, + { + "epoch": 0.5180519798945691, + "grad_norm": 1.56039059983696, + "learning_rate": 4.9506171779668776e-06, + "loss": 0.542, + "step": 16903 + }, + { + "epoch": 0.5180826284173102, + "grad_norm": 0.7565857055906011, + "learning_rate": 4.9501208845802e-06, + "loss": 0.4416, + "step": 16904 + }, + { + "epoch": 0.5181132769400515, + "grad_norm": 1.6568503807694643, + "learning_rate": 4.9496245916849955e-06, + "loss": 0.6379, + "step": 16905 + }, + { + "epoch": 0.5181439254627926, + "grad_norm": 0.7775028818185872, + "learning_rate": 4.9491282992861515e-06, + "loss": 0.4116, + "step": 16906 + }, + { + "epoch": 0.5181745739855339, + "grad_norm": 1.8915640781701175, + "learning_rate": 4.948632007388559e-06, + "loss": 0.6079, + "step": 16907 + }, + { + "epoch": 0.518205222508275, + "grad_norm": 1.575382459207478, + "learning_rate": 4.9481357159971096e-06, + "loss": 0.5967, + "step": 16908 + }, + { + "epoch": 0.5182358710310163, + "grad_norm": 1.5512602824602462, + "learning_rate": 4.947639425116691e-06, + "loss": 0.6122, + "step": 16909 + }, + { + "epoch": 0.5182665195537575, + "grad_norm": 0.8090771895309409, + "learning_rate": 4.947143134752195e-06, + "loss": 0.4301, + "step": 16910 + }, + { + "epoch": 0.5182971680764987, + "grad_norm": 1.6695598890951422, + "learning_rate": 4.946646844908513e-06, + "loss": 0.6512, + "step": 16911 + }, + { + "epoch": 0.5183278165992399, + "grad_norm": 1.7518357944635403, + "learning_rate": 4.946150555590531e-06, + "loss": 0.6756, + "step": 16912 + }, + { + "epoch": 0.5183584651219811, + "grad_norm": 1.630818011692094, + "learning_rate": 4.945654266803141e-06, + "loss": 0.6518, + "step": 16913 + }, + { + "epoch": 0.5183891136447223, + "grad_norm": 1.7267739746688098, + "learning_rate": 4.945157978551237e-06, + "loss": 0.6191, + "step": 16914 + }, + { + "epoch": 0.5184197621674635, + "grad_norm": 1.4915259279929538, + "learning_rate": 4.944661690839701e-06, + "loss": 0.5889, + "step": 16915 + }, + { + "epoch": 0.5184504106902047, + "grad_norm": 1.9399412926028674, + "learning_rate": 4.944165403673431e-06, + "loss": 0.6273, + "step": 16916 + }, + { + "epoch": 0.518481059212946, + "grad_norm": 1.8190302128303675, + "learning_rate": 4.94366911705731e-06, + "loss": 0.5623, + "step": 16917 + }, + { + "epoch": 0.5185117077356871, + "grad_norm": 1.4882206573126098, + "learning_rate": 4.9431728309962355e-06, + "loss": 0.5568, + "step": 16918 + }, + { + "epoch": 0.5185423562584284, + "grad_norm": 1.8542676601905688, + "learning_rate": 4.942676545495092e-06, + "loss": 0.5801, + "step": 16919 + }, + { + "epoch": 0.5185730047811695, + "grad_norm": 1.880383958160186, + "learning_rate": 4.94218026055877e-06, + "loss": 0.7101, + "step": 16920 + }, + { + "epoch": 0.5186036533039108, + "grad_norm": 1.7412675829916018, + "learning_rate": 4.941683976192162e-06, + "loss": 0.6432, + "step": 16921 + }, + { + "epoch": 0.5186343018266519, + "grad_norm": 2.1247090269854283, + "learning_rate": 4.941187692400157e-06, + "loss": 0.6419, + "step": 16922 + }, + { + "epoch": 0.5186649503493932, + "grad_norm": 1.917376636341976, + "learning_rate": 4.940691409187643e-06, + "loss": 0.7008, + "step": 16923 + }, + { + "epoch": 0.5186955988721343, + "grad_norm": 1.8857827823152158, + "learning_rate": 4.940195126559514e-06, + "loss": 0.5912, + "step": 16924 + }, + { + "epoch": 0.5187262473948756, + "grad_norm": 1.7134333506264554, + "learning_rate": 4.9396988445206575e-06, + "loss": 0.7135, + "step": 16925 + }, + { + "epoch": 0.5187568959176168, + "grad_norm": 1.8922396565414825, + "learning_rate": 4.93920256307596e-06, + "loss": 0.6563, + "step": 16926 + }, + { + "epoch": 0.518787544440358, + "grad_norm": 1.5709854663019318, + "learning_rate": 4.93870628223032e-06, + "loss": 0.5601, + "step": 16927 + }, + { + "epoch": 0.5188181929630992, + "grad_norm": 1.6681457693578687, + "learning_rate": 4.9382100019886185e-06, + "loss": 0.6474, + "step": 16928 + }, + { + "epoch": 0.5188488414858404, + "grad_norm": 1.7160266653960097, + "learning_rate": 4.937713722355754e-06, + "loss": 0.629, + "step": 16929 + }, + { + "epoch": 0.5188794900085816, + "grad_norm": 1.4948352903625366, + "learning_rate": 4.937217443336609e-06, + "loss": 0.568, + "step": 16930 + }, + { + "epoch": 0.5189101385313228, + "grad_norm": 1.5824361903753192, + "learning_rate": 4.936721164936077e-06, + "loss": 0.5792, + "step": 16931 + }, + { + "epoch": 0.518940787054064, + "grad_norm": 1.6363847116072543, + "learning_rate": 4.936224887159049e-06, + "loss": 0.6641, + "step": 16932 + }, + { + "epoch": 0.5189714355768053, + "grad_norm": 1.8941626796456101, + "learning_rate": 4.935728610010412e-06, + "loss": 0.6981, + "step": 16933 + }, + { + "epoch": 0.5190020840995464, + "grad_norm": 1.8106200992892043, + "learning_rate": 4.935232333495058e-06, + "loss": 0.6548, + "step": 16934 + }, + { + "epoch": 0.5190327326222876, + "grad_norm": 1.7149286104811057, + "learning_rate": 4.934736057617877e-06, + "loss": 0.6713, + "step": 16935 + }, + { + "epoch": 0.5190633811450288, + "grad_norm": 1.7727897033304172, + "learning_rate": 4.9342397823837565e-06, + "loss": 0.583, + "step": 16936 + }, + { + "epoch": 0.51909402966777, + "grad_norm": 1.46052384846951, + "learning_rate": 4.93374350779759e-06, + "loss": 0.5636, + "step": 16937 + }, + { + "epoch": 0.5191246781905112, + "grad_norm": 1.852587043371929, + "learning_rate": 4.933247233864267e-06, + "loss": 0.6641, + "step": 16938 + }, + { + "epoch": 0.5191553267132524, + "grad_norm": 1.8671484940741503, + "learning_rate": 4.932750960588672e-06, + "loss": 0.6135, + "step": 16939 + }, + { + "epoch": 0.5191859752359936, + "grad_norm": 1.640900123135185, + "learning_rate": 4.932254687975703e-06, + "loss": 0.706, + "step": 16940 + }, + { + "epoch": 0.5192166237587348, + "grad_norm": 1.62184115191935, + "learning_rate": 4.931758416030244e-06, + "loss": 0.633, + "step": 16941 + }, + { + "epoch": 0.519247272281476, + "grad_norm": 1.7824767646102269, + "learning_rate": 4.931262144757186e-06, + "loss": 0.6946, + "step": 16942 + }, + { + "epoch": 0.5192779208042172, + "grad_norm": 1.6550657027367572, + "learning_rate": 4.9307658741614216e-06, + "loss": 0.5596, + "step": 16943 + }, + { + "epoch": 0.5193085693269585, + "grad_norm": 1.5243600206309897, + "learning_rate": 4.930269604247836e-06, + "loss": 0.6192, + "step": 16944 + }, + { + "epoch": 0.5193392178496996, + "grad_norm": 0.8933675509720981, + "learning_rate": 4.929773335021324e-06, + "loss": 0.4382, + "step": 16945 + }, + { + "epoch": 0.5193698663724409, + "grad_norm": 1.7220643861082068, + "learning_rate": 4.929277066486774e-06, + "loss": 0.7221, + "step": 16946 + }, + { + "epoch": 0.519400514895182, + "grad_norm": 1.7715419130537096, + "learning_rate": 4.928780798649073e-06, + "loss": 0.6519, + "step": 16947 + }, + { + "epoch": 0.5194311634179233, + "grad_norm": 1.7754084625604056, + "learning_rate": 4.928284531513114e-06, + "loss": 0.6567, + "step": 16948 + }, + { + "epoch": 0.5194618119406644, + "grad_norm": 1.778344230284368, + "learning_rate": 4.927788265083787e-06, + "loss": 0.6392, + "step": 16949 + }, + { + "epoch": 0.5194924604634057, + "grad_norm": 1.7437840332755496, + "learning_rate": 4.927291999365979e-06, + "loss": 0.7318, + "step": 16950 + }, + { + "epoch": 0.5195231089861468, + "grad_norm": 1.6209376045306012, + "learning_rate": 4.9267957343645824e-06, + "loss": 0.6388, + "step": 16951 + }, + { + "epoch": 0.5195537575088881, + "grad_norm": 1.6743395605666906, + "learning_rate": 4.926299470084486e-06, + "loss": 0.569, + "step": 16952 + }, + { + "epoch": 0.5195844060316293, + "grad_norm": 1.6902294358063024, + "learning_rate": 4.925803206530579e-06, + "loss": 0.6388, + "step": 16953 + }, + { + "epoch": 0.5196150545543705, + "grad_norm": 1.6380549542209486, + "learning_rate": 4.925306943707753e-06, + "loss": 0.6653, + "step": 16954 + }, + { + "epoch": 0.5196457030771117, + "grad_norm": 1.7777642997850966, + "learning_rate": 4.924810681620896e-06, + "loss": 0.6937, + "step": 16955 + }, + { + "epoch": 0.5196763515998529, + "grad_norm": 1.646764569098974, + "learning_rate": 4.924314420274899e-06, + "loss": 0.606, + "step": 16956 + }, + { + "epoch": 0.5197070001225941, + "grad_norm": 1.784709998098849, + "learning_rate": 4.923818159674653e-06, + "loss": 0.5884, + "step": 16957 + }, + { + "epoch": 0.5197376486453353, + "grad_norm": 0.8406116974917864, + "learning_rate": 4.923321899825043e-06, + "loss": 0.4326, + "step": 16958 + }, + { + "epoch": 0.5197682971680765, + "grad_norm": 1.8895004000180284, + "learning_rate": 4.922825640730965e-06, + "loss": 0.7034, + "step": 16959 + }, + { + "epoch": 0.5197989456908177, + "grad_norm": 0.8105089317301916, + "learning_rate": 4.9223293823973045e-06, + "loss": 0.4327, + "step": 16960 + }, + { + "epoch": 0.5198295942135589, + "grad_norm": 1.7706559234180637, + "learning_rate": 4.921833124828952e-06, + "loss": 0.6451, + "step": 16961 + }, + { + "epoch": 0.5198602427363002, + "grad_norm": 1.6992840258529105, + "learning_rate": 4.921336868030797e-06, + "loss": 0.6574, + "step": 16962 + }, + { + "epoch": 0.5198908912590413, + "grad_norm": 1.6238294855884947, + "learning_rate": 4.920840612007731e-06, + "loss": 0.6546, + "step": 16963 + }, + { + "epoch": 0.5199215397817826, + "grad_norm": 1.6439756537366395, + "learning_rate": 4.9203443567646434e-06, + "loss": 0.6158, + "step": 16964 + }, + { + "epoch": 0.5199521883045237, + "grad_norm": 1.6955385320353187, + "learning_rate": 4.919848102306422e-06, + "loss": 0.5116, + "step": 16965 + }, + { + "epoch": 0.5199828368272649, + "grad_norm": 1.7910006501872733, + "learning_rate": 4.919351848637958e-06, + "loss": 0.6458, + "step": 16966 + }, + { + "epoch": 0.5200134853500061, + "grad_norm": 1.7997525628018416, + "learning_rate": 4.918855595764141e-06, + "loss": 0.6301, + "step": 16967 + }, + { + "epoch": 0.5200441338727473, + "grad_norm": 1.6865933205455803, + "learning_rate": 4.9183593436898615e-06, + "loss": 0.6644, + "step": 16968 + }, + { + "epoch": 0.5200747823954885, + "grad_norm": 2.4812009815670195, + "learning_rate": 4.917863092420006e-06, + "loss": 0.6819, + "step": 16969 + }, + { + "epoch": 0.5201054309182297, + "grad_norm": 1.6583746967497688, + "learning_rate": 4.91736684195947e-06, + "loss": 0.5759, + "step": 16970 + }, + { + "epoch": 0.520136079440971, + "grad_norm": 1.7031935961048175, + "learning_rate": 4.916870592313135e-06, + "loss": 0.7058, + "step": 16971 + }, + { + "epoch": 0.5201667279637121, + "grad_norm": 1.3827902970051071, + "learning_rate": 4.9163743434858994e-06, + "loss": 0.6677, + "step": 16972 + }, + { + "epoch": 0.5201973764864534, + "grad_norm": 1.8109147973588795, + "learning_rate": 4.9158780954826475e-06, + "loss": 0.5792, + "step": 16973 + }, + { + "epoch": 0.5202280250091945, + "grad_norm": 1.8531461981848825, + "learning_rate": 4.915381848308269e-06, + "loss": 0.6117, + "step": 16974 + }, + { + "epoch": 0.5202586735319358, + "grad_norm": 1.6373835147927698, + "learning_rate": 4.914885601967656e-06, + "loss": 0.6056, + "step": 16975 + }, + { + "epoch": 0.5202893220546769, + "grad_norm": 1.5922841190959631, + "learning_rate": 4.914389356465698e-06, + "loss": 0.6572, + "step": 16976 + }, + { + "epoch": 0.5203199705774182, + "grad_norm": 1.7524763948035587, + "learning_rate": 4.913893111807281e-06, + "loss": 0.6447, + "step": 16977 + }, + { + "epoch": 0.5203506191001593, + "grad_norm": 1.9102653946138992, + "learning_rate": 4.913396867997299e-06, + "loss": 0.627, + "step": 16978 + }, + { + "epoch": 0.5203812676229006, + "grad_norm": 1.5481181945376665, + "learning_rate": 4.912900625040641e-06, + "loss": 0.6171, + "step": 16979 + }, + { + "epoch": 0.5204119161456418, + "grad_norm": 1.7976346910196521, + "learning_rate": 4.912404382942191e-06, + "loss": 0.5742, + "step": 16980 + }, + { + "epoch": 0.520442564668383, + "grad_norm": 1.06725352017923, + "learning_rate": 4.9119081417068474e-06, + "loss": 0.4324, + "step": 16981 + }, + { + "epoch": 0.5204732131911242, + "grad_norm": 0.9676789237873205, + "learning_rate": 4.9114119013394925e-06, + "loss": 0.4332, + "step": 16982 + }, + { + "epoch": 0.5205038617138654, + "grad_norm": 1.875049387779561, + "learning_rate": 4.910915661845021e-06, + "loss": 0.5978, + "step": 16983 + }, + { + "epoch": 0.5205345102366066, + "grad_norm": 1.6341496827638953, + "learning_rate": 4.91041942322832e-06, + "loss": 0.5874, + "step": 16984 + }, + { + "epoch": 0.5205651587593478, + "grad_norm": 1.7936292576128097, + "learning_rate": 4.909923185494278e-06, + "loss": 0.6438, + "step": 16985 + }, + { + "epoch": 0.520595807282089, + "grad_norm": 1.9894908650925682, + "learning_rate": 4.909426948647787e-06, + "loss": 0.6055, + "step": 16986 + }, + { + "epoch": 0.5206264558048302, + "grad_norm": 1.9210438657612634, + "learning_rate": 4.908930712693735e-06, + "loss": 0.6723, + "step": 16987 + }, + { + "epoch": 0.5206571043275714, + "grad_norm": 1.7273503041623284, + "learning_rate": 4.908434477637011e-06, + "loss": 0.6371, + "step": 16988 + }, + { + "epoch": 0.5206877528503127, + "grad_norm": 1.6981500377548027, + "learning_rate": 4.907938243482507e-06, + "loss": 0.6735, + "step": 16989 + }, + { + "epoch": 0.5207184013730538, + "grad_norm": 1.6974214406103398, + "learning_rate": 4.90744201023511e-06, + "loss": 0.6279, + "step": 16990 + }, + { + "epoch": 0.5207490498957951, + "grad_norm": 1.924236073083775, + "learning_rate": 4.906945777899712e-06, + "loss": 0.683, + "step": 16991 + }, + { + "epoch": 0.5207796984185362, + "grad_norm": 1.7998773463865687, + "learning_rate": 4.906449546481201e-06, + "loss": 0.598, + "step": 16992 + }, + { + "epoch": 0.5208103469412775, + "grad_norm": 1.8012896246732764, + "learning_rate": 4.905953315984465e-06, + "loss": 0.6499, + "step": 16993 + }, + { + "epoch": 0.5208409954640186, + "grad_norm": 1.70209347177317, + "learning_rate": 4.905457086414397e-06, + "loss": 0.6835, + "step": 16994 + }, + { + "epoch": 0.5208716439867599, + "grad_norm": 1.172476393120044, + "learning_rate": 4.904960857775883e-06, + "loss": 0.4429, + "step": 16995 + }, + { + "epoch": 0.520902292509501, + "grad_norm": 1.8146425099183057, + "learning_rate": 4.904464630073814e-06, + "loss": 0.6476, + "step": 16996 + }, + { + "epoch": 0.5209329410322422, + "grad_norm": 1.807797119539539, + "learning_rate": 4.90396840331308e-06, + "loss": 0.6071, + "step": 16997 + }, + { + "epoch": 0.5209635895549835, + "grad_norm": 1.7186103281517107, + "learning_rate": 4.90347217749857e-06, + "loss": 0.5784, + "step": 16998 + }, + { + "epoch": 0.5209942380777246, + "grad_norm": 1.658683011160559, + "learning_rate": 4.9029759526351726e-06, + "loss": 0.6516, + "step": 16999 + }, + { + "epoch": 0.5210248866004659, + "grad_norm": 1.7330237133058386, + "learning_rate": 4.902479728727778e-06, + "loss": 0.6084, + "step": 17000 + }, + { + "epoch": 0.521055535123207, + "grad_norm": 0.7965315745129088, + "learning_rate": 4.901983505781276e-06, + "loss": 0.4459, + "step": 17001 + }, + { + "epoch": 0.5210861836459483, + "grad_norm": 1.8523551376340945, + "learning_rate": 4.901487283800556e-06, + "loss": 0.5662, + "step": 17002 + }, + { + "epoch": 0.5211168321686894, + "grad_norm": 1.7862190517852343, + "learning_rate": 4.900991062790507e-06, + "loss": 0.6753, + "step": 17003 + }, + { + "epoch": 0.5211474806914307, + "grad_norm": 1.5883991100988089, + "learning_rate": 4.900494842756017e-06, + "loss": 0.6585, + "step": 17004 + }, + { + "epoch": 0.5211781292141718, + "grad_norm": 1.586064625567306, + "learning_rate": 4.899998623701979e-06, + "loss": 0.5331, + "step": 17005 + }, + { + "epoch": 0.5212087777369131, + "grad_norm": 1.8280799795098563, + "learning_rate": 4.899502405633279e-06, + "loss": 0.6215, + "step": 17006 + }, + { + "epoch": 0.5212394262596542, + "grad_norm": 2.1704948815682674, + "learning_rate": 4.899006188554807e-06, + "loss": 0.6351, + "step": 17007 + }, + { + "epoch": 0.5212700747823955, + "grad_norm": 1.7574471722911922, + "learning_rate": 4.898509972471453e-06, + "loss": 0.5806, + "step": 17008 + }, + { + "epoch": 0.5213007233051367, + "grad_norm": 1.765858404344238, + "learning_rate": 4.898013757388106e-06, + "loss": 0.6428, + "step": 17009 + }, + { + "epoch": 0.5213313718278779, + "grad_norm": 0.871862504518672, + "learning_rate": 4.897517543309656e-06, + "loss": 0.439, + "step": 17010 + }, + { + "epoch": 0.5213620203506191, + "grad_norm": 1.7067487874332927, + "learning_rate": 4.897021330240993e-06, + "loss": 0.6182, + "step": 17011 + }, + { + "epoch": 0.5213926688733603, + "grad_norm": 1.9188754999508932, + "learning_rate": 4.896525118187002e-06, + "loss": 0.6375, + "step": 17012 + }, + { + "epoch": 0.5214233173961015, + "grad_norm": 1.6338089479838578, + "learning_rate": 4.896028907152579e-06, + "loss": 0.539, + "step": 17013 + }, + { + "epoch": 0.5214539659188427, + "grad_norm": 1.7051061908923555, + "learning_rate": 4.8955326971426085e-06, + "loss": 0.6169, + "step": 17014 + }, + { + "epoch": 0.5214846144415839, + "grad_norm": 1.619131840388474, + "learning_rate": 4.895036488161981e-06, + "loss": 0.7052, + "step": 17015 + }, + { + "epoch": 0.5215152629643252, + "grad_norm": 1.8307494709606906, + "learning_rate": 4.894540280215586e-06, + "loss": 0.6744, + "step": 17016 + }, + { + "epoch": 0.5215459114870663, + "grad_norm": 1.75408867207646, + "learning_rate": 4.894044073308311e-06, + "loss": 0.6043, + "step": 17017 + }, + { + "epoch": 0.5215765600098076, + "grad_norm": 1.5343640208590235, + "learning_rate": 4.893547867445049e-06, + "loss": 0.6034, + "step": 17018 + }, + { + "epoch": 0.5216072085325487, + "grad_norm": 2.005032456448723, + "learning_rate": 4.893051662630686e-06, + "loss": 0.562, + "step": 17019 + }, + { + "epoch": 0.52163785705529, + "grad_norm": 1.5633696164366582, + "learning_rate": 4.892555458870112e-06, + "loss": 0.5683, + "step": 17020 + }, + { + "epoch": 0.5216685055780311, + "grad_norm": 1.69980321503174, + "learning_rate": 4.892059256168217e-06, + "loss": 0.6215, + "step": 17021 + }, + { + "epoch": 0.5216991541007724, + "grad_norm": 1.8333220345002965, + "learning_rate": 4.891563054529892e-06, + "loss": 0.7097, + "step": 17022 + }, + { + "epoch": 0.5217298026235135, + "grad_norm": 1.8459481119755226, + "learning_rate": 4.89106685396002e-06, + "loss": 0.6974, + "step": 17023 + }, + { + "epoch": 0.5217604511462548, + "grad_norm": 0.8760926579581677, + "learning_rate": 4.890570654463497e-06, + "loss": 0.4484, + "step": 17024 + }, + { + "epoch": 0.521791099668996, + "grad_norm": 1.7020822454475906, + "learning_rate": 4.890074456045209e-06, + "loss": 0.7302, + "step": 17025 + }, + { + "epoch": 0.5218217481917372, + "grad_norm": 2.0983877441906253, + "learning_rate": 4.8895782587100434e-06, + "loss": 0.7605, + "step": 17026 + }, + { + "epoch": 0.5218523967144784, + "grad_norm": 0.7861760790469434, + "learning_rate": 4.889082062462894e-06, + "loss": 0.4284, + "step": 17027 + }, + { + "epoch": 0.5218830452372195, + "grad_norm": 1.672417787533036, + "learning_rate": 4.888585867308646e-06, + "loss": 0.5644, + "step": 17028 + }, + { + "epoch": 0.5219136937599608, + "grad_norm": 0.7881033297392507, + "learning_rate": 4.8880896732521905e-06, + "loss": 0.4364, + "step": 17029 + }, + { + "epoch": 0.5219443422827019, + "grad_norm": 1.720186180390703, + "learning_rate": 4.887593480298416e-06, + "loss": 0.5592, + "step": 17030 + }, + { + "epoch": 0.5219749908054432, + "grad_norm": 1.632510456219012, + "learning_rate": 4.88709728845221e-06, + "loss": 0.6143, + "step": 17031 + }, + { + "epoch": 0.5220056393281843, + "grad_norm": 1.6644353719821199, + "learning_rate": 4.886601097718466e-06, + "loss": 0.6572, + "step": 17032 + }, + { + "epoch": 0.5220362878509256, + "grad_norm": 1.7349109720375673, + "learning_rate": 4.886104908102071e-06, + "loss": 0.6786, + "step": 17033 + }, + { + "epoch": 0.5220669363736667, + "grad_norm": 1.7975205622528903, + "learning_rate": 4.885608719607909e-06, + "loss": 0.6578, + "step": 17034 + }, + { + "epoch": 0.522097584896408, + "grad_norm": 1.674332272762426, + "learning_rate": 4.8851125322408786e-06, + "loss": 0.6269, + "step": 17035 + }, + { + "epoch": 0.5221282334191492, + "grad_norm": 0.9036243602069247, + "learning_rate": 4.884616346005859e-06, + "loss": 0.4535, + "step": 17036 + }, + { + "epoch": 0.5221588819418904, + "grad_norm": 1.703224627793162, + "learning_rate": 4.884120160907749e-06, + "loss": 0.6559, + "step": 17037 + }, + { + "epoch": 0.5221895304646316, + "grad_norm": 1.6221360467605745, + "learning_rate": 4.8836239769514305e-06, + "loss": 0.6161, + "step": 17038 + }, + { + "epoch": 0.5222201789873728, + "grad_norm": 1.7270171409897994, + "learning_rate": 4.883127794141794e-06, + "loss": 0.6896, + "step": 17039 + }, + { + "epoch": 0.522250827510114, + "grad_norm": 0.8270425069983794, + "learning_rate": 4.88263161248373e-06, + "loss": 0.4474, + "step": 17040 + }, + { + "epoch": 0.5222814760328552, + "grad_norm": 1.7110806504547709, + "learning_rate": 4.882135431982127e-06, + "loss": 0.692, + "step": 17041 + }, + { + "epoch": 0.5223121245555964, + "grad_norm": 1.6720971430577531, + "learning_rate": 4.881639252641872e-06, + "loss": 0.6577, + "step": 17042 + }, + { + "epoch": 0.5223427730783377, + "grad_norm": 0.7767060766257146, + "learning_rate": 4.881143074467857e-06, + "loss": 0.4118, + "step": 17043 + }, + { + "epoch": 0.5223734216010788, + "grad_norm": 1.5786850247438298, + "learning_rate": 4.88064689746497e-06, + "loss": 0.6438, + "step": 17044 + }, + { + "epoch": 0.5224040701238201, + "grad_norm": 1.5818658392728036, + "learning_rate": 4.8801507216381e-06, + "loss": 0.5506, + "step": 17045 + }, + { + "epoch": 0.5224347186465612, + "grad_norm": 1.7146417189228107, + "learning_rate": 4.879654546992136e-06, + "loss": 0.7042, + "step": 17046 + }, + { + "epoch": 0.5224653671693025, + "grad_norm": 1.752115686803171, + "learning_rate": 4.879158373531964e-06, + "loss": 0.6482, + "step": 17047 + }, + { + "epoch": 0.5224960156920436, + "grad_norm": 1.7534179361700273, + "learning_rate": 4.878662201262478e-06, + "loss": 0.7135, + "step": 17048 + }, + { + "epoch": 0.5225266642147849, + "grad_norm": 1.5656321366911845, + "learning_rate": 4.878166030188564e-06, + "loss": 0.507, + "step": 17049 + }, + { + "epoch": 0.522557312737526, + "grad_norm": 1.8220782700615172, + "learning_rate": 4.877669860315109e-06, + "loss": 0.6682, + "step": 17050 + }, + { + "epoch": 0.5225879612602673, + "grad_norm": 1.5914949559408034, + "learning_rate": 4.877173691647006e-06, + "loss": 0.6344, + "step": 17051 + }, + { + "epoch": 0.5226186097830084, + "grad_norm": 1.734461469228005, + "learning_rate": 4.876677524189144e-06, + "loss": 0.7412, + "step": 17052 + }, + { + "epoch": 0.5226492583057497, + "grad_norm": 1.6348906631641804, + "learning_rate": 4.876181357946406e-06, + "loss": 0.6866, + "step": 17053 + }, + { + "epoch": 0.5226799068284909, + "grad_norm": 1.6910516968289573, + "learning_rate": 4.875685192923688e-06, + "loss": 0.5792, + "step": 17054 + }, + { + "epoch": 0.5227105553512321, + "grad_norm": 1.7790227854397607, + "learning_rate": 4.875189029125872e-06, + "loss": 0.6883, + "step": 17055 + }, + { + "epoch": 0.5227412038739733, + "grad_norm": 1.7457968582398515, + "learning_rate": 4.8746928665578545e-06, + "loss": 0.6557, + "step": 17056 + }, + { + "epoch": 0.5227718523967145, + "grad_norm": 3.6371806710095784, + "learning_rate": 4.874196705224518e-06, + "loss": 0.6332, + "step": 17057 + }, + { + "epoch": 0.5228025009194557, + "grad_norm": 1.82654687838075, + "learning_rate": 4.8737005451307524e-06, + "loss": 0.5508, + "step": 17058 + }, + { + "epoch": 0.5228331494421968, + "grad_norm": 1.6980754065307655, + "learning_rate": 4.87320438628145e-06, + "loss": 0.681, + "step": 17059 + }, + { + "epoch": 0.5228637979649381, + "grad_norm": 1.6727651078998442, + "learning_rate": 4.872708228681497e-06, + "loss": 0.6037, + "step": 17060 + }, + { + "epoch": 0.5228944464876792, + "grad_norm": 1.7096193293590223, + "learning_rate": 4.872212072335781e-06, + "loss": 0.5938, + "step": 17061 + }, + { + "epoch": 0.5229250950104205, + "grad_norm": 1.7384597552957244, + "learning_rate": 4.871715917249193e-06, + "loss": 0.6763, + "step": 17062 + }, + { + "epoch": 0.5229557435331617, + "grad_norm": 1.5457490075342322, + "learning_rate": 4.87121976342662e-06, + "loss": 0.5938, + "step": 17063 + }, + { + "epoch": 0.5229863920559029, + "grad_norm": 1.765384526532429, + "learning_rate": 4.870723610872953e-06, + "loss": 0.6644, + "step": 17064 + }, + { + "epoch": 0.5230170405786441, + "grad_norm": 1.9441641521602462, + "learning_rate": 4.87022745959308e-06, + "loss": 0.5993, + "step": 17065 + }, + { + "epoch": 0.5230476891013853, + "grad_norm": 1.6738861716657556, + "learning_rate": 4.8697313095918856e-06, + "loss": 0.6636, + "step": 17066 + }, + { + "epoch": 0.5230783376241265, + "grad_norm": 0.8880871644601603, + "learning_rate": 4.8692351608742654e-06, + "loss": 0.4356, + "step": 17067 + }, + { + "epoch": 0.5231089861468677, + "grad_norm": 1.8185481019155614, + "learning_rate": 4.868739013445104e-06, + "loss": 0.6021, + "step": 17068 + }, + { + "epoch": 0.5231396346696089, + "grad_norm": 1.5704372621431872, + "learning_rate": 4.868242867309289e-06, + "loss": 0.5956, + "step": 17069 + }, + { + "epoch": 0.5231702831923501, + "grad_norm": 1.741981138894768, + "learning_rate": 4.8677467224717125e-06, + "loss": 0.6472, + "step": 17070 + }, + { + "epoch": 0.5232009317150913, + "grad_norm": 1.8912408142454404, + "learning_rate": 4.86725057893726e-06, + "loss": 0.6731, + "step": 17071 + }, + { + "epoch": 0.5232315802378326, + "grad_norm": 0.7972190645111145, + "learning_rate": 4.8667544367108225e-06, + "loss": 0.4427, + "step": 17072 + }, + { + "epoch": 0.5232622287605737, + "grad_norm": 1.7817018949629047, + "learning_rate": 4.866258295797287e-06, + "loss": 0.6273, + "step": 17073 + }, + { + "epoch": 0.523292877283315, + "grad_norm": 2.0626439360129805, + "learning_rate": 4.865762156201543e-06, + "loss": 0.723, + "step": 17074 + }, + { + "epoch": 0.5233235258060561, + "grad_norm": 1.4495203735309783, + "learning_rate": 4.865266017928478e-06, + "loss": 0.6418, + "step": 17075 + }, + { + "epoch": 0.5233541743287974, + "grad_norm": 1.9086884666018467, + "learning_rate": 4.864769880982984e-06, + "loss": 0.6525, + "step": 17076 + }, + { + "epoch": 0.5233848228515385, + "grad_norm": 0.8582215729123063, + "learning_rate": 4.8642737453699434e-06, + "loss": 0.4593, + "step": 17077 + }, + { + "epoch": 0.5234154713742798, + "grad_norm": 1.6456139399498477, + "learning_rate": 4.863777611094251e-06, + "loss": 0.5283, + "step": 17078 + }, + { + "epoch": 0.523446119897021, + "grad_norm": 1.6949837367378313, + "learning_rate": 4.863281478160793e-06, + "loss": 0.6119, + "step": 17079 + }, + { + "epoch": 0.5234767684197622, + "grad_norm": 1.5559098817790988, + "learning_rate": 4.8627853465744555e-06, + "loss": 0.5491, + "step": 17080 + }, + { + "epoch": 0.5235074169425034, + "grad_norm": 1.7506674151080455, + "learning_rate": 4.862289216340129e-06, + "loss": 0.6898, + "step": 17081 + }, + { + "epoch": 0.5235380654652446, + "grad_norm": 1.7437919827675303, + "learning_rate": 4.861793087462703e-06, + "loss": 0.7485, + "step": 17082 + }, + { + "epoch": 0.5235687139879858, + "grad_norm": 1.6794634060081068, + "learning_rate": 4.861296959947066e-06, + "loss": 0.6009, + "step": 17083 + }, + { + "epoch": 0.523599362510727, + "grad_norm": 1.5451356252903896, + "learning_rate": 4.860800833798104e-06, + "loss": 0.5576, + "step": 17084 + }, + { + "epoch": 0.5236300110334682, + "grad_norm": 1.6894846831666308, + "learning_rate": 4.860304709020707e-06, + "loss": 0.5901, + "step": 17085 + }, + { + "epoch": 0.5236606595562094, + "grad_norm": 1.6001408317693697, + "learning_rate": 4.8598085856197644e-06, + "loss": 0.6181, + "step": 17086 + }, + { + "epoch": 0.5236913080789506, + "grad_norm": 1.718248327060134, + "learning_rate": 4.859312463600165e-06, + "loss": 0.6349, + "step": 17087 + }, + { + "epoch": 0.5237219566016919, + "grad_norm": 1.9773619735117494, + "learning_rate": 4.8588163429667926e-06, + "loss": 0.6263, + "step": 17088 + }, + { + "epoch": 0.523752605124433, + "grad_norm": 1.7262271205693918, + "learning_rate": 4.858320223724542e-06, + "loss": 0.7219, + "step": 17089 + }, + { + "epoch": 0.5237832536471742, + "grad_norm": 1.689127607930628, + "learning_rate": 4.857824105878296e-06, + "loss": 0.6185, + "step": 17090 + }, + { + "epoch": 0.5238139021699154, + "grad_norm": 1.690519035681251, + "learning_rate": 4.857327989432946e-06, + "loss": 0.7174, + "step": 17091 + }, + { + "epoch": 0.5238445506926566, + "grad_norm": 1.5693398036364816, + "learning_rate": 4.856831874393382e-06, + "loss": 0.5804, + "step": 17092 + }, + { + "epoch": 0.5238751992153978, + "grad_norm": 1.737404562004694, + "learning_rate": 4.856335760764488e-06, + "loss": 0.6456, + "step": 17093 + }, + { + "epoch": 0.523905847738139, + "grad_norm": 1.5646416624762218, + "learning_rate": 4.855839648551154e-06, + "loss": 0.6257, + "step": 17094 + }, + { + "epoch": 0.5239364962608802, + "grad_norm": 1.7763059916731716, + "learning_rate": 4.8553435377582704e-06, + "loss": 0.5636, + "step": 17095 + }, + { + "epoch": 0.5239671447836214, + "grad_norm": 1.7631208600547137, + "learning_rate": 4.854847428390723e-06, + "loss": 0.6525, + "step": 17096 + }, + { + "epoch": 0.5239977933063626, + "grad_norm": 1.7832688618474113, + "learning_rate": 4.854351320453402e-06, + "loss": 0.7332, + "step": 17097 + }, + { + "epoch": 0.5240284418291038, + "grad_norm": 1.6506457185362275, + "learning_rate": 4.853855213951193e-06, + "loss": 0.5974, + "step": 17098 + }, + { + "epoch": 0.5240590903518451, + "grad_norm": 1.595681378365227, + "learning_rate": 4.8533591088889885e-06, + "loss": 0.5742, + "step": 17099 + }, + { + "epoch": 0.5240897388745862, + "grad_norm": 1.8454627808166426, + "learning_rate": 4.852863005271673e-06, + "loss": 0.6724, + "step": 17100 + }, + { + "epoch": 0.5241203873973275, + "grad_norm": 1.6609261359809686, + "learning_rate": 4.8523669031041345e-06, + "loss": 0.6089, + "step": 17101 + }, + { + "epoch": 0.5241510359200686, + "grad_norm": 1.6249474378448918, + "learning_rate": 4.851870802391264e-06, + "loss": 0.5306, + "step": 17102 + }, + { + "epoch": 0.5241816844428099, + "grad_norm": 0.9053065289305129, + "learning_rate": 4.851374703137949e-06, + "loss": 0.457, + "step": 17103 + }, + { + "epoch": 0.524212332965551, + "grad_norm": 1.6245926281614902, + "learning_rate": 4.850878605349076e-06, + "loss": 0.6224, + "step": 17104 + }, + { + "epoch": 0.5242429814882923, + "grad_norm": 1.6806781834888036, + "learning_rate": 4.8503825090295345e-06, + "loss": 0.5842, + "step": 17105 + }, + { + "epoch": 0.5242736300110334, + "grad_norm": 1.7712685894132418, + "learning_rate": 4.8498864141842136e-06, + "loss": 0.5676, + "step": 17106 + }, + { + "epoch": 0.5243042785337747, + "grad_norm": 0.8266488900246675, + "learning_rate": 4.849390320817998e-06, + "loss": 0.4362, + "step": 17107 + }, + { + "epoch": 0.5243349270565159, + "grad_norm": 1.5913070388961013, + "learning_rate": 4.84889422893578e-06, + "loss": 0.6171, + "step": 17108 + }, + { + "epoch": 0.5243655755792571, + "grad_norm": 1.9916119038612572, + "learning_rate": 4.848398138542444e-06, + "loss": 0.6929, + "step": 17109 + }, + { + "epoch": 0.5243962241019983, + "grad_norm": 1.7267276635931346, + "learning_rate": 4.847902049642882e-06, + "loss": 0.5995, + "step": 17110 + }, + { + "epoch": 0.5244268726247395, + "grad_norm": 1.7218402056813835, + "learning_rate": 4.847405962241979e-06, + "loss": 0.6409, + "step": 17111 + }, + { + "epoch": 0.5244575211474807, + "grad_norm": 1.7407402283883886, + "learning_rate": 4.846909876344624e-06, + "loss": 0.5992, + "step": 17112 + }, + { + "epoch": 0.5244881696702219, + "grad_norm": 1.64874140995681, + "learning_rate": 4.846413791955706e-06, + "loss": 0.7112, + "step": 17113 + }, + { + "epoch": 0.5245188181929631, + "grad_norm": 0.807139467443859, + "learning_rate": 4.845917709080111e-06, + "loss": 0.4313, + "step": 17114 + }, + { + "epoch": 0.5245494667157043, + "grad_norm": 1.8651488293266465, + "learning_rate": 4.845421627722728e-06, + "loss": 0.6373, + "step": 17115 + }, + { + "epoch": 0.5245801152384455, + "grad_norm": 0.7916988821247838, + "learning_rate": 4.8449255478884465e-06, + "loss": 0.4237, + "step": 17116 + }, + { + "epoch": 0.5246107637611868, + "grad_norm": 1.711239815996429, + "learning_rate": 4.844429469582153e-06, + "loss": 0.6025, + "step": 17117 + }, + { + "epoch": 0.5246414122839279, + "grad_norm": 1.5869358071936313, + "learning_rate": 4.843933392808735e-06, + "loss": 0.6621, + "step": 17118 + }, + { + "epoch": 0.5246720608066692, + "grad_norm": 1.7842641535578783, + "learning_rate": 4.843437317573083e-06, + "loss": 0.6061, + "step": 17119 + }, + { + "epoch": 0.5247027093294103, + "grad_norm": 0.7953345819892482, + "learning_rate": 4.84294124388008e-06, + "loss": 0.4244, + "step": 17120 + }, + { + "epoch": 0.5247333578521515, + "grad_norm": 1.845213644839652, + "learning_rate": 4.842445171734621e-06, + "loss": 0.6404, + "step": 17121 + }, + { + "epoch": 0.5247640063748927, + "grad_norm": 1.7292590873595455, + "learning_rate": 4.841949101141588e-06, + "loss": 0.5994, + "step": 17122 + }, + { + "epoch": 0.5247946548976339, + "grad_norm": 1.7021214325504308, + "learning_rate": 4.841453032105871e-06, + "loss": 0.6141, + "step": 17123 + }, + { + "epoch": 0.5248253034203751, + "grad_norm": 1.778158646691678, + "learning_rate": 4.840956964632358e-06, + "loss": 0.6443, + "step": 17124 + }, + { + "epoch": 0.5248559519431163, + "grad_norm": 1.6261328677563838, + "learning_rate": 4.840460898725935e-06, + "loss": 0.6227, + "step": 17125 + }, + { + "epoch": 0.5248866004658576, + "grad_norm": 1.9376292890107263, + "learning_rate": 4.839964834391494e-06, + "loss": 0.7415, + "step": 17126 + }, + { + "epoch": 0.5249172489885987, + "grad_norm": 1.8617963270217008, + "learning_rate": 4.83946877163392e-06, + "loss": 0.7643, + "step": 17127 + }, + { + "epoch": 0.52494789751134, + "grad_norm": 1.6479371044029174, + "learning_rate": 4.8389727104581e-06, + "loss": 0.5998, + "step": 17128 + }, + { + "epoch": 0.5249785460340811, + "grad_norm": 1.6877157100841063, + "learning_rate": 4.838476650868924e-06, + "loss": 0.5528, + "step": 17129 + }, + { + "epoch": 0.5250091945568224, + "grad_norm": 1.6878401112465242, + "learning_rate": 4.83798059287128e-06, + "loss": 0.5417, + "step": 17130 + }, + { + "epoch": 0.5250398430795635, + "grad_norm": 0.7831747133725416, + "learning_rate": 4.837484536470052e-06, + "loss": 0.4112, + "step": 17131 + }, + { + "epoch": 0.5250704916023048, + "grad_norm": 1.7471344736977412, + "learning_rate": 4.836988481670133e-06, + "loss": 0.667, + "step": 17132 + }, + { + "epoch": 0.5251011401250459, + "grad_norm": 1.7778452136819562, + "learning_rate": 4.836492428476407e-06, + "loss": 0.6611, + "step": 17133 + }, + { + "epoch": 0.5251317886477872, + "grad_norm": 1.7260945201031328, + "learning_rate": 4.835996376893763e-06, + "loss": 0.6066, + "step": 17134 + }, + { + "epoch": 0.5251624371705284, + "grad_norm": 1.8380271648123656, + "learning_rate": 4.835500326927089e-06, + "loss": 0.6683, + "step": 17135 + }, + { + "epoch": 0.5251930856932696, + "grad_norm": 1.5853829483615471, + "learning_rate": 4.835004278581271e-06, + "loss": 0.6537, + "step": 17136 + }, + { + "epoch": 0.5252237342160108, + "grad_norm": 1.7695596964673834, + "learning_rate": 4.8345082318611995e-06, + "loss": 0.6467, + "step": 17137 + }, + { + "epoch": 0.525254382738752, + "grad_norm": 1.6777004908049418, + "learning_rate": 4.834012186771761e-06, + "loss": 0.6171, + "step": 17138 + }, + { + "epoch": 0.5252850312614932, + "grad_norm": 1.9067987299153926, + "learning_rate": 4.833516143317841e-06, + "loss": 0.7037, + "step": 17139 + }, + { + "epoch": 0.5253156797842344, + "grad_norm": 0.8056202906208232, + "learning_rate": 4.833020101504331e-06, + "loss": 0.423, + "step": 17140 + }, + { + "epoch": 0.5253463283069756, + "grad_norm": 1.706420846834696, + "learning_rate": 4.832524061336118e-06, + "loss": 0.5924, + "step": 17141 + }, + { + "epoch": 0.5253769768297168, + "grad_norm": 0.81406978763941, + "learning_rate": 4.832028022818085e-06, + "loss": 0.422, + "step": 17142 + }, + { + "epoch": 0.525407625352458, + "grad_norm": 1.5582440757106253, + "learning_rate": 4.831531985955124e-06, + "loss": 0.642, + "step": 17143 + }, + { + "epoch": 0.5254382738751993, + "grad_norm": 1.503745926711847, + "learning_rate": 4.8310359507521205e-06, + "loss": 0.5779, + "step": 17144 + }, + { + "epoch": 0.5254689223979404, + "grad_norm": 2.233660561697853, + "learning_rate": 4.830539917213965e-06, + "loss": 0.5773, + "step": 17145 + }, + { + "epoch": 0.5254995709206817, + "grad_norm": 1.530078692292291, + "learning_rate": 4.830043885345543e-06, + "loss": 0.5203, + "step": 17146 + }, + { + "epoch": 0.5255302194434228, + "grad_norm": 0.7942454546406149, + "learning_rate": 4.82954785515174e-06, + "loss": 0.4302, + "step": 17147 + }, + { + "epoch": 0.5255608679661641, + "grad_norm": 2.0296809158167335, + "learning_rate": 4.829051826637447e-06, + "loss": 0.6322, + "step": 17148 + }, + { + "epoch": 0.5255915164889052, + "grad_norm": 1.738029253042746, + "learning_rate": 4.828555799807552e-06, + "loss": 0.6903, + "step": 17149 + }, + { + "epoch": 0.5256221650116465, + "grad_norm": 1.7678465862971313, + "learning_rate": 4.828059774666936e-06, + "loss": 0.599, + "step": 17150 + }, + { + "epoch": 0.5256528135343876, + "grad_norm": 1.735563544151993, + "learning_rate": 4.827563751220495e-06, + "loss": 0.6993, + "step": 17151 + }, + { + "epoch": 0.5256834620571288, + "grad_norm": 1.73029548958537, + "learning_rate": 4.827067729473111e-06, + "loss": 0.6098, + "step": 17152 + }, + { + "epoch": 0.52571411057987, + "grad_norm": 1.7539600459425346, + "learning_rate": 4.826571709429673e-06, + "loss": 0.6264, + "step": 17153 + }, + { + "epoch": 0.5257447591026112, + "grad_norm": 1.7499220988923991, + "learning_rate": 4.826075691095068e-06, + "loss": 0.6751, + "step": 17154 + }, + { + "epoch": 0.5257754076253525, + "grad_norm": 1.6674189856372772, + "learning_rate": 4.825579674474183e-06, + "loss": 0.6204, + "step": 17155 + }, + { + "epoch": 0.5258060561480936, + "grad_norm": 1.856615989711433, + "learning_rate": 4.825083659571907e-06, + "loss": 0.6761, + "step": 17156 + }, + { + "epoch": 0.5258367046708349, + "grad_norm": 1.712933251244837, + "learning_rate": 4.824587646393127e-06, + "loss": 0.6359, + "step": 17157 + }, + { + "epoch": 0.525867353193576, + "grad_norm": 0.7717226670886485, + "learning_rate": 4.824091634942728e-06, + "loss": 0.4392, + "step": 17158 + }, + { + "epoch": 0.5258980017163173, + "grad_norm": 1.7474548943278343, + "learning_rate": 4.823595625225602e-06, + "loss": 0.6131, + "step": 17159 + }, + { + "epoch": 0.5259286502390584, + "grad_norm": 1.4539476670238116, + "learning_rate": 4.8230996172466325e-06, + "loss": 0.641, + "step": 17160 + }, + { + "epoch": 0.5259592987617997, + "grad_norm": 1.8233237726731177, + "learning_rate": 4.8226036110107055e-06, + "loss": 0.6432, + "step": 17161 + }, + { + "epoch": 0.5259899472845408, + "grad_norm": 1.8285494626863437, + "learning_rate": 4.822107606522713e-06, + "loss": 0.6551, + "step": 17162 + }, + { + "epoch": 0.5260205958072821, + "grad_norm": 1.6122956138315412, + "learning_rate": 4.821611603787538e-06, + "loss": 0.6213, + "step": 17163 + }, + { + "epoch": 0.5260512443300233, + "grad_norm": 1.6223657692445321, + "learning_rate": 4.821115602810072e-06, + "loss": 0.5187, + "step": 17164 + }, + { + "epoch": 0.5260818928527645, + "grad_norm": 1.8015720009948046, + "learning_rate": 4.8206196035951975e-06, + "loss": 0.5842, + "step": 17165 + }, + { + "epoch": 0.5261125413755057, + "grad_norm": 1.5379970564411394, + "learning_rate": 4.820123606147804e-06, + "loss": 0.6024, + "step": 17166 + }, + { + "epoch": 0.5261431898982469, + "grad_norm": 1.5986733140818317, + "learning_rate": 4.8196276104727795e-06, + "loss": 0.5382, + "step": 17167 + }, + { + "epoch": 0.5261738384209881, + "grad_norm": 1.750120289650125, + "learning_rate": 4.8191316165750105e-06, + "loss": 0.6306, + "step": 17168 + }, + { + "epoch": 0.5262044869437293, + "grad_norm": 1.7825014049278702, + "learning_rate": 4.818635624459383e-06, + "loss": 0.702, + "step": 17169 + }, + { + "epoch": 0.5262351354664705, + "grad_norm": 1.6027114555117885, + "learning_rate": 4.818139634130785e-06, + "loss": 0.7004, + "step": 17170 + }, + { + "epoch": 0.5262657839892118, + "grad_norm": 2.026083699633362, + "learning_rate": 4.817643645594104e-06, + "loss": 0.7008, + "step": 17171 + }, + { + "epoch": 0.5262964325119529, + "grad_norm": 1.586960862822858, + "learning_rate": 4.817147658854227e-06, + "loss": 0.6083, + "step": 17172 + }, + { + "epoch": 0.5263270810346942, + "grad_norm": 1.6336131791063735, + "learning_rate": 4.816651673916043e-06, + "loss": 0.6107, + "step": 17173 + }, + { + "epoch": 0.5263577295574353, + "grad_norm": 1.8016804330540437, + "learning_rate": 4.816155690784433e-06, + "loss": 0.6859, + "step": 17174 + }, + { + "epoch": 0.5263883780801766, + "grad_norm": 1.7709248575826808, + "learning_rate": 4.815659709464291e-06, + "loss": 0.7495, + "step": 17175 + }, + { + "epoch": 0.5264190266029177, + "grad_norm": 1.7520757175741535, + "learning_rate": 4.8151637299605e-06, + "loss": 0.599, + "step": 17176 + }, + { + "epoch": 0.526449675125659, + "grad_norm": 1.6142858403883846, + "learning_rate": 4.814667752277948e-06, + "loss": 0.6275, + "step": 17177 + }, + { + "epoch": 0.5264803236484001, + "grad_norm": 1.790179590647761, + "learning_rate": 4.814171776421521e-06, + "loss": 0.639, + "step": 17178 + }, + { + "epoch": 0.5265109721711414, + "grad_norm": 1.7067881007484789, + "learning_rate": 4.813675802396109e-06, + "loss": 0.563, + "step": 17179 + }, + { + "epoch": 0.5265416206938826, + "grad_norm": 1.8248770304789035, + "learning_rate": 4.813179830206595e-06, + "loss": 0.6625, + "step": 17180 + }, + { + "epoch": 0.5265722692166238, + "grad_norm": 1.851950377854255, + "learning_rate": 4.81268385985787e-06, + "loss": 0.5963, + "step": 17181 + }, + { + "epoch": 0.526602917739365, + "grad_norm": 1.7555870894656678, + "learning_rate": 4.8121878913548175e-06, + "loss": 0.6658, + "step": 17182 + }, + { + "epoch": 0.5266335662621061, + "grad_norm": 1.474687578149039, + "learning_rate": 4.811691924702327e-06, + "loss": 0.6254, + "step": 17183 + }, + { + "epoch": 0.5266642147848474, + "grad_norm": 1.7221195133169622, + "learning_rate": 4.811195959905284e-06, + "loss": 0.633, + "step": 17184 + }, + { + "epoch": 0.5266948633075885, + "grad_norm": 1.7366335763742569, + "learning_rate": 4.810699996968573e-06, + "loss": 0.6502, + "step": 17185 + }, + { + "epoch": 0.5267255118303298, + "grad_norm": 1.812002497541649, + "learning_rate": 4.810204035897087e-06, + "loss": 0.6727, + "step": 17186 + }, + { + "epoch": 0.5267561603530709, + "grad_norm": 0.853726006548383, + "learning_rate": 4.809708076695708e-06, + "loss": 0.427, + "step": 17187 + }, + { + "epoch": 0.5267868088758122, + "grad_norm": 1.8384216668622864, + "learning_rate": 4.8092121193693225e-06, + "loss": 0.6605, + "step": 17188 + }, + { + "epoch": 0.5268174573985533, + "grad_norm": 1.7414530334723433, + "learning_rate": 4.80871616392282e-06, + "loss": 0.646, + "step": 17189 + }, + { + "epoch": 0.5268481059212946, + "grad_norm": 1.675690771260797, + "learning_rate": 4.8082202103610844e-06, + "loss": 0.5839, + "step": 17190 + }, + { + "epoch": 0.5268787544440358, + "grad_norm": 1.6331095483442852, + "learning_rate": 4.807724258689006e-06, + "loss": 0.6279, + "step": 17191 + }, + { + "epoch": 0.526909402966777, + "grad_norm": 1.801610252329306, + "learning_rate": 4.807228308911469e-06, + "loss": 0.7115, + "step": 17192 + }, + { + "epoch": 0.5269400514895182, + "grad_norm": 0.7868744373444476, + "learning_rate": 4.806732361033361e-06, + "loss": 0.4483, + "step": 17193 + }, + { + "epoch": 0.5269707000122594, + "grad_norm": 1.7158987049444174, + "learning_rate": 4.8062364150595704e-06, + "loss": 0.5975, + "step": 17194 + }, + { + "epoch": 0.5270013485350006, + "grad_norm": 1.4138613292425384, + "learning_rate": 4.80574047099498e-06, + "loss": 0.5137, + "step": 17195 + }, + { + "epoch": 0.5270319970577418, + "grad_norm": 1.6098086066223996, + "learning_rate": 4.805244528844477e-06, + "loss": 0.6087, + "step": 17196 + }, + { + "epoch": 0.527062645580483, + "grad_norm": 0.7848175023566697, + "learning_rate": 4.8047485886129516e-06, + "loss": 0.4448, + "step": 17197 + }, + { + "epoch": 0.5270932941032243, + "grad_norm": 0.768995854891381, + "learning_rate": 4.8042526503052875e-06, + "loss": 0.4297, + "step": 17198 + }, + { + "epoch": 0.5271239426259654, + "grad_norm": 1.6146732422161552, + "learning_rate": 4.803756713926373e-06, + "loss": 0.6352, + "step": 17199 + }, + { + "epoch": 0.5271545911487067, + "grad_norm": 1.6582042504612244, + "learning_rate": 4.803260779481093e-06, + "loss": 0.6703, + "step": 17200 + }, + { + "epoch": 0.5271852396714478, + "grad_norm": 0.7588305034737717, + "learning_rate": 4.802764846974334e-06, + "loss": 0.444, + "step": 17201 + }, + { + "epoch": 0.5272158881941891, + "grad_norm": 1.6117881249106338, + "learning_rate": 4.8022689164109855e-06, + "loss": 0.5731, + "step": 17202 + }, + { + "epoch": 0.5272465367169302, + "grad_norm": 1.5517049220402739, + "learning_rate": 4.801772987795932e-06, + "loss": 0.661, + "step": 17203 + }, + { + "epoch": 0.5272771852396715, + "grad_norm": 1.8719403349860624, + "learning_rate": 4.801277061134057e-06, + "loss": 0.6799, + "step": 17204 + }, + { + "epoch": 0.5273078337624126, + "grad_norm": 1.73707006610965, + "learning_rate": 4.800781136430254e-06, + "loss": 0.5652, + "step": 17205 + }, + { + "epoch": 0.5273384822851539, + "grad_norm": 1.5491633411256756, + "learning_rate": 4.800285213689403e-06, + "loss": 0.6948, + "step": 17206 + }, + { + "epoch": 0.527369130807895, + "grad_norm": 1.7186945872038482, + "learning_rate": 4.799789292916392e-06, + "loss": 0.6216, + "step": 17207 + }, + { + "epoch": 0.5273997793306363, + "grad_norm": 1.695614892653983, + "learning_rate": 4.79929337411611e-06, + "loss": 0.6391, + "step": 17208 + }, + { + "epoch": 0.5274304278533775, + "grad_norm": 1.6671875284868296, + "learning_rate": 4.798797457293441e-06, + "loss": 0.6439, + "step": 17209 + }, + { + "epoch": 0.5274610763761187, + "grad_norm": 1.6545055712022458, + "learning_rate": 4.798301542453273e-06, + "loss": 0.6111, + "step": 17210 + }, + { + "epoch": 0.5274917248988599, + "grad_norm": 1.9945788443576267, + "learning_rate": 4.7978056296004905e-06, + "loss": 0.6328, + "step": 17211 + }, + { + "epoch": 0.5275223734216011, + "grad_norm": 1.9436594005771344, + "learning_rate": 4.797309718739981e-06, + "loss": 0.6533, + "step": 17212 + }, + { + "epoch": 0.5275530219443423, + "grad_norm": 1.7890725964680083, + "learning_rate": 4.796813809876631e-06, + "loss": 0.6425, + "step": 17213 + }, + { + "epoch": 0.5275836704670834, + "grad_norm": 0.9204319404143607, + "learning_rate": 4.7963179030153275e-06, + "loss": 0.4536, + "step": 17214 + }, + { + "epoch": 0.5276143189898247, + "grad_norm": 1.785641258888243, + "learning_rate": 4.7958219981609535e-06, + "loss": 0.6453, + "step": 17215 + }, + { + "epoch": 0.5276449675125658, + "grad_norm": 1.7680720383291342, + "learning_rate": 4.7953260953184e-06, + "loss": 0.7418, + "step": 17216 + }, + { + "epoch": 0.5276756160353071, + "grad_norm": 2.1320505483650463, + "learning_rate": 4.794830194492548e-06, + "loss": 0.7467, + "step": 17217 + }, + { + "epoch": 0.5277062645580483, + "grad_norm": 1.6840482300551434, + "learning_rate": 4.79433429568829e-06, + "loss": 0.5998, + "step": 17218 + }, + { + "epoch": 0.5277369130807895, + "grad_norm": 1.8290067106437784, + "learning_rate": 4.7938383989105065e-06, + "loss": 0.6398, + "step": 17219 + }, + { + "epoch": 0.5277675616035307, + "grad_norm": 1.732989315698536, + "learning_rate": 4.793342504164086e-06, + "loss": 0.6908, + "step": 17220 + }, + { + "epoch": 0.5277982101262719, + "grad_norm": 1.6322539008798695, + "learning_rate": 4.792846611453916e-06, + "loss": 0.6907, + "step": 17221 + }, + { + "epoch": 0.5278288586490131, + "grad_norm": 1.5827517869705778, + "learning_rate": 4.792350720784881e-06, + "loss": 0.6149, + "step": 17222 + }, + { + "epoch": 0.5278595071717543, + "grad_norm": 1.8476533022041348, + "learning_rate": 4.791854832161867e-06, + "loss": 0.6813, + "step": 17223 + }, + { + "epoch": 0.5278901556944955, + "grad_norm": 1.7090230308213228, + "learning_rate": 4.79135894558976e-06, + "loss": 0.6978, + "step": 17224 + }, + { + "epoch": 0.5279208042172367, + "grad_norm": 1.8528096613738392, + "learning_rate": 4.790863061073447e-06, + "loss": 0.6738, + "step": 17225 + }, + { + "epoch": 0.5279514527399779, + "grad_norm": 1.8955910760635117, + "learning_rate": 4.790367178617815e-06, + "loss": 0.5973, + "step": 17226 + }, + { + "epoch": 0.5279821012627192, + "grad_norm": 1.626529933362141, + "learning_rate": 4.789871298227749e-06, + "loss": 0.6088, + "step": 17227 + }, + { + "epoch": 0.5280127497854603, + "grad_norm": 1.7181974120120118, + "learning_rate": 4.789375419908132e-06, + "loss": 0.7504, + "step": 17228 + }, + { + "epoch": 0.5280433983082016, + "grad_norm": 1.6023713474284769, + "learning_rate": 4.788879543663856e-06, + "loss": 0.6209, + "step": 17229 + }, + { + "epoch": 0.5280740468309427, + "grad_norm": 1.9257321143820183, + "learning_rate": 4.788383669499803e-06, + "loss": 0.7341, + "step": 17230 + }, + { + "epoch": 0.528104695353684, + "grad_norm": 1.6388267736852218, + "learning_rate": 4.7878877974208585e-06, + "loss": 0.6304, + "step": 17231 + }, + { + "epoch": 0.5281353438764251, + "grad_norm": 0.8355628602810383, + "learning_rate": 4.7873919274319116e-06, + "loss": 0.4426, + "step": 17232 + }, + { + "epoch": 0.5281659923991664, + "grad_norm": 1.7686750311664399, + "learning_rate": 4.7868960595378465e-06, + "loss": 0.6069, + "step": 17233 + }, + { + "epoch": 0.5281966409219075, + "grad_norm": 1.7660057880937554, + "learning_rate": 4.7864001937435465e-06, + "loss": 0.5426, + "step": 17234 + }, + { + "epoch": 0.5282272894446488, + "grad_norm": 1.5635934702774634, + "learning_rate": 4.785904330053902e-06, + "loss": 0.6657, + "step": 17235 + }, + { + "epoch": 0.52825793796739, + "grad_norm": 1.783283255279513, + "learning_rate": 4.785408468473796e-06, + "loss": 0.6319, + "step": 17236 + }, + { + "epoch": 0.5282885864901312, + "grad_norm": 1.976854199068246, + "learning_rate": 4.784912609008116e-06, + "loss": 0.7026, + "step": 17237 + }, + { + "epoch": 0.5283192350128724, + "grad_norm": 1.6218324155779908, + "learning_rate": 4.784416751661749e-06, + "loss": 0.6478, + "step": 17238 + }, + { + "epoch": 0.5283498835356136, + "grad_norm": 1.6033860425834292, + "learning_rate": 4.783920896439576e-06, + "loss": 0.5818, + "step": 17239 + }, + { + "epoch": 0.5283805320583548, + "grad_norm": 1.6671168844609632, + "learning_rate": 4.7834250433464866e-06, + "loss": 0.5622, + "step": 17240 + }, + { + "epoch": 0.528411180581096, + "grad_norm": 1.7074672979711438, + "learning_rate": 4.782929192387366e-06, + "loss": 0.5361, + "step": 17241 + }, + { + "epoch": 0.5284418291038372, + "grad_norm": 1.6176187457773716, + "learning_rate": 4.782433343567099e-06, + "loss": 0.5527, + "step": 17242 + }, + { + "epoch": 0.5284724776265785, + "grad_norm": 1.6583588853768034, + "learning_rate": 4.7819374968905725e-06, + "loss": 0.6126, + "step": 17243 + }, + { + "epoch": 0.5285031261493196, + "grad_norm": 0.7975195078231418, + "learning_rate": 4.78144165236267e-06, + "loss": 0.4344, + "step": 17244 + }, + { + "epoch": 0.5285337746720608, + "grad_norm": 0.7509303862454239, + "learning_rate": 4.780945809988281e-06, + "loss": 0.4126, + "step": 17245 + }, + { + "epoch": 0.528564423194802, + "grad_norm": 1.6544242382648242, + "learning_rate": 4.780449969772289e-06, + "loss": 0.7066, + "step": 17246 + }, + { + "epoch": 0.5285950717175432, + "grad_norm": 1.8486776806908434, + "learning_rate": 4.779954131719577e-06, + "loss": 0.6146, + "step": 17247 + }, + { + "epoch": 0.5286257202402844, + "grad_norm": 0.7687319353798829, + "learning_rate": 4.779458295835037e-06, + "loss": 0.4406, + "step": 17248 + }, + { + "epoch": 0.5286563687630256, + "grad_norm": 1.871691491095665, + "learning_rate": 4.778962462123549e-06, + "loss": 0.6812, + "step": 17249 + }, + { + "epoch": 0.5286870172857668, + "grad_norm": 1.6845053095250593, + "learning_rate": 4.7784666305899995e-06, + "loss": 0.5572, + "step": 17250 + }, + { + "epoch": 0.528717665808508, + "grad_norm": 1.8520043427621828, + "learning_rate": 4.777970801239276e-06, + "loss": 0.6121, + "step": 17251 + }, + { + "epoch": 0.5287483143312492, + "grad_norm": 1.3969640391993392, + "learning_rate": 4.777474974076261e-06, + "loss": 0.6084, + "step": 17252 + }, + { + "epoch": 0.5287789628539904, + "grad_norm": 1.6840116783855337, + "learning_rate": 4.776979149105845e-06, + "loss": 0.6691, + "step": 17253 + }, + { + "epoch": 0.5288096113767317, + "grad_norm": 1.7264314035830175, + "learning_rate": 4.776483326332909e-06, + "loss": 0.7001, + "step": 17254 + }, + { + "epoch": 0.5288402598994728, + "grad_norm": 1.6175882359229714, + "learning_rate": 4.775987505762339e-06, + "loss": 0.5588, + "step": 17255 + }, + { + "epoch": 0.5288709084222141, + "grad_norm": 1.8956011464951619, + "learning_rate": 4.775491687399024e-06, + "loss": 0.7083, + "step": 17256 + }, + { + "epoch": 0.5289015569449552, + "grad_norm": 1.5561925127979281, + "learning_rate": 4.774995871247847e-06, + "loss": 0.5948, + "step": 17257 + }, + { + "epoch": 0.5289322054676965, + "grad_norm": 1.7006201674374801, + "learning_rate": 4.77450005731369e-06, + "loss": 0.5277, + "step": 17258 + }, + { + "epoch": 0.5289628539904376, + "grad_norm": 1.5978000820953848, + "learning_rate": 4.774004245601444e-06, + "loss": 0.645, + "step": 17259 + }, + { + "epoch": 0.5289935025131789, + "grad_norm": 1.5898610182881117, + "learning_rate": 4.773508436115992e-06, + "loss": 0.6029, + "step": 17260 + }, + { + "epoch": 0.52902415103592, + "grad_norm": 0.837955642563936, + "learning_rate": 4.773012628862218e-06, + "loss": 0.4293, + "step": 17261 + }, + { + "epoch": 0.5290547995586613, + "grad_norm": 1.7306541026147857, + "learning_rate": 4.7725168238450096e-06, + "loss": 0.6075, + "step": 17262 + }, + { + "epoch": 0.5290854480814025, + "grad_norm": 1.8434289764427656, + "learning_rate": 4.772021021069249e-06, + "loss": 0.6469, + "step": 17263 + }, + { + "epoch": 0.5291160966041437, + "grad_norm": 0.8155201705705245, + "learning_rate": 4.771525220539826e-06, + "loss": 0.4197, + "step": 17264 + }, + { + "epoch": 0.5291467451268849, + "grad_norm": 1.613456636109744, + "learning_rate": 4.771029422261624e-06, + "loss": 0.5367, + "step": 17265 + }, + { + "epoch": 0.5291773936496261, + "grad_norm": 1.5714594387811749, + "learning_rate": 4.770533626239526e-06, + "loss": 0.5993, + "step": 17266 + }, + { + "epoch": 0.5292080421723673, + "grad_norm": 1.9392410478405497, + "learning_rate": 4.7700378324784195e-06, + "loss": 0.6741, + "step": 17267 + }, + { + "epoch": 0.5292386906951085, + "grad_norm": 1.687661185874196, + "learning_rate": 4.769542040983191e-06, + "loss": 0.5867, + "step": 17268 + }, + { + "epoch": 0.5292693392178497, + "grad_norm": 0.7956364828317848, + "learning_rate": 4.76904625175872e-06, + "loss": 0.4391, + "step": 17269 + }, + { + "epoch": 0.529299987740591, + "grad_norm": 1.8481733931300461, + "learning_rate": 4.7685504648098984e-06, + "loss": 0.6456, + "step": 17270 + }, + { + "epoch": 0.5293306362633321, + "grad_norm": 1.651676826079063, + "learning_rate": 4.768054680141605e-06, + "loss": 0.6816, + "step": 17271 + }, + { + "epoch": 0.5293612847860734, + "grad_norm": 1.4729384296963288, + "learning_rate": 4.767558897758732e-06, + "loss": 0.6267, + "step": 17272 + }, + { + "epoch": 0.5293919333088145, + "grad_norm": 0.7830221536932322, + "learning_rate": 4.767063117666159e-06, + "loss": 0.4181, + "step": 17273 + }, + { + "epoch": 0.5294225818315558, + "grad_norm": 1.4408752682092214, + "learning_rate": 4.766567339868772e-06, + "loss": 0.5856, + "step": 17274 + }, + { + "epoch": 0.5294532303542969, + "grad_norm": 1.5585574867967098, + "learning_rate": 4.766071564371458e-06, + "loss": 0.5904, + "step": 17275 + }, + { + "epoch": 0.5294838788770381, + "grad_norm": 1.651319164765854, + "learning_rate": 4.7655757911791e-06, + "loss": 0.6009, + "step": 17276 + }, + { + "epoch": 0.5295145273997793, + "grad_norm": 1.7590451585123954, + "learning_rate": 4.765080020296583e-06, + "loss": 0.6616, + "step": 17277 + }, + { + "epoch": 0.5295451759225205, + "grad_norm": 1.9199398829602519, + "learning_rate": 4.764584251728794e-06, + "loss": 0.6193, + "step": 17278 + }, + { + "epoch": 0.5295758244452617, + "grad_norm": 1.8596489406111945, + "learning_rate": 4.764088485480615e-06, + "loss": 0.6134, + "step": 17279 + }, + { + "epoch": 0.5296064729680029, + "grad_norm": 0.7894409275315628, + "learning_rate": 4.763592721556934e-06, + "loss": 0.447, + "step": 17280 + }, + { + "epoch": 0.5296371214907442, + "grad_norm": 1.788530637335222, + "learning_rate": 4.763096959962635e-06, + "loss": 0.5588, + "step": 17281 + }, + { + "epoch": 0.5296677700134853, + "grad_norm": 1.7348902528998993, + "learning_rate": 4.7626012007026e-06, + "loss": 0.5903, + "step": 17282 + }, + { + "epoch": 0.5296984185362266, + "grad_norm": 1.7182126787869396, + "learning_rate": 4.762105443781719e-06, + "loss": 0.6108, + "step": 17283 + }, + { + "epoch": 0.5297290670589677, + "grad_norm": 1.7030567623163886, + "learning_rate": 4.761609689204872e-06, + "loss": 0.672, + "step": 17284 + }, + { + "epoch": 0.529759715581709, + "grad_norm": 1.7495629465588356, + "learning_rate": 4.7611139369769455e-06, + "loss": 0.6362, + "step": 17285 + }, + { + "epoch": 0.5297903641044501, + "grad_norm": 1.8767808781165431, + "learning_rate": 4.760618187102825e-06, + "loss": 0.7226, + "step": 17286 + }, + { + "epoch": 0.5298210126271914, + "grad_norm": 1.8523885344987623, + "learning_rate": 4.7601224395873955e-06, + "loss": 0.66, + "step": 17287 + }, + { + "epoch": 0.5298516611499325, + "grad_norm": 1.5786533841662505, + "learning_rate": 4.75962669443554e-06, + "loss": 0.6895, + "step": 17288 + }, + { + "epoch": 0.5298823096726738, + "grad_norm": 1.7007152732064195, + "learning_rate": 4.759130951652144e-06, + "loss": 0.6005, + "step": 17289 + }, + { + "epoch": 0.529912958195415, + "grad_norm": 0.7643042567541958, + "learning_rate": 4.758635211242092e-06, + "loss": 0.4214, + "step": 17290 + }, + { + "epoch": 0.5299436067181562, + "grad_norm": 1.9143212053048484, + "learning_rate": 4.7581394732102714e-06, + "loss": 0.5246, + "step": 17291 + }, + { + "epoch": 0.5299742552408974, + "grad_norm": 1.670531176956011, + "learning_rate": 4.757643737561563e-06, + "loss": 0.4754, + "step": 17292 + }, + { + "epoch": 0.5300049037636386, + "grad_norm": 0.7946409515079994, + "learning_rate": 4.757148004300852e-06, + "loss": 0.4526, + "step": 17293 + }, + { + "epoch": 0.5300355522863798, + "grad_norm": 0.8065203509276236, + "learning_rate": 4.756652273433025e-06, + "loss": 0.4354, + "step": 17294 + }, + { + "epoch": 0.530066200809121, + "grad_norm": 1.6931048577276477, + "learning_rate": 4.756156544962966e-06, + "loss": 0.5456, + "step": 17295 + }, + { + "epoch": 0.5300968493318622, + "grad_norm": 1.6968635321023553, + "learning_rate": 4.755660818895557e-06, + "loss": 0.7213, + "step": 17296 + }, + { + "epoch": 0.5301274978546034, + "grad_norm": 2.010371036913974, + "learning_rate": 4.755165095235685e-06, + "loss": 0.6324, + "step": 17297 + }, + { + "epoch": 0.5301581463773446, + "grad_norm": 1.4147544506074008, + "learning_rate": 4.7546693739882335e-06, + "loss": 0.5975, + "step": 17298 + }, + { + "epoch": 0.5301887949000859, + "grad_norm": 1.6396368788547724, + "learning_rate": 4.7541736551580885e-06, + "loss": 0.633, + "step": 17299 + }, + { + "epoch": 0.530219443422827, + "grad_norm": 1.666758350066823, + "learning_rate": 4.753677938750135e-06, + "loss": 0.6311, + "step": 17300 + }, + { + "epoch": 0.5302500919455683, + "grad_norm": 1.7739046172153572, + "learning_rate": 4.753182224769252e-06, + "loss": 0.6042, + "step": 17301 + }, + { + "epoch": 0.5302807404683094, + "grad_norm": 1.7348513295345827, + "learning_rate": 4.752686513220331e-06, + "loss": 0.7174, + "step": 17302 + }, + { + "epoch": 0.5303113889910507, + "grad_norm": 0.833745379375176, + "learning_rate": 4.752190804108252e-06, + "loss": 0.4531, + "step": 17303 + }, + { + "epoch": 0.5303420375137918, + "grad_norm": 0.7806270803157008, + "learning_rate": 4.751695097437899e-06, + "loss": 0.4387, + "step": 17304 + }, + { + "epoch": 0.5303726860365331, + "grad_norm": 1.790174128184844, + "learning_rate": 4.75119939321416e-06, + "loss": 0.66, + "step": 17305 + }, + { + "epoch": 0.5304033345592742, + "grad_norm": 0.7883534153533815, + "learning_rate": 4.750703691441915e-06, + "loss": 0.4267, + "step": 17306 + }, + { + "epoch": 0.5304339830820154, + "grad_norm": 1.858341676105463, + "learning_rate": 4.750207992126051e-06, + "loss": 0.7125, + "step": 17307 + }, + { + "epoch": 0.5304646316047567, + "grad_norm": 1.6716993272649117, + "learning_rate": 4.749712295271453e-06, + "loss": 0.6115, + "step": 17308 + }, + { + "epoch": 0.5304952801274978, + "grad_norm": 1.860477275440261, + "learning_rate": 4.749216600883002e-06, + "loss": 0.6962, + "step": 17309 + }, + { + "epoch": 0.5305259286502391, + "grad_norm": 1.7039607192730066, + "learning_rate": 4.748720908965584e-06, + "loss": 0.6242, + "step": 17310 + }, + { + "epoch": 0.5305565771729802, + "grad_norm": 1.6111812937975867, + "learning_rate": 4.748225219524085e-06, + "loss": 0.5207, + "step": 17311 + }, + { + "epoch": 0.5305872256957215, + "grad_norm": 1.662487718425246, + "learning_rate": 4.747729532563384e-06, + "loss": 0.6178, + "step": 17312 + }, + { + "epoch": 0.5306178742184626, + "grad_norm": 1.800616336527116, + "learning_rate": 4.747233848088373e-06, + "loss": 0.6744, + "step": 17313 + }, + { + "epoch": 0.5306485227412039, + "grad_norm": 1.7453617605470468, + "learning_rate": 4.746738166103929e-06, + "loss": 0.6317, + "step": 17314 + }, + { + "epoch": 0.530679171263945, + "grad_norm": 1.7820022573319692, + "learning_rate": 4.746242486614938e-06, + "loss": 0.5814, + "step": 17315 + }, + { + "epoch": 0.5307098197866863, + "grad_norm": 2.01022850204285, + "learning_rate": 4.745746809626286e-06, + "loss": 0.7112, + "step": 17316 + }, + { + "epoch": 0.5307404683094274, + "grad_norm": 0.9240377632547886, + "learning_rate": 4.745251135142854e-06, + "loss": 0.4307, + "step": 17317 + }, + { + "epoch": 0.5307711168321687, + "grad_norm": 1.6281654156093992, + "learning_rate": 4.744755463169529e-06, + "loss": 0.5953, + "step": 17318 + }, + { + "epoch": 0.5308017653549099, + "grad_norm": 1.7389066552614156, + "learning_rate": 4.744259793711195e-06, + "loss": 0.6269, + "step": 17319 + }, + { + "epoch": 0.5308324138776511, + "grad_norm": 1.7756814829324075, + "learning_rate": 4.743764126772732e-06, + "loss": 0.6232, + "step": 17320 + }, + { + "epoch": 0.5308630624003923, + "grad_norm": 1.7512484742826546, + "learning_rate": 4.743268462359029e-06, + "loss": 0.6754, + "step": 17321 + }, + { + "epoch": 0.5308937109231335, + "grad_norm": 1.61357078430452, + "learning_rate": 4.742772800474967e-06, + "loss": 0.6053, + "step": 17322 + }, + { + "epoch": 0.5309243594458747, + "grad_norm": 1.559182473194278, + "learning_rate": 4.742277141125428e-06, + "loss": 0.5594, + "step": 17323 + }, + { + "epoch": 0.5309550079686159, + "grad_norm": 1.6067152217764857, + "learning_rate": 4.741781484315302e-06, + "loss": 0.6821, + "step": 17324 + }, + { + "epoch": 0.5309856564913571, + "grad_norm": 1.7917843615236915, + "learning_rate": 4.741285830049465e-06, + "loss": 0.6084, + "step": 17325 + }, + { + "epoch": 0.5310163050140984, + "grad_norm": 1.6150977333252372, + "learning_rate": 4.7407901783328096e-06, + "loss": 0.6064, + "step": 17326 + }, + { + "epoch": 0.5310469535368395, + "grad_norm": 1.8747437687135688, + "learning_rate": 4.740294529170212e-06, + "loss": 0.6551, + "step": 17327 + }, + { + "epoch": 0.5310776020595808, + "grad_norm": 1.9158381908188231, + "learning_rate": 4.739798882566558e-06, + "loss": 0.6977, + "step": 17328 + }, + { + "epoch": 0.5311082505823219, + "grad_norm": 1.7358481106776238, + "learning_rate": 4.739303238526735e-06, + "loss": 0.6859, + "step": 17329 + }, + { + "epoch": 0.5311388991050632, + "grad_norm": 1.7490067603125963, + "learning_rate": 4.738807597055623e-06, + "loss": 0.7022, + "step": 17330 + }, + { + "epoch": 0.5311695476278043, + "grad_norm": 1.7801993594601209, + "learning_rate": 4.738311958158104e-06, + "loss": 0.6664, + "step": 17331 + }, + { + "epoch": 0.5312001961505456, + "grad_norm": 1.8581312312450577, + "learning_rate": 4.7378163218390674e-06, + "loss": 0.6838, + "step": 17332 + }, + { + "epoch": 0.5312308446732867, + "grad_norm": 1.6285573917468785, + "learning_rate": 4.737320688103394e-06, + "loss": 0.7062, + "step": 17333 + }, + { + "epoch": 0.531261493196028, + "grad_norm": 1.9562388786785843, + "learning_rate": 4.736825056955964e-06, + "loss": 0.6773, + "step": 17334 + }, + { + "epoch": 0.5312921417187692, + "grad_norm": 1.5012379085111924, + "learning_rate": 4.736329428401667e-06, + "loss": 0.5545, + "step": 17335 + }, + { + "epoch": 0.5313227902415104, + "grad_norm": 1.6104223935291704, + "learning_rate": 4.735833802445381e-06, + "loss": 0.6133, + "step": 17336 + }, + { + "epoch": 0.5313534387642516, + "grad_norm": 1.9447647394972383, + "learning_rate": 4.735338179091994e-06, + "loss": 0.6939, + "step": 17337 + }, + { + "epoch": 0.5313840872869927, + "grad_norm": 1.739667733959929, + "learning_rate": 4.734842558346387e-06, + "loss": 0.6083, + "step": 17338 + }, + { + "epoch": 0.531414735809734, + "grad_norm": 0.8177238246054879, + "learning_rate": 4.734346940213443e-06, + "loss": 0.4485, + "step": 17339 + }, + { + "epoch": 0.5314453843324751, + "grad_norm": 1.755282246787942, + "learning_rate": 4.733851324698048e-06, + "loss": 0.5504, + "step": 17340 + }, + { + "epoch": 0.5314760328552164, + "grad_norm": 1.6443370176714054, + "learning_rate": 4.733355711805085e-06, + "loss": 0.6545, + "step": 17341 + }, + { + "epoch": 0.5315066813779575, + "grad_norm": 1.8413756430387385, + "learning_rate": 4.732860101539434e-06, + "loss": 0.6756, + "step": 17342 + }, + { + "epoch": 0.5315373299006988, + "grad_norm": 1.6017954834102575, + "learning_rate": 4.732364493905983e-06, + "loss": 0.618, + "step": 17343 + }, + { + "epoch": 0.53156797842344, + "grad_norm": 1.6218330038856283, + "learning_rate": 4.73186888890961e-06, + "loss": 0.5864, + "step": 17344 + }, + { + "epoch": 0.5315986269461812, + "grad_norm": 1.6766392469466636, + "learning_rate": 4.731373286555205e-06, + "loss": 0.6341, + "step": 17345 + }, + { + "epoch": 0.5316292754689224, + "grad_norm": 0.7865121432313098, + "learning_rate": 4.730877686847647e-06, + "loss": 0.4325, + "step": 17346 + }, + { + "epoch": 0.5316599239916636, + "grad_norm": 1.6055371076046627, + "learning_rate": 4.730382089791818e-06, + "loss": 0.6541, + "step": 17347 + }, + { + "epoch": 0.5316905725144048, + "grad_norm": 0.7882927694190845, + "learning_rate": 4.729886495392604e-06, + "loss": 0.4599, + "step": 17348 + }, + { + "epoch": 0.531721221037146, + "grad_norm": 1.9228042899864932, + "learning_rate": 4.729390903654888e-06, + "loss": 0.6556, + "step": 17349 + }, + { + "epoch": 0.5317518695598872, + "grad_norm": 1.6809297701837864, + "learning_rate": 4.728895314583553e-06, + "loss": 0.6058, + "step": 17350 + }, + { + "epoch": 0.5317825180826284, + "grad_norm": 1.8199639887737653, + "learning_rate": 4.728399728183481e-06, + "loss": 0.6225, + "step": 17351 + }, + { + "epoch": 0.5318131666053696, + "grad_norm": 1.6193562502522914, + "learning_rate": 4.727904144459556e-06, + "loss": 0.6659, + "step": 17352 + }, + { + "epoch": 0.5318438151281109, + "grad_norm": 1.6845345544070387, + "learning_rate": 4.727408563416661e-06, + "loss": 0.6249, + "step": 17353 + }, + { + "epoch": 0.531874463650852, + "grad_norm": 1.5193356822826136, + "learning_rate": 4.7269129850596815e-06, + "loss": 0.6522, + "step": 17354 + }, + { + "epoch": 0.5319051121735933, + "grad_norm": 0.7875476957111055, + "learning_rate": 4.726417409393494e-06, + "loss": 0.4231, + "step": 17355 + }, + { + "epoch": 0.5319357606963344, + "grad_norm": 1.8008035698900495, + "learning_rate": 4.7259218364229896e-06, + "loss": 0.6555, + "step": 17356 + }, + { + "epoch": 0.5319664092190757, + "grad_norm": 1.938149068686533, + "learning_rate": 4.725426266153046e-06, + "loss": 0.7016, + "step": 17357 + }, + { + "epoch": 0.5319970577418168, + "grad_norm": 1.8535081134615932, + "learning_rate": 4.724930698588548e-06, + "loss": 0.6424, + "step": 17358 + }, + { + "epoch": 0.5320277062645581, + "grad_norm": 1.7962530021981367, + "learning_rate": 4.724435133734377e-06, + "loss": 0.5268, + "step": 17359 + }, + { + "epoch": 0.5320583547872992, + "grad_norm": 1.666529499492938, + "learning_rate": 4.723939571595419e-06, + "loss": 0.659, + "step": 17360 + }, + { + "epoch": 0.5320890033100405, + "grad_norm": 1.6625246630491226, + "learning_rate": 4.7234440121765525e-06, + "loss": 0.5415, + "step": 17361 + }, + { + "epoch": 0.5321196518327816, + "grad_norm": 0.7812775593935175, + "learning_rate": 4.722948455482665e-06, + "loss": 0.4155, + "step": 17362 + }, + { + "epoch": 0.5321503003555229, + "grad_norm": 1.7164224140798499, + "learning_rate": 4.722452901518636e-06, + "loss": 0.6736, + "step": 17363 + }, + { + "epoch": 0.5321809488782641, + "grad_norm": 1.7489995863654606, + "learning_rate": 4.721957350289351e-06, + "loss": 0.6588, + "step": 17364 + }, + { + "epoch": 0.5322115974010053, + "grad_norm": 1.6673207778020314, + "learning_rate": 4.721461801799692e-06, + "loss": 0.5176, + "step": 17365 + }, + { + "epoch": 0.5322422459237465, + "grad_norm": 1.9295135705185664, + "learning_rate": 4.720966256054538e-06, + "loss": 0.6055, + "step": 17366 + }, + { + "epoch": 0.5322728944464877, + "grad_norm": 0.8453705304029162, + "learning_rate": 4.720470713058777e-06, + "loss": 0.4598, + "step": 17367 + }, + { + "epoch": 0.5323035429692289, + "grad_norm": 1.862749101430272, + "learning_rate": 4.71997517281729e-06, + "loss": 0.6289, + "step": 17368 + }, + { + "epoch": 0.53233419149197, + "grad_norm": 1.5752993177258843, + "learning_rate": 4.719479635334958e-06, + "loss": 0.5676, + "step": 17369 + }, + { + "epoch": 0.5323648400147113, + "grad_norm": 1.7780947997798453, + "learning_rate": 4.718984100616665e-06, + "loss": 0.5891, + "step": 17370 + }, + { + "epoch": 0.5323954885374524, + "grad_norm": 1.7488745183070764, + "learning_rate": 4.718488568667294e-06, + "loss": 0.6543, + "step": 17371 + }, + { + "epoch": 0.5324261370601937, + "grad_norm": 1.57252868329563, + "learning_rate": 4.7179930394917274e-06, + "loss": 0.6066, + "step": 17372 + }, + { + "epoch": 0.5324567855829349, + "grad_norm": 1.785267869353525, + "learning_rate": 4.717497513094847e-06, + "loss": 0.6635, + "step": 17373 + }, + { + "epoch": 0.5324874341056761, + "grad_norm": 1.919343062907238, + "learning_rate": 4.717001989481536e-06, + "loss": 0.6804, + "step": 17374 + }, + { + "epoch": 0.5325180826284173, + "grad_norm": 0.7766154425214823, + "learning_rate": 4.716506468656677e-06, + "loss": 0.4208, + "step": 17375 + }, + { + "epoch": 0.5325487311511585, + "grad_norm": 1.7776887988087653, + "learning_rate": 4.716010950625153e-06, + "loss": 0.6594, + "step": 17376 + }, + { + "epoch": 0.5325793796738997, + "grad_norm": 1.5590940555787751, + "learning_rate": 4.7155154353918436e-06, + "loss": 0.5618, + "step": 17377 + }, + { + "epoch": 0.5326100281966409, + "grad_norm": 2.0968936640465388, + "learning_rate": 4.715019922961636e-06, + "loss": 0.7284, + "step": 17378 + }, + { + "epoch": 0.5326406767193821, + "grad_norm": 1.6275810923852276, + "learning_rate": 4.7145244133394085e-06, + "loss": 0.7255, + "step": 17379 + }, + { + "epoch": 0.5326713252421234, + "grad_norm": 0.80166551437377, + "learning_rate": 4.714028906530046e-06, + "loss": 0.442, + "step": 17380 + }, + { + "epoch": 0.5327019737648645, + "grad_norm": 0.7751135902909128, + "learning_rate": 4.713533402538429e-06, + "loss": 0.4236, + "step": 17381 + }, + { + "epoch": 0.5327326222876058, + "grad_norm": 1.7599711159665565, + "learning_rate": 4.71303790136944e-06, + "loss": 0.6749, + "step": 17382 + }, + { + "epoch": 0.5327632708103469, + "grad_norm": 0.807103217825373, + "learning_rate": 4.712542403027963e-06, + "loss": 0.4346, + "step": 17383 + }, + { + "epoch": 0.5327939193330882, + "grad_norm": 1.7552099863913764, + "learning_rate": 4.71204690751888e-06, + "loss": 0.5393, + "step": 17384 + }, + { + "epoch": 0.5328245678558293, + "grad_norm": 1.6069148771053636, + "learning_rate": 4.71155141484707e-06, + "loss": 0.6222, + "step": 17385 + }, + { + "epoch": 0.5328552163785706, + "grad_norm": 0.811925758811521, + "learning_rate": 4.711055925017421e-06, + "loss": 0.4381, + "step": 17386 + }, + { + "epoch": 0.5328858649013117, + "grad_norm": 1.5931454560327925, + "learning_rate": 4.710560438034811e-06, + "loss": 0.5208, + "step": 17387 + }, + { + "epoch": 0.532916513424053, + "grad_norm": 1.6311021522335705, + "learning_rate": 4.710064953904121e-06, + "loss": 0.548, + "step": 17388 + }, + { + "epoch": 0.5329471619467941, + "grad_norm": 1.6307841395936935, + "learning_rate": 4.7095694726302365e-06, + "loss": 0.5733, + "step": 17389 + }, + { + "epoch": 0.5329778104695354, + "grad_norm": 1.8118281487018577, + "learning_rate": 4.709073994218038e-06, + "loss": 0.6615, + "step": 17390 + }, + { + "epoch": 0.5330084589922766, + "grad_norm": 1.4712879990338006, + "learning_rate": 4.708578518672408e-06, + "loss": 0.5896, + "step": 17391 + }, + { + "epoch": 0.5330391075150178, + "grad_norm": 1.65539411916748, + "learning_rate": 4.708083045998229e-06, + "loss": 0.6249, + "step": 17392 + }, + { + "epoch": 0.533069756037759, + "grad_norm": 3.86392957132895, + "learning_rate": 4.707587576200381e-06, + "loss": 0.6376, + "step": 17393 + }, + { + "epoch": 0.5331004045605002, + "grad_norm": 1.6778635959080694, + "learning_rate": 4.707092109283749e-06, + "loss": 0.5486, + "step": 17394 + }, + { + "epoch": 0.5331310530832414, + "grad_norm": 1.921019520590209, + "learning_rate": 4.706596645253214e-06, + "loss": 0.6537, + "step": 17395 + }, + { + "epoch": 0.5331617016059826, + "grad_norm": 1.651423589237798, + "learning_rate": 4.706101184113655e-06, + "loss": 0.6331, + "step": 17396 + }, + { + "epoch": 0.5331923501287238, + "grad_norm": 1.5827359696947825, + "learning_rate": 4.7056057258699585e-06, + "loss": 0.5865, + "step": 17397 + }, + { + "epoch": 0.533222998651465, + "grad_norm": 1.7352272739975976, + "learning_rate": 4.7051102705270004e-06, + "loss": 0.6355, + "step": 17398 + }, + { + "epoch": 0.5332536471742062, + "grad_norm": 1.3631896387907119, + "learning_rate": 4.70461481808967e-06, + "loss": 0.5957, + "step": 17399 + }, + { + "epoch": 0.5332842956969474, + "grad_norm": 1.7547896271096028, + "learning_rate": 4.704119368562845e-06, + "loss": 0.6797, + "step": 17400 + }, + { + "epoch": 0.5333149442196886, + "grad_norm": 1.7107684067427555, + "learning_rate": 4.703623921951406e-06, + "loss": 0.6353, + "step": 17401 + }, + { + "epoch": 0.5333455927424298, + "grad_norm": 1.8343553151659104, + "learning_rate": 4.703128478260237e-06, + "loss": 0.5933, + "step": 17402 + }, + { + "epoch": 0.533376241265171, + "grad_norm": 1.7228409511752263, + "learning_rate": 4.70263303749422e-06, + "loss": 0.6433, + "step": 17403 + }, + { + "epoch": 0.5334068897879122, + "grad_norm": 0.8965781895634278, + "learning_rate": 4.702137599658234e-06, + "loss": 0.4192, + "step": 17404 + }, + { + "epoch": 0.5334375383106534, + "grad_norm": 1.7633118103942416, + "learning_rate": 4.701642164757164e-06, + "loss": 0.6808, + "step": 17405 + }, + { + "epoch": 0.5334681868333946, + "grad_norm": 1.9085685602146683, + "learning_rate": 4.7011467327958886e-06, + "loss": 0.6755, + "step": 17406 + }, + { + "epoch": 0.5334988353561358, + "grad_norm": 1.9143283875381887, + "learning_rate": 4.700651303779291e-06, + "loss": 0.6435, + "step": 17407 + }, + { + "epoch": 0.533529483878877, + "grad_norm": 1.7785454056349488, + "learning_rate": 4.700155877712256e-06, + "loss": 0.6725, + "step": 17408 + }, + { + "epoch": 0.5335601324016183, + "grad_norm": 1.7862301101239575, + "learning_rate": 4.699660454599657e-06, + "loss": 0.6526, + "step": 17409 + }, + { + "epoch": 0.5335907809243594, + "grad_norm": 0.8216241891188147, + "learning_rate": 4.699165034446384e-06, + "loss": 0.4451, + "step": 17410 + }, + { + "epoch": 0.5336214294471007, + "grad_norm": 1.7216055143924445, + "learning_rate": 4.698669617257314e-06, + "loss": 0.6387, + "step": 17411 + }, + { + "epoch": 0.5336520779698418, + "grad_norm": 1.8037427382173488, + "learning_rate": 4.698174203037328e-06, + "loss": 0.5352, + "step": 17412 + }, + { + "epoch": 0.5336827264925831, + "grad_norm": 1.6585639964118144, + "learning_rate": 4.697678791791311e-06, + "loss": 0.61, + "step": 17413 + }, + { + "epoch": 0.5337133750153242, + "grad_norm": 1.75961674562892, + "learning_rate": 4.697183383524141e-06, + "loss": 0.6232, + "step": 17414 + }, + { + "epoch": 0.5337440235380655, + "grad_norm": 1.7254250942639184, + "learning_rate": 4.696687978240699e-06, + "loss": 0.6973, + "step": 17415 + }, + { + "epoch": 0.5337746720608066, + "grad_norm": 1.965535205643349, + "learning_rate": 4.69619257594587e-06, + "loss": 0.6848, + "step": 17416 + }, + { + "epoch": 0.5338053205835479, + "grad_norm": 1.5685902534491583, + "learning_rate": 4.695697176644532e-06, + "loss": 0.5247, + "step": 17417 + }, + { + "epoch": 0.533835969106289, + "grad_norm": 1.659117683356685, + "learning_rate": 4.695201780341569e-06, + "loss": 0.5845, + "step": 17418 + }, + { + "epoch": 0.5338666176290303, + "grad_norm": 0.8321450019374609, + "learning_rate": 4.694706387041861e-06, + "loss": 0.4209, + "step": 17419 + }, + { + "epoch": 0.5338972661517715, + "grad_norm": 1.9280753857137967, + "learning_rate": 4.694210996750287e-06, + "loss": 0.7321, + "step": 17420 + }, + { + "epoch": 0.5339279146745127, + "grad_norm": 1.7037347934850866, + "learning_rate": 4.693715609471733e-06, + "loss": 0.5363, + "step": 17421 + }, + { + "epoch": 0.5339585631972539, + "grad_norm": 1.7794590857256791, + "learning_rate": 4.693220225211076e-06, + "loss": 0.7235, + "step": 17422 + }, + { + "epoch": 0.5339892117199951, + "grad_norm": 1.5656455912730742, + "learning_rate": 4.692724843973198e-06, + "loss": 0.6121, + "step": 17423 + }, + { + "epoch": 0.5340198602427363, + "grad_norm": 0.8523990110085031, + "learning_rate": 4.692229465762982e-06, + "loss": 0.4183, + "step": 17424 + }, + { + "epoch": 0.5340505087654775, + "grad_norm": 2.006781146918783, + "learning_rate": 4.691734090585306e-06, + "loss": 0.6604, + "step": 17425 + }, + { + "epoch": 0.5340811572882187, + "grad_norm": 2.047552398178804, + "learning_rate": 4.691238718445055e-06, + "loss": 0.6279, + "step": 17426 + }, + { + "epoch": 0.53411180581096, + "grad_norm": 0.7784702801663548, + "learning_rate": 4.690743349347107e-06, + "loss": 0.4438, + "step": 17427 + }, + { + "epoch": 0.5341424543337011, + "grad_norm": 1.7683459008514772, + "learning_rate": 4.690247983296343e-06, + "loss": 0.5391, + "step": 17428 + }, + { + "epoch": 0.5341731028564424, + "grad_norm": 1.8808039605569389, + "learning_rate": 4.689752620297647e-06, + "loss": 0.5806, + "step": 17429 + }, + { + "epoch": 0.5342037513791835, + "grad_norm": 1.6915865834666528, + "learning_rate": 4.6892572603558975e-06, + "loss": 0.6233, + "step": 17430 + }, + { + "epoch": 0.5342343999019247, + "grad_norm": 1.745113051310851, + "learning_rate": 4.6887619034759735e-06, + "loss": 0.5535, + "step": 17431 + }, + { + "epoch": 0.5342650484246659, + "grad_norm": 1.8540900819387875, + "learning_rate": 4.68826654966276e-06, + "loss": 0.6082, + "step": 17432 + }, + { + "epoch": 0.5342956969474071, + "grad_norm": 1.668333741919333, + "learning_rate": 4.687771198921134e-06, + "loss": 0.5627, + "step": 17433 + }, + { + "epoch": 0.5343263454701483, + "grad_norm": 0.820795075249107, + "learning_rate": 4.68727585125598e-06, + "loss": 0.4185, + "step": 17434 + }, + { + "epoch": 0.5343569939928895, + "grad_norm": 1.7699058753051515, + "learning_rate": 4.686780506672177e-06, + "loss": 0.627, + "step": 17435 + }, + { + "epoch": 0.5343876425156308, + "grad_norm": 0.8533906321372862, + "learning_rate": 4.686285165174605e-06, + "loss": 0.4547, + "step": 17436 + }, + { + "epoch": 0.5344182910383719, + "grad_norm": 1.7358494544047864, + "learning_rate": 4.6857898267681465e-06, + "loss": 0.6803, + "step": 17437 + }, + { + "epoch": 0.5344489395611132, + "grad_norm": 1.9964383418018095, + "learning_rate": 4.685294491457682e-06, + "loss": 0.7346, + "step": 17438 + }, + { + "epoch": 0.5344795880838543, + "grad_norm": 1.9421560347131803, + "learning_rate": 4.684799159248088e-06, + "loss": 0.6518, + "step": 17439 + }, + { + "epoch": 0.5345102366065956, + "grad_norm": 1.7669441418484868, + "learning_rate": 4.684303830144252e-06, + "loss": 0.6721, + "step": 17440 + }, + { + "epoch": 0.5345408851293367, + "grad_norm": 1.8963572988035078, + "learning_rate": 4.683808504151051e-06, + "loss": 0.6547, + "step": 17441 + }, + { + "epoch": 0.534571533652078, + "grad_norm": 1.7718825634277051, + "learning_rate": 4.683313181273363e-06, + "loss": 0.615, + "step": 17442 + }, + { + "epoch": 0.5346021821748191, + "grad_norm": 0.7837113321562795, + "learning_rate": 4.682817861516073e-06, + "loss": 0.4063, + "step": 17443 + }, + { + "epoch": 0.5346328306975604, + "grad_norm": 1.9606521514471704, + "learning_rate": 4.682322544884059e-06, + "loss": 0.7004, + "step": 17444 + }, + { + "epoch": 0.5346634792203016, + "grad_norm": 2.060733504784079, + "learning_rate": 4.681827231382203e-06, + "loss": 0.6663, + "step": 17445 + }, + { + "epoch": 0.5346941277430428, + "grad_norm": 0.7887008172007915, + "learning_rate": 4.681331921015385e-06, + "loss": 0.4302, + "step": 17446 + }, + { + "epoch": 0.534724776265784, + "grad_norm": 1.640639041540056, + "learning_rate": 4.680836613788483e-06, + "loss": 0.5087, + "step": 17447 + }, + { + "epoch": 0.5347554247885252, + "grad_norm": 1.5049863910641401, + "learning_rate": 4.680341309706382e-06, + "loss": 0.5506, + "step": 17448 + }, + { + "epoch": 0.5347860733112664, + "grad_norm": 1.4818873629340832, + "learning_rate": 4.679846008773961e-06, + "loss": 0.552, + "step": 17449 + }, + { + "epoch": 0.5348167218340076, + "grad_norm": 1.5015109994487552, + "learning_rate": 4.679350710996094e-06, + "loss": 0.639, + "step": 17450 + }, + { + "epoch": 0.5348473703567488, + "grad_norm": 1.857116234969272, + "learning_rate": 4.678855416377672e-06, + "loss": 0.71, + "step": 17451 + }, + { + "epoch": 0.53487801887949, + "grad_norm": 1.7887488950489252, + "learning_rate": 4.6783601249235655e-06, + "loss": 0.6726, + "step": 17452 + }, + { + "epoch": 0.5349086674022312, + "grad_norm": 1.7690231001653263, + "learning_rate": 4.677864836638662e-06, + "loss": 0.6544, + "step": 17453 + }, + { + "epoch": 0.5349393159249725, + "grad_norm": 1.8243269843805365, + "learning_rate": 4.6773695515278376e-06, + "loss": 0.6377, + "step": 17454 + }, + { + "epoch": 0.5349699644477136, + "grad_norm": 1.5440934894483838, + "learning_rate": 4.676874269595973e-06, + "loss": 0.643, + "step": 17455 + }, + { + "epoch": 0.5350006129704549, + "grad_norm": 1.7061081918491148, + "learning_rate": 4.6763789908479495e-06, + "loss": 0.5334, + "step": 17456 + }, + { + "epoch": 0.535031261493196, + "grad_norm": 1.7598536954058113, + "learning_rate": 4.6758837152886475e-06, + "loss": 0.6666, + "step": 17457 + }, + { + "epoch": 0.5350619100159373, + "grad_norm": 1.9435858580289669, + "learning_rate": 4.675388442922944e-06, + "loss": 0.6548, + "step": 17458 + }, + { + "epoch": 0.5350925585386784, + "grad_norm": 1.7188050258801022, + "learning_rate": 4.674893173755723e-06, + "loss": 0.5775, + "step": 17459 + }, + { + "epoch": 0.5351232070614197, + "grad_norm": 1.624583850774213, + "learning_rate": 4.674397907791861e-06, + "loss": 0.594, + "step": 17460 + }, + { + "epoch": 0.5351538555841608, + "grad_norm": 1.5383826608176432, + "learning_rate": 4.673902645036242e-06, + "loss": 0.5194, + "step": 17461 + }, + { + "epoch": 0.535184504106902, + "grad_norm": 1.866535091473537, + "learning_rate": 4.673407385493743e-06, + "loss": 0.7335, + "step": 17462 + }, + { + "epoch": 0.5352151526296433, + "grad_norm": 1.736551778702997, + "learning_rate": 4.672912129169242e-06, + "loss": 0.6074, + "step": 17463 + }, + { + "epoch": 0.5352458011523844, + "grad_norm": 1.8693085953318962, + "learning_rate": 4.672416876067626e-06, + "loss": 0.6149, + "step": 17464 + }, + { + "epoch": 0.5352764496751257, + "grad_norm": 0.7968505713467741, + "learning_rate": 4.6719216261937685e-06, + "loss": 0.4272, + "step": 17465 + }, + { + "epoch": 0.5353070981978668, + "grad_norm": 1.6960538273968202, + "learning_rate": 4.671426379552549e-06, + "loss": 0.6388, + "step": 17466 + }, + { + "epoch": 0.5353377467206081, + "grad_norm": 0.79897595154646, + "learning_rate": 4.6709311361488515e-06, + "loss": 0.4372, + "step": 17467 + }, + { + "epoch": 0.5353683952433492, + "grad_norm": 1.7191674005873434, + "learning_rate": 4.670435895987554e-06, + "loss": 0.541, + "step": 17468 + }, + { + "epoch": 0.5353990437660905, + "grad_norm": 1.7976442919707867, + "learning_rate": 4.6699406590735345e-06, + "loss": 0.6613, + "step": 17469 + }, + { + "epoch": 0.5354296922888316, + "grad_norm": 0.7877296379709249, + "learning_rate": 4.669445425411675e-06, + "loss": 0.4365, + "step": 17470 + }, + { + "epoch": 0.5354603408115729, + "grad_norm": 1.8919733766804185, + "learning_rate": 4.668950195006854e-06, + "loss": 0.6162, + "step": 17471 + }, + { + "epoch": 0.535490989334314, + "grad_norm": 1.7099961845969132, + "learning_rate": 4.668454967863952e-06, + "loss": 0.6342, + "step": 17472 + }, + { + "epoch": 0.5355216378570553, + "grad_norm": 1.5904102591297422, + "learning_rate": 4.667959743987848e-06, + "loss": 0.619, + "step": 17473 + }, + { + "epoch": 0.5355522863797965, + "grad_norm": 1.87597906891774, + "learning_rate": 4.6674645233834196e-06, + "loss": 0.6146, + "step": 17474 + }, + { + "epoch": 0.5355829349025377, + "grad_norm": 0.7587581558725377, + "learning_rate": 4.666969306055552e-06, + "loss": 0.4171, + "step": 17475 + }, + { + "epoch": 0.5356135834252789, + "grad_norm": 1.5377594542052784, + "learning_rate": 4.666474092009119e-06, + "loss": 0.6, + "step": 17476 + }, + { + "epoch": 0.5356442319480201, + "grad_norm": 1.6031128770066834, + "learning_rate": 4.665978881249001e-06, + "loss": 0.6076, + "step": 17477 + }, + { + "epoch": 0.5356748804707613, + "grad_norm": 1.7641091832266138, + "learning_rate": 4.66548367378008e-06, + "loss": 0.7032, + "step": 17478 + }, + { + "epoch": 0.5357055289935025, + "grad_norm": 1.5253355317447215, + "learning_rate": 4.664988469607233e-06, + "loss": 0.5674, + "step": 17479 + }, + { + "epoch": 0.5357361775162437, + "grad_norm": 1.701736460638395, + "learning_rate": 4.664493268735341e-06, + "loss": 0.6537, + "step": 17480 + }, + { + "epoch": 0.535766826038985, + "grad_norm": 1.6364806008632202, + "learning_rate": 4.663998071169283e-06, + "loss": 0.6339, + "step": 17481 + }, + { + "epoch": 0.5357974745617261, + "grad_norm": 1.6165709039944625, + "learning_rate": 4.663502876913937e-06, + "loss": 0.5736, + "step": 17482 + }, + { + "epoch": 0.5358281230844674, + "grad_norm": 1.7850996953329834, + "learning_rate": 4.663007685974185e-06, + "loss": 0.6591, + "step": 17483 + }, + { + "epoch": 0.5358587716072085, + "grad_norm": 1.6340938024165512, + "learning_rate": 4.662512498354904e-06, + "loss": 0.6413, + "step": 17484 + }, + { + "epoch": 0.5358894201299498, + "grad_norm": 1.9765376399019996, + "learning_rate": 4.662017314060972e-06, + "loss": 0.6973, + "step": 17485 + }, + { + "epoch": 0.5359200686526909, + "grad_norm": 1.5360247835886294, + "learning_rate": 4.6615221330972715e-06, + "loss": 0.6936, + "step": 17486 + }, + { + "epoch": 0.5359507171754322, + "grad_norm": 1.8488541310938362, + "learning_rate": 4.661026955468678e-06, + "loss": 0.7267, + "step": 17487 + }, + { + "epoch": 0.5359813656981733, + "grad_norm": 1.7119325689041593, + "learning_rate": 4.660531781180075e-06, + "loss": 0.7226, + "step": 17488 + }, + { + "epoch": 0.5360120142209146, + "grad_norm": 1.821243587766829, + "learning_rate": 4.660036610236339e-06, + "loss": 0.546, + "step": 17489 + }, + { + "epoch": 0.5360426627436558, + "grad_norm": 1.8165130256794393, + "learning_rate": 4.659541442642348e-06, + "loss": 0.6414, + "step": 17490 + }, + { + "epoch": 0.536073311266397, + "grad_norm": 1.4128958850810973, + "learning_rate": 4.659046278402982e-06, + "loss": 0.627, + "step": 17491 + }, + { + "epoch": 0.5361039597891382, + "grad_norm": 1.9643769848359478, + "learning_rate": 4.658551117523123e-06, + "loss": 0.643, + "step": 17492 + }, + { + "epoch": 0.5361346083118793, + "grad_norm": 1.9253533876941342, + "learning_rate": 4.6580559600076435e-06, + "loss": 0.6025, + "step": 17493 + }, + { + "epoch": 0.5361652568346206, + "grad_norm": 1.7844004145722765, + "learning_rate": 4.657560805861429e-06, + "loss": 0.6714, + "step": 17494 + }, + { + "epoch": 0.5361959053573617, + "grad_norm": 1.7171344160168804, + "learning_rate": 4.657065655089355e-06, + "loss": 0.6764, + "step": 17495 + }, + { + "epoch": 0.536226553880103, + "grad_norm": 1.7611496052980515, + "learning_rate": 4.6565705076962995e-06, + "loss": 0.6188, + "step": 17496 + }, + { + "epoch": 0.5362572024028441, + "grad_norm": 2.129702622380898, + "learning_rate": 4.6560753636871435e-06, + "loss": 0.7117, + "step": 17497 + }, + { + "epoch": 0.5362878509255854, + "grad_norm": 1.6608176235250323, + "learning_rate": 4.655580223066764e-06, + "loss": 0.5524, + "step": 17498 + }, + { + "epoch": 0.5363184994483265, + "grad_norm": 1.8774057818609087, + "learning_rate": 4.655085085840042e-06, + "loss": 0.6407, + "step": 17499 + }, + { + "epoch": 0.5363491479710678, + "grad_norm": 1.5862069561443715, + "learning_rate": 4.6545899520118545e-06, + "loss": 0.524, + "step": 17500 + }, + { + "epoch": 0.536379796493809, + "grad_norm": 1.8360700104213774, + "learning_rate": 4.654094821587079e-06, + "loss": 0.7467, + "step": 17501 + }, + { + "epoch": 0.5364104450165502, + "grad_norm": 1.6772064500183315, + "learning_rate": 4.653599694570598e-06, + "loss": 0.6356, + "step": 17502 + }, + { + "epoch": 0.5364410935392914, + "grad_norm": 1.6423352373633942, + "learning_rate": 4.6531045709672886e-06, + "loss": 0.6036, + "step": 17503 + }, + { + "epoch": 0.5364717420620326, + "grad_norm": 1.7828317970788472, + "learning_rate": 4.652609450782026e-06, + "loss": 0.6791, + "step": 17504 + }, + { + "epoch": 0.5365023905847738, + "grad_norm": 1.7878776261221654, + "learning_rate": 4.652114334019693e-06, + "loss": 0.5877, + "step": 17505 + }, + { + "epoch": 0.536533039107515, + "grad_norm": 1.6242212295318736, + "learning_rate": 4.6516192206851644e-06, + "loss": 0.6609, + "step": 17506 + }, + { + "epoch": 0.5365636876302562, + "grad_norm": 1.6283773690681793, + "learning_rate": 4.651124110783324e-06, + "loss": 0.599, + "step": 17507 + }, + { + "epoch": 0.5365943361529975, + "grad_norm": 1.7634986474847194, + "learning_rate": 4.650629004319046e-06, + "loss": 0.5907, + "step": 17508 + }, + { + "epoch": 0.5366249846757386, + "grad_norm": 1.7633620916825672, + "learning_rate": 4.6501339012972076e-06, + "loss": 0.5555, + "step": 17509 + }, + { + "epoch": 0.5366556331984799, + "grad_norm": 1.6470221138802814, + "learning_rate": 4.649638801722691e-06, + "loss": 0.6183, + "step": 17510 + }, + { + "epoch": 0.536686281721221, + "grad_norm": 1.8033389836160474, + "learning_rate": 4.649143705600373e-06, + "loss": 0.5461, + "step": 17511 + }, + { + "epoch": 0.5367169302439623, + "grad_norm": 1.8078899533108668, + "learning_rate": 4.648648612935131e-06, + "loss": 0.6786, + "step": 17512 + }, + { + "epoch": 0.5367475787667034, + "grad_norm": 1.6415963294332685, + "learning_rate": 4.648153523731846e-06, + "loss": 0.6331, + "step": 17513 + }, + { + "epoch": 0.5367782272894447, + "grad_norm": 1.7334936364871543, + "learning_rate": 4.647658437995394e-06, + "loss": 0.6428, + "step": 17514 + }, + { + "epoch": 0.5368088758121858, + "grad_norm": 1.8599611414607589, + "learning_rate": 4.647163355730651e-06, + "loss": 0.576, + "step": 17515 + }, + { + "epoch": 0.5368395243349271, + "grad_norm": 1.5856218074649338, + "learning_rate": 4.6466682769425e-06, + "loss": 0.5068, + "step": 17516 + }, + { + "epoch": 0.5368701728576682, + "grad_norm": 1.923896620151049, + "learning_rate": 4.646173201635815e-06, + "loss": 0.6534, + "step": 17517 + }, + { + "epoch": 0.5369008213804095, + "grad_norm": 1.59436844953436, + "learning_rate": 4.6456781298154784e-06, + "loss": 0.5715, + "step": 17518 + }, + { + "epoch": 0.5369314699031507, + "grad_norm": 1.7731316097057306, + "learning_rate": 4.645183061486365e-06, + "loss": 0.6155, + "step": 17519 + }, + { + "epoch": 0.5369621184258919, + "grad_norm": 1.8518999781946077, + "learning_rate": 4.644687996653353e-06, + "loss": 0.6675, + "step": 17520 + }, + { + "epoch": 0.5369927669486331, + "grad_norm": 1.9426094579747684, + "learning_rate": 4.644192935321322e-06, + "loss": 0.5915, + "step": 17521 + }, + { + "epoch": 0.5370234154713743, + "grad_norm": 0.8197038764504234, + "learning_rate": 4.643697877495148e-06, + "loss": 0.4332, + "step": 17522 + }, + { + "epoch": 0.5370540639941155, + "grad_norm": 1.7723641202421343, + "learning_rate": 4.6432028231797095e-06, + "loss": 0.6636, + "step": 17523 + }, + { + "epoch": 0.5370847125168566, + "grad_norm": 1.7951272004977603, + "learning_rate": 4.642707772379887e-06, + "loss": 0.6707, + "step": 17524 + }, + { + "epoch": 0.5371153610395979, + "grad_norm": 1.7628178535629493, + "learning_rate": 4.642212725100554e-06, + "loss": 0.6888, + "step": 17525 + }, + { + "epoch": 0.537146009562339, + "grad_norm": 1.7069182026507492, + "learning_rate": 4.641717681346592e-06, + "loss": 0.6658, + "step": 17526 + }, + { + "epoch": 0.5371766580850803, + "grad_norm": 1.7655484777497246, + "learning_rate": 4.641222641122878e-06, + "loss": 0.6182, + "step": 17527 + }, + { + "epoch": 0.5372073066078215, + "grad_norm": 1.8358988222958716, + "learning_rate": 4.640727604434288e-06, + "loss": 0.6891, + "step": 17528 + }, + { + "epoch": 0.5372379551305627, + "grad_norm": 1.7324311592918684, + "learning_rate": 4.640232571285701e-06, + "loss": 0.6729, + "step": 17529 + }, + { + "epoch": 0.5372686036533039, + "grad_norm": 1.9353915224622003, + "learning_rate": 4.639737541681995e-06, + "loss": 0.6051, + "step": 17530 + }, + { + "epoch": 0.5372992521760451, + "grad_norm": 0.7953096366952886, + "learning_rate": 4.639242515628046e-06, + "loss": 0.4384, + "step": 17531 + }, + { + "epoch": 0.5373299006987863, + "grad_norm": 1.7161721120275237, + "learning_rate": 4.638747493128733e-06, + "loss": 0.5593, + "step": 17532 + }, + { + "epoch": 0.5373605492215275, + "grad_norm": 1.7572442734177502, + "learning_rate": 4.638252474188934e-06, + "loss": 0.6576, + "step": 17533 + }, + { + "epoch": 0.5373911977442687, + "grad_norm": 1.9016698226438857, + "learning_rate": 4.637757458813526e-06, + "loss": 0.5807, + "step": 17534 + }, + { + "epoch": 0.53742184626701, + "grad_norm": 1.9198658031142983, + "learning_rate": 4.637262447007387e-06, + "loss": 0.6403, + "step": 17535 + }, + { + "epoch": 0.5374524947897511, + "grad_norm": 1.7009507523470109, + "learning_rate": 4.636767438775392e-06, + "loss": 0.5994, + "step": 17536 + }, + { + "epoch": 0.5374831433124924, + "grad_norm": 1.813481717278424, + "learning_rate": 4.636272434122422e-06, + "loss": 0.689, + "step": 17537 + }, + { + "epoch": 0.5375137918352335, + "grad_norm": 1.6616503820981718, + "learning_rate": 4.635777433053353e-06, + "loss": 0.5629, + "step": 17538 + }, + { + "epoch": 0.5375444403579748, + "grad_norm": 1.601579859527588, + "learning_rate": 4.63528243557306e-06, + "loss": 0.5659, + "step": 17539 + }, + { + "epoch": 0.5375750888807159, + "grad_norm": 1.566825768530324, + "learning_rate": 4.634787441686425e-06, + "loss": 0.6269, + "step": 17540 + }, + { + "epoch": 0.5376057374034572, + "grad_norm": 1.6865376891208697, + "learning_rate": 4.634292451398322e-06, + "loss": 0.6025, + "step": 17541 + }, + { + "epoch": 0.5376363859261983, + "grad_norm": 0.7787034671389703, + "learning_rate": 4.633797464713628e-06, + "loss": 0.4215, + "step": 17542 + }, + { + "epoch": 0.5376670344489396, + "grad_norm": 1.6153887920808847, + "learning_rate": 4.633302481637222e-06, + "loss": 0.6452, + "step": 17543 + }, + { + "epoch": 0.5376976829716807, + "grad_norm": 2.003379299834303, + "learning_rate": 4.63280750217398e-06, + "loss": 0.7262, + "step": 17544 + }, + { + "epoch": 0.537728331494422, + "grad_norm": 1.7055866969321853, + "learning_rate": 4.6323125263287805e-06, + "loss": 0.5946, + "step": 17545 + }, + { + "epoch": 0.5377589800171632, + "grad_norm": 1.7929354510509643, + "learning_rate": 4.631817554106501e-06, + "loss": 0.7037, + "step": 17546 + }, + { + "epoch": 0.5377896285399044, + "grad_norm": 0.7571217668281346, + "learning_rate": 4.631322585512014e-06, + "loss": 0.4146, + "step": 17547 + }, + { + "epoch": 0.5378202770626456, + "grad_norm": 1.6845245521779615, + "learning_rate": 4.6308276205502026e-06, + "loss": 0.5777, + "step": 17548 + }, + { + "epoch": 0.5378509255853868, + "grad_norm": 1.5058862868817973, + "learning_rate": 4.6303326592259405e-06, + "loss": 0.4617, + "step": 17549 + }, + { + "epoch": 0.537881574108128, + "grad_norm": 1.7204142710283536, + "learning_rate": 4.629837701544104e-06, + "loss": 0.5361, + "step": 17550 + }, + { + "epoch": 0.5379122226308692, + "grad_norm": 1.892926279214769, + "learning_rate": 4.6293427475095725e-06, + "loss": 0.6448, + "step": 17551 + }, + { + "epoch": 0.5379428711536104, + "grad_norm": 1.5826965107483093, + "learning_rate": 4.62884779712722e-06, + "loss": 0.6525, + "step": 17552 + }, + { + "epoch": 0.5379735196763517, + "grad_norm": 1.8252805427709768, + "learning_rate": 4.628352850401928e-06, + "loss": 0.6138, + "step": 17553 + }, + { + "epoch": 0.5380041681990928, + "grad_norm": 1.711546374259232, + "learning_rate": 4.6278579073385685e-06, + "loss": 0.6115, + "step": 17554 + }, + { + "epoch": 0.538034816721834, + "grad_norm": 1.8645625511794803, + "learning_rate": 4.627362967942021e-06, + "loss": 0.682, + "step": 17555 + }, + { + "epoch": 0.5380654652445752, + "grad_norm": 1.6121102806121255, + "learning_rate": 4.626868032217161e-06, + "loss": 0.5575, + "step": 17556 + }, + { + "epoch": 0.5380961137673164, + "grad_norm": 1.7662489518095745, + "learning_rate": 4.6263731001688676e-06, + "loss": 0.5843, + "step": 17557 + }, + { + "epoch": 0.5381267622900576, + "grad_norm": 2.027681562737041, + "learning_rate": 4.625878171802012e-06, + "loss": 0.7012, + "step": 17558 + }, + { + "epoch": 0.5381574108127988, + "grad_norm": 1.7696810544355521, + "learning_rate": 4.625383247121479e-06, + "loss": 0.7027, + "step": 17559 + }, + { + "epoch": 0.53818805933554, + "grad_norm": 1.6085792698348345, + "learning_rate": 4.624888326132136e-06, + "loss": 0.685, + "step": 17560 + }, + { + "epoch": 0.5382187078582812, + "grad_norm": 1.8941846194087497, + "learning_rate": 4.624393408838868e-06, + "loss": 0.6353, + "step": 17561 + }, + { + "epoch": 0.5382493563810224, + "grad_norm": 1.844048185542162, + "learning_rate": 4.623898495246547e-06, + "loss": 0.6826, + "step": 17562 + }, + { + "epoch": 0.5382800049037636, + "grad_norm": 0.830198673082686, + "learning_rate": 4.6234035853600486e-06, + "loss": 0.4562, + "step": 17563 + }, + { + "epoch": 0.5383106534265049, + "grad_norm": 1.669581943558097, + "learning_rate": 4.622908679184253e-06, + "loss": 0.6015, + "step": 17564 + }, + { + "epoch": 0.538341301949246, + "grad_norm": 2.0535649449069764, + "learning_rate": 4.6224137767240344e-06, + "loss": 0.5386, + "step": 17565 + }, + { + "epoch": 0.5383719504719873, + "grad_norm": 1.617730494024874, + "learning_rate": 4.621918877984268e-06, + "loss": 0.6435, + "step": 17566 + }, + { + "epoch": 0.5384025989947284, + "grad_norm": 1.758104235713165, + "learning_rate": 4.621423982969833e-06, + "loss": 0.633, + "step": 17567 + }, + { + "epoch": 0.5384332475174697, + "grad_norm": 1.5642357264534503, + "learning_rate": 4.620929091685605e-06, + "loss": 0.6164, + "step": 17568 + }, + { + "epoch": 0.5384638960402108, + "grad_norm": 2.1425240227995506, + "learning_rate": 4.620434204136457e-06, + "loss": 0.6765, + "step": 17569 + }, + { + "epoch": 0.5384945445629521, + "grad_norm": 1.8446214949804751, + "learning_rate": 4.619939320327271e-06, + "loss": 0.6728, + "step": 17570 + }, + { + "epoch": 0.5385251930856932, + "grad_norm": 1.9072995959112091, + "learning_rate": 4.619444440262918e-06, + "loss": 0.6672, + "step": 17571 + }, + { + "epoch": 0.5385558416084345, + "grad_norm": 1.6997148976526117, + "learning_rate": 4.618949563948277e-06, + "loss": 0.7333, + "step": 17572 + }, + { + "epoch": 0.5385864901311757, + "grad_norm": 0.8118490176043107, + "learning_rate": 4.618454691388224e-06, + "loss": 0.4252, + "step": 17573 + }, + { + "epoch": 0.5386171386539169, + "grad_norm": 1.7678023475562967, + "learning_rate": 4.617959822587633e-06, + "loss": 0.6672, + "step": 17574 + }, + { + "epoch": 0.5386477871766581, + "grad_norm": 0.8365517008767226, + "learning_rate": 4.617464957551383e-06, + "loss": 0.4334, + "step": 17575 + }, + { + "epoch": 0.5386784356993993, + "grad_norm": 1.9812689914142947, + "learning_rate": 4.616970096284348e-06, + "loss": 0.6874, + "step": 17576 + }, + { + "epoch": 0.5387090842221405, + "grad_norm": 1.8269668405387463, + "learning_rate": 4.616475238791405e-06, + "loss": 0.6667, + "step": 17577 + }, + { + "epoch": 0.5387397327448817, + "grad_norm": 2.0602429063014474, + "learning_rate": 4.61598038507743e-06, + "loss": 0.7186, + "step": 17578 + }, + { + "epoch": 0.5387703812676229, + "grad_norm": 1.837986856200391, + "learning_rate": 4.615485535147296e-06, + "loss": 0.657, + "step": 17579 + }, + { + "epoch": 0.5388010297903641, + "grad_norm": 1.7227763637405138, + "learning_rate": 4.6149906890058855e-06, + "loss": 0.5887, + "step": 17580 + }, + { + "epoch": 0.5388316783131053, + "grad_norm": 0.7617413155753756, + "learning_rate": 4.614495846658068e-06, + "loss": 0.4199, + "step": 17581 + }, + { + "epoch": 0.5388623268358466, + "grad_norm": 0.8222344697515642, + "learning_rate": 4.614001008108721e-06, + "loss": 0.4361, + "step": 17582 + }, + { + "epoch": 0.5388929753585877, + "grad_norm": 1.6740708253868053, + "learning_rate": 4.613506173362722e-06, + "loss": 0.6694, + "step": 17583 + }, + { + "epoch": 0.538923623881329, + "grad_norm": 1.6372386271201653, + "learning_rate": 4.613011342424945e-06, + "loss": 0.5919, + "step": 17584 + }, + { + "epoch": 0.5389542724040701, + "grad_norm": 1.7316131004982889, + "learning_rate": 4.612516515300266e-06, + "loss": 0.6867, + "step": 17585 + }, + { + "epoch": 0.5389849209268113, + "grad_norm": 1.6498884138828251, + "learning_rate": 4.612021691993561e-06, + "loss": 0.6502, + "step": 17586 + }, + { + "epoch": 0.5390155694495525, + "grad_norm": 1.707395833914051, + "learning_rate": 4.611526872509705e-06, + "loss": 0.6814, + "step": 17587 + }, + { + "epoch": 0.5390462179722937, + "grad_norm": 1.7381626987687764, + "learning_rate": 4.611032056853575e-06, + "loss": 0.6685, + "step": 17588 + }, + { + "epoch": 0.539076866495035, + "grad_norm": 0.7539755395751385, + "learning_rate": 4.610537245030048e-06, + "loss": 0.405, + "step": 17589 + }, + { + "epoch": 0.5391075150177761, + "grad_norm": 1.9417382504316334, + "learning_rate": 4.610042437043993e-06, + "loss": 0.6605, + "step": 17590 + }, + { + "epoch": 0.5391381635405174, + "grad_norm": 0.7718212915858734, + "learning_rate": 4.609547632900292e-06, + "loss": 0.438, + "step": 17591 + }, + { + "epoch": 0.5391688120632585, + "grad_norm": 1.9754201819387798, + "learning_rate": 4.609052832603818e-06, + "loss": 0.6997, + "step": 17592 + }, + { + "epoch": 0.5391994605859998, + "grad_norm": 1.6190644775435916, + "learning_rate": 4.608558036159445e-06, + "loss": 0.5825, + "step": 17593 + }, + { + "epoch": 0.5392301091087409, + "grad_norm": 1.9650608834941805, + "learning_rate": 4.608063243572051e-06, + "loss": 0.6804, + "step": 17594 + }, + { + "epoch": 0.5392607576314822, + "grad_norm": 1.4890727587803767, + "learning_rate": 4.60756845484651e-06, + "loss": 0.6177, + "step": 17595 + }, + { + "epoch": 0.5392914061542233, + "grad_norm": 1.69009044304811, + "learning_rate": 4.607073669987698e-06, + "loss": 0.6627, + "step": 17596 + }, + { + "epoch": 0.5393220546769646, + "grad_norm": 0.8021892575134817, + "learning_rate": 4.606578889000489e-06, + "loss": 0.4287, + "step": 17597 + }, + { + "epoch": 0.5393527031997057, + "grad_norm": 1.6983851360429785, + "learning_rate": 4.606084111889758e-06, + "loss": 0.5965, + "step": 17598 + }, + { + "epoch": 0.539383351722447, + "grad_norm": 1.6167262129315196, + "learning_rate": 4.605589338660382e-06, + "loss": 0.5229, + "step": 17599 + }, + { + "epoch": 0.5394140002451882, + "grad_norm": 1.7025145208114847, + "learning_rate": 4.605094569317236e-06, + "loss": 0.5699, + "step": 17600 + }, + { + "epoch": 0.5394446487679294, + "grad_norm": 1.8366056249121303, + "learning_rate": 4.604599803865192e-06, + "loss": 0.59, + "step": 17601 + }, + { + "epoch": 0.5394752972906706, + "grad_norm": 1.8427729283263359, + "learning_rate": 4.60410504230913e-06, + "loss": 0.6141, + "step": 17602 + }, + { + "epoch": 0.5395059458134118, + "grad_norm": 0.8122929303995454, + "learning_rate": 4.603610284653921e-06, + "loss": 0.441, + "step": 17603 + }, + { + "epoch": 0.539536594336153, + "grad_norm": 2.0380254810317457, + "learning_rate": 4.603115530904441e-06, + "loss": 0.6441, + "step": 17604 + }, + { + "epoch": 0.5395672428588942, + "grad_norm": 1.7519598277305464, + "learning_rate": 4.602620781065565e-06, + "loss": 0.6501, + "step": 17605 + }, + { + "epoch": 0.5395978913816354, + "grad_norm": 0.8069288284807751, + "learning_rate": 4.602126035142168e-06, + "loss": 0.4353, + "step": 17606 + }, + { + "epoch": 0.5396285399043766, + "grad_norm": 1.8653625391536162, + "learning_rate": 4.601631293139126e-06, + "loss": 0.6604, + "step": 17607 + }, + { + "epoch": 0.5396591884271178, + "grad_norm": 1.6663182749105099, + "learning_rate": 4.601136555061312e-06, + "loss": 0.6223, + "step": 17608 + }, + { + "epoch": 0.5396898369498591, + "grad_norm": 1.6763593148805533, + "learning_rate": 4.600641820913601e-06, + "loss": 0.6167, + "step": 17609 + }, + { + "epoch": 0.5397204854726002, + "grad_norm": 1.6382956368722918, + "learning_rate": 4.60014709070087e-06, + "loss": 0.644, + "step": 17610 + }, + { + "epoch": 0.5397511339953415, + "grad_norm": 1.4546480767527832, + "learning_rate": 4.599652364427993e-06, + "loss": 0.5174, + "step": 17611 + }, + { + "epoch": 0.5397817825180826, + "grad_norm": 1.6607607020820372, + "learning_rate": 4.59915764209984e-06, + "loss": 0.5818, + "step": 17612 + }, + { + "epoch": 0.5398124310408239, + "grad_norm": 1.8882570302008985, + "learning_rate": 4.598662923721293e-06, + "loss": 0.5741, + "step": 17613 + }, + { + "epoch": 0.539843079563565, + "grad_norm": 1.6322820927504615, + "learning_rate": 4.598168209297218e-06, + "loss": 0.5604, + "step": 17614 + }, + { + "epoch": 0.5398737280863063, + "grad_norm": 1.853404273483041, + "learning_rate": 4.5976734988325e-06, + "loss": 0.6492, + "step": 17615 + }, + { + "epoch": 0.5399043766090474, + "grad_norm": 1.749047736904495, + "learning_rate": 4.5971787923320055e-06, + "loss": 0.6735, + "step": 17616 + }, + { + "epoch": 0.5399350251317886, + "grad_norm": 1.6272244465125114, + "learning_rate": 4.59668408980061e-06, + "loss": 0.6869, + "step": 17617 + }, + { + "epoch": 0.5399656736545299, + "grad_norm": 1.6830356429540214, + "learning_rate": 4.596189391243192e-06, + "loss": 0.5992, + "step": 17618 + }, + { + "epoch": 0.539996322177271, + "grad_norm": 1.747424875684164, + "learning_rate": 4.595694696664622e-06, + "loss": 0.6045, + "step": 17619 + }, + { + "epoch": 0.5400269707000123, + "grad_norm": 1.5136068708402437, + "learning_rate": 4.595200006069775e-06, + "loss": 0.6991, + "step": 17620 + }, + { + "epoch": 0.5400576192227534, + "grad_norm": 1.5665415784500456, + "learning_rate": 4.5947053194635275e-06, + "loss": 0.6305, + "step": 17621 + }, + { + "epoch": 0.5400882677454947, + "grad_norm": 0.8510747002605299, + "learning_rate": 4.594210636850752e-06, + "loss": 0.4117, + "step": 17622 + }, + { + "epoch": 0.5401189162682358, + "grad_norm": 0.8413596459777912, + "learning_rate": 4.593715958236322e-06, + "loss": 0.4346, + "step": 17623 + }, + { + "epoch": 0.5401495647909771, + "grad_norm": 1.920039172146737, + "learning_rate": 4.593221283625113e-06, + "loss": 0.7484, + "step": 17624 + }, + { + "epoch": 0.5401802133137182, + "grad_norm": 1.7548456350558261, + "learning_rate": 4.592726613021997e-06, + "loss": 0.638, + "step": 17625 + }, + { + "epoch": 0.5402108618364595, + "grad_norm": 1.8449324372000444, + "learning_rate": 4.5922319464318524e-06, + "loss": 0.7039, + "step": 17626 + }, + { + "epoch": 0.5402415103592006, + "grad_norm": 1.802542276565198, + "learning_rate": 4.59173728385955e-06, + "loss": 0.6364, + "step": 17627 + }, + { + "epoch": 0.5402721588819419, + "grad_norm": 1.643725752212191, + "learning_rate": 4.591242625309963e-06, + "loss": 0.6416, + "step": 17628 + }, + { + "epoch": 0.5403028074046831, + "grad_norm": 0.84760985989571, + "learning_rate": 4.5907479707879694e-06, + "loss": 0.4239, + "step": 17629 + }, + { + "epoch": 0.5403334559274243, + "grad_norm": 1.8300249813916136, + "learning_rate": 4.59025332029844e-06, + "loss": 0.6501, + "step": 17630 + }, + { + "epoch": 0.5403641044501655, + "grad_norm": 0.8458453978573444, + "learning_rate": 4.589758673846249e-06, + "loss": 0.4457, + "step": 17631 + }, + { + "epoch": 0.5403947529729067, + "grad_norm": 1.779474044068733, + "learning_rate": 4.589264031436272e-06, + "loss": 0.6201, + "step": 17632 + }, + { + "epoch": 0.5404254014956479, + "grad_norm": 1.803896001402324, + "learning_rate": 4.588769393073379e-06, + "loss": 0.6029, + "step": 17633 + }, + { + "epoch": 0.5404560500183891, + "grad_norm": 0.7688540676230105, + "learning_rate": 4.588274758762449e-06, + "loss": 0.4159, + "step": 17634 + }, + { + "epoch": 0.5404866985411303, + "grad_norm": 1.5515604185396306, + "learning_rate": 4.587780128508352e-06, + "loss": 0.5932, + "step": 17635 + }, + { + "epoch": 0.5405173470638716, + "grad_norm": 1.6904193233354707, + "learning_rate": 4.587285502315963e-06, + "loss": 0.6037, + "step": 17636 + }, + { + "epoch": 0.5405479955866127, + "grad_norm": 1.5508868954267974, + "learning_rate": 4.586790880190155e-06, + "loss": 0.572, + "step": 17637 + }, + { + "epoch": 0.540578644109354, + "grad_norm": 1.6366509247393741, + "learning_rate": 4.586296262135804e-06, + "loss": 0.5614, + "step": 17638 + }, + { + "epoch": 0.5406092926320951, + "grad_norm": 1.8052982593092142, + "learning_rate": 4.58580164815778e-06, + "loss": 0.6676, + "step": 17639 + }, + { + "epoch": 0.5406399411548364, + "grad_norm": 0.8654964407717722, + "learning_rate": 4.5853070382609584e-06, + "loss": 0.4433, + "step": 17640 + }, + { + "epoch": 0.5406705896775775, + "grad_norm": 1.5879520377885867, + "learning_rate": 4.584812432450212e-06, + "loss": 0.6214, + "step": 17641 + }, + { + "epoch": 0.5407012382003188, + "grad_norm": 1.6249013707326456, + "learning_rate": 4.584317830730417e-06, + "loss": 0.6059, + "step": 17642 + }, + { + "epoch": 0.5407318867230599, + "grad_norm": 1.900926058089687, + "learning_rate": 4.583823233106445e-06, + "loss": 0.6797, + "step": 17643 + }, + { + "epoch": 0.5407625352458012, + "grad_norm": 1.5820046512235821, + "learning_rate": 4.583328639583166e-06, + "loss": 0.5501, + "step": 17644 + }, + { + "epoch": 0.5407931837685424, + "grad_norm": 2.083161397365613, + "learning_rate": 4.58283405016546e-06, + "loss": 0.6476, + "step": 17645 + }, + { + "epoch": 0.5408238322912836, + "grad_norm": 1.671333491442986, + "learning_rate": 4.582339464858195e-06, + "loss": 0.5462, + "step": 17646 + }, + { + "epoch": 0.5408544808140248, + "grad_norm": 1.532197060603869, + "learning_rate": 4.581844883666246e-06, + "loss": 0.5914, + "step": 17647 + }, + { + "epoch": 0.5408851293367659, + "grad_norm": 1.6471351011722661, + "learning_rate": 4.581350306594487e-06, + "loss": 0.614, + "step": 17648 + }, + { + "epoch": 0.5409157778595072, + "grad_norm": 1.6142025286353607, + "learning_rate": 4.580855733647791e-06, + "loss": 0.6123, + "step": 17649 + }, + { + "epoch": 0.5409464263822483, + "grad_norm": 1.8245377410349914, + "learning_rate": 4.5803611648310295e-06, + "loss": 0.677, + "step": 17650 + }, + { + "epoch": 0.5409770749049896, + "grad_norm": 1.610996475444641, + "learning_rate": 4.579866600149077e-06, + "loss": 0.6409, + "step": 17651 + }, + { + "epoch": 0.5410077234277307, + "grad_norm": 1.9198730319542496, + "learning_rate": 4.579372039606806e-06, + "loss": 0.7084, + "step": 17652 + }, + { + "epoch": 0.541038371950472, + "grad_norm": 1.845562613303968, + "learning_rate": 4.578877483209091e-06, + "loss": 0.5968, + "step": 17653 + }, + { + "epoch": 0.5410690204732131, + "grad_norm": 1.8642463029950775, + "learning_rate": 4.578382930960805e-06, + "loss": 0.5921, + "step": 17654 + }, + { + "epoch": 0.5410996689959544, + "grad_norm": 1.926048269462034, + "learning_rate": 4.5778883828668165e-06, + "loss": 0.7613, + "step": 17655 + }, + { + "epoch": 0.5411303175186956, + "grad_norm": 1.745050066980814, + "learning_rate": 4.577393838932006e-06, + "loss": 0.7213, + "step": 17656 + }, + { + "epoch": 0.5411609660414368, + "grad_norm": 2.046749051212574, + "learning_rate": 4.576899299161239e-06, + "loss": 0.574, + "step": 17657 + }, + { + "epoch": 0.541191614564178, + "grad_norm": 1.704244457282487, + "learning_rate": 4.576404763559392e-06, + "loss": 0.6423, + "step": 17658 + }, + { + "epoch": 0.5412222630869192, + "grad_norm": 1.7362915295233061, + "learning_rate": 4.575910232131338e-06, + "loss": 0.5706, + "step": 17659 + }, + { + "epoch": 0.5412529116096604, + "grad_norm": 2.5682852712724644, + "learning_rate": 4.575415704881947e-06, + "loss": 0.5566, + "step": 17660 + }, + { + "epoch": 0.5412835601324016, + "grad_norm": 1.7151689906236192, + "learning_rate": 4.5749211818160964e-06, + "loss": 0.5542, + "step": 17661 + }, + { + "epoch": 0.5413142086551428, + "grad_norm": 0.8218532004310775, + "learning_rate": 4.574426662938655e-06, + "loss": 0.4395, + "step": 17662 + }, + { + "epoch": 0.541344857177884, + "grad_norm": 0.8226924372252994, + "learning_rate": 4.573932148254496e-06, + "loss": 0.4341, + "step": 17663 + }, + { + "epoch": 0.5413755057006252, + "grad_norm": 1.9284633242911315, + "learning_rate": 4.573437637768493e-06, + "loss": 0.5817, + "step": 17664 + }, + { + "epoch": 0.5414061542233665, + "grad_norm": 1.6033323616235038, + "learning_rate": 4.57294313148552e-06, + "loss": 0.5475, + "step": 17665 + }, + { + "epoch": 0.5414368027461076, + "grad_norm": 0.7395819894560498, + "learning_rate": 4.572448629410444e-06, + "loss": 0.4237, + "step": 17666 + }, + { + "epoch": 0.5414674512688489, + "grad_norm": 1.7511253015307648, + "learning_rate": 4.571954131548144e-06, + "loss": 0.6203, + "step": 17667 + }, + { + "epoch": 0.54149809979159, + "grad_norm": 1.7821488044446898, + "learning_rate": 4.571459637903489e-06, + "loss": 0.592, + "step": 17668 + }, + { + "epoch": 0.5415287483143313, + "grad_norm": 0.7548903277764495, + "learning_rate": 4.57096514848135e-06, + "loss": 0.4291, + "step": 17669 + }, + { + "epoch": 0.5415593968370724, + "grad_norm": 1.5674317127161344, + "learning_rate": 4.570470663286603e-06, + "loss": 0.5845, + "step": 17670 + }, + { + "epoch": 0.5415900453598137, + "grad_norm": 1.6614465122646258, + "learning_rate": 4.569976182324116e-06, + "loss": 0.6663, + "step": 17671 + }, + { + "epoch": 0.5416206938825548, + "grad_norm": 1.6346949506100015, + "learning_rate": 4.569481705598766e-06, + "loss": 0.665, + "step": 17672 + }, + { + "epoch": 0.5416513424052961, + "grad_norm": 1.7378742118833246, + "learning_rate": 4.568987233115423e-06, + "loss": 0.6446, + "step": 17673 + }, + { + "epoch": 0.5416819909280373, + "grad_norm": 0.7967893385189685, + "learning_rate": 4.568492764878958e-06, + "loss": 0.436, + "step": 17674 + }, + { + "epoch": 0.5417126394507785, + "grad_norm": 1.6243462388896752, + "learning_rate": 4.567998300894245e-06, + "loss": 0.5336, + "step": 17675 + }, + { + "epoch": 0.5417432879735197, + "grad_norm": 1.4976830342854692, + "learning_rate": 4.567503841166155e-06, + "loss": 0.5898, + "step": 17676 + }, + { + "epoch": 0.5417739364962609, + "grad_norm": 0.8149854375377007, + "learning_rate": 4.56700938569956e-06, + "loss": 0.4467, + "step": 17677 + }, + { + "epoch": 0.5418045850190021, + "grad_norm": 1.784888445689272, + "learning_rate": 4.566514934499333e-06, + "loss": 0.6371, + "step": 17678 + }, + { + "epoch": 0.5418352335417432, + "grad_norm": 0.791000054416554, + "learning_rate": 4.566020487570344e-06, + "loss": 0.4344, + "step": 17679 + }, + { + "epoch": 0.5418658820644845, + "grad_norm": 0.7919366502065541, + "learning_rate": 4.565526044917467e-06, + "loss": 0.4323, + "step": 17680 + }, + { + "epoch": 0.5418965305872256, + "grad_norm": 1.607695619405609, + "learning_rate": 4.565031606545574e-06, + "loss": 0.593, + "step": 17681 + }, + { + "epoch": 0.5419271791099669, + "grad_norm": 1.8915467980014762, + "learning_rate": 4.564537172459533e-06, + "loss": 0.6468, + "step": 17682 + }, + { + "epoch": 0.5419578276327081, + "grad_norm": 1.8995501434892683, + "learning_rate": 4.564042742664221e-06, + "loss": 0.5775, + "step": 17683 + }, + { + "epoch": 0.5419884761554493, + "grad_norm": 1.729199033181408, + "learning_rate": 4.563548317164509e-06, + "loss": 0.6322, + "step": 17684 + }, + { + "epoch": 0.5420191246781905, + "grad_norm": 0.7472899651763578, + "learning_rate": 4.563053895965263e-06, + "loss": 0.4316, + "step": 17685 + }, + { + "epoch": 0.5420497732009317, + "grad_norm": 0.7653618975033067, + "learning_rate": 4.562559479071362e-06, + "loss": 0.4298, + "step": 17686 + }, + { + "epoch": 0.5420804217236729, + "grad_norm": 1.7200489089867528, + "learning_rate": 4.562065066487672e-06, + "loss": 0.6377, + "step": 17687 + }, + { + "epoch": 0.5421110702464141, + "grad_norm": 1.919456705124247, + "learning_rate": 4.561570658219069e-06, + "loss": 0.5621, + "step": 17688 + }, + { + "epoch": 0.5421417187691553, + "grad_norm": 1.789498328220634, + "learning_rate": 4.561076254270422e-06, + "loss": 0.7298, + "step": 17689 + }, + { + "epoch": 0.5421723672918966, + "grad_norm": 1.6163430559447385, + "learning_rate": 4.560581854646602e-06, + "loss": 0.697, + "step": 17690 + }, + { + "epoch": 0.5422030158146377, + "grad_norm": 0.7528295582423489, + "learning_rate": 4.560087459352482e-06, + "loss": 0.4255, + "step": 17691 + }, + { + "epoch": 0.542233664337379, + "grad_norm": 1.635806447265174, + "learning_rate": 4.559593068392933e-06, + "loss": 0.7233, + "step": 17692 + }, + { + "epoch": 0.5422643128601201, + "grad_norm": 0.7453117964427001, + "learning_rate": 4.559098681772826e-06, + "loss": 0.4326, + "step": 17693 + }, + { + "epoch": 0.5422949613828614, + "grad_norm": 1.9392253328872127, + "learning_rate": 4.558604299497032e-06, + "loss": 0.6495, + "step": 17694 + }, + { + "epoch": 0.5423256099056025, + "grad_norm": 1.6216712775967965, + "learning_rate": 4.558109921570425e-06, + "loss": 0.5789, + "step": 17695 + }, + { + "epoch": 0.5423562584283438, + "grad_norm": 1.7083230583708844, + "learning_rate": 4.55761554799787e-06, + "loss": 0.6163, + "step": 17696 + }, + { + "epoch": 0.5423869069510849, + "grad_norm": 1.4745365964016377, + "learning_rate": 4.557121178784246e-06, + "loss": 0.6032, + "step": 17697 + }, + { + "epoch": 0.5424175554738262, + "grad_norm": 1.9769656935374627, + "learning_rate": 4.556626813934417e-06, + "loss": 0.76, + "step": 17698 + }, + { + "epoch": 0.5424482039965673, + "grad_norm": 1.8408664532289456, + "learning_rate": 4.55613245345326e-06, + "loss": 0.6547, + "step": 17699 + }, + { + "epoch": 0.5424788525193086, + "grad_norm": 2.017258163515001, + "learning_rate": 4.555638097345644e-06, + "loss": 0.7954, + "step": 17700 + }, + { + "epoch": 0.5425095010420498, + "grad_norm": 1.5771825239637463, + "learning_rate": 4.555143745616437e-06, + "loss": 0.558, + "step": 17701 + }, + { + "epoch": 0.542540149564791, + "grad_norm": 1.5377720532409196, + "learning_rate": 4.554649398270515e-06, + "loss": 0.7183, + "step": 17702 + }, + { + "epoch": 0.5425707980875322, + "grad_norm": 1.6864092832240816, + "learning_rate": 4.5541550553127445e-06, + "loss": 0.642, + "step": 17703 + }, + { + "epoch": 0.5426014466102734, + "grad_norm": 0.8129662582627311, + "learning_rate": 4.553660716747998e-06, + "loss": 0.4309, + "step": 17704 + }, + { + "epoch": 0.5426320951330146, + "grad_norm": 1.643457922473451, + "learning_rate": 4.5531663825811486e-06, + "loss": 0.6953, + "step": 17705 + }, + { + "epoch": 0.5426627436557558, + "grad_norm": 1.6790247209942524, + "learning_rate": 4.5526720528170635e-06, + "loss": 0.6521, + "step": 17706 + }, + { + "epoch": 0.542693392178497, + "grad_norm": 0.7885802499598368, + "learning_rate": 4.552177727460616e-06, + "loss": 0.4334, + "step": 17707 + }, + { + "epoch": 0.5427240407012383, + "grad_norm": 1.9897194363011719, + "learning_rate": 4.551683406516677e-06, + "loss": 0.6925, + "step": 17708 + }, + { + "epoch": 0.5427546892239794, + "grad_norm": 1.6417376733917681, + "learning_rate": 4.551189089990113e-06, + "loss": 0.6186, + "step": 17709 + }, + { + "epoch": 0.5427853377467206, + "grad_norm": 1.7767864205641597, + "learning_rate": 4.550694777885801e-06, + "loss": 0.5758, + "step": 17710 + }, + { + "epoch": 0.5428159862694618, + "grad_norm": 0.8208620156887023, + "learning_rate": 4.5502004702086076e-06, + "loss": 0.4536, + "step": 17711 + }, + { + "epoch": 0.542846634792203, + "grad_norm": 1.7480246771752055, + "learning_rate": 4.549706166963402e-06, + "loss": 0.6504, + "step": 17712 + }, + { + "epoch": 0.5428772833149442, + "grad_norm": 1.5277585472716297, + "learning_rate": 4.549211868155059e-06, + "loss": 0.6305, + "step": 17713 + }, + { + "epoch": 0.5429079318376854, + "grad_norm": 1.741557855401053, + "learning_rate": 4.548717573788445e-06, + "loss": 0.5862, + "step": 17714 + }, + { + "epoch": 0.5429385803604266, + "grad_norm": 1.6587569435153602, + "learning_rate": 4.548223283868433e-06, + "loss": 0.6431, + "step": 17715 + }, + { + "epoch": 0.5429692288831678, + "grad_norm": 1.5840105584153017, + "learning_rate": 4.547728998399894e-06, + "loss": 0.6644, + "step": 17716 + }, + { + "epoch": 0.542999877405909, + "grad_norm": 1.7112338391954547, + "learning_rate": 4.547234717387694e-06, + "loss": 0.5567, + "step": 17717 + }, + { + "epoch": 0.5430305259286502, + "grad_norm": 1.6526737771617686, + "learning_rate": 4.546740440836709e-06, + "loss": 0.6194, + "step": 17718 + }, + { + "epoch": 0.5430611744513915, + "grad_norm": 0.8005817321315349, + "learning_rate": 4.546246168751806e-06, + "loss": 0.4389, + "step": 17719 + }, + { + "epoch": 0.5430918229741326, + "grad_norm": 0.7889826237809647, + "learning_rate": 4.545751901137853e-06, + "loss": 0.4423, + "step": 17720 + }, + { + "epoch": 0.5431224714968739, + "grad_norm": 1.7031415738401445, + "learning_rate": 4.545257637999726e-06, + "loss": 0.5726, + "step": 17721 + }, + { + "epoch": 0.543153120019615, + "grad_norm": 1.9030953099172105, + "learning_rate": 4.544763379342291e-06, + "loss": 0.5909, + "step": 17722 + }, + { + "epoch": 0.5431837685423563, + "grad_norm": 1.8432190045343297, + "learning_rate": 4.544269125170417e-06, + "loss": 0.6218, + "step": 17723 + }, + { + "epoch": 0.5432144170650974, + "grad_norm": 1.8136666518691704, + "learning_rate": 4.543774875488978e-06, + "loss": 0.5553, + "step": 17724 + }, + { + "epoch": 0.5432450655878387, + "grad_norm": 1.8094399349317667, + "learning_rate": 4.54328063030284e-06, + "loss": 0.6848, + "step": 17725 + }, + { + "epoch": 0.5432757141105798, + "grad_norm": 1.7001537972808813, + "learning_rate": 4.542786389616876e-06, + "loss": 0.6143, + "step": 17726 + }, + { + "epoch": 0.5433063626333211, + "grad_norm": 1.9733683566527267, + "learning_rate": 4.542292153435956e-06, + "loss": 0.5914, + "step": 17727 + }, + { + "epoch": 0.5433370111560623, + "grad_norm": 1.5592892391943707, + "learning_rate": 4.541797921764945e-06, + "loss": 0.663, + "step": 17728 + }, + { + "epoch": 0.5433676596788035, + "grad_norm": 2.1144256810068622, + "learning_rate": 4.5413036946087195e-06, + "loss": 0.7197, + "step": 17729 + }, + { + "epoch": 0.5433983082015447, + "grad_norm": 0.838147891945538, + "learning_rate": 4.540809471972146e-06, + "loss": 0.4382, + "step": 17730 + }, + { + "epoch": 0.5434289567242859, + "grad_norm": 1.786210689512305, + "learning_rate": 4.540315253860092e-06, + "loss": 0.647, + "step": 17731 + }, + { + "epoch": 0.5434596052470271, + "grad_norm": 1.6604030523167796, + "learning_rate": 4.5398210402774315e-06, + "loss": 0.628, + "step": 17732 + }, + { + "epoch": 0.5434902537697683, + "grad_norm": 0.7907591791453016, + "learning_rate": 4.5393268312290304e-06, + "loss": 0.3897, + "step": 17733 + }, + { + "epoch": 0.5435209022925095, + "grad_norm": 1.8015430482632553, + "learning_rate": 4.53883262671976e-06, + "loss": 0.774, + "step": 17734 + }, + { + "epoch": 0.5435515508152508, + "grad_norm": 1.9445042614360954, + "learning_rate": 4.538338426754491e-06, + "loss": 0.7507, + "step": 17735 + }, + { + "epoch": 0.5435821993379919, + "grad_norm": 1.8054009219267504, + "learning_rate": 4.537844231338091e-06, + "loss": 0.5381, + "step": 17736 + }, + { + "epoch": 0.5436128478607332, + "grad_norm": 0.789775594921309, + "learning_rate": 4.53735004047543e-06, + "loss": 0.4244, + "step": 17737 + }, + { + "epoch": 0.5436434963834743, + "grad_norm": 2.0525862018075025, + "learning_rate": 4.536855854171378e-06, + "loss": 0.773, + "step": 17738 + }, + { + "epoch": 0.5436741449062156, + "grad_norm": 1.8010504131351226, + "learning_rate": 4.536361672430802e-06, + "loss": 0.6334, + "step": 17739 + }, + { + "epoch": 0.5437047934289567, + "grad_norm": 1.6595741489686038, + "learning_rate": 4.535867495258576e-06, + "loss": 0.6306, + "step": 17740 + }, + { + "epoch": 0.5437354419516979, + "grad_norm": 1.749773070384078, + "learning_rate": 4.535373322659563e-06, + "loss": 0.6273, + "step": 17741 + }, + { + "epoch": 0.5437660904744391, + "grad_norm": 1.7712893665090237, + "learning_rate": 4.534879154638637e-06, + "loss": 0.61, + "step": 17742 + }, + { + "epoch": 0.5437967389971803, + "grad_norm": 1.6682396330507823, + "learning_rate": 4.534384991200667e-06, + "loss": 0.6669, + "step": 17743 + }, + { + "epoch": 0.5438273875199215, + "grad_norm": 1.7866080511035853, + "learning_rate": 4.533890832350519e-06, + "loss": 0.6837, + "step": 17744 + }, + { + "epoch": 0.5438580360426627, + "grad_norm": 1.7678232918048749, + "learning_rate": 4.533396678093065e-06, + "loss": 0.7423, + "step": 17745 + }, + { + "epoch": 0.543888684565404, + "grad_norm": 1.8654023339915968, + "learning_rate": 4.532902528433173e-06, + "loss": 0.6231, + "step": 17746 + }, + { + "epoch": 0.5439193330881451, + "grad_norm": 1.7618082516569078, + "learning_rate": 4.53240838337571e-06, + "loss": 0.7428, + "step": 17747 + }, + { + "epoch": 0.5439499816108864, + "grad_norm": 1.88214088364251, + "learning_rate": 4.531914242925548e-06, + "loss": 0.6546, + "step": 17748 + }, + { + "epoch": 0.5439806301336275, + "grad_norm": 1.5549247706266358, + "learning_rate": 4.531420107087557e-06, + "loss": 0.6162, + "step": 17749 + }, + { + "epoch": 0.5440112786563688, + "grad_norm": 1.7969118656444631, + "learning_rate": 4.530925975866599e-06, + "loss": 0.6255, + "step": 17750 + }, + { + "epoch": 0.5440419271791099, + "grad_norm": 1.895764012229568, + "learning_rate": 4.530431849267552e-06, + "loss": 0.5838, + "step": 17751 + }, + { + "epoch": 0.5440725757018512, + "grad_norm": 0.8530756769434482, + "learning_rate": 4.529937727295276e-06, + "loss": 0.4302, + "step": 17752 + }, + { + "epoch": 0.5441032242245923, + "grad_norm": 0.8065621605087043, + "learning_rate": 4.529443609954647e-06, + "loss": 0.415, + "step": 17753 + }, + { + "epoch": 0.5441338727473336, + "grad_norm": 1.6027471193016103, + "learning_rate": 4.52894949725053e-06, + "loss": 0.6383, + "step": 17754 + }, + { + "epoch": 0.5441645212700748, + "grad_norm": 1.6655112986859137, + "learning_rate": 4.528455389187792e-06, + "loss": 0.6715, + "step": 17755 + }, + { + "epoch": 0.544195169792816, + "grad_norm": 1.6664141226969442, + "learning_rate": 4.527961285771306e-06, + "loss": 0.6181, + "step": 17756 + }, + { + "epoch": 0.5442258183155572, + "grad_norm": 1.6941680878704755, + "learning_rate": 4.527467187005938e-06, + "loss": 0.6016, + "step": 17757 + }, + { + "epoch": 0.5442564668382984, + "grad_norm": 1.8842036362105707, + "learning_rate": 4.526973092896556e-06, + "loss": 0.5957, + "step": 17758 + }, + { + "epoch": 0.5442871153610396, + "grad_norm": 1.835780720966295, + "learning_rate": 4.526479003448029e-06, + "loss": 0.6796, + "step": 17759 + }, + { + "epoch": 0.5443177638837808, + "grad_norm": 1.7228081604703158, + "learning_rate": 4.525984918665225e-06, + "loss": 0.6086, + "step": 17760 + }, + { + "epoch": 0.544348412406522, + "grad_norm": 1.8617677996487387, + "learning_rate": 4.5254908385530135e-06, + "loss": 0.6252, + "step": 17761 + }, + { + "epoch": 0.5443790609292632, + "grad_norm": 1.7314200137706983, + "learning_rate": 4.524996763116265e-06, + "loss": 0.61, + "step": 17762 + }, + { + "epoch": 0.5444097094520044, + "grad_norm": 1.9979647040854245, + "learning_rate": 4.5245026923598396e-06, + "loss": 0.6339, + "step": 17763 + }, + { + "epoch": 0.5444403579747457, + "grad_norm": 1.8252543120648126, + "learning_rate": 4.5240086262886154e-06, + "loss": 0.6487, + "step": 17764 + }, + { + "epoch": 0.5444710064974868, + "grad_norm": 1.8247773792153628, + "learning_rate": 4.523514564907454e-06, + "loss": 0.6219, + "step": 17765 + }, + { + "epoch": 0.5445016550202281, + "grad_norm": 1.632783606684848, + "learning_rate": 4.523020508221225e-06, + "loss": 0.7046, + "step": 17766 + }, + { + "epoch": 0.5445323035429692, + "grad_norm": 1.6787779343262474, + "learning_rate": 4.522526456234797e-06, + "loss": 0.6198, + "step": 17767 + }, + { + "epoch": 0.5445629520657105, + "grad_norm": 1.6615006582913383, + "learning_rate": 4.522032408953039e-06, + "loss": 0.5636, + "step": 17768 + }, + { + "epoch": 0.5445936005884516, + "grad_norm": 1.9748363331869687, + "learning_rate": 4.521538366380818e-06, + "loss": 0.7284, + "step": 17769 + }, + { + "epoch": 0.5446242491111929, + "grad_norm": 1.7106081174989, + "learning_rate": 4.521044328523001e-06, + "loss": 0.6409, + "step": 17770 + }, + { + "epoch": 0.544654897633934, + "grad_norm": 1.7192666800639604, + "learning_rate": 4.520550295384457e-06, + "loss": 0.6977, + "step": 17771 + }, + { + "epoch": 0.5446855461566752, + "grad_norm": 1.52093255603134, + "learning_rate": 4.520056266970055e-06, + "loss": 0.6979, + "step": 17772 + }, + { + "epoch": 0.5447161946794165, + "grad_norm": 1.8138973890293582, + "learning_rate": 4.51956224328466e-06, + "loss": 0.6484, + "step": 17773 + }, + { + "epoch": 0.5447468432021576, + "grad_norm": 1.7222890585247963, + "learning_rate": 4.519068224333141e-06, + "loss": 0.6722, + "step": 17774 + }, + { + "epoch": 0.5447774917248989, + "grad_norm": 0.9588630905175688, + "learning_rate": 4.518574210120366e-06, + "loss": 0.45, + "step": 17775 + }, + { + "epoch": 0.54480814024764, + "grad_norm": 1.9787086371838394, + "learning_rate": 4.518080200651203e-06, + "loss": 0.6768, + "step": 17776 + }, + { + "epoch": 0.5448387887703813, + "grad_norm": 1.585928838061218, + "learning_rate": 4.517586195930519e-06, + "loss": 0.5581, + "step": 17777 + }, + { + "epoch": 0.5448694372931224, + "grad_norm": 1.5993050589734183, + "learning_rate": 4.517092195963182e-06, + "loss": 0.5388, + "step": 17778 + }, + { + "epoch": 0.5449000858158637, + "grad_norm": 1.7324081396686866, + "learning_rate": 4.516598200754057e-06, + "loss": 0.645, + "step": 17779 + }, + { + "epoch": 0.5449307343386048, + "grad_norm": 1.6586288117655505, + "learning_rate": 4.5161042103080156e-06, + "loss": 0.7008, + "step": 17780 + }, + { + "epoch": 0.5449613828613461, + "grad_norm": 2.1349196353926043, + "learning_rate": 4.515610224629924e-06, + "loss": 0.626, + "step": 17781 + }, + { + "epoch": 0.5449920313840872, + "grad_norm": 1.6598988423105183, + "learning_rate": 4.515116243724646e-06, + "loss": 0.6273, + "step": 17782 + }, + { + "epoch": 0.5450226799068285, + "grad_norm": 0.792466104162375, + "learning_rate": 4.514622267597054e-06, + "loss": 0.4421, + "step": 17783 + }, + { + "epoch": 0.5450533284295697, + "grad_norm": 1.638488619230122, + "learning_rate": 4.514128296252013e-06, + "loss": 0.5989, + "step": 17784 + }, + { + "epoch": 0.5450839769523109, + "grad_norm": 1.883723562592468, + "learning_rate": 4.513634329694389e-06, + "loss": 0.678, + "step": 17785 + }, + { + "epoch": 0.5451146254750521, + "grad_norm": 1.6536608790528744, + "learning_rate": 4.513140367929052e-06, + "loss": 0.5596, + "step": 17786 + }, + { + "epoch": 0.5451452739977933, + "grad_norm": 1.5713940430097284, + "learning_rate": 4.512646410960865e-06, + "loss": 0.6412, + "step": 17787 + }, + { + "epoch": 0.5451759225205345, + "grad_norm": 1.520120924458014, + "learning_rate": 4.5121524587947e-06, + "loss": 0.5611, + "step": 17788 + }, + { + "epoch": 0.5452065710432757, + "grad_norm": 1.6786102963478278, + "learning_rate": 4.51165851143542e-06, + "loss": 0.6464, + "step": 17789 + }, + { + "epoch": 0.5452372195660169, + "grad_norm": 0.8393061520720417, + "learning_rate": 4.511164568887894e-06, + "loss": 0.4279, + "step": 17790 + }, + { + "epoch": 0.5452678680887582, + "grad_norm": 1.7376312706554387, + "learning_rate": 4.510670631156989e-06, + "loss": 0.5397, + "step": 17791 + }, + { + "epoch": 0.5452985166114993, + "grad_norm": 1.7196695256253878, + "learning_rate": 4.510176698247573e-06, + "loss": 0.6921, + "step": 17792 + }, + { + "epoch": 0.5453291651342406, + "grad_norm": 1.8963361590777985, + "learning_rate": 4.509682770164508e-06, + "loss": 0.6587, + "step": 17793 + }, + { + "epoch": 0.5453598136569817, + "grad_norm": 1.7559280386015312, + "learning_rate": 4.509188846912667e-06, + "loss": 0.5807, + "step": 17794 + }, + { + "epoch": 0.545390462179723, + "grad_norm": 1.7047729732024286, + "learning_rate": 4.508694928496911e-06, + "loss": 0.604, + "step": 17795 + }, + { + "epoch": 0.5454211107024641, + "grad_norm": 1.6499704791601377, + "learning_rate": 4.508201014922113e-06, + "loss": 0.5512, + "step": 17796 + }, + { + "epoch": 0.5454517592252054, + "grad_norm": 1.8426198776638434, + "learning_rate": 4.507707106193135e-06, + "loss": 0.6435, + "step": 17797 + }, + { + "epoch": 0.5454824077479465, + "grad_norm": 1.8041826385284072, + "learning_rate": 4.507213202314843e-06, + "loss": 0.62, + "step": 17798 + }, + { + "epoch": 0.5455130562706878, + "grad_norm": 1.9304506822107634, + "learning_rate": 4.506719303292107e-06, + "loss": 0.6956, + "step": 17799 + }, + { + "epoch": 0.545543704793429, + "grad_norm": 0.7899803220013999, + "learning_rate": 4.506225409129792e-06, + "loss": 0.4404, + "step": 17800 + }, + { + "epoch": 0.5455743533161702, + "grad_norm": 1.8774465921993697, + "learning_rate": 4.505731519832763e-06, + "loss": 0.6453, + "step": 17801 + }, + { + "epoch": 0.5456050018389114, + "grad_norm": 1.843167074273254, + "learning_rate": 4.50523763540589e-06, + "loss": 0.6312, + "step": 17802 + }, + { + "epoch": 0.5456356503616525, + "grad_norm": 1.556755528762679, + "learning_rate": 4.504743755854037e-06, + "loss": 0.5902, + "step": 17803 + }, + { + "epoch": 0.5456662988843938, + "grad_norm": 1.6558341723294365, + "learning_rate": 4.504249881182067e-06, + "loss": 0.5711, + "step": 17804 + }, + { + "epoch": 0.5456969474071349, + "grad_norm": 1.8199634059012322, + "learning_rate": 4.503756011394854e-06, + "loss": 0.649, + "step": 17805 + }, + { + "epoch": 0.5457275959298762, + "grad_norm": 1.6788278261141358, + "learning_rate": 4.503262146497256e-06, + "loss": 0.6004, + "step": 17806 + }, + { + "epoch": 0.5457582444526173, + "grad_norm": 1.718230920848199, + "learning_rate": 4.502768286494148e-06, + "loss": 0.5875, + "step": 17807 + }, + { + "epoch": 0.5457888929753586, + "grad_norm": 2.0646065786848506, + "learning_rate": 4.502274431390388e-06, + "loss": 0.6791, + "step": 17808 + }, + { + "epoch": 0.5458195414980997, + "grad_norm": 1.7162308505913537, + "learning_rate": 4.501780581190845e-06, + "loss": 0.7025, + "step": 17809 + }, + { + "epoch": 0.545850190020841, + "grad_norm": 1.6481990511929037, + "learning_rate": 4.501286735900388e-06, + "loss": 0.5562, + "step": 17810 + }, + { + "epoch": 0.5458808385435822, + "grad_norm": 1.7137096964365028, + "learning_rate": 4.500792895523879e-06, + "loss": 0.5609, + "step": 17811 + }, + { + "epoch": 0.5459114870663234, + "grad_norm": 2.046077292934159, + "learning_rate": 4.500299060066184e-06, + "loss": 0.625, + "step": 17812 + }, + { + "epoch": 0.5459421355890646, + "grad_norm": 1.8844937717103665, + "learning_rate": 4.499805229532172e-06, + "loss": 0.6788, + "step": 17813 + }, + { + "epoch": 0.5459727841118058, + "grad_norm": 0.7971268122699962, + "learning_rate": 4.499311403926705e-06, + "loss": 0.4233, + "step": 17814 + }, + { + "epoch": 0.546003432634547, + "grad_norm": 1.7985990667085219, + "learning_rate": 4.498817583254654e-06, + "loss": 0.6505, + "step": 17815 + }, + { + "epoch": 0.5460340811572882, + "grad_norm": 0.7738222276951698, + "learning_rate": 4.4983237675208805e-06, + "loss": 0.4302, + "step": 17816 + }, + { + "epoch": 0.5460647296800294, + "grad_norm": 1.5314339840387026, + "learning_rate": 4.49782995673025e-06, + "loss": 0.6691, + "step": 17817 + }, + { + "epoch": 0.5460953782027707, + "grad_norm": 0.8185506416133536, + "learning_rate": 4.4973361508876306e-06, + "loss": 0.4375, + "step": 17818 + }, + { + "epoch": 0.5461260267255118, + "grad_norm": 1.563202698727265, + "learning_rate": 4.496842349997886e-06, + "loss": 0.5837, + "step": 17819 + }, + { + "epoch": 0.5461566752482531, + "grad_norm": 2.0697781366766104, + "learning_rate": 4.4963485540658824e-06, + "loss": 0.6246, + "step": 17820 + }, + { + "epoch": 0.5461873237709942, + "grad_norm": 1.5716404066724656, + "learning_rate": 4.495854763096486e-06, + "loss": 0.5764, + "step": 17821 + }, + { + "epoch": 0.5462179722937355, + "grad_norm": 1.6603846723249014, + "learning_rate": 4.495360977094561e-06, + "loss": 0.7, + "step": 17822 + }, + { + "epoch": 0.5462486208164766, + "grad_norm": 1.7043724543402061, + "learning_rate": 4.494867196064973e-06, + "loss": 0.6035, + "step": 17823 + }, + { + "epoch": 0.5462792693392179, + "grad_norm": 1.9045721755173792, + "learning_rate": 4.49437342001259e-06, + "loss": 0.757, + "step": 17824 + }, + { + "epoch": 0.546309917861959, + "grad_norm": 1.6738454730814825, + "learning_rate": 4.493879648942272e-06, + "loss": 0.5089, + "step": 17825 + }, + { + "epoch": 0.5463405663847003, + "grad_norm": 0.8066208582912912, + "learning_rate": 4.49338588285889e-06, + "loss": 0.4115, + "step": 17826 + }, + { + "epoch": 0.5463712149074414, + "grad_norm": 1.7475544469454962, + "learning_rate": 4.4928921217673055e-06, + "loss": 0.6331, + "step": 17827 + }, + { + "epoch": 0.5464018634301827, + "grad_norm": 1.8922510957770566, + "learning_rate": 4.492398365672384e-06, + "loss": 0.5959, + "step": 17828 + }, + { + "epoch": 0.5464325119529239, + "grad_norm": 1.7284573028638384, + "learning_rate": 4.491904614578992e-06, + "loss": 0.6331, + "step": 17829 + }, + { + "epoch": 0.5464631604756651, + "grad_norm": 1.7425657130760417, + "learning_rate": 4.491410868491994e-06, + "loss": 0.6266, + "step": 17830 + }, + { + "epoch": 0.5464938089984063, + "grad_norm": 0.8246620483131503, + "learning_rate": 4.490917127416254e-06, + "loss": 0.4132, + "step": 17831 + }, + { + "epoch": 0.5465244575211475, + "grad_norm": 1.6058445650680966, + "learning_rate": 4.490423391356638e-06, + "loss": 0.6671, + "step": 17832 + }, + { + "epoch": 0.5465551060438887, + "grad_norm": 1.6128298158684966, + "learning_rate": 4.4899296603180105e-06, + "loss": 0.6562, + "step": 17833 + }, + { + "epoch": 0.5465857545666298, + "grad_norm": 0.7846272436596664, + "learning_rate": 4.4894359343052375e-06, + "loss": 0.4255, + "step": 17834 + }, + { + "epoch": 0.5466164030893711, + "grad_norm": 1.9389186257845177, + "learning_rate": 4.488942213323184e-06, + "loss": 0.7027, + "step": 17835 + }, + { + "epoch": 0.5466470516121122, + "grad_norm": 1.708837872729404, + "learning_rate": 4.48844849737671e-06, + "loss": 0.6528, + "step": 17836 + }, + { + "epoch": 0.5466777001348535, + "grad_norm": 1.6327624859255359, + "learning_rate": 4.487954786470687e-06, + "loss": 0.5702, + "step": 17837 + }, + { + "epoch": 0.5467083486575947, + "grad_norm": 2.189999013555777, + "learning_rate": 4.487461080609976e-06, + "loss": 0.6397, + "step": 17838 + }, + { + "epoch": 0.5467389971803359, + "grad_norm": 1.7796481109698037, + "learning_rate": 4.486967379799441e-06, + "loss": 0.6446, + "step": 17839 + }, + { + "epoch": 0.5467696457030771, + "grad_norm": 1.5908358732993875, + "learning_rate": 4.486473684043948e-06, + "loss": 0.6457, + "step": 17840 + }, + { + "epoch": 0.5468002942258183, + "grad_norm": 1.885135002154278, + "learning_rate": 4.485979993348361e-06, + "loss": 0.7336, + "step": 17841 + }, + { + "epoch": 0.5468309427485595, + "grad_norm": 1.8496966558033114, + "learning_rate": 4.4854863077175445e-06, + "loss": 0.6195, + "step": 17842 + }, + { + "epoch": 0.5468615912713007, + "grad_norm": 1.6874327143379266, + "learning_rate": 4.484992627156365e-06, + "loss": 0.6848, + "step": 17843 + }, + { + "epoch": 0.5468922397940419, + "grad_norm": 1.8300914121600682, + "learning_rate": 4.484498951669682e-06, + "loss": 0.6536, + "step": 17844 + }, + { + "epoch": 0.5469228883167832, + "grad_norm": 1.7563303431481032, + "learning_rate": 4.484005281262364e-06, + "loss": 0.5966, + "step": 17845 + }, + { + "epoch": 0.5469535368395243, + "grad_norm": 1.6102132734577737, + "learning_rate": 4.483511615939276e-06, + "loss": 0.5412, + "step": 17846 + }, + { + "epoch": 0.5469841853622656, + "grad_norm": 2.01614117431148, + "learning_rate": 4.483017955705277e-06, + "loss": 0.6162, + "step": 17847 + }, + { + "epoch": 0.5470148338850067, + "grad_norm": 0.8127636918586179, + "learning_rate": 4.4825243005652364e-06, + "loss": 0.4214, + "step": 17848 + }, + { + "epoch": 0.547045482407748, + "grad_norm": 1.4111184101943224, + "learning_rate": 4.4820306505240166e-06, + "loss": 0.5528, + "step": 17849 + }, + { + "epoch": 0.5470761309304891, + "grad_norm": 1.8856124068695215, + "learning_rate": 4.4815370055864785e-06, + "loss": 0.6527, + "step": 17850 + }, + { + "epoch": 0.5471067794532304, + "grad_norm": 0.8032053263521748, + "learning_rate": 4.481043365757492e-06, + "loss": 0.427, + "step": 17851 + }, + { + "epoch": 0.5471374279759715, + "grad_norm": 1.8065046008129875, + "learning_rate": 4.480549731041915e-06, + "loss": 0.5741, + "step": 17852 + }, + { + "epoch": 0.5471680764987128, + "grad_norm": 0.784273967732316, + "learning_rate": 4.480056101444617e-06, + "loss": 0.4265, + "step": 17853 + }, + { + "epoch": 0.547198725021454, + "grad_norm": 1.7527015202958853, + "learning_rate": 4.4795624769704585e-06, + "loss": 0.6465, + "step": 17854 + }, + { + "epoch": 0.5472293735441952, + "grad_norm": 1.7607068417288256, + "learning_rate": 4.479068857624304e-06, + "loss": 0.5861, + "step": 17855 + }, + { + "epoch": 0.5472600220669364, + "grad_norm": 1.7873425650318986, + "learning_rate": 4.478575243411017e-06, + "loss": 0.5862, + "step": 17856 + }, + { + "epoch": 0.5472906705896776, + "grad_norm": 2.2480173859460133, + "learning_rate": 4.478081634335464e-06, + "loss": 0.6617, + "step": 17857 + }, + { + "epoch": 0.5473213191124188, + "grad_norm": 1.6889112357432807, + "learning_rate": 4.477588030402502e-06, + "loss": 0.6377, + "step": 17858 + }, + { + "epoch": 0.54735196763516, + "grad_norm": 1.7847914650002121, + "learning_rate": 4.477094431617001e-06, + "loss": 0.532, + "step": 17859 + }, + { + "epoch": 0.5473826161579012, + "grad_norm": 1.7356697194923485, + "learning_rate": 4.476600837983821e-06, + "loss": 0.628, + "step": 17860 + }, + { + "epoch": 0.5474132646806424, + "grad_norm": 0.7674764893558116, + "learning_rate": 4.47610724950783e-06, + "loss": 0.43, + "step": 17861 + }, + { + "epoch": 0.5474439132033836, + "grad_norm": 0.7877247278023424, + "learning_rate": 4.475613666193887e-06, + "loss": 0.431, + "step": 17862 + }, + { + "epoch": 0.5474745617261249, + "grad_norm": 1.8103976862148379, + "learning_rate": 4.475120088046855e-06, + "loss": 0.6253, + "step": 17863 + }, + { + "epoch": 0.547505210248866, + "grad_norm": 1.748705988605216, + "learning_rate": 4.4746265150716e-06, + "loss": 0.6096, + "step": 17864 + }, + { + "epoch": 0.5475358587716072, + "grad_norm": 0.7939035826177573, + "learning_rate": 4.474132947272985e-06, + "loss": 0.4074, + "step": 17865 + }, + { + "epoch": 0.5475665072943484, + "grad_norm": 2.029956462943008, + "learning_rate": 4.4736393846558716e-06, + "loss": 0.7549, + "step": 17866 + }, + { + "epoch": 0.5475971558170896, + "grad_norm": 1.824093275024584, + "learning_rate": 4.473145827225125e-06, + "loss": 0.6962, + "step": 17867 + }, + { + "epoch": 0.5476278043398308, + "grad_norm": 1.6387549624112348, + "learning_rate": 4.472652274985606e-06, + "loss": 0.6775, + "step": 17868 + }, + { + "epoch": 0.547658452862572, + "grad_norm": 1.8242994368844856, + "learning_rate": 4.472158727942181e-06, + "loss": 0.5506, + "step": 17869 + }, + { + "epoch": 0.5476891013853132, + "grad_norm": 1.754087859845289, + "learning_rate": 4.471665186099711e-06, + "loss": 0.7038, + "step": 17870 + }, + { + "epoch": 0.5477197499080544, + "grad_norm": 1.6111727454990283, + "learning_rate": 4.471171649463057e-06, + "loss": 0.5891, + "step": 17871 + }, + { + "epoch": 0.5477503984307956, + "grad_norm": 1.7773609599496902, + "learning_rate": 4.470678118037086e-06, + "loss": 0.7114, + "step": 17872 + }, + { + "epoch": 0.5477810469535368, + "grad_norm": 1.7650060888927661, + "learning_rate": 4.470184591826658e-06, + "loss": 0.6471, + "step": 17873 + }, + { + "epoch": 0.5478116954762781, + "grad_norm": 1.947347196821568, + "learning_rate": 4.469691070836637e-06, + "loss": 0.6679, + "step": 17874 + }, + { + "epoch": 0.5478423439990192, + "grad_norm": 1.9249979282500886, + "learning_rate": 4.469197555071886e-06, + "loss": 0.7288, + "step": 17875 + }, + { + "epoch": 0.5478729925217605, + "grad_norm": 1.8420801691553095, + "learning_rate": 4.468704044537268e-06, + "loss": 0.6742, + "step": 17876 + }, + { + "epoch": 0.5479036410445016, + "grad_norm": 1.6461112444628527, + "learning_rate": 4.468210539237642e-06, + "loss": 0.7129, + "step": 17877 + }, + { + "epoch": 0.5479342895672429, + "grad_norm": 1.6523324314234955, + "learning_rate": 4.467717039177877e-06, + "loss": 0.545, + "step": 17878 + }, + { + "epoch": 0.547964938089984, + "grad_norm": 1.5956710944390393, + "learning_rate": 4.467223544362828e-06, + "loss": 0.638, + "step": 17879 + }, + { + "epoch": 0.5479955866127253, + "grad_norm": 1.517677650059064, + "learning_rate": 4.466730054797366e-06, + "loss": 0.6115, + "step": 17880 + }, + { + "epoch": 0.5480262351354664, + "grad_norm": 1.5275687942146734, + "learning_rate": 4.466236570486348e-06, + "loss": 0.5852, + "step": 17881 + }, + { + "epoch": 0.5480568836582077, + "grad_norm": 1.541282286455491, + "learning_rate": 4.465743091434636e-06, + "loss": 0.6306, + "step": 17882 + }, + { + "epoch": 0.5480875321809489, + "grad_norm": 1.7095639985979048, + "learning_rate": 4.465249617647096e-06, + "loss": 0.6139, + "step": 17883 + }, + { + "epoch": 0.5481181807036901, + "grad_norm": 0.8914079842924535, + "learning_rate": 4.464756149128588e-06, + "loss": 0.4372, + "step": 17884 + }, + { + "epoch": 0.5481488292264313, + "grad_norm": 1.9410554783449945, + "learning_rate": 4.4642626858839736e-06, + "loss": 0.6619, + "step": 17885 + }, + { + "epoch": 0.5481794777491725, + "grad_norm": 1.80716400351385, + "learning_rate": 4.4637692279181174e-06, + "loss": 0.7176, + "step": 17886 + }, + { + "epoch": 0.5482101262719137, + "grad_norm": 1.6801142762166401, + "learning_rate": 4.463275775235878e-06, + "loss": 0.6111, + "step": 17887 + }, + { + "epoch": 0.5482407747946549, + "grad_norm": 0.8101868359495488, + "learning_rate": 4.462782327842122e-06, + "loss": 0.4506, + "step": 17888 + }, + { + "epoch": 0.5482714233173961, + "grad_norm": 1.832106005988081, + "learning_rate": 4.462288885741709e-06, + "loss": 0.6897, + "step": 17889 + }, + { + "epoch": 0.5483020718401374, + "grad_norm": 1.5408846140711068, + "learning_rate": 4.461795448939499e-06, + "loss": 0.5342, + "step": 17890 + }, + { + "epoch": 0.5483327203628785, + "grad_norm": 0.7928786686470796, + "learning_rate": 4.46130201744036e-06, + "loss": 0.4445, + "step": 17891 + }, + { + "epoch": 0.5483633688856198, + "grad_norm": 2.0761456495653743, + "learning_rate": 4.460808591249147e-06, + "loss": 0.6532, + "step": 17892 + }, + { + "epoch": 0.5483940174083609, + "grad_norm": 0.7595034527694549, + "learning_rate": 4.460315170370725e-06, + "loss": 0.4403, + "step": 17893 + }, + { + "epoch": 0.5484246659311022, + "grad_norm": 1.9107449055144206, + "learning_rate": 4.4598217548099575e-06, + "loss": 0.6738, + "step": 17894 + }, + { + "epoch": 0.5484553144538433, + "grad_norm": 1.6814722428805917, + "learning_rate": 4.459328344571702e-06, + "loss": 0.6943, + "step": 17895 + }, + { + "epoch": 0.5484859629765845, + "grad_norm": 1.6028619500384829, + "learning_rate": 4.458834939660824e-06, + "loss": 0.6123, + "step": 17896 + }, + { + "epoch": 0.5485166114993257, + "grad_norm": 1.730769431936031, + "learning_rate": 4.458341540082185e-06, + "loss": 0.6896, + "step": 17897 + }, + { + "epoch": 0.5485472600220669, + "grad_norm": 0.871068221148961, + "learning_rate": 4.4578481458406425e-06, + "loss": 0.4465, + "step": 17898 + }, + { + "epoch": 0.5485779085448081, + "grad_norm": 2.061006708421774, + "learning_rate": 4.4573547569410634e-06, + "loss": 0.6171, + "step": 17899 + }, + { + "epoch": 0.5486085570675493, + "grad_norm": 1.6041904005462333, + "learning_rate": 4.456861373388307e-06, + "loss": 0.5534, + "step": 17900 + }, + { + "epoch": 0.5486392055902906, + "grad_norm": 1.7904341919791467, + "learning_rate": 4.456367995187231e-06, + "loss": 0.5812, + "step": 17901 + }, + { + "epoch": 0.5486698541130317, + "grad_norm": 1.7921675199292513, + "learning_rate": 4.455874622342705e-06, + "loss": 0.6327, + "step": 17902 + }, + { + "epoch": 0.548700502635773, + "grad_norm": 1.7619497942639952, + "learning_rate": 4.455381254859583e-06, + "loss": 0.6731, + "step": 17903 + }, + { + "epoch": 0.5487311511585141, + "grad_norm": 1.5899091754905792, + "learning_rate": 4.454887892742728e-06, + "loss": 0.6085, + "step": 17904 + }, + { + "epoch": 0.5487617996812554, + "grad_norm": 1.6662831381441112, + "learning_rate": 4.454394535997004e-06, + "loss": 0.6064, + "step": 17905 + }, + { + "epoch": 0.5487924482039965, + "grad_norm": 1.6880894101128707, + "learning_rate": 4.4539011846272684e-06, + "loss": 0.5804, + "step": 17906 + }, + { + "epoch": 0.5488230967267378, + "grad_norm": 1.6794937572946822, + "learning_rate": 4.453407838638385e-06, + "loss": 0.6493, + "step": 17907 + }, + { + "epoch": 0.5488537452494789, + "grad_norm": 1.7382315700837023, + "learning_rate": 4.452914498035215e-06, + "loss": 0.6727, + "step": 17908 + }, + { + "epoch": 0.5488843937722202, + "grad_norm": 1.6228061556528894, + "learning_rate": 4.452421162822616e-06, + "loss": 0.565, + "step": 17909 + }, + { + "epoch": 0.5489150422949614, + "grad_norm": 1.933514962391402, + "learning_rate": 4.451927833005453e-06, + "loss": 0.6188, + "step": 17910 + }, + { + "epoch": 0.5489456908177026, + "grad_norm": 0.8176756018300521, + "learning_rate": 4.451434508588587e-06, + "loss": 0.4544, + "step": 17911 + }, + { + "epoch": 0.5489763393404438, + "grad_norm": 1.9292227481170303, + "learning_rate": 4.450941189576874e-06, + "loss": 0.6311, + "step": 17912 + }, + { + "epoch": 0.549006987863185, + "grad_norm": 1.7401583856863663, + "learning_rate": 4.4504478759751805e-06, + "loss": 0.5476, + "step": 17913 + }, + { + "epoch": 0.5490376363859262, + "grad_norm": 1.5458374340573073, + "learning_rate": 4.449954567788363e-06, + "loss": 0.5411, + "step": 17914 + }, + { + "epoch": 0.5490682849086674, + "grad_norm": 1.6174063489459078, + "learning_rate": 4.449461265021284e-06, + "loss": 0.6367, + "step": 17915 + }, + { + "epoch": 0.5490989334314086, + "grad_norm": 0.7773053635079572, + "learning_rate": 4.448967967678805e-06, + "loss": 0.4224, + "step": 17916 + }, + { + "epoch": 0.5491295819541498, + "grad_norm": 1.8093254975460031, + "learning_rate": 4.448474675765783e-06, + "loss": 0.6044, + "step": 17917 + }, + { + "epoch": 0.549160230476891, + "grad_norm": 1.7765536577156686, + "learning_rate": 4.447981389287085e-06, + "loss": 0.5397, + "step": 17918 + }, + { + "epoch": 0.5491908789996323, + "grad_norm": 1.7535048347456137, + "learning_rate": 4.4474881082475655e-06, + "loss": 0.7285, + "step": 17919 + }, + { + "epoch": 0.5492215275223734, + "grad_norm": 1.6250347624048456, + "learning_rate": 4.4469948326520865e-06, + "loss": 0.6206, + "step": 17920 + }, + { + "epoch": 0.5492521760451147, + "grad_norm": 1.6170420499550548, + "learning_rate": 4.446501562505511e-06, + "loss": 0.6691, + "step": 17921 + }, + { + "epoch": 0.5492828245678558, + "grad_norm": 1.830960953090353, + "learning_rate": 4.446008297812694e-06, + "loss": 0.6462, + "step": 17922 + }, + { + "epoch": 0.5493134730905971, + "grad_norm": 1.5835732682236707, + "learning_rate": 4.445515038578502e-06, + "loss": 0.6175, + "step": 17923 + }, + { + "epoch": 0.5493441216133382, + "grad_norm": 1.742995883705481, + "learning_rate": 4.445021784807792e-06, + "loss": 0.6532, + "step": 17924 + }, + { + "epoch": 0.5493747701360795, + "grad_norm": 1.721503880598056, + "learning_rate": 4.444528536505423e-06, + "loss": 0.5906, + "step": 17925 + }, + { + "epoch": 0.5494054186588206, + "grad_norm": 1.5803502712076893, + "learning_rate": 4.444035293676257e-06, + "loss": 0.6232, + "step": 17926 + }, + { + "epoch": 0.5494360671815618, + "grad_norm": 1.6907443237955497, + "learning_rate": 4.443542056325153e-06, + "loss": 0.5325, + "step": 17927 + }, + { + "epoch": 0.549466715704303, + "grad_norm": 0.775566496574197, + "learning_rate": 4.4430488244569715e-06, + "loss": 0.4237, + "step": 17928 + }, + { + "epoch": 0.5494973642270442, + "grad_norm": 1.7863018153339614, + "learning_rate": 4.442555598076573e-06, + "loss": 0.7076, + "step": 17929 + }, + { + "epoch": 0.5495280127497855, + "grad_norm": 1.7369191102113102, + "learning_rate": 4.442062377188818e-06, + "loss": 0.5986, + "step": 17930 + }, + { + "epoch": 0.5495586612725266, + "grad_norm": 1.5257958600038661, + "learning_rate": 4.441569161798562e-06, + "loss": 0.6482, + "step": 17931 + }, + { + "epoch": 0.5495893097952679, + "grad_norm": 1.66350009473373, + "learning_rate": 4.441075951910671e-06, + "loss": 0.5939, + "step": 17932 + }, + { + "epoch": 0.549619958318009, + "grad_norm": 2.0973095807914217, + "learning_rate": 4.440582747529998e-06, + "loss": 0.73, + "step": 17933 + }, + { + "epoch": 0.5496506068407503, + "grad_norm": 1.6904568613873237, + "learning_rate": 4.440089548661411e-06, + "loss": 0.5942, + "step": 17934 + }, + { + "epoch": 0.5496812553634914, + "grad_norm": 1.7227055282533055, + "learning_rate": 4.439596355309763e-06, + "loss": 0.6357, + "step": 17935 + }, + { + "epoch": 0.5497119038862327, + "grad_norm": 0.7665167650083545, + "learning_rate": 4.439103167479913e-06, + "loss": 0.4199, + "step": 17936 + }, + { + "epoch": 0.5497425524089739, + "grad_norm": 1.8154904757560197, + "learning_rate": 4.438609985176726e-06, + "loss": 0.6934, + "step": 17937 + }, + { + "epoch": 0.5497732009317151, + "grad_norm": 1.736306858434117, + "learning_rate": 4.438116808405058e-06, + "loss": 0.6587, + "step": 17938 + }, + { + "epoch": 0.5498038494544563, + "grad_norm": 1.8695445313694035, + "learning_rate": 4.437623637169768e-06, + "loss": 0.6584, + "step": 17939 + }, + { + "epoch": 0.5498344979771975, + "grad_norm": 1.667168476879837, + "learning_rate": 4.437130471475717e-06, + "loss": 0.5953, + "step": 17940 + }, + { + "epoch": 0.5498651464999387, + "grad_norm": 2.094331581074104, + "learning_rate": 4.4366373113277615e-06, + "loss": 0.6577, + "step": 17941 + }, + { + "epoch": 0.5498957950226799, + "grad_norm": 1.7383464565863287, + "learning_rate": 4.436144156730765e-06, + "loss": 0.5599, + "step": 17942 + }, + { + "epoch": 0.5499264435454211, + "grad_norm": 1.7261381705588017, + "learning_rate": 4.435651007689585e-06, + "loss": 0.6614, + "step": 17943 + }, + { + "epoch": 0.5499570920681623, + "grad_norm": 0.7632268291695954, + "learning_rate": 4.435157864209077e-06, + "loss": 0.4267, + "step": 17944 + }, + { + "epoch": 0.5499877405909035, + "grad_norm": 1.6552176846444178, + "learning_rate": 4.434664726294106e-06, + "loss": 0.6745, + "step": 17945 + }, + { + "epoch": 0.5500183891136448, + "grad_norm": 0.810717681722344, + "learning_rate": 4.434171593949527e-06, + "loss": 0.4477, + "step": 17946 + }, + { + "epoch": 0.5500490376363859, + "grad_norm": 1.6538426921989047, + "learning_rate": 4.4336784671802e-06, + "loss": 0.6403, + "step": 17947 + }, + { + "epoch": 0.5500796861591272, + "grad_norm": 1.8119856218444022, + "learning_rate": 4.433185345990984e-06, + "loss": 0.6496, + "step": 17948 + }, + { + "epoch": 0.5501103346818683, + "grad_norm": 1.893737439649935, + "learning_rate": 4.432692230386737e-06, + "loss": 0.5811, + "step": 17949 + }, + { + "epoch": 0.5501409832046096, + "grad_norm": 1.6677034408437406, + "learning_rate": 4.432199120372319e-06, + "loss": 0.6372, + "step": 17950 + }, + { + "epoch": 0.5501716317273507, + "grad_norm": 3.062897137890888, + "learning_rate": 4.431706015952589e-06, + "loss": 0.5388, + "step": 17951 + }, + { + "epoch": 0.550202280250092, + "grad_norm": 1.8949702049869308, + "learning_rate": 4.431212917132404e-06, + "loss": 0.6394, + "step": 17952 + }, + { + "epoch": 0.5502329287728331, + "grad_norm": 1.6160305375486936, + "learning_rate": 4.4307198239166245e-06, + "loss": 0.6076, + "step": 17953 + }, + { + "epoch": 0.5502635772955744, + "grad_norm": 1.6447587039202087, + "learning_rate": 4.43022673631011e-06, + "loss": 0.6596, + "step": 17954 + }, + { + "epoch": 0.5502942258183156, + "grad_norm": 1.7953644087158582, + "learning_rate": 4.429733654317713e-06, + "loss": 0.5973, + "step": 17955 + }, + { + "epoch": 0.5503248743410568, + "grad_norm": 1.7172043858301307, + "learning_rate": 4.4292405779443e-06, + "loss": 0.5914, + "step": 17956 + }, + { + "epoch": 0.550355522863798, + "grad_norm": 0.7917629537346013, + "learning_rate": 4.428747507194725e-06, + "loss": 0.4334, + "step": 17957 + }, + { + "epoch": 0.5503861713865391, + "grad_norm": 1.7590232112704611, + "learning_rate": 4.428254442073845e-06, + "loss": 0.6044, + "step": 17958 + }, + { + "epoch": 0.5504168199092804, + "grad_norm": 2.045179691527631, + "learning_rate": 4.427761382586522e-06, + "loss": 0.6742, + "step": 17959 + }, + { + "epoch": 0.5504474684320215, + "grad_norm": 0.7668518293413231, + "learning_rate": 4.427268328737611e-06, + "loss": 0.4258, + "step": 17960 + }, + { + "epoch": 0.5504781169547628, + "grad_norm": 1.7919385931050853, + "learning_rate": 4.426775280531973e-06, + "loss": 0.6729, + "step": 17961 + }, + { + "epoch": 0.5505087654775039, + "grad_norm": 0.7747451471348215, + "learning_rate": 4.426282237974465e-06, + "loss": 0.4307, + "step": 17962 + }, + { + "epoch": 0.5505394140002452, + "grad_norm": 1.503238269286537, + "learning_rate": 4.425789201069943e-06, + "loss": 0.5962, + "step": 17963 + }, + { + "epoch": 0.5505700625229863, + "grad_norm": 1.6099341191253966, + "learning_rate": 4.425296169823269e-06, + "loss": 0.6195, + "step": 17964 + }, + { + "epoch": 0.5506007110457276, + "grad_norm": 1.6536215858578365, + "learning_rate": 4.4248031442392995e-06, + "loss": 0.5709, + "step": 17965 + }, + { + "epoch": 0.5506313595684688, + "grad_norm": 1.7665659011894783, + "learning_rate": 4.42431012432289e-06, + "loss": 0.6079, + "step": 17966 + }, + { + "epoch": 0.55066200809121, + "grad_norm": 1.705545722065179, + "learning_rate": 4.423817110078901e-06, + "loss": 0.587, + "step": 17967 + }, + { + "epoch": 0.5506926566139512, + "grad_norm": 1.7391169750906967, + "learning_rate": 4.423324101512188e-06, + "loss": 0.5973, + "step": 17968 + }, + { + "epoch": 0.5507233051366924, + "grad_norm": 1.8100239213853566, + "learning_rate": 4.422831098627611e-06, + "loss": 0.5913, + "step": 17969 + }, + { + "epoch": 0.5507539536594336, + "grad_norm": 1.5345631807668128, + "learning_rate": 4.4223381014300285e-06, + "loss": 0.5308, + "step": 17970 + }, + { + "epoch": 0.5507846021821748, + "grad_norm": 1.9743950061266524, + "learning_rate": 4.421845109924294e-06, + "loss": 0.6289, + "step": 17971 + }, + { + "epoch": 0.550815250704916, + "grad_norm": 1.7530439838331588, + "learning_rate": 4.421352124115269e-06, + "loss": 0.7094, + "step": 17972 + }, + { + "epoch": 0.5508458992276573, + "grad_norm": 1.5996374440959775, + "learning_rate": 4.42085914400781e-06, + "loss": 0.6524, + "step": 17973 + }, + { + "epoch": 0.5508765477503984, + "grad_norm": 1.7234343735298727, + "learning_rate": 4.420366169606772e-06, + "loss": 0.6187, + "step": 17974 + }, + { + "epoch": 0.5509071962731397, + "grad_norm": 1.7312656558146842, + "learning_rate": 4.4198732009170165e-06, + "loss": 0.6762, + "step": 17975 + }, + { + "epoch": 0.5509378447958808, + "grad_norm": 1.7599712022924139, + "learning_rate": 4.419380237943396e-06, + "loss": 0.6639, + "step": 17976 + }, + { + "epoch": 0.5509684933186221, + "grad_norm": 1.9338838382969554, + "learning_rate": 4.418887280690774e-06, + "loss": 0.636, + "step": 17977 + }, + { + "epoch": 0.5509991418413632, + "grad_norm": 1.657899166877422, + "learning_rate": 4.418394329164003e-06, + "loss": 0.6347, + "step": 17978 + }, + { + "epoch": 0.5510297903641045, + "grad_norm": 1.5651064762341609, + "learning_rate": 4.417901383367941e-06, + "loss": 0.5915, + "step": 17979 + }, + { + "epoch": 0.5510604388868456, + "grad_norm": 1.696680732579792, + "learning_rate": 4.417408443307446e-06, + "loss": 0.5418, + "step": 17980 + }, + { + "epoch": 0.5510910874095869, + "grad_norm": 1.823770305539363, + "learning_rate": 4.416915508987375e-06, + "loss": 0.6965, + "step": 17981 + }, + { + "epoch": 0.551121735932328, + "grad_norm": 1.5014123208306576, + "learning_rate": 4.416422580412584e-06, + "loss": 0.6119, + "step": 17982 + }, + { + "epoch": 0.5511523844550693, + "grad_norm": 1.592546707919526, + "learning_rate": 4.4159296575879305e-06, + "loss": 0.5995, + "step": 17983 + }, + { + "epoch": 0.5511830329778105, + "grad_norm": 1.58159457247724, + "learning_rate": 4.415436740518273e-06, + "loss": 0.6197, + "step": 17984 + }, + { + "epoch": 0.5512136815005517, + "grad_norm": 1.8993923696311081, + "learning_rate": 4.4149438292084645e-06, + "loss": 0.6804, + "step": 17985 + }, + { + "epoch": 0.5512443300232929, + "grad_norm": 1.7428593215538044, + "learning_rate": 4.414450923663367e-06, + "loss": 0.7027, + "step": 17986 + }, + { + "epoch": 0.5512749785460341, + "grad_norm": 1.6108311633498023, + "learning_rate": 4.413958023887831e-06, + "loss": 0.6502, + "step": 17987 + }, + { + "epoch": 0.5513056270687753, + "grad_norm": 1.6721022464863176, + "learning_rate": 4.413465129886719e-06, + "loss": 0.6698, + "step": 17988 + }, + { + "epoch": 0.5513362755915164, + "grad_norm": 1.654019288554884, + "learning_rate": 4.412972241664885e-06, + "loss": 0.6587, + "step": 17989 + }, + { + "epoch": 0.5513669241142577, + "grad_norm": 1.8632958038778995, + "learning_rate": 4.412479359227185e-06, + "loss": 0.6116, + "step": 17990 + }, + { + "epoch": 0.5513975726369988, + "grad_norm": 0.9663066404037907, + "learning_rate": 4.411986482578476e-06, + "loss": 0.4126, + "step": 17991 + }, + { + "epoch": 0.5514282211597401, + "grad_norm": 1.5513547750556518, + "learning_rate": 4.411493611723616e-06, + "loss": 0.6187, + "step": 17992 + }, + { + "epoch": 0.5514588696824813, + "grad_norm": 1.7251309557132093, + "learning_rate": 4.4110007466674575e-06, + "loss": 0.6697, + "step": 17993 + }, + { + "epoch": 0.5514895182052225, + "grad_norm": 1.8775803914169507, + "learning_rate": 4.410507887414861e-06, + "loss": 0.611, + "step": 17994 + }, + { + "epoch": 0.5515201667279637, + "grad_norm": 0.8208928876614926, + "learning_rate": 4.410015033970681e-06, + "loss": 0.4339, + "step": 17995 + }, + { + "epoch": 0.5515508152507049, + "grad_norm": 1.6767736610126909, + "learning_rate": 4.409522186339774e-06, + "loss": 0.6677, + "step": 17996 + }, + { + "epoch": 0.5515814637734461, + "grad_norm": 1.8067475857965467, + "learning_rate": 4.409029344526997e-06, + "loss": 0.6328, + "step": 17997 + }, + { + "epoch": 0.5516121122961873, + "grad_norm": 0.7513976192911566, + "learning_rate": 4.408536508537202e-06, + "loss": 0.401, + "step": 17998 + }, + { + "epoch": 0.5516427608189285, + "grad_norm": 0.8612918029009913, + "learning_rate": 4.408043678375251e-06, + "loss": 0.4345, + "step": 17999 + }, + { + "epoch": 0.5516734093416698, + "grad_norm": 1.9276025915496127, + "learning_rate": 4.407550854045996e-06, + "loss": 0.5788, + "step": 18000 + }, + { + "epoch": 0.5517040578644109, + "grad_norm": 1.6801933634720045, + "learning_rate": 4.407058035554294e-06, + "loss": 0.661, + "step": 18001 + }, + { + "epoch": 0.5517347063871522, + "grad_norm": 1.805471441901074, + "learning_rate": 4.406565222905002e-06, + "loss": 0.6257, + "step": 18002 + }, + { + "epoch": 0.5517653549098933, + "grad_norm": 1.9090455761575689, + "learning_rate": 4.406072416102974e-06, + "loss": 0.6695, + "step": 18003 + }, + { + "epoch": 0.5517960034326346, + "grad_norm": 2.0397858928527572, + "learning_rate": 4.405579615153065e-06, + "loss": 0.6809, + "step": 18004 + }, + { + "epoch": 0.5518266519553757, + "grad_norm": 1.7748289695219508, + "learning_rate": 4.405086820060133e-06, + "loss": 0.682, + "step": 18005 + }, + { + "epoch": 0.551857300478117, + "grad_norm": 1.8038516255738837, + "learning_rate": 4.4045940308290325e-06, + "loss": 0.5446, + "step": 18006 + }, + { + "epoch": 0.5518879490008581, + "grad_norm": 1.8315118262110084, + "learning_rate": 4.40410124746462e-06, + "loss": 0.7629, + "step": 18007 + }, + { + "epoch": 0.5519185975235994, + "grad_norm": 1.9878614193948, + "learning_rate": 4.4036084699717515e-06, + "loss": 0.6042, + "step": 18008 + }, + { + "epoch": 0.5519492460463405, + "grad_norm": 0.7668585229805419, + "learning_rate": 4.40311569835528e-06, + "loss": 0.4228, + "step": 18009 + }, + { + "epoch": 0.5519798945690818, + "grad_norm": 0.7996556012526216, + "learning_rate": 4.402622932620063e-06, + "loss": 0.4347, + "step": 18010 + }, + { + "epoch": 0.552010543091823, + "grad_norm": 1.7878549262931607, + "learning_rate": 4.4021301727709545e-06, + "loss": 0.5536, + "step": 18011 + }, + { + "epoch": 0.5520411916145642, + "grad_norm": 1.625266848576819, + "learning_rate": 4.401637418812809e-06, + "loss": 0.6324, + "step": 18012 + }, + { + "epoch": 0.5520718401373054, + "grad_norm": 1.7654767335581616, + "learning_rate": 4.401144670750485e-06, + "loss": 0.7159, + "step": 18013 + }, + { + "epoch": 0.5521024886600466, + "grad_norm": 0.7896605566900536, + "learning_rate": 4.400651928588835e-06, + "loss": 0.4233, + "step": 18014 + }, + { + "epoch": 0.5521331371827878, + "grad_norm": 1.849599224143912, + "learning_rate": 4.4001591923327146e-06, + "loss": 0.7093, + "step": 18015 + }, + { + "epoch": 0.552163785705529, + "grad_norm": 0.7933532786160507, + "learning_rate": 4.399666461986982e-06, + "loss": 0.4429, + "step": 18016 + }, + { + "epoch": 0.5521944342282702, + "grad_norm": 1.787778089589185, + "learning_rate": 4.399173737556485e-06, + "loss": 0.6131, + "step": 18017 + }, + { + "epoch": 0.5522250827510115, + "grad_norm": 1.828272274971419, + "learning_rate": 4.398681019046086e-06, + "loss": 0.7102, + "step": 18018 + }, + { + "epoch": 0.5522557312737526, + "grad_norm": 1.6682273670469403, + "learning_rate": 4.398188306460635e-06, + "loss": 0.6434, + "step": 18019 + }, + { + "epoch": 0.5522863797964938, + "grad_norm": 1.7123293077675756, + "learning_rate": 4.39769559980499e-06, + "loss": 0.5658, + "step": 18020 + }, + { + "epoch": 0.552317028319235, + "grad_norm": 1.8122104467792393, + "learning_rate": 4.397202899084003e-06, + "loss": 0.5789, + "step": 18021 + }, + { + "epoch": 0.5523476768419762, + "grad_norm": 1.7669543471237563, + "learning_rate": 4.3967102043025305e-06, + "loss": 0.642, + "step": 18022 + }, + { + "epoch": 0.5523783253647174, + "grad_norm": 1.863540030232622, + "learning_rate": 4.3962175154654264e-06, + "loss": 0.6232, + "step": 18023 + }, + { + "epoch": 0.5524089738874586, + "grad_norm": 1.7653061195916748, + "learning_rate": 4.395724832577547e-06, + "loss": 0.6642, + "step": 18024 + }, + { + "epoch": 0.5524396224101998, + "grad_norm": 0.7975834346817162, + "learning_rate": 4.395232155643744e-06, + "loss": 0.4127, + "step": 18025 + }, + { + "epoch": 0.552470270932941, + "grad_norm": 1.7202929624992025, + "learning_rate": 4.394739484668874e-06, + "loss": 0.57, + "step": 18026 + }, + { + "epoch": 0.5525009194556822, + "grad_norm": 0.7825363527352863, + "learning_rate": 4.394246819657792e-06, + "loss": 0.4223, + "step": 18027 + }, + { + "epoch": 0.5525315679784234, + "grad_norm": 1.826671713864539, + "learning_rate": 4.393754160615348e-06, + "loss": 0.6568, + "step": 18028 + }, + { + "epoch": 0.5525622165011647, + "grad_norm": 1.6445335932604634, + "learning_rate": 4.3932615075464025e-06, + "loss": 0.6509, + "step": 18029 + }, + { + "epoch": 0.5525928650239058, + "grad_norm": 0.7864245415868775, + "learning_rate": 4.392768860455805e-06, + "loss": 0.4104, + "step": 18030 + }, + { + "epoch": 0.5526235135466471, + "grad_norm": 1.5000907330233428, + "learning_rate": 4.392276219348411e-06, + "loss": 0.6281, + "step": 18031 + }, + { + "epoch": 0.5526541620693882, + "grad_norm": 1.6587463394625528, + "learning_rate": 4.391783584229076e-06, + "loss": 0.6247, + "step": 18032 + }, + { + "epoch": 0.5526848105921295, + "grad_norm": 1.6835374580821036, + "learning_rate": 4.391290955102651e-06, + "loss": 0.5978, + "step": 18033 + }, + { + "epoch": 0.5527154591148706, + "grad_norm": 0.7434822951524487, + "learning_rate": 4.390798331973994e-06, + "loss": 0.4057, + "step": 18034 + }, + { + "epoch": 0.5527461076376119, + "grad_norm": 1.7545742924771277, + "learning_rate": 4.390305714847956e-06, + "loss": 0.6082, + "step": 18035 + }, + { + "epoch": 0.552776756160353, + "grad_norm": 1.8237654996833168, + "learning_rate": 4.389813103729392e-06, + "loss": 0.675, + "step": 18036 + }, + { + "epoch": 0.5528074046830943, + "grad_norm": 1.882119125027646, + "learning_rate": 4.3893204986231554e-06, + "loss": 0.5387, + "step": 18037 + }, + { + "epoch": 0.5528380532058355, + "grad_norm": 1.7274238984868069, + "learning_rate": 4.388827899534102e-06, + "loss": 0.6411, + "step": 18038 + }, + { + "epoch": 0.5528687017285767, + "grad_norm": 1.7298672876520855, + "learning_rate": 4.388335306467079e-06, + "loss": 0.5552, + "step": 18039 + }, + { + "epoch": 0.5528993502513179, + "grad_norm": 1.7181919675883437, + "learning_rate": 4.3878427194269506e-06, + "loss": 0.728, + "step": 18040 + }, + { + "epoch": 0.5529299987740591, + "grad_norm": 1.858744281653967, + "learning_rate": 4.387350138418559e-06, + "loss": 0.594, + "step": 18041 + }, + { + "epoch": 0.5529606472968003, + "grad_norm": 1.886103263800503, + "learning_rate": 4.386857563446767e-06, + "loss": 0.6911, + "step": 18042 + }, + { + "epoch": 0.5529912958195415, + "grad_norm": 0.8990880355088942, + "learning_rate": 4.386364994516424e-06, + "loss": 0.4308, + "step": 18043 + }, + { + "epoch": 0.5530219443422827, + "grad_norm": 0.8178086510483132, + "learning_rate": 4.385872431632382e-06, + "loss": 0.4219, + "step": 18044 + }, + { + "epoch": 0.553052592865024, + "grad_norm": 1.8507939128514528, + "learning_rate": 4.3853798747994975e-06, + "loss": 0.6415, + "step": 18045 + }, + { + "epoch": 0.5530832413877651, + "grad_norm": 1.719383127394268, + "learning_rate": 4.384887324022622e-06, + "loss": 0.5919, + "step": 18046 + }, + { + "epoch": 0.5531138899105064, + "grad_norm": 0.7677352185276447, + "learning_rate": 4.384394779306609e-06, + "loss": 0.4163, + "step": 18047 + }, + { + "epoch": 0.5531445384332475, + "grad_norm": 1.6283042723945553, + "learning_rate": 4.383902240656312e-06, + "loss": 0.588, + "step": 18048 + }, + { + "epoch": 0.5531751869559888, + "grad_norm": 1.8692666829007294, + "learning_rate": 4.383409708076582e-06, + "loss": 0.6343, + "step": 18049 + }, + { + "epoch": 0.5532058354787299, + "grad_norm": 0.7505561889301743, + "learning_rate": 4.382917181572276e-06, + "loss": 0.4128, + "step": 18050 + }, + { + "epoch": 0.5532364840014711, + "grad_norm": 1.9011337009898306, + "learning_rate": 4.382424661148245e-06, + "loss": 0.6289, + "step": 18051 + }, + { + "epoch": 0.5532671325242123, + "grad_norm": 1.6346756962162046, + "learning_rate": 4.38193214680934e-06, + "loss": 0.5981, + "step": 18052 + }, + { + "epoch": 0.5532977810469535, + "grad_norm": 1.6718677711913295, + "learning_rate": 4.381439638560418e-06, + "loss": 0.7384, + "step": 18053 + }, + { + "epoch": 0.5533284295696947, + "grad_norm": 1.8155973795264426, + "learning_rate": 4.380947136406329e-06, + "loss": 0.5678, + "step": 18054 + }, + { + "epoch": 0.5533590780924359, + "grad_norm": 1.9164873579150337, + "learning_rate": 4.380454640351924e-06, + "loss": 0.6377, + "step": 18055 + }, + { + "epoch": 0.5533897266151772, + "grad_norm": 1.6345254119082497, + "learning_rate": 4.379962150402061e-06, + "loss": 0.5429, + "step": 18056 + }, + { + "epoch": 0.5534203751379183, + "grad_norm": 1.9099799254359608, + "learning_rate": 4.379469666561588e-06, + "loss": 0.5899, + "step": 18057 + }, + { + "epoch": 0.5534510236606596, + "grad_norm": 1.8139201457501735, + "learning_rate": 4.378977188835358e-06, + "loss": 0.6161, + "step": 18058 + }, + { + "epoch": 0.5534816721834007, + "grad_norm": 1.8221850718801176, + "learning_rate": 4.378484717228226e-06, + "loss": 0.6674, + "step": 18059 + }, + { + "epoch": 0.553512320706142, + "grad_norm": 0.7882828333446491, + "learning_rate": 4.377992251745043e-06, + "loss": 0.4065, + "step": 18060 + }, + { + "epoch": 0.5535429692288831, + "grad_norm": 1.5841515262456538, + "learning_rate": 4.377499792390663e-06, + "loss": 0.6431, + "step": 18061 + }, + { + "epoch": 0.5535736177516244, + "grad_norm": 1.9112532256358334, + "learning_rate": 4.377007339169935e-06, + "loss": 0.5539, + "step": 18062 + }, + { + "epoch": 0.5536042662743655, + "grad_norm": 1.8379531310915123, + "learning_rate": 4.376514892087713e-06, + "loss": 0.632, + "step": 18063 + }, + { + "epoch": 0.5536349147971068, + "grad_norm": 0.7953353759079034, + "learning_rate": 4.37602245114885e-06, + "loss": 0.449, + "step": 18064 + }, + { + "epoch": 0.553665563319848, + "grad_norm": 1.7163255966177933, + "learning_rate": 4.375530016358198e-06, + "loss": 0.6164, + "step": 18065 + }, + { + "epoch": 0.5536962118425892, + "grad_norm": 1.9745769999609288, + "learning_rate": 4.375037587720606e-06, + "loss": 0.7054, + "step": 18066 + }, + { + "epoch": 0.5537268603653304, + "grad_norm": 1.8966467234334916, + "learning_rate": 4.374545165240931e-06, + "loss": 0.6441, + "step": 18067 + }, + { + "epoch": 0.5537575088880716, + "grad_norm": 1.84587622301921, + "learning_rate": 4.374052748924022e-06, + "loss": 0.701, + "step": 18068 + }, + { + "epoch": 0.5537881574108128, + "grad_norm": 0.7549580015137799, + "learning_rate": 4.373560338774731e-06, + "loss": 0.4273, + "step": 18069 + }, + { + "epoch": 0.553818805933554, + "grad_norm": 1.7100156983607064, + "learning_rate": 4.3730679347979114e-06, + "loss": 0.6885, + "step": 18070 + }, + { + "epoch": 0.5538494544562952, + "grad_norm": 1.6773546339858123, + "learning_rate": 4.372575536998411e-06, + "loss": 0.5297, + "step": 18071 + }, + { + "epoch": 0.5538801029790364, + "grad_norm": 0.7834298510216331, + "learning_rate": 4.372083145381087e-06, + "loss": 0.3887, + "step": 18072 + }, + { + "epoch": 0.5539107515017776, + "grad_norm": 1.7606381157613862, + "learning_rate": 4.371590759950789e-06, + "loss": 0.5686, + "step": 18073 + }, + { + "epoch": 0.5539414000245189, + "grad_norm": 1.7633683365296868, + "learning_rate": 4.371098380712366e-06, + "loss": 0.677, + "step": 18074 + }, + { + "epoch": 0.55397204854726, + "grad_norm": 0.8349658155451529, + "learning_rate": 4.370606007670673e-06, + "loss": 0.4181, + "step": 18075 + }, + { + "epoch": 0.5540026970700013, + "grad_norm": 2.534582390268363, + "learning_rate": 4.3701136408305575e-06, + "loss": 0.6421, + "step": 18076 + }, + { + "epoch": 0.5540333455927424, + "grad_norm": 1.7667417207283613, + "learning_rate": 4.3696212801968765e-06, + "loss": 0.6332, + "step": 18077 + }, + { + "epoch": 0.5540639941154837, + "grad_norm": 1.5325313511336653, + "learning_rate": 4.369128925774477e-06, + "loss": 0.5609, + "step": 18078 + }, + { + "epoch": 0.5540946426382248, + "grad_norm": 1.577530026319836, + "learning_rate": 4.368636577568211e-06, + "loss": 0.5807, + "step": 18079 + }, + { + "epoch": 0.5541252911609661, + "grad_norm": 1.8258934301853833, + "learning_rate": 4.368144235582931e-06, + "loss": 0.6346, + "step": 18080 + }, + { + "epoch": 0.5541559396837072, + "grad_norm": 1.8828498988183946, + "learning_rate": 4.367651899823489e-06, + "loss": 0.6726, + "step": 18081 + }, + { + "epoch": 0.5541865882064484, + "grad_norm": 1.8236501461808488, + "learning_rate": 4.367159570294731e-06, + "loss": 0.6599, + "step": 18082 + }, + { + "epoch": 0.5542172367291897, + "grad_norm": 1.7675868185141503, + "learning_rate": 4.366667247001516e-06, + "loss": 0.6626, + "step": 18083 + }, + { + "epoch": 0.5542478852519308, + "grad_norm": 1.7423401674383647, + "learning_rate": 4.3661749299486886e-06, + "loss": 0.6471, + "step": 18084 + }, + { + "epoch": 0.5542785337746721, + "grad_norm": 1.954617924408033, + "learning_rate": 4.3656826191411014e-06, + "loss": 0.6396, + "step": 18085 + }, + { + "epoch": 0.5543091822974132, + "grad_norm": 0.8755825224813867, + "learning_rate": 4.365190314583606e-06, + "loss": 0.4246, + "step": 18086 + }, + { + "epoch": 0.5543398308201545, + "grad_norm": 1.8685813533454363, + "learning_rate": 4.364698016281052e-06, + "loss": 0.6717, + "step": 18087 + }, + { + "epoch": 0.5543704793428956, + "grad_norm": 1.5820127171551148, + "learning_rate": 4.364205724238292e-06, + "loss": 0.6209, + "step": 18088 + }, + { + "epoch": 0.5544011278656369, + "grad_norm": 0.7954903293509484, + "learning_rate": 4.363713438460176e-06, + "loss": 0.4192, + "step": 18089 + }, + { + "epoch": 0.554431776388378, + "grad_norm": 1.6596025875048788, + "learning_rate": 4.3632211589515525e-06, + "loss": 0.6012, + "step": 18090 + }, + { + "epoch": 0.5544624249111193, + "grad_norm": 1.6338846190801706, + "learning_rate": 4.362728885717275e-06, + "loss": 0.6528, + "step": 18091 + }, + { + "epoch": 0.5544930734338605, + "grad_norm": 0.7647129505111321, + "learning_rate": 4.362236618762195e-06, + "loss": 0.4145, + "step": 18092 + }, + { + "epoch": 0.5545237219566017, + "grad_norm": 1.6993550352875257, + "learning_rate": 4.361744358091157e-06, + "loss": 0.6644, + "step": 18093 + }, + { + "epoch": 0.5545543704793429, + "grad_norm": 1.7616654523396866, + "learning_rate": 4.361252103709018e-06, + "loss": 0.5864, + "step": 18094 + }, + { + "epoch": 0.5545850190020841, + "grad_norm": 1.7805207286816098, + "learning_rate": 4.360759855620622e-06, + "loss": 0.6406, + "step": 18095 + }, + { + "epoch": 0.5546156675248253, + "grad_norm": 1.7093329015526082, + "learning_rate": 4.360267613830827e-06, + "loss": 0.6812, + "step": 18096 + }, + { + "epoch": 0.5546463160475665, + "grad_norm": 1.9683971635114847, + "learning_rate": 4.359775378344478e-06, + "loss": 0.6052, + "step": 18097 + }, + { + "epoch": 0.5546769645703077, + "grad_norm": 1.7462694649723836, + "learning_rate": 4.359283149166423e-06, + "loss": 0.6878, + "step": 18098 + }, + { + "epoch": 0.554707613093049, + "grad_norm": 1.665311672069378, + "learning_rate": 4.358790926301518e-06, + "loss": 0.5876, + "step": 18099 + }, + { + "epoch": 0.5547382616157901, + "grad_norm": 1.7031086629349712, + "learning_rate": 4.3582987097546095e-06, + "loss": 0.5431, + "step": 18100 + }, + { + "epoch": 0.5547689101385314, + "grad_norm": 1.527675950771673, + "learning_rate": 4.357806499530547e-06, + "loss": 0.5893, + "step": 18101 + }, + { + "epoch": 0.5547995586612725, + "grad_norm": 1.8519594536980188, + "learning_rate": 4.357314295634182e-06, + "loss": 0.632, + "step": 18102 + }, + { + "epoch": 0.5548302071840138, + "grad_norm": 1.9164449846410951, + "learning_rate": 4.356822098070362e-06, + "loss": 0.7052, + "step": 18103 + }, + { + "epoch": 0.5548608557067549, + "grad_norm": 1.595291742289142, + "learning_rate": 4.356329906843941e-06, + "loss": 0.6369, + "step": 18104 + }, + { + "epoch": 0.5548915042294962, + "grad_norm": 0.9126585788476878, + "learning_rate": 4.355837721959766e-06, + "loss": 0.4582, + "step": 18105 + }, + { + "epoch": 0.5549221527522373, + "grad_norm": 1.5875263551039132, + "learning_rate": 4.355345543422686e-06, + "loss": 0.6327, + "step": 18106 + }, + { + "epoch": 0.5549528012749786, + "grad_norm": 1.6889426060132848, + "learning_rate": 4.354853371237551e-06, + "loss": 0.5422, + "step": 18107 + }, + { + "epoch": 0.5549834497977197, + "grad_norm": 0.8501194803078754, + "learning_rate": 4.354361205409212e-06, + "loss": 0.4219, + "step": 18108 + }, + { + "epoch": 0.555014098320461, + "grad_norm": 0.8278790076650129, + "learning_rate": 4.353869045942515e-06, + "loss": 0.4316, + "step": 18109 + }, + { + "epoch": 0.5550447468432022, + "grad_norm": 0.8625919780056834, + "learning_rate": 4.353376892842313e-06, + "loss": 0.4159, + "step": 18110 + }, + { + "epoch": 0.5550753953659434, + "grad_norm": 1.6617109031653352, + "learning_rate": 4.352884746113454e-06, + "loss": 0.5862, + "step": 18111 + }, + { + "epoch": 0.5551060438886846, + "grad_norm": 1.7639755096767407, + "learning_rate": 4.3523926057607866e-06, + "loss": 0.622, + "step": 18112 + }, + { + "epoch": 0.5551366924114257, + "grad_norm": 0.7775630592063527, + "learning_rate": 4.351900471789162e-06, + "loss": 0.4173, + "step": 18113 + }, + { + "epoch": 0.555167340934167, + "grad_norm": 1.5117165626324283, + "learning_rate": 4.351408344203425e-06, + "loss": 0.5276, + "step": 18114 + }, + { + "epoch": 0.5551979894569081, + "grad_norm": 1.7507839893413182, + "learning_rate": 4.350916223008431e-06, + "loss": 0.6397, + "step": 18115 + }, + { + "epoch": 0.5552286379796494, + "grad_norm": 1.6148981776286087, + "learning_rate": 4.350424108209024e-06, + "loss": 0.4938, + "step": 18116 + }, + { + "epoch": 0.5552592865023905, + "grad_norm": 1.6077777073952555, + "learning_rate": 4.349931999810053e-06, + "loss": 0.4928, + "step": 18117 + }, + { + "epoch": 0.5552899350251318, + "grad_norm": 1.739826296399786, + "learning_rate": 4.349439897816371e-06, + "loss": 0.715, + "step": 18118 + }, + { + "epoch": 0.555320583547873, + "grad_norm": 2.149103311111994, + "learning_rate": 4.348947802232823e-06, + "loss": 0.6383, + "step": 18119 + }, + { + "epoch": 0.5553512320706142, + "grad_norm": 1.7725033271960777, + "learning_rate": 4.348455713064257e-06, + "loss": 0.7183, + "step": 18120 + }, + { + "epoch": 0.5553818805933554, + "grad_norm": 1.8054733196568442, + "learning_rate": 4.347963630315526e-06, + "loss": 0.6566, + "step": 18121 + }, + { + "epoch": 0.5554125291160966, + "grad_norm": 1.76861585507201, + "learning_rate": 4.347471553991475e-06, + "loss": 0.5892, + "step": 18122 + }, + { + "epoch": 0.5554431776388378, + "grad_norm": 1.7210083485877332, + "learning_rate": 4.346979484096954e-06, + "loss": 0.6238, + "step": 18123 + }, + { + "epoch": 0.555473826161579, + "grad_norm": 1.7528677419677954, + "learning_rate": 4.346487420636812e-06, + "loss": 0.552, + "step": 18124 + }, + { + "epoch": 0.5555044746843202, + "grad_norm": 0.8567810874509721, + "learning_rate": 4.345995363615894e-06, + "loss": 0.4346, + "step": 18125 + }, + { + "epoch": 0.5555351232070614, + "grad_norm": 1.6089224643446387, + "learning_rate": 4.345503313039056e-06, + "loss": 0.5546, + "step": 18126 + }, + { + "epoch": 0.5555657717298026, + "grad_norm": 1.7586590636397301, + "learning_rate": 4.345011268911138e-06, + "loss": 0.5978, + "step": 18127 + }, + { + "epoch": 0.5555964202525439, + "grad_norm": 0.8225388257050933, + "learning_rate": 4.344519231236991e-06, + "loss": 0.4444, + "step": 18128 + }, + { + "epoch": 0.555627068775285, + "grad_norm": 0.8223994936163471, + "learning_rate": 4.344027200021465e-06, + "loss": 0.4372, + "step": 18129 + }, + { + "epoch": 0.5556577172980263, + "grad_norm": 1.7580994602189852, + "learning_rate": 4.343535175269406e-06, + "loss": 0.6089, + "step": 18130 + }, + { + "epoch": 0.5556883658207674, + "grad_norm": 1.674267680031877, + "learning_rate": 4.3430431569856635e-06, + "loss": 0.5568, + "step": 18131 + }, + { + "epoch": 0.5557190143435087, + "grad_norm": 1.8113383958607614, + "learning_rate": 4.342551145175085e-06, + "loss": 0.5911, + "step": 18132 + }, + { + "epoch": 0.5557496628662498, + "grad_norm": 1.7727048992146854, + "learning_rate": 4.342059139842517e-06, + "loss": 0.5721, + "step": 18133 + }, + { + "epoch": 0.5557803113889911, + "grad_norm": 1.7470013266095339, + "learning_rate": 4.34156714099281e-06, + "loss": 0.6891, + "step": 18134 + }, + { + "epoch": 0.5558109599117322, + "grad_norm": 1.758341039690562, + "learning_rate": 4.3410751486308116e-06, + "loss": 0.6154, + "step": 18135 + }, + { + "epoch": 0.5558416084344735, + "grad_norm": 1.6982860208229766, + "learning_rate": 4.340583162761365e-06, + "loss": 0.5978, + "step": 18136 + }, + { + "epoch": 0.5558722569572146, + "grad_norm": 1.790847954967096, + "learning_rate": 4.340091183389324e-06, + "loss": 0.6347, + "step": 18137 + }, + { + "epoch": 0.5559029054799559, + "grad_norm": 1.8884436431926144, + "learning_rate": 4.339599210519533e-06, + "loss": 0.7412, + "step": 18138 + }, + { + "epoch": 0.5559335540026971, + "grad_norm": 1.8478512684629964, + "learning_rate": 4.3391072441568384e-06, + "loss": 0.6703, + "step": 18139 + }, + { + "epoch": 0.5559642025254383, + "grad_norm": 1.7337975430574442, + "learning_rate": 4.338615284306091e-06, + "loss": 0.6565, + "step": 18140 + }, + { + "epoch": 0.5559948510481795, + "grad_norm": 0.8082860417345223, + "learning_rate": 4.338123330972135e-06, + "loss": 0.4125, + "step": 18141 + }, + { + "epoch": 0.5560254995709207, + "grad_norm": 1.6202321855368647, + "learning_rate": 4.337631384159819e-06, + "loss": 0.5057, + "step": 18142 + }, + { + "epoch": 0.5560561480936619, + "grad_norm": 1.7424562641648897, + "learning_rate": 4.337139443873992e-06, + "loss": 0.5769, + "step": 18143 + }, + { + "epoch": 0.556086796616403, + "grad_norm": 1.6144002704154936, + "learning_rate": 4.3366475101194984e-06, + "loss": 0.6759, + "step": 18144 + }, + { + "epoch": 0.5561174451391443, + "grad_norm": 1.427549515785848, + "learning_rate": 4.336155582901187e-06, + "loss": 0.5024, + "step": 18145 + }, + { + "epoch": 0.5561480936618854, + "grad_norm": 2.010285227350116, + "learning_rate": 4.335663662223907e-06, + "loss": 0.7088, + "step": 18146 + }, + { + "epoch": 0.5561787421846267, + "grad_norm": 0.8071548549393178, + "learning_rate": 4.3351717480924986e-06, + "loss": 0.46, + "step": 18147 + }, + { + "epoch": 0.5562093907073679, + "grad_norm": 1.7403211295488743, + "learning_rate": 4.334679840511816e-06, + "loss": 0.6233, + "step": 18148 + }, + { + "epoch": 0.5562400392301091, + "grad_norm": 1.8231852389195702, + "learning_rate": 4.3341879394867e-06, + "loss": 0.7125, + "step": 18149 + }, + { + "epoch": 0.5562706877528503, + "grad_norm": 0.7663845736425469, + "learning_rate": 4.333696045022005e-06, + "loss": 0.4161, + "step": 18150 + }, + { + "epoch": 0.5563013362755915, + "grad_norm": 1.8138411238089014, + "learning_rate": 4.333204157122571e-06, + "loss": 0.6013, + "step": 18151 + }, + { + "epoch": 0.5563319847983327, + "grad_norm": 1.6646924315766454, + "learning_rate": 4.332712275793246e-06, + "loss": 0.5179, + "step": 18152 + }, + { + "epoch": 0.5563626333210739, + "grad_norm": 1.7113492234833838, + "learning_rate": 4.332220401038879e-06, + "loss": 0.6069, + "step": 18153 + }, + { + "epoch": 0.5563932818438151, + "grad_norm": 1.658254247162296, + "learning_rate": 4.331728532864315e-06, + "loss": 0.621, + "step": 18154 + }, + { + "epoch": 0.5564239303665564, + "grad_norm": 1.6754812347224382, + "learning_rate": 4.3312366712744e-06, + "loss": 0.5305, + "step": 18155 + }, + { + "epoch": 0.5564545788892975, + "grad_norm": 1.9784871407978897, + "learning_rate": 4.330744816273983e-06, + "loss": 0.7179, + "step": 18156 + }, + { + "epoch": 0.5564852274120388, + "grad_norm": 1.6563595741720578, + "learning_rate": 4.330252967867906e-06, + "loss": 0.6427, + "step": 18157 + }, + { + "epoch": 0.5565158759347799, + "grad_norm": 1.8773662394931567, + "learning_rate": 4.329761126061021e-06, + "loss": 0.6373, + "step": 18158 + }, + { + "epoch": 0.5565465244575212, + "grad_norm": 0.7993402488394595, + "learning_rate": 4.329269290858171e-06, + "loss": 0.4455, + "step": 18159 + }, + { + "epoch": 0.5565771729802623, + "grad_norm": 1.6658616253021101, + "learning_rate": 4.3287774622641995e-06, + "loss": 0.6239, + "step": 18160 + }, + { + "epoch": 0.5566078215030036, + "grad_norm": 1.5092601041921054, + "learning_rate": 4.328285640283957e-06, + "loss": 0.5584, + "step": 18161 + }, + { + "epoch": 0.5566384700257447, + "grad_norm": 1.981855055807902, + "learning_rate": 4.327793824922288e-06, + "loss": 0.6427, + "step": 18162 + }, + { + "epoch": 0.556669118548486, + "grad_norm": 0.7815716599096074, + "learning_rate": 4.327302016184037e-06, + "loss": 0.4393, + "step": 18163 + }, + { + "epoch": 0.5566997670712271, + "grad_norm": 1.8067743017458604, + "learning_rate": 4.326810214074053e-06, + "loss": 0.6449, + "step": 18164 + }, + { + "epoch": 0.5567304155939684, + "grad_norm": 1.9077435767622228, + "learning_rate": 4.326318418597181e-06, + "loss": 0.6603, + "step": 18165 + }, + { + "epoch": 0.5567610641167096, + "grad_norm": 1.638484502732098, + "learning_rate": 4.325826629758263e-06, + "loss": 0.674, + "step": 18166 + }, + { + "epoch": 0.5567917126394508, + "grad_norm": 1.9489895751238193, + "learning_rate": 4.325334847562151e-06, + "loss": 0.634, + "step": 18167 + }, + { + "epoch": 0.556822361162192, + "grad_norm": 1.6006391806565843, + "learning_rate": 4.324843072013684e-06, + "loss": 0.5814, + "step": 18168 + }, + { + "epoch": 0.5568530096849332, + "grad_norm": 1.9295230581824923, + "learning_rate": 4.324351303117714e-06, + "loss": 0.6021, + "step": 18169 + }, + { + "epoch": 0.5568836582076744, + "grad_norm": 0.7632752239142216, + "learning_rate": 4.3238595408790825e-06, + "loss": 0.4241, + "step": 18170 + }, + { + "epoch": 0.5569143067304156, + "grad_norm": 0.7975467334515438, + "learning_rate": 4.323367785302634e-06, + "loss": 0.4335, + "step": 18171 + }, + { + "epoch": 0.5569449552531568, + "grad_norm": 1.681949263864094, + "learning_rate": 4.3228760363932186e-06, + "loss": 0.5432, + "step": 18172 + }, + { + "epoch": 0.556975603775898, + "grad_norm": 1.740128325630817, + "learning_rate": 4.322384294155678e-06, + "loss": 0.6554, + "step": 18173 + }, + { + "epoch": 0.5570062522986392, + "grad_norm": 0.7324400560136118, + "learning_rate": 4.321892558594857e-06, + "loss": 0.4206, + "step": 18174 + }, + { + "epoch": 0.5570369008213804, + "grad_norm": 2.053925304574529, + "learning_rate": 4.321400829715604e-06, + "loss": 0.7348, + "step": 18175 + }, + { + "epoch": 0.5570675493441216, + "grad_norm": 1.8391529647037108, + "learning_rate": 4.3209091075227605e-06, + "loss": 0.7132, + "step": 18176 + }, + { + "epoch": 0.5570981978668628, + "grad_norm": 1.5477755053115254, + "learning_rate": 4.320417392021175e-06, + "loss": 0.5756, + "step": 18177 + }, + { + "epoch": 0.557128846389604, + "grad_norm": 1.7122409475321987, + "learning_rate": 4.319925683215691e-06, + "loss": 0.6593, + "step": 18178 + }, + { + "epoch": 0.5571594949123452, + "grad_norm": 1.7102954731286215, + "learning_rate": 4.319433981111151e-06, + "loss": 0.6393, + "step": 18179 + }, + { + "epoch": 0.5571901434350864, + "grad_norm": 1.7253658147816584, + "learning_rate": 4.318942285712404e-06, + "loss": 0.6336, + "step": 18180 + }, + { + "epoch": 0.5572207919578276, + "grad_norm": 1.5947748614819008, + "learning_rate": 4.3184505970242926e-06, + "loss": 0.6229, + "step": 18181 + }, + { + "epoch": 0.5572514404805688, + "grad_norm": 1.7518764737595398, + "learning_rate": 4.317958915051661e-06, + "loss": 0.6887, + "step": 18182 + }, + { + "epoch": 0.55728208900331, + "grad_norm": 1.6464466868903223, + "learning_rate": 4.317467239799355e-06, + "loss": 0.6171, + "step": 18183 + }, + { + "epoch": 0.5573127375260513, + "grad_norm": 1.742090916112131, + "learning_rate": 4.31697557127222e-06, + "loss": 0.6794, + "step": 18184 + }, + { + "epoch": 0.5573433860487924, + "grad_norm": 1.6563087738355686, + "learning_rate": 4.316483909475098e-06, + "loss": 0.6299, + "step": 18185 + }, + { + "epoch": 0.5573740345715337, + "grad_norm": 1.5837049334527946, + "learning_rate": 4.315992254412836e-06, + "loss": 0.5742, + "step": 18186 + }, + { + "epoch": 0.5574046830942748, + "grad_norm": 1.5328613764090715, + "learning_rate": 4.315500606090276e-06, + "loss": 0.6272, + "step": 18187 + }, + { + "epoch": 0.5574353316170161, + "grad_norm": 0.8334696181623037, + "learning_rate": 4.315008964512265e-06, + "loss": 0.4171, + "step": 18188 + }, + { + "epoch": 0.5574659801397572, + "grad_norm": 0.8616883923097378, + "learning_rate": 4.3145173296836475e-06, + "loss": 0.4321, + "step": 18189 + }, + { + "epoch": 0.5574966286624985, + "grad_norm": 0.8141938935811389, + "learning_rate": 4.314025701609262e-06, + "loss": 0.425, + "step": 18190 + }, + { + "epoch": 0.5575272771852396, + "grad_norm": 1.841581076079749, + "learning_rate": 4.3135340802939605e-06, + "loss": 0.6292, + "step": 18191 + }, + { + "epoch": 0.5575579257079809, + "grad_norm": 0.7604096238462, + "learning_rate": 4.313042465742582e-06, + "loss": 0.4081, + "step": 18192 + }, + { + "epoch": 0.5575885742307221, + "grad_norm": 1.8461274251865876, + "learning_rate": 4.31255085795997e-06, + "loss": 0.6542, + "step": 18193 + }, + { + "epoch": 0.5576192227534633, + "grad_norm": 0.8657464940140652, + "learning_rate": 4.312059256950973e-06, + "loss": 0.4214, + "step": 18194 + }, + { + "epoch": 0.5576498712762045, + "grad_norm": 1.6323000524634934, + "learning_rate": 4.3115676627204305e-06, + "loss": 0.6454, + "step": 18195 + }, + { + "epoch": 0.5576805197989457, + "grad_norm": 2.0212797740494715, + "learning_rate": 4.311076075273189e-06, + "loss": 0.6005, + "step": 18196 + }, + { + "epoch": 0.5577111683216869, + "grad_norm": 1.7594960783100821, + "learning_rate": 4.310584494614091e-06, + "loss": 0.6954, + "step": 18197 + }, + { + "epoch": 0.5577418168444281, + "grad_norm": 1.7572071145933317, + "learning_rate": 4.310092920747979e-06, + "loss": 0.6691, + "step": 18198 + }, + { + "epoch": 0.5577724653671693, + "grad_norm": 1.9137425297984312, + "learning_rate": 4.3096013536797e-06, + "loss": 0.5264, + "step": 18199 + }, + { + "epoch": 0.5578031138899106, + "grad_norm": 1.6687860524205502, + "learning_rate": 4.309109793414096e-06, + "loss": 0.6282, + "step": 18200 + }, + { + "epoch": 0.5578337624126517, + "grad_norm": 1.7561524143409744, + "learning_rate": 4.308618239956006e-06, + "loss": 0.7043, + "step": 18201 + }, + { + "epoch": 0.557864410935393, + "grad_norm": 0.827190322844107, + "learning_rate": 4.308126693310281e-06, + "loss": 0.421, + "step": 18202 + }, + { + "epoch": 0.5578950594581341, + "grad_norm": 1.802653164367574, + "learning_rate": 4.307635153481759e-06, + "loss": 0.6978, + "step": 18203 + }, + { + "epoch": 0.5579257079808754, + "grad_norm": 1.7110174402916845, + "learning_rate": 4.307143620475287e-06, + "loss": 0.6341, + "step": 18204 + }, + { + "epoch": 0.5579563565036165, + "grad_norm": 1.8155268095183494, + "learning_rate": 4.306652094295705e-06, + "loss": 0.6548, + "step": 18205 + }, + { + "epoch": 0.5579870050263577, + "grad_norm": 0.8624619116237059, + "learning_rate": 4.306160574947856e-06, + "loss": 0.4512, + "step": 18206 + }, + { + "epoch": 0.5580176535490989, + "grad_norm": 1.7673158504777466, + "learning_rate": 4.305669062436586e-06, + "loss": 0.6209, + "step": 18207 + }, + { + "epoch": 0.5580483020718401, + "grad_norm": 1.9166957551965549, + "learning_rate": 4.305177556766736e-06, + "loss": 0.6357, + "step": 18208 + }, + { + "epoch": 0.5580789505945813, + "grad_norm": 1.6273459166187048, + "learning_rate": 4.3046860579431485e-06, + "loss": 0.6834, + "step": 18209 + }, + { + "epoch": 0.5581095991173225, + "grad_norm": 1.854297253691532, + "learning_rate": 4.30419456597067e-06, + "loss": 0.6488, + "step": 18210 + }, + { + "epoch": 0.5581402476400638, + "grad_norm": 2.177098064441358, + "learning_rate": 4.303703080854138e-06, + "loss": 0.6078, + "step": 18211 + }, + { + "epoch": 0.5581708961628049, + "grad_norm": 1.808427810625711, + "learning_rate": 4.3032116025983975e-06, + "loss": 0.6365, + "step": 18212 + }, + { + "epoch": 0.5582015446855462, + "grad_norm": 1.5772504058565677, + "learning_rate": 4.302720131208292e-06, + "loss": 0.5218, + "step": 18213 + }, + { + "epoch": 0.5582321932082873, + "grad_norm": 0.8102777136019015, + "learning_rate": 4.302228666688663e-06, + "loss": 0.4325, + "step": 18214 + }, + { + "epoch": 0.5582628417310286, + "grad_norm": 1.916863421921085, + "learning_rate": 4.3017372090443545e-06, + "loss": 0.6115, + "step": 18215 + }, + { + "epoch": 0.5582934902537697, + "grad_norm": 1.6716891403149299, + "learning_rate": 4.3012457582802076e-06, + "loss": 0.472, + "step": 18216 + }, + { + "epoch": 0.558324138776511, + "grad_norm": 0.8000982991022793, + "learning_rate": 4.300754314401064e-06, + "loss": 0.4226, + "step": 18217 + }, + { + "epoch": 0.5583547872992521, + "grad_norm": 1.532066073612832, + "learning_rate": 4.300262877411767e-06, + "loss": 0.5629, + "step": 18218 + }, + { + "epoch": 0.5583854358219934, + "grad_norm": 1.6469285108284124, + "learning_rate": 4.299771447317162e-06, + "loss": 0.6048, + "step": 18219 + }, + { + "epoch": 0.5584160843447346, + "grad_norm": 1.6697007840103992, + "learning_rate": 4.299280024122084e-06, + "loss": 0.6451, + "step": 18220 + }, + { + "epoch": 0.5584467328674758, + "grad_norm": 1.7735965092590515, + "learning_rate": 4.298788607831382e-06, + "loss": 0.6544, + "step": 18221 + }, + { + "epoch": 0.558477381390217, + "grad_norm": 1.7580214915442545, + "learning_rate": 4.2982971984498924e-06, + "loss": 0.6313, + "step": 18222 + }, + { + "epoch": 0.5585080299129582, + "grad_norm": 1.7955074827093684, + "learning_rate": 4.297805795982464e-06, + "loss": 0.5953, + "step": 18223 + }, + { + "epoch": 0.5585386784356994, + "grad_norm": 1.7687288577730584, + "learning_rate": 4.2973144004339325e-06, + "loss": 0.6711, + "step": 18224 + }, + { + "epoch": 0.5585693269584406, + "grad_norm": 1.7134020542985393, + "learning_rate": 4.296823011809142e-06, + "loss": 0.7034, + "step": 18225 + }, + { + "epoch": 0.5585999754811818, + "grad_norm": 1.763558065979225, + "learning_rate": 4.2963316301129345e-06, + "loss": 0.661, + "step": 18226 + }, + { + "epoch": 0.558630624003923, + "grad_norm": 1.6098691138213215, + "learning_rate": 4.295840255350151e-06, + "loss": 0.6282, + "step": 18227 + }, + { + "epoch": 0.5586612725266642, + "grad_norm": 1.819467883840178, + "learning_rate": 4.295348887525633e-06, + "loss": 0.6691, + "step": 18228 + }, + { + "epoch": 0.5586919210494055, + "grad_norm": 1.7232561463964167, + "learning_rate": 4.294857526644225e-06, + "loss": 0.58, + "step": 18229 + }, + { + "epoch": 0.5587225695721466, + "grad_norm": 1.973755419411058, + "learning_rate": 4.294366172710764e-06, + "loss": 0.6818, + "step": 18230 + }, + { + "epoch": 0.5587532180948879, + "grad_norm": 1.91378868013843, + "learning_rate": 4.293874825730095e-06, + "loss": 0.5916, + "step": 18231 + }, + { + "epoch": 0.558783866617629, + "grad_norm": 1.7754878855544414, + "learning_rate": 4.293383485707059e-06, + "loss": 0.5812, + "step": 18232 + }, + { + "epoch": 0.5588145151403703, + "grad_norm": 2.206454463870113, + "learning_rate": 4.292892152646493e-06, + "loss": 0.7403, + "step": 18233 + }, + { + "epoch": 0.5588451636631114, + "grad_norm": 1.7273086799634774, + "learning_rate": 4.292400826553245e-06, + "loss": 0.5779, + "step": 18234 + }, + { + "epoch": 0.5588758121858527, + "grad_norm": 1.6012889286971348, + "learning_rate": 4.291909507432151e-06, + "loss": 0.6467, + "step": 18235 + }, + { + "epoch": 0.5589064607085938, + "grad_norm": 2.2101341521943736, + "learning_rate": 4.291418195288053e-06, + "loss": 0.6936, + "step": 18236 + }, + { + "epoch": 0.558937109231335, + "grad_norm": 1.8532168012200856, + "learning_rate": 4.290926890125794e-06, + "loss": 0.7509, + "step": 18237 + }, + { + "epoch": 0.5589677577540763, + "grad_norm": 1.6060339270547654, + "learning_rate": 4.290435591950215e-06, + "loss": 0.5984, + "step": 18238 + }, + { + "epoch": 0.5589984062768174, + "grad_norm": 1.7708824173788507, + "learning_rate": 4.289944300766153e-06, + "loss": 0.6675, + "step": 18239 + }, + { + "epoch": 0.5590290547995587, + "grad_norm": 1.739239857525158, + "learning_rate": 4.289453016578453e-06, + "loss": 0.5807, + "step": 18240 + }, + { + "epoch": 0.5590597033222998, + "grad_norm": 1.6639312317794535, + "learning_rate": 4.288961739391953e-06, + "loss": 0.5859, + "step": 18241 + }, + { + "epoch": 0.5590903518450411, + "grad_norm": 0.8425177875965819, + "learning_rate": 4.2884704692114965e-06, + "loss": 0.4321, + "step": 18242 + }, + { + "epoch": 0.5591210003677822, + "grad_norm": 1.8566325308227511, + "learning_rate": 4.287979206041923e-06, + "loss": 0.6469, + "step": 18243 + }, + { + "epoch": 0.5591516488905235, + "grad_norm": 0.830291333683026, + "learning_rate": 4.287487949888069e-06, + "loss": 0.4158, + "step": 18244 + }, + { + "epoch": 0.5591822974132646, + "grad_norm": 1.8209368977757487, + "learning_rate": 4.286996700754783e-06, + "loss": 0.6447, + "step": 18245 + }, + { + "epoch": 0.5592129459360059, + "grad_norm": 1.8115211974341345, + "learning_rate": 4.286505458646899e-06, + "loss": 0.6304, + "step": 18246 + }, + { + "epoch": 0.559243594458747, + "grad_norm": 1.7447624757025266, + "learning_rate": 4.286014223569258e-06, + "loss": 0.6714, + "step": 18247 + }, + { + "epoch": 0.5592742429814883, + "grad_norm": 1.5792811317166957, + "learning_rate": 4.285522995526703e-06, + "loss": 0.6478, + "step": 18248 + }, + { + "epoch": 0.5593048915042295, + "grad_norm": 0.7888279172824352, + "learning_rate": 4.285031774524072e-06, + "loss": 0.4253, + "step": 18249 + }, + { + "epoch": 0.5593355400269707, + "grad_norm": 1.7523924685181238, + "learning_rate": 4.284540560566207e-06, + "loss": 0.5784, + "step": 18250 + }, + { + "epoch": 0.5593661885497119, + "grad_norm": 1.7011559146679114, + "learning_rate": 4.284049353657946e-06, + "loss": 0.6307, + "step": 18251 + }, + { + "epoch": 0.5593968370724531, + "grad_norm": 1.5613858212079106, + "learning_rate": 4.28355815380413e-06, + "loss": 0.542, + "step": 18252 + }, + { + "epoch": 0.5594274855951943, + "grad_norm": 1.7633779496527795, + "learning_rate": 4.283066961009599e-06, + "loss": 0.6991, + "step": 18253 + }, + { + "epoch": 0.5594581341179355, + "grad_norm": 1.5985714362733818, + "learning_rate": 4.282575775279194e-06, + "loss": 0.5875, + "step": 18254 + }, + { + "epoch": 0.5594887826406767, + "grad_norm": 1.9756623288757447, + "learning_rate": 4.282084596617752e-06, + "loss": 0.602, + "step": 18255 + }, + { + "epoch": 0.559519431163418, + "grad_norm": 1.7726845840603358, + "learning_rate": 4.281593425030114e-06, + "loss": 0.6773, + "step": 18256 + }, + { + "epoch": 0.5595500796861591, + "grad_norm": 1.7280410336334477, + "learning_rate": 4.281102260521119e-06, + "loss": 0.5273, + "step": 18257 + }, + { + "epoch": 0.5595807282089004, + "grad_norm": 1.9195354088216923, + "learning_rate": 4.280611103095609e-06, + "loss": 0.6859, + "step": 18258 + }, + { + "epoch": 0.5596113767316415, + "grad_norm": 1.8163723203618833, + "learning_rate": 4.280119952758422e-06, + "loss": 0.6519, + "step": 18259 + }, + { + "epoch": 0.5596420252543828, + "grad_norm": 1.643940080491509, + "learning_rate": 4.279628809514395e-06, + "loss": 0.584, + "step": 18260 + }, + { + "epoch": 0.5596726737771239, + "grad_norm": 1.7126262810693318, + "learning_rate": 4.279137673368371e-06, + "loss": 0.5575, + "step": 18261 + }, + { + "epoch": 0.5597033222998652, + "grad_norm": 1.7448847030204635, + "learning_rate": 4.27864654432519e-06, + "loss": 0.639, + "step": 18262 + }, + { + "epoch": 0.5597339708226063, + "grad_norm": 2.0485779553683736, + "learning_rate": 4.278155422389685e-06, + "loss": 0.6412, + "step": 18263 + }, + { + "epoch": 0.5597646193453476, + "grad_norm": 1.557340546068751, + "learning_rate": 4.277664307566703e-06, + "loss": 0.6657, + "step": 18264 + }, + { + "epoch": 0.5597952678680888, + "grad_norm": 1.8992871336818722, + "learning_rate": 4.277173199861079e-06, + "loss": 0.6608, + "step": 18265 + }, + { + "epoch": 0.55982591639083, + "grad_norm": 1.814599466872143, + "learning_rate": 4.27668209927765e-06, + "loss": 0.6781, + "step": 18266 + }, + { + "epoch": 0.5598565649135712, + "grad_norm": 1.6782371920294197, + "learning_rate": 4.2761910058212595e-06, + "loss": 0.6115, + "step": 18267 + }, + { + "epoch": 0.5598872134363123, + "grad_norm": 1.7468173937283942, + "learning_rate": 4.275699919496742e-06, + "loss": 0.6713, + "step": 18268 + }, + { + "epoch": 0.5599178619590536, + "grad_norm": 1.7966261829743484, + "learning_rate": 4.275208840308941e-06, + "loss": 0.713, + "step": 18269 + }, + { + "epoch": 0.5599485104817947, + "grad_norm": 0.8152900317103857, + "learning_rate": 4.274717768262692e-06, + "loss": 0.4506, + "step": 18270 + }, + { + "epoch": 0.559979159004536, + "grad_norm": 0.798134758673108, + "learning_rate": 4.274226703362833e-06, + "loss": 0.4128, + "step": 18271 + }, + { + "epoch": 0.5600098075272771, + "grad_norm": 1.7174773311001468, + "learning_rate": 4.273735645614206e-06, + "loss": 0.6506, + "step": 18272 + }, + { + "epoch": 0.5600404560500184, + "grad_norm": 1.7488535544618395, + "learning_rate": 4.273244595021648e-06, + "loss": 0.6847, + "step": 18273 + }, + { + "epoch": 0.5600711045727595, + "grad_norm": 1.5757435554003856, + "learning_rate": 4.272753551589993e-06, + "loss": 0.6457, + "step": 18274 + }, + { + "epoch": 0.5601017530955008, + "grad_norm": 1.7831378897501755, + "learning_rate": 4.272262515324088e-06, + "loss": 0.6406, + "step": 18275 + }, + { + "epoch": 0.560132401618242, + "grad_norm": 1.4767467725064283, + "learning_rate": 4.271771486228762e-06, + "loss": 0.5448, + "step": 18276 + }, + { + "epoch": 0.5601630501409832, + "grad_norm": 0.7767069492215278, + "learning_rate": 4.2712804643088625e-06, + "loss": 0.4235, + "step": 18277 + }, + { + "epoch": 0.5601936986637244, + "grad_norm": 1.7750347698856626, + "learning_rate": 4.2707894495692205e-06, + "loss": 0.61, + "step": 18278 + }, + { + "epoch": 0.5602243471864656, + "grad_norm": 1.7041010515159773, + "learning_rate": 4.270298442014677e-06, + "loss": 0.6517, + "step": 18279 + }, + { + "epoch": 0.5602549957092068, + "grad_norm": 1.6641810930488696, + "learning_rate": 4.26980744165007e-06, + "loss": 0.5669, + "step": 18280 + }, + { + "epoch": 0.560285644231948, + "grad_norm": 1.6521742597252653, + "learning_rate": 4.269316448480237e-06, + "loss": 0.6964, + "step": 18281 + }, + { + "epoch": 0.5603162927546892, + "grad_norm": 1.907828125691995, + "learning_rate": 4.268825462510015e-06, + "loss": 0.6514, + "step": 18282 + }, + { + "epoch": 0.5603469412774305, + "grad_norm": 1.8256853077921529, + "learning_rate": 4.268334483744244e-06, + "loss": 0.6585, + "step": 18283 + }, + { + "epoch": 0.5603775898001716, + "grad_norm": 1.8383346989959168, + "learning_rate": 4.26784351218776e-06, + "loss": 0.6709, + "step": 18284 + }, + { + "epoch": 0.5604082383229129, + "grad_norm": 1.778974982773008, + "learning_rate": 4.267352547845401e-06, + "loss": 0.6546, + "step": 18285 + }, + { + "epoch": 0.560438886845654, + "grad_norm": 1.9785379977423698, + "learning_rate": 4.266861590722007e-06, + "loss": 0.6544, + "step": 18286 + }, + { + "epoch": 0.5604695353683953, + "grad_norm": 1.7473932235111507, + "learning_rate": 4.2663706408224094e-06, + "loss": 0.6929, + "step": 18287 + }, + { + "epoch": 0.5605001838911364, + "grad_norm": 1.815715681516648, + "learning_rate": 4.265879698151453e-06, + "loss": 0.6158, + "step": 18288 + }, + { + "epoch": 0.5605308324138777, + "grad_norm": 1.8345003830614477, + "learning_rate": 4.26538876271397e-06, + "loss": 0.6276, + "step": 18289 + }, + { + "epoch": 0.5605614809366188, + "grad_norm": 1.7864029405649786, + "learning_rate": 4.2648978345147995e-06, + "loss": 0.6614, + "step": 18290 + }, + { + "epoch": 0.5605921294593601, + "grad_norm": 1.5427565006141695, + "learning_rate": 4.264406913558779e-06, + "loss": 0.6137, + "step": 18291 + }, + { + "epoch": 0.5606227779821013, + "grad_norm": 0.7955752315978383, + "learning_rate": 4.263915999850746e-06, + "loss": 0.4119, + "step": 18292 + }, + { + "epoch": 0.5606534265048425, + "grad_norm": 0.8100853174172666, + "learning_rate": 4.263425093395536e-06, + "loss": 0.4154, + "step": 18293 + }, + { + "epoch": 0.5606840750275837, + "grad_norm": 0.8112030628372098, + "learning_rate": 4.2629341941979885e-06, + "loss": 0.4176, + "step": 18294 + }, + { + "epoch": 0.5607147235503249, + "grad_norm": 1.7341120334110602, + "learning_rate": 4.262443302262937e-06, + "loss": 0.665, + "step": 18295 + }, + { + "epoch": 0.5607453720730661, + "grad_norm": 1.766433211826748, + "learning_rate": 4.261952417595222e-06, + "loss": 0.6642, + "step": 18296 + }, + { + "epoch": 0.5607760205958073, + "grad_norm": 1.8986710329526992, + "learning_rate": 4.261461540199679e-06, + "loss": 0.6402, + "step": 18297 + }, + { + "epoch": 0.5608066691185485, + "grad_norm": 1.629868764721668, + "learning_rate": 4.2609706700811424e-06, + "loss": 0.5279, + "step": 18298 + }, + { + "epoch": 0.5608373176412896, + "grad_norm": 1.5347551054007962, + "learning_rate": 4.260479807244452e-06, + "loss": 0.5903, + "step": 18299 + }, + { + "epoch": 0.5608679661640309, + "grad_norm": 0.8163657944195541, + "learning_rate": 4.2599889516944435e-06, + "loss": 0.4199, + "step": 18300 + }, + { + "epoch": 0.560898614686772, + "grad_norm": 1.6678117835362352, + "learning_rate": 4.259498103435953e-06, + "loss": 0.5814, + "step": 18301 + }, + { + "epoch": 0.5609292632095133, + "grad_norm": 1.7837791416539743, + "learning_rate": 4.259007262473817e-06, + "loss": 0.5747, + "step": 18302 + }, + { + "epoch": 0.5609599117322545, + "grad_norm": 0.8087422538848582, + "learning_rate": 4.258516428812871e-06, + "loss": 0.4248, + "step": 18303 + }, + { + "epoch": 0.5609905602549957, + "grad_norm": 1.5851559423491854, + "learning_rate": 4.258025602457954e-06, + "loss": 0.6361, + "step": 18304 + }, + { + "epoch": 0.5610212087777369, + "grad_norm": 1.6889585136547536, + "learning_rate": 4.2575347834139e-06, + "loss": 0.5812, + "step": 18305 + }, + { + "epoch": 0.5610518573004781, + "grad_norm": 1.5562937770232357, + "learning_rate": 4.257043971685545e-06, + "loss": 0.5547, + "step": 18306 + }, + { + "epoch": 0.5610825058232193, + "grad_norm": 1.9950247491129558, + "learning_rate": 4.256553167277729e-06, + "loss": 0.6392, + "step": 18307 + }, + { + "epoch": 0.5611131543459605, + "grad_norm": 1.8521890301954071, + "learning_rate": 4.256062370195282e-06, + "loss": 0.6618, + "step": 18308 + }, + { + "epoch": 0.5611438028687017, + "grad_norm": 1.6375715019889767, + "learning_rate": 4.2555715804430425e-06, + "loss": 0.5987, + "step": 18309 + }, + { + "epoch": 0.561174451391443, + "grad_norm": 1.7825671500997222, + "learning_rate": 4.255080798025848e-06, + "loss": 0.6443, + "step": 18310 + }, + { + "epoch": 0.5612050999141841, + "grad_norm": 0.807678633767612, + "learning_rate": 4.2545900229485315e-06, + "loss": 0.4519, + "step": 18311 + }, + { + "epoch": 0.5612357484369254, + "grad_norm": 1.853140868200417, + "learning_rate": 4.254099255215931e-06, + "loss": 0.6366, + "step": 18312 + }, + { + "epoch": 0.5612663969596665, + "grad_norm": 1.6466999442976455, + "learning_rate": 4.253608494832882e-06, + "loss": 0.534, + "step": 18313 + }, + { + "epoch": 0.5612970454824078, + "grad_norm": 1.7274825115040355, + "learning_rate": 4.253117741804219e-06, + "loss": 0.6094, + "step": 18314 + }, + { + "epoch": 0.5613276940051489, + "grad_norm": 1.6994993032903898, + "learning_rate": 4.252626996134778e-06, + "loss": 0.6106, + "step": 18315 + }, + { + "epoch": 0.5613583425278902, + "grad_norm": 1.7205394393192148, + "learning_rate": 4.252136257829396e-06, + "loss": 0.6803, + "step": 18316 + }, + { + "epoch": 0.5613889910506313, + "grad_norm": 0.847567827739303, + "learning_rate": 4.251645526892903e-06, + "loss": 0.4426, + "step": 18317 + }, + { + "epoch": 0.5614196395733726, + "grad_norm": 0.8258509121309311, + "learning_rate": 4.251154803330142e-06, + "loss": 0.4274, + "step": 18318 + }, + { + "epoch": 0.5614502880961137, + "grad_norm": 1.8098150516902793, + "learning_rate": 4.250664087145943e-06, + "loss": 0.622, + "step": 18319 + }, + { + "epoch": 0.561480936618855, + "grad_norm": 1.6859722671224044, + "learning_rate": 4.250173378345141e-06, + "loss": 0.6373, + "step": 18320 + }, + { + "epoch": 0.5615115851415962, + "grad_norm": 1.7852446581828165, + "learning_rate": 4.2496826769325735e-06, + "loss": 0.5931, + "step": 18321 + }, + { + "epoch": 0.5615422336643374, + "grad_norm": 1.741119211311145, + "learning_rate": 4.249191982913074e-06, + "loss": 0.6224, + "step": 18322 + }, + { + "epoch": 0.5615728821870786, + "grad_norm": 1.7257595426255816, + "learning_rate": 4.248701296291479e-06, + "loss": 0.6651, + "step": 18323 + }, + { + "epoch": 0.5616035307098198, + "grad_norm": 1.5868547417518595, + "learning_rate": 4.248210617072623e-06, + "loss": 0.5883, + "step": 18324 + }, + { + "epoch": 0.561634179232561, + "grad_norm": 1.6165287995839959, + "learning_rate": 4.247719945261338e-06, + "loss": 0.6549, + "step": 18325 + }, + { + "epoch": 0.5616648277553022, + "grad_norm": 1.7116372783762877, + "learning_rate": 4.247229280862463e-06, + "loss": 0.5985, + "step": 18326 + }, + { + "epoch": 0.5616954762780434, + "grad_norm": 1.9344143123907522, + "learning_rate": 4.246738623880831e-06, + "loss": 0.6746, + "step": 18327 + }, + { + "epoch": 0.5617261248007847, + "grad_norm": 1.8217815630188265, + "learning_rate": 4.246247974321273e-06, + "loss": 0.7045, + "step": 18328 + }, + { + "epoch": 0.5617567733235258, + "grad_norm": 1.9821401934363105, + "learning_rate": 4.245757332188629e-06, + "loss": 0.6229, + "step": 18329 + }, + { + "epoch": 0.561787421846267, + "grad_norm": 1.947583137378476, + "learning_rate": 4.245266697487729e-06, + "loss": 0.6714, + "step": 18330 + }, + { + "epoch": 0.5618180703690082, + "grad_norm": 2.219514319732754, + "learning_rate": 4.244776070223412e-06, + "loss": 0.6932, + "step": 18331 + }, + { + "epoch": 0.5618487188917494, + "grad_norm": 1.8322268775372945, + "learning_rate": 4.244285450400508e-06, + "loss": 0.704, + "step": 18332 + }, + { + "epoch": 0.5618793674144906, + "grad_norm": 1.7677161654782334, + "learning_rate": 4.2437948380238525e-06, + "loss": 0.6283, + "step": 18333 + }, + { + "epoch": 0.5619100159372318, + "grad_norm": 0.8774018805203285, + "learning_rate": 4.2433042330982805e-06, + "loss": 0.4312, + "step": 18334 + }, + { + "epoch": 0.561940664459973, + "grad_norm": 1.6604494131378522, + "learning_rate": 4.242813635628626e-06, + "loss": 0.5265, + "step": 18335 + }, + { + "epoch": 0.5619713129827142, + "grad_norm": 0.809984554177966, + "learning_rate": 4.242323045619721e-06, + "loss": 0.4256, + "step": 18336 + }, + { + "epoch": 0.5620019615054554, + "grad_norm": 1.6954889760771024, + "learning_rate": 4.241832463076402e-06, + "loss": 0.6437, + "step": 18337 + }, + { + "epoch": 0.5620326100281966, + "grad_norm": 1.9864574780690778, + "learning_rate": 4.241341888003501e-06, + "loss": 0.6729, + "step": 18338 + }, + { + "epoch": 0.5620632585509379, + "grad_norm": 1.8152693975452208, + "learning_rate": 4.240851320405853e-06, + "loss": 0.6283, + "step": 18339 + }, + { + "epoch": 0.562093907073679, + "grad_norm": 1.5713599885520109, + "learning_rate": 4.240360760288293e-06, + "loss": 0.5351, + "step": 18340 + }, + { + "epoch": 0.5621245555964203, + "grad_norm": 1.727970596414256, + "learning_rate": 4.239870207655648e-06, + "loss": 0.6249, + "step": 18341 + }, + { + "epoch": 0.5621552041191614, + "grad_norm": 1.7553934193827883, + "learning_rate": 4.239379662512761e-06, + "loss": 0.5954, + "step": 18342 + }, + { + "epoch": 0.5621858526419027, + "grad_norm": 1.7683614199777598, + "learning_rate": 4.238889124864461e-06, + "loss": 0.6463, + "step": 18343 + }, + { + "epoch": 0.5622165011646438, + "grad_norm": 1.9552537407125008, + "learning_rate": 4.238398594715577e-06, + "loss": 0.7527, + "step": 18344 + }, + { + "epoch": 0.5622471496873851, + "grad_norm": 1.879937567908243, + "learning_rate": 4.23790807207095e-06, + "loss": 0.6279, + "step": 18345 + }, + { + "epoch": 0.5622777982101262, + "grad_norm": 1.7153084996628756, + "learning_rate": 4.237417556935409e-06, + "loss": 0.5969, + "step": 18346 + }, + { + "epoch": 0.5623084467328675, + "grad_norm": 1.741222893170807, + "learning_rate": 4.236927049313786e-06, + "loss": 0.5689, + "step": 18347 + }, + { + "epoch": 0.5623390952556087, + "grad_norm": 1.7472109661661885, + "learning_rate": 4.236436549210918e-06, + "loss": 0.5417, + "step": 18348 + }, + { + "epoch": 0.5623697437783499, + "grad_norm": 1.6976859473772754, + "learning_rate": 4.235946056631635e-06, + "loss": 0.6397, + "step": 18349 + }, + { + "epoch": 0.5624003923010911, + "grad_norm": 1.6397755913925942, + "learning_rate": 4.2354555715807735e-06, + "loss": 0.6092, + "step": 18350 + }, + { + "epoch": 0.5624310408238323, + "grad_norm": 1.930934148540748, + "learning_rate": 4.2349650940631615e-06, + "loss": 0.6327, + "step": 18351 + }, + { + "epoch": 0.5624616893465735, + "grad_norm": 1.5407398804617871, + "learning_rate": 4.2344746240836345e-06, + "loss": 0.6316, + "step": 18352 + }, + { + "epoch": 0.5624923378693147, + "grad_norm": 1.687765350765832, + "learning_rate": 4.233984161647025e-06, + "loss": 0.6444, + "step": 18353 + }, + { + "epoch": 0.5625229863920559, + "grad_norm": 1.7487674157437285, + "learning_rate": 4.233493706758166e-06, + "loss": 0.6435, + "step": 18354 + }, + { + "epoch": 0.5625536349147972, + "grad_norm": 0.8909633665164679, + "learning_rate": 4.2330032594218885e-06, + "loss": 0.4416, + "step": 18355 + }, + { + "epoch": 0.5625842834375383, + "grad_norm": 1.968967697149699, + "learning_rate": 4.2325128196430265e-06, + "loss": 0.6611, + "step": 18356 + }, + { + "epoch": 0.5626149319602796, + "grad_norm": 1.7302859109414206, + "learning_rate": 4.232022387426412e-06, + "loss": 0.6364, + "step": 18357 + }, + { + "epoch": 0.5626455804830207, + "grad_norm": 1.737331494033916, + "learning_rate": 4.231531962776878e-06, + "loss": 0.5933, + "step": 18358 + }, + { + "epoch": 0.562676229005762, + "grad_norm": 1.6259121232004805, + "learning_rate": 4.231041545699257e-06, + "loss": 0.5748, + "step": 18359 + }, + { + "epoch": 0.5627068775285031, + "grad_norm": 1.7533595253699736, + "learning_rate": 4.230551136198377e-06, + "loss": 0.6605, + "step": 18360 + }, + { + "epoch": 0.5627375260512444, + "grad_norm": 1.9296325537076333, + "learning_rate": 4.230060734279078e-06, + "loss": 0.6184, + "step": 18361 + }, + { + "epoch": 0.5627681745739855, + "grad_norm": 1.5887432339366023, + "learning_rate": 4.229570339946186e-06, + "loss": 0.6695, + "step": 18362 + }, + { + "epoch": 0.5627988230967267, + "grad_norm": 1.6320226335029584, + "learning_rate": 4.229079953204533e-06, + "loss": 0.6055, + "step": 18363 + }, + { + "epoch": 0.562829471619468, + "grad_norm": 1.754263435042526, + "learning_rate": 4.228589574058954e-06, + "loss": 0.7074, + "step": 18364 + }, + { + "epoch": 0.5628601201422091, + "grad_norm": 0.8202697492370459, + "learning_rate": 4.228099202514279e-06, + "loss": 0.4023, + "step": 18365 + }, + { + "epoch": 0.5628907686649504, + "grad_norm": 1.7114050623941004, + "learning_rate": 4.2276088385753396e-06, + "loss": 0.6612, + "step": 18366 + }, + { + "epoch": 0.5629214171876915, + "grad_norm": 1.8944007158540186, + "learning_rate": 4.227118482246968e-06, + "loss": 0.6147, + "step": 18367 + }, + { + "epoch": 0.5629520657104328, + "grad_norm": 1.5985358785527777, + "learning_rate": 4.226628133533996e-06, + "loss": 0.6284, + "step": 18368 + }, + { + "epoch": 0.5629827142331739, + "grad_norm": 1.40353294889712, + "learning_rate": 4.226137792441254e-06, + "loss": 0.6082, + "step": 18369 + }, + { + "epoch": 0.5630133627559152, + "grad_norm": 1.5342929094417475, + "learning_rate": 4.225647458973578e-06, + "loss": 0.6634, + "step": 18370 + }, + { + "epoch": 0.5630440112786563, + "grad_norm": 1.5920800111875328, + "learning_rate": 4.22515713313579e-06, + "loss": 0.5863, + "step": 18371 + }, + { + "epoch": 0.5630746598013976, + "grad_norm": 1.8256536666871517, + "learning_rate": 4.224666814932731e-06, + "loss": 0.6889, + "step": 18372 + }, + { + "epoch": 0.5631053083241387, + "grad_norm": 2.0145398974698288, + "learning_rate": 4.224176504369228e-06, + "loss": 0.6183, + "step": 18373 + }, + { + "epoch": 0.56313595684688, + "grad_norm": 1.7770669831601027, + "learning_rate": 4.223686201450111e-06, + "loss": 0.6362, + "step": 18374 + }, + { + "epoch": 0.5631666053696212, + "grad_norm": 1.6041706418589101, + "learning_rate": 4.223195906180213e-06, + "loss": 0.6275, + "step": 18375 + }, + { + "epoch": 0.5631972538923624, + "grad_norm": 1.8378334080628465, + "learning_rate": 4.222705618564364e-06, + "loss": 0.6602, + "step": 18376 + }, + { + "epoch": 0.5632279024151036, + "grad_norm": 1.7448477784513514, + "learning_rate": 4.222215338607396e-06, + "loss": 0.6082, + "step": 18377 + }, + { + "epoch": 0.5632585509378448, + "grad_norm": 1.976925924189923, + "learning_rate": 4.22172506631414e-06, + "loss": 0.6639, + "step": 18378 + }, + { + "epoch": 0.563289199460586, + "grad_norm": 1.9183160121011018, + "learning_rate": 4.221234801689424e-06, + "loss": 0.6884, + "step": 18379 + }, + { + "epoch": 0.5633198479833272, + "grad_norm": 1.5209154154140958, + "learning_rate": 4.220744544738082e-06, + "loss": 0.5845, + "step": 18380 + }, + { + "epoch": 0.5633504965060684, + "grad_norm": 1.9925436359424313, + "learning_rate": 4.220254295464945e-06, + "loss": 0.7535, + "step": 18381 + }, + { + "epoch": 0.5633811450288096, + "grad_norm": 0.8097446467611503, + "learning_rate": 4.219764053874838e-06, + "loss": 0.444, + "step": 18382 + }, + { + "epoch": 0.5634117935515508, + "grad_norm": 1.5735352555189237, + "learning_rate": 4.2192738199726e-06, + "loss": 0.5818, + "step": 18383 + }, + { + "epoch": 0.5634424420742921, + "grad_norm": 1.730392135392516, + "learning_rate": 4.2187835937630524e-06, + "loss": 0.6492, + "step": 18384 + }, + { + "epoch": 0.5634730905970332, + "grad_norm": 1.6342946399289462, + "learning_rate": 4.218293375251034e-06, + "loss": 0.5965, + "step": 18385 + }, + { + "epoch": 0.5635037391197745, + "grad_norm": 1.731234903604976, + "learning_rate": 4.217803164441369e-06, + "loss": 0.6138, + "step": 18386 + }, + { + "epoch": 0.5635343876425156, + "grad_norm": 1.7779233853065517, + "learning_rate": 4.217312961338889e-06, + "loss": 0.54, + "step": 18387 + }, + { + "epoch": 0.5635650361652569, + "grad_norm": 0.8146274892814486, + "learning_rate": 4.216822765948425e-06, + "loss": 0.42, + "step": 18388 + }, + { + "epoch": 0.563595684687998, + "grad_norm": 1.9945741259707648, + "learning_rate": 4.216332578274808e-06, + "loss": 0.716, + "step": 18389 + }, + { + "epoch": 0.5636263332107393, + "grad_norm": 0.7694779526600913, + "learning_rate": 4.215842398322865e-06, + "loss": 0.425, + "step": 18390 + }, + { + "epoch": 0.5636569817334804, + "grad_norm": 0.7666726886408015, + "learning_rate": 4.215352226097428e-06, + "loss": 0.419, + "step": 18391 + }, + { + "epoch": 0.5636876302562217, + "grad_norm": 1.5422678748060343, + "learning_rate": 4.214862061603328e-06, + "loss": 0.6614, + "step": 18392 + }, + { + "epoch": 0.5637182787789629, + "grad_norm": 1.7363121821384788, + "learning_rate": 4.214371904845389e-06, + "loss": 0.6224, + "step": 18393 + }, + { + "epoch": 0.563748927301704, + "grad_norm": 1.7145836337896838, + "learning_rate": 4.213881755828449e-06, + "loss": 0.591, + "step": 18394 + }, + { + "epoch": 0.5637795758244453, + "grad_norm": 1.6594662365229849, + "learning_rate": 4.2133916145573295e-06, + "loss": 0.5301, + "step": 18395 + }, + { + "epoch": 0.5638102243471864, + "grad_norm": 1.541784685854649, + "learning_rate": 4.212901481036866e-06, + "loss": 0.6139, + "step": 18396 + }, + { + "epoch": 0.5638408728699277, + "grad_norm": 1.719826162418137, + "learning_rate": 4.212411355271885e-06, + "loss": 0.631, + "step": 18397 + }, + { + "epoch": 0.5638715213926688, + "grad_norm": 1.7197241459511066, + "learning_rate": 4.211921237267216e-06, + "loss": 0.6726, + "step": 18398 + }, + { + "epoch": 0.5639021699154101, + "grad_norm": 1.7710479848804186, + "learning_rate": 4.2114311270276895e-06, + "loss": 0.6324, + "step": 18399 + }, + { + "epoch": 0.5639328184381512, + "grad_norm": 1.8188538929913012, + "learning_rate": 4.210941024558133e-06, + "loss": 0.5729, + "step": 18400 + }, + { + "epoch": 0.5639634669608925, + "grad_norm": 1.4960231688487038, + "learning_rate": 4.210450929863376e-06, + "loss": 0.7019, + "step": 18401 + }, + { + "epoch": 0.5639941154836337, + "grad_norm": 1.743877312929451, + "learning_rate": 4.20996084294825e-06, + "loss": 0.6821, + "step": 18402 + }, + { + "epoch": 0.5640247640063749, + "grad_norm": 1.758052025433625, + "learning_rate": 4.20947076381758e-06, + "loss": 0.5739, + "step": 18403 + }, + { + "epoch": 0.5640554125291161, + "grad_norm": 1.6748529620470525, + "learning_rate": 4.208980692476199e-06, + "loss": 0.5324, + "step": 18404 + }, + { + "epoch": 0.5640860610518573, + "grad_norm": 0.8302257733201689, + "learning_rate": 4.2084906289289325e-06, + "loss": 0.4225, + "step": 18405 + }, + { + "epoch": 0.5641167095745985, + "grad_norm": 1.8026213901407007, + "learning_rate": 4.20800057318061e-06, + "loss": 0.6254, + "step": 18406 + }, + { + "epoch": 0.5641473580973397, + "grad_norm": 1.61834849071561, + "learning_rate": 4.20751052523606e-06, + "loss": 0.595, + "step": 18407 + }, + { + "epoch": 0.5641780066200809, + "grad_norm": 1.6935964764358835, + "learning_rate": 4.207020485100113e-06, + "loss": 0.6539, + "step": 18408 + }, + { + "epoch": 0.5642086551428221, + "grad_norm": 1.8258016242979105, + "learning_rate": 4.206530452777594e-06, + "loss": 0.559, + "step": 18409 + }, + { + "epoch": 0.5642393036655633, + "grad_norm": 1.6674887418066886, + "learning_rate": 4.206040428273336e-06, + "loss": 0.5923, + "step": 18410 + }, + { + "epoch": 0.5642699521883046, + "grad_norm": 0.8285356001638499, + "learning_rate": 4.205550411592162e-06, + "loss": 0.4237, + "step": 18411 + }, + { + "epoch": 0.5643006007110457, + "grad_norm": 1.7127354859503237, + "learning_rate": 4.205060402738905e-06, + "loss": 0.6078, + "step": 18412 + }, + { + "epoch": 0.564331249233787, + "grad_norm": 1.943075944742401, + "learning_rate": 4.204570401718392e-06, + "loss": 0.662, + "step": 18413 + }, + { + "epoch": 0.5643618977565281, + "grad_norm": 1.5807499824789475, + "learning_rate": 4.204080408535448e-06, + "loss": 0.6166, + "step": 18414 + }, + { + "epoch": 0.5643925462792694, + "grad_norm": 1.7459323423609217, + "learning_rate": 4.203590423194905e-06, + "loss": 0.5884, + "step": 18415 + }, + { + "epoch": 0.5644231948020105, + "grad_norm": 1.6338490268346122, + "learning_rate": 4.20310044570159e-06, + "loss": 0.6072, + "step": 18416 + }, + { + "epoch": 0.5644538433247518, + "grad_norm": 1.7442989721690587, + "learning_rate": 4.202610476060328e-06, + "loss": 0.5801, + "step": 18417 + }, + { + "epoch": 0.5644844918474929, + "grad_norm": 1.586391665838328, + "learning_rate": 4.202120514275951e-06, + "loss": 0.6152, + "step": 18418 + }, + { + "epoch": 0.5645151403702342, + "grad_norm": 0.8294097290579179, + "learning_rate": 4.2016305603532835e-06, + "loss": 0.4405, + "step": 18419 + }, + { + "epoch": 0.5645457888929754, + "grad_norm": 1.7212691255513188, + "learning_rate": 4.201140614297155e-06, + "loss": 0.6868, + "step": 18420 + }, + { + "epoch": 0.5645764374157166, + "grad_norm": 0.7958992882745027, + "learning_rate": 4.200650676112392e-06, + "loss": 0.4145, + "step": 18421 + }, + { + "epoch": 0.5646070859384578, + "grad_norm": 1.754076476163791, + "learning_rate": 4.200160745803821e-06, + "loss": 0.697, + "step": 18422 + }, + { + "epoch": 0.564637734461199, + "grad_norm": 0.7776735044189058, + "learning_rate": 4.199670823376273e-06, + "loss": 0.4043, + "step": 18423 + }, + { + "epoch": 0.5646683829839402, + "grad_norm": 1.6764987886102634, + "learning_rate": 4.199180908834573e-06, + "loss": 0.4869, + "step": 18424 + }, + { + "epoch": 0.5646990315066813, + "grad_norm": 1.57335217661521, + "learning_rate": 4.198691002183547e-06, + "loss": 0.5672, + "step": 18425 + }, + { + "epoch": 0.5647296800294226, + "grad_norm": 1.894165131811522, + "learning_rate": 4.198201103428025e-06, + "loss": 0.6869, + "step": 18426 + }, + { + "epoch": 0.5647603285521637, + "grad_norm": 1.8706128894033212, + "learning_rate": 4.197711212572834e-06, + "loss": 0.5509, + "step": 18427 + }, + { + "epoch": 0.564790977074905, + "grad_norm": 1.627812860936387, + "learning_rate": 4.197221329622796e-06, + "loss": 0.5373, + "step": 18428 + }, + { + "epoch": 0.5648216255976461, + "grad_norm": 1.7000140950812457, + "learning_rate": 4.196731454582744e-06, + "loss": 0.5982, + "step": 18429 + }, + { + "epoch": 0.5648522741203874, + "grad_norm": 2.3543958952124275, + "learning_rate": 4.196241587457501e-06, + "loss": 0.6059, + "step": 18430 + }, + { + "epoch": 0.5648829226431286, + "grad_norm": 1.842540907196089, + "learning_rate": 4.1957517282518965e-06, + "loss": 0.6341, + "step": 18431 + }, + { + "epoch": 0.5649135711658698, + "grad_norm": 1.7499442415847635, + "learning_rate": 4.195261876970756e-06, + "loss": 0.6531, + "step": 18432 + }, + { + "epoch": 0.564944219688611, + "grad_norm": 1.8851209733413312, + "learning_rate": 4.1947720336189055e-06, + "loss": 0.6771, + "step": 18433 + }, + { + "epoch": 0.5649748682113522, + "grad_norm": 1.8517393888170457, + "learning_rate": 4.1942821982011735e-06, + "loss": 0.6691, + "step": 18434 + }, + { + "epoch": 0.5650055167340934, + "grad_norm": 1.640188167385473, + "learning_rate": 4.193792370722386e-06, + "loss": 0.576, + "step": 18435 + }, + { + "epoch": 0.5650361652568346, + "grad_norm": 1.707768763746691, + "learning_rate": 4.193302551187364e-06, + "loss": 0.5964, + "step": 18436 + }, + { + "epoch": 0.5650668137795758, + "grad_norm": 1.7597817691814746, + "learning_rate": 4.192812739600942e-06, + "loss": 0.6198, + "step": 18437 + }, + { + "epoch": 0.565097462302317, + "grad_norm": 2.0051647271942454, + "learning_rate": 4.1923229359679405e-06, + "loss": 0.6175, + "step": 18438 + }, + { + "epoch": 0.5651281108250582, + "grad_norm": 1.9891887152693313, + "learning_rate": 4.191833140293191e-06, + "loss": 0.7114, + "step": 18439 + }, + { + "epoch": 0.5651587593477995, + "grad_norm": 1.6400863802892522, + "learning_rate": 4.191343352581514e-06, + "loss": 0.6274, + "step": 18440 + }, + { + "epoch": 0.5651894078705406, + "grad_norm": 2.099707245183428, + "learning_rate": 4.190853572837737e-06, + "loss": 0.7407, + "step": 18441 + }, + { + "epoch": 0.5652200563932819, + "grad_norm": 1.8814029615568622, + "learning_rate": 4.1903638010666895e-06, + "loss": 0.5954, + "step": 18442 + }, + { + "epoch": 0.565250704916023, + "grad_norm": 1.7074280097054055, + "learning_rate": 4.189874037273193e-06, + "loss": 0.6191, + "step": 18443 + }, + { + "epoch": 0.5652813534387643, + "grad_norm": 1.859470077413823, + "learning_rate": 4.189384281462074e-06, + "loss": 0.7267, + "step": 18444 + }, + { + "epoch": 0.5653120019615054, + "grad_norm": 1.6449990707060338, + "learning_rate": 4.188894533638161e-06, + "loss": 0.5999, + "step": 18445 + }, + { + "epoch": 0.5653426504842467, + "grad_norm": 1.6249168535895762, + "learning_rate": 4.1884047938062774e-06, + "loss": 0.5733, + "step": 18446 + }, + { + "epoch": 0.5653732990069879, + "grad_norm": 0.841678488054031, + "learning_rate": 4.187915061971248e-06, + "loss": 0.4189, + "step": 18447 + }, + { + "epoch": 0.5654039475297291, + "grad_norm": 1.8311260333966801, + "learning_rate": 4.1874253381379e-06, + "loss": 0.6456, + "step": 18448 + }, + { + "epoch": 0.5654345960524703, + "grad_norm": 1.673110920438966, + "learning_rate": 4.186935622311057e-06, + "loss": 0.6836, + "step": 18449 + }, + { + "epoch": 0.5654652445752115, + "grad_norm": 2.0507955116461503, + "learning_rate": 4.186445914495546e-06, + "loss": 0.6307, + "step": 18450 + }, + { + "epoch": 0.5654958930979527, + "grad_norm": 1.7322790751182353, + "learning_rate": 4.1859562146961925e-06, + "loss": 0.677, + "step": 18451 + }, + { + "epoch": 0.5655265416206939, + "grad_norm": 1.6255794366065521, + "learning_rate": 4.185466522917819e-06, + "loss": 0.4794, + "step": 18452 + }, + { + "epoch": 0.5655571901434351, + "grad_norm": 1.6308463157864492, + "learning_rate": 4.184976839165254e-06, + "loss": 0.6603, + "step": 18453 + }, + { + "epoch": 0.5655878386661763, + "grad_norm": 1.966379789684128, + "learning_rate": 4.1844871634433206e-06, + "loss": 0.6873, + "step": 18454 + }, + { + "epoch": 0.5656184871889175, + "grad_norm": 1.4071739931848384, + "learning_rate": 4.183997495756841e-06, + "loss": 0.4775, + "step": 18455 + }, + { + "epoch": 0.5656491357116586, + "grad_norm": 1.7593794802008296, + "learning_rate": 4.183507836110646e-06, + "loss": 0.6342, + "step": 18456 + }, + { + "epoch": 0.5656797842343999, + "grad_norm": 1.9323084184453339, + "learning_rate": 4.183018184509555e-06, + "loss": 0.6262, + "step": 18457 + }, + { + "epoch": 0.5657104327571411, + "grad_norm": 1.8614571943256075, + "learning_rate": 4.182528540958397e-06, + "loss": 0.6755, + "step": 18458 + }, + { + "epoch": 0.5657410812798823, + "grad_norm": 1.7572579862783384, + "learning_rate": 4.182038905461994e-06, + "loss": 0.529, + "step": 18459 + }, + { + "epoch": 0.5657717298026235, + "grad_norm": 1.6737158998784052, + "learning_rate": 4.1815492780251695e-06, + "loss": 0.7136, + "step": 18460 + }, + { + "epoch": 0.5658023783253647, + "grad_norm": 1.6300658964681936, + "learning_rate": 4.181059658652751e-06, + "loss": 0.5364, + "step": 18461 + }, + { + "epoch": 0.5658330268481059, + "grad_norm": 1.7879368679892287, + "learning_rate": 4.18057004734956e-06, + "loss": 0.5705, + "step": 18462 + }, + { + "epoch": 0.5658636753708471, + "grad_norm": 1.6681289976694293, + "learning_rate": 4.180080444120422e-06, + "loss": 0.6059, + "step": 18463 + }, + { + "epoch": 0.5658943238935883, + "grad_norm": 1.915159440979533, + "learning_rate": 4.179590848970162e-06, + "loss": 0.6808, + "step": 18464 + }, + { + "epoch": 0.5659249724163296, + "grad_norm": 1.6457967826779791, + "learning_rate": 4.179101261903602e-06, + "loss": 0.5251, + "step": 18465 + }, + { + "epoch": 0.5659556209390707, + "grad_norm": 1.7016254788622758, + "learning_rate": 4.178611682925569e-06, + "loss": 0.5288, + "step": 18466 + }, + { + "epoch": 0.565986269461812, + "grad_norm": 1.536991133801106, + "learning_rate": 4.178122112040886e-06, + "loss": 0.564, + "step": 18467 + }, + { + "epoch": 0.5660169179845531, + "grad_norm": 1.5950893575140144, + "learning_rate": 4.177632549254372e-06, + "loss": 0.7046, + "step": 18468 + }, + { + "epoch": 0.5660475665072944, + "grad_norm": 1.7205904701073194, + "learning_rate": 4.177142994570859e-06, + "loss": 0.5907, + "step": 18469 + }, + { + "epoch": 0.5660782150300355, + "grad_norm": 1.7913386985603188, + "learning_rate": 4.176653447995165e-06, + "loss": 0.7187, + "step": 18470 + }, + { + "epoch": 0.5661088635527768, + "grad_norm": 1.6232426349204512, + "learning_rate": 4.176163909532115e-06, + "loss": 0.6301, + "step": 18471 + }, + { + "epoch": 0.5661395120755179, + "grad_norm": 1.6489718919664818, + "learning_rate": 4.175674379186534e-06, + "loss": 0.6173, + "step": 18472 + }, + { + "epoch": 0.5661701605982592, + "grad_norm": 1.5790888836542822, + "learning_rate": 4.175184856963243e-06, + "loss": 0.5785, + "step": 18473 + }, + { + "epoch": 0.5662008091210003, + "grad_norm": 1.9708751381366716, + "learning_rate": 4.174695342867066e-06, + "loss": 0.6404, + "step": 18474 + }, + { + "epoch": 0.5662314576437416, + "grad_norm": 0.8612561357129158, + "learning_rate": 4.174205836902828e-06, + "loss": 0.441, + "step": 18475 + }, + { + "epoch": 0.5662621061664828, + "grad_norm": 1.9299447143911408, + "learning_rate": 4.173716339075351e-06, + "loss": 0.79, + "step": 18476 + }, + { + "epoch": 0.566292754689224, + "grad_norm": 1.7430642724101202, + "learning_rate": 4.1732268493894586e-06, + "loss": 0.6535, + "step": 18477 + }, + { + "epoch": 0.5663234032119652, + "grad_norm": 1.6892321888361925, + "learning_rate": 4.172737367849975e-06, + "loss": 0.6155, + "step": 18478 + }, + { + "epoch": 0.5663540517347064, + "grad_norm": 1.8988707954412156, + "learning_rate": 4.1722478944617184e-06, + "loss": 0.6182, + "step": 18479 + }, + { + "epoch": 0.5663847002574476, + "grad_norm": 0.8055932728243945, + "learning_rate": 4.171758429229518e-06, + "loss": 0.4329, + "step": 18480 + }, + { + "epoch": 0.5664153487801888, + "grad_norm": 1.79862045584604, + "learning_rate": 4.171268972158193e-06, + "loss": 0.6701, + "step": 18481 + }, + { + "epoch": 0.56644599730293, + "grad_norm": 1.7833032647530247, + "learning_rate": 4.170779523252565e-06, + "loss": 0.6524, + "step": 18482 + }, + { + "epoch": 0.5664766458256713, + "grad_norm": 0.7826807382953315, + "learning_rate": 4.170290082517461e-06, + "loss": 0.435, + "step": 18483 + }, + { + "epoch": 0.5665072943484124, + "grad_norm": 0.795142724874591, + "learning_rate": 4.169800649957699e-06, + "loss": 0.4381, + "step": 18484 + }, + { + "epoch": 0.5665379428711537, + "grad_norm": 1.47939314718036, + "learning_rate": 4.1693112255781055e-06, + "loss": 0.5619, + "step": 18485 + }, + { + "epoch": 0.5665685913938948, + "grad_norm": 0.7850037713915324, + "learning_rate": 4.1688218093835005e-06, + "loss": 0.4273, + "step": 18486 + }, + { + "epoch": 0.566599239916636, + "grad_norm": 1.763864923833146, + "learning_rate": 4.1683324013787056e-06, + "loss": 0.6555, + "step": 18487 + }, + { + "epoch": 0.5666298884393772, + "grad_norm": 1.7224811111705958, + "learning_rate": 4.167843001568545e-06, + "loss": 0.6526, + "step": 18488 + }, + { + "epoch": 0.5666605369621184, + "grad_norm": 2.0029322965225074, + "learning_rate": 4.1673536099578425e-06, + "loss": 0.6537, + "step": 18489 + }, + { + "epoch": 0.5666911854848596, + "grad_norm": 1.6351287753103911, + "learning_rate": 4.1668642265514145e-06, + "loss": 0.5934, + "step": 18490 + }, + { + "epoch": 0.5667218340076008, + "grad_norm": 1.959065068867312, + "learning_rate": 4.166374851354089e-06, + "loss": 0.6696, + "step": 18491 + }, + { + "epoch": 0.566752482530342, + "grad_norm": 1.8173079019062586, + "learning_rate": 4.165885484370684e-06, + "loss": 0.7317, + "step": 18492 + }, + { + "epoch": 0.5667831310530832, + "grad_norm": 1.6528681226982944, + "learning_rate": 4.1653961256060235e-06, + "loss": 0.556, + "step": 18493 + }, + { + "epoch": 0.5668137795758245, + "grad_norm": 1.8383760035285661, + "learning_rate": 4.164906775064929e-06, + "loss": 0.6216, + "step": 18494 + }, + { + "epoch": 0.5668444280985656, + "grad_norm": 1.556404190771897, + "learning_rate": 4.1644174327522195e-06, + "loss": 0.5722, + "step": 18495 + }, + { + "epoch": 0.5668750766213069, + "grad_norm": 1.6807317735027123, + "learning_rate": 4.1639280986727205e-06, + "loss": 0.6275, + "step": 18496 + }, + { + "epoch": 0.566905725144048, + "grad_norm": 1.7136572790579383, + "learning_rate": 4.163438772831251e-06, + "loss": 0.646, + "step": 18497 + }, + { + "epoch": 0.5669363736667893, + "grad_norm": 0.8777459403878255, + "learning_rate": 4.162949455232634e-06, + "loss": 0.4326, + "step": 18498 + }, + { + "epoch": 0.5669670221895304, + "grad_norm": 0.8317451026158588, + "learning_rate": 4.162460145881691e-06, + "loss": 0.4418, + "step": 18499 + }, + { + "epoch": 0.5669976707122717, + "grad_norm": 1.772135779796484, + "learning_rate": 4.161970844783242e-06, + "loss": 0.6717, + "step": 18500 + }, + { + "epoch": 0.5670283192350128, + "grad_norm": 1.670808214526764, + "learning_rate": 4.161481551942107e-06, + "loss": 0.6156, + "step": 18501 + }, + { + "epoch": 0.5670589677577541, + "grad_norm": 0.7854945406419863, + "learning_rate": 4.16099226736311e-06, + "loss": 0.4404, + "step": 18502 + }, + { + "epoch": 0.5670896162804953, + "grad_norm": 1.960556566771591, + "learning_rate": 4.160502991051071e-06, + "loss": 0.652, + "step": 18503 + }, + { + "epoch": 0.5671202648032365, + "grad_norm": 1.5218104028521324, + "learning_rate": 4.1600137230108106e-06, + "loss": 0.5856, + "step": 18504 + }, + { + "epoch": 0.5671509133259777, + "grad_norm": 1.69055046279009, + "learning_rate": 4.159524463247151e-06, + "loss": 0.5834, + "step": 18505 + }, + { + "epoch": 0.5671815618487189, + "grad_norm": 2.1819311270979767, + "learning_rate": 4.159035211764909e-06, + "loss": 0.5491, + "step": 18506 + }, + { + "epoch": 0.5672122103714601, + "grad_norm": 1.610704516858351, + "learning_rate": 4.1585459685689105e-06, + "loss": 0.7265, + "step": 18507 + }, + { + "epoch": 0.5672428588942013, + "grad_norm": 1.6400043682196894, + "learning_rate": 4.158056733663975e-06, + "loss": 0.5646, + "step": 18508 + }, + { + "epoch": 0.5672735074169425, + "grad_norm": 1.5524654039900425, + "learning_rate": 4.157567507054919e-06, + "loss": 0.6029, + "step": 18509 + }, + { + "epoch": 0.5673041559396838, + "grad_norm": 1.8256811827512271, + "learning_rate": 4.1570782887465685e-06, + "loss": 0.703, + "step": 18510 + }, + { + "epoch": 0.5673348044624249, + "grad_norm": 1.9196417577641625, + "learning_rate": 4.156589078743738e-06, + "loss": 0.7266, + "step": 18511 + }, + { + "epoch": 0.5673654529851662, + "grad_norm": 1.626910248693126, + "learning_rate": 4.156099877051254e-06, + "loss": 0.5774, + "step": 18512 + }, + { + "epoch": 0.5673961015079073, + "grad_norm": 1.7244990745830426, + "learning_rate": 4.155610683673934e-06, + "loss": 0.5817, + "step": 18513 + }, + { + "epoch": 0.5674267500306486, + "grad_norm": 1.5964214517690258, + "learning_rate": 4.155121498616596e-06, + "loss": 0.5869, + "step": 18514 + }, + { + "epoch": 0.5674573985533897, + "grad_norm": 0.7979922432096651, + "learning_rate": 4.154632321884063e-06, + "loss": 0.4279, + "step": 18515 + }, + { + "epoch": 0.567488047076131, + "grad_norm": 1.5978680698585392, + "learning_rate": 4.154143153481155e-06, + "loss": 0.5737, + "step": 18516 + }, + { + "epoch": 0.5675186955988721, + "grad_norm": 1.6534092889052256, + "learning_rate": 4.15365399341269e-06, + "loss": 0.5481, + "step": 18517 + }, + { + "epoch": 0.5675493441216133, + "grad_norm": 1.696266706997799, + "learning_rate": 4.153164841683488e-06, + "loss": 0.573, + "step": 18518 + }, + { + "epoch": 0.5675799926443545, + "grad_norm": 1.6155355952790942, + "learning_rate": 4.152675698298371e-06, + "loss": 0.6975, + "step": 18519 + }, + { + "epoch": 0.5676106411670957, + "grad_norm": 1.8312039782377982, + "learning_rate": 4.152186563262155e-06, + "loss": 0.5857, + "step": 18520 + }, + { + "epoch": 0.567641289689837, + "grad_norm": 1.6960501013468383, + "learning_rate": 4.1516974365796645e-06, + "loss": 0.6012, + "step": 18521 + }, + { + "epoch": 0.5676719382125781, + "grad_norm": 1.5728422895886023, + "learning_rate": 4.151208318255713e-06, + "loss": 0.575, + "step": 18522 + }, + { + "epoch": 0.5677025867353194, + "grad_norm": 1.9531013509605983, + "learning_rate": 4.150719208295127e-06, + "loss": 0.6006, + "step": 18523 + }, + { + "epoch": 0.5677332352580605, + "grad_norm": 1.7403120524300126, + "learning_rate": 4.15023010670272e-06, + "loss": 0.5986, + "step": 18524 + }, + { + "epoch": 0.5677638837808018, + "grad_norm": 0.8187195570980701, + "learning_rate": 4.149741013483312e-06, + "loss": 0.4215, + "step": 18525 + }, + { + "epoch": 0.5677945323035429, + "grad_norm": 1.6141463290650244, + "learning_rate": 4.149251928641725e-06, + "loss": 0.561, + "step": 18526 + }, + { + "epoch": 0.5678251808262842, + "grad_norm": 1.8743547773306866, + "learning_rate": 4.1487628521827765e-06, + "loss": 0.6412, + "step": 18527 + }, + { + "epoch": 0.5678558293490253, + "grad_norm": 1.5163433366859398, + "learning_rate": 4.1482737841112835e-06, + "loss": 0.6368, + "step": 18528 + }, + { + "epoch": 0.5678864778717666, + "grad_norm": 1.8706740867836473, + "learning_rate": 4.1477847244320685e-06, + "loss": 0.6678, + "step": 18529 + }, + { + "epoch": 0.5679171263945078, + "grad_norm": 0.7657609576284323, + "learning_rate": 4.147295673149947e-06, + "loss": 0.4221, + "step": 18530 + }, + { + "epoch": 0.567947774917249, + "grad_norm": 1.9405321628464185, + "learning_rate": 4.146806630269741e-06, + "loss": 0.6303, + "step": 18531 + }, + { + "epoch": 0.5679784234399902, + "grad_norm": 1.8658070338414858, + "learning_rate": 4.1463175957962686e-06, + "loss": 0.7404, + "step": 18532 + }, + { + "epoch": 0.5680090719627314, + "grad_norm": 2.6750980859651436, + "learning_rate": 4.1458285697343445e-06, + "loss": 0.6486, + "step": 18533 + }, + { + "epoch": 0.5680397204854726, + "grad_norm": 1.875733472545887, + "learning_rate": 4.145339552088793e-06, + "loss": 0.6627, + "step": 18534 + }, + { + "epoch": 0.5680703690082138, + "grad_norm": 0.8052280985029562, + "learning_rate": 4.144850542864428e-06, + "loss": 0.4248, + "step": 18535 + }, + { + "epoch": 0.568101017530955, + "grad_norm": 1.631039791668866, + "learning_rate": 4.144361542066069e-06, + "loss": 0.5846, + "step": 18536 + }, + { + "epoch": 0.5681316660536962, + "grad_norm": 1.626255662442959, + "learning_rate": 4.143872549698535e-06, + "loss": 0.581, + "step": 18537 + }, + { + "epoch": 0.5681623145764374, + "grad_norm": 1.5857248378994728, + "learning_rate": 4.143383565766643e-06, + "loss": 0.6814, + "step": 18538 + }, + { + "epoch": 0.5681929630991787, + "grad_norm": 1.6430800571016468, + "learning_rate": 4.1428945902752135e-06, + "loss": 0.4875, + "step": 18539 + }, + { + "epoch": 0.5682236116219198, + "grad_norm": 1.63680735153756, + "learning_rate": 4.142405623229062e-06, + "loss": 0.606, + "step": 18540 + }, + { + "epoch": 0.5682542601446611, + "grad_norm": 1.8588366473636262, + "learning_rate": 4.141916664633008e-06, + "loss": 0.6959, + "step": 18541 + }, + { + "epoch": 0.5682849086674022, + "grad_norm": 1.6557987075615828, + "learning_rate": 4.141427714491868e-06, + "loss": 0.5911, + "step": 18542 + }, + { + "epoch": 0.5683155571901435, + "grad_norm": 1.8119721505337743, + "learning_rate": 4.1409387728104615e-06, + "loss": 0.5486, + "step": 18543 + }, + { + "epoch": 0.5683462057128846, + "grad_norm": 1.7067931095152462, + "learning_rate": 4.1404498395936035e-06, + "loss": 0.6134, + "step": 18544 + }, + { + "epoch": 0.5683768542356259, + "grad_norm": 1.665432315394693, + "learning_rate": 4.1399609148461135e-06, + "loss": 0.6571, + "step": 18545 + }, + { + "epoch": 0.568407502758367, + "grad_norm": 1.7937394507062123, + "learning_rate": 4.13947199857281e-06, + "loss": 0.6118, + "step": 18546 + }, + { + "epoch": 0.5684381512811083, + "grad_norm": 1.4833073987338619, + "learning_rate": 4.138983090778507e-06, + "loss": 0.6402, + "step": 18547 + }, + { + "epoch": 0.5684687998038495, + "grad_norm": 1.5767976356004394, + "learning_rate": 4.1384941914680256e-06, + "loss": 0.5544, + "step": 18548 + }, + { + "epoch": 0.5684994483265906, + "grad_norm": 0.8263073217916601, + "learning_rate": 4.1380053006461804e-06, + "loss": 0.4119, + "step": 18549 + }, + { + "epoch": 0.5685300968493319, + "grad_norm": 1.6746732764028334, + "learning_rate": 4.13751641831779e-06, + "loss": 0.7232, + "step": 18550 + }, + { + "epoch": 0.568560745372073, + "grad_norm": 1.728151579484166, + "learning_rate": 4.137027544487672e-06, + "loss": 0.6724, + "step": 18551 + }, + { + "epoch": 0.5685913938948143, + "grad_norm": 1.701039492968913, + "learning_rate": 4.136538679160639e-06, + "loss": 0.6587, + "step": 18552 + }, + { + "epoch": 0.5686220424175554, + "grad_norm": 1.667351787076564, + "learning_rate": 4.136049822341516e-06, + "loss": 0.616, + "step": 18553 + }, + { + "epoch": 0.5686526909402967, + "grad_norm": 1.6351765194127996, + "learning_rate": 4.135560974035112e-06, + "loss": 0.5343, + "step": 18554 + }, + { + "epoch": 0.5686833394630378, + "grad_norm": 1.91047516241696, + "learning_rate": 4.135072134246247e-06, + "loss": 0.6285, + "step": 18555 + }, + { + "epoch": 0.5687139879857791, + "grad_norm": 1.748361631644768, + "learning_rate": 4.134583302979739e-06, + "loss": 0.6049, + "step": 18556 + }, + { + "epoch": 0.5687446365085203, + "grad_norm": 0.838022722913728, + "learning_rate": 4.134094480240402e-06, + "loss": 0.4175, + "step": 18557 + }, + { + "epoch": 0.5687752850312615, + "grad_norm": 1.7781341482993427, + "learning_rate": 4.1336056660330535e-06, + "loss": 0.6843, + "step": 18558 + }, + { + "epoch": 0.5688059335540027, + "grad_norm": 0.7715621361189842, + "learning_rate": 4.133116860362511e-06, + "loss": 0.4435, + "step": 18559 + }, + { + "epoch": 0.5688365820767439, + "grad_norm": 1.6765307097666806, + "learning_rate": 4.132628063233589e-06, + "loss": 0.5958, + "step": 18560 + }, + { + "epoch": 0.5688672305994851, + "grad_norm": 0.818048583348214, + "learning_rate": 4.132139274651105e-06, + "loss": 0.4177, + "step": 18561 + }, + { + "epoch": 0.5688978791222263, + "grad_norm": 1.829509009813729, + "learning_rate": 4.131650494619876e-06, + "loss": 0.6773, + "step": 18562 + }, + { + "epoch": 0.5689285276449675, + "grad_norm": 1.7596078386033398, + "learning_rate": 4.1311617231447136e-06, + "loss": 0.6441, + "step": 18563 + }, + { + "epoch": 0.5689591761677087, + "grad_norm": 0.776021902730361, + "learning_rate": 4.130672960230441e-06, + "loss": 0.4473, + "step": 18564 + }, + { + "epoch": 0.5689898246904499, + "grad_norm": 0.7359641450716095, + "learning_rate": 4.130184205881866e-06, + "loss": 0.3974, + "step": 18565 + }, + { + "epoch": 0.5690204732131912, + "grad_norm": 1.7890931613419367, + "learning_rate": 4.129695460103813e-06, + "loss": 0.6144, + "step": 18566 + }, + { + "epoch": 0.5690511217359323, + "grad_norm": 1.733657080471215, + "learning_rate": 4.12920672290109e-06, + "loss": 0.6194, + "step": 18567 + }, + { + "epoch": 0.5690817702586736, + "grad_norm": 1.9138777577306758, + "learning_rate": 4.128717994278517e-06, + "loss": 0.5919, + "step": 18568 + }, + { + "epoch": 0.5691124187814147, + "grad_norm": 1.66805825968978, + "learning_rate": 4.1282292742409095e-06, + "loss": 0.5614, + "step": 18569 + }, + { + "epoch": 0.569143067304156, + "grad_norm": 1.9660722249852072, + "learning_rate": 4.127740562793081e-06, + "loss": 0.5864, + "step": 18570 + }, + { + "epoch": 0.5691737158268971, + "grad_norm": 1.8295431662133623, + "learning_rate": 4.127251859939847e-06, + "loss": 0.6276, + "step": 18571 + }, + { + "epoch": 0.5692043643496384, + "grad_norm": 1.7612093734412313, + "learning_rate": 4.126763165686025e-06, + "loss": 0.6991, + "step": 18572 + }, + { + "epoch": 0.5692350128723795, + "grad_norm": 1.6254338130125883, + "learning_rate": 4.12627448003643e-06, + "loss": 0.616, + "step": 18573 + }, + { + "epoch": 0.5692656613951208, + "grad_norm": 0.7874544628574089, + "learning_rate": 4.125785802995874e-06, + "loss": 0.3953, + "step": 18574 + }, + { + "epoch": 0.569296309917862, + "grad_norm": 1.9380386968049983, + "learning_rate": 4.125297134569176e-06, + "loss": 0.6929, + "step": 18575 + }, + { + "epoch": 0.5693269584406032, + "grad_norm": 1.8052037531052851, + "learning_rate": 4.124808474761146e-06, + "loss": 0.6557, + "step": 18576 + }, + { + "epoch": 0.5693576069633444, + "grad_norm": 1.8933916102225576, + "learning_rate": 4.124319823576606e-06, + "loss": 0.6769, + "step": 18577 + }, + { + "epoch": 0.5693882554860856, + "grad_norm": 1.7807648779726006, + "learning_rate": 4.123831181020365e-06, + "loss": 0.5826, + "step": 18578 + }, + { + "epoch": 0.5694189040088268, + "grad_norm": 1.755162590584831, + "learning_rate": 4.123342547097239e-06, + "loss": 0.6187, + "step": 18579 + }, + { + "epoch": 0.5694495525315679, + "grad_norm": 1.7205669199864377, + "learning_rate": 4.122853921812044e-06, + "loss": 0.5946, + "step": 18580 + }, + { + "epoch": 0.5694802010543092, + "grad_norm": 2.1175355210299576, + "learning_rate": 4.122365305169593e-06, + "loss": 0.6302, + "step": 18581 + }, + { + "epoch": 0.5695108495770503, + "grad_norm": 1.6604523279031627, + "learning_rate": 4.121876697174701e-06, + "loss": 0.5674, + "step": 18582 + }, + { + "epoch": 0.5695414980997916, + "grad_norm": 1.7289202308237535, + "learning_rate": 4.121388097832184e-06, + "loss": 0.6102, + "step": 18583 + }, + { + "epoch": 0.5695721466225327, + "grad_norm": 1.857535242446626, + "learning_rate": 4.120899507146853e-06, + "loss": 0.7218, + "step": 18584 + }, + { + "epoch": 0.569602795145274, + "grad_norm": 1.663717800714552, + "learning_rate": 4.1204109251235255e-06, + "loss": 0.5194, + "step": 18585 + }, + { + "epoch": 0.5696334436680152, + "grad_norm": 1.7729555974613824, + "learning_rate": 4.119922351767016e-06, + "loss": 0.6252, + "step": 18586 + }, + { + "epoch": 0.5696640921907564, + "grad_norm": 0.7803993481270006, + "learning_rate": 4.119433787082133e-06, + "loss": 0.4126, + "step": 18587 + }, + { + "epoch": 0.5696947407134976, + "grad_norm": 1.899876151089557, + "learning_rate": 4.118945231073697e-06, + "loss": 0.6341, + "step": 18588 + }, + { + "epoch": 0.5697253892362388, + "grad_norm": 1.844957168173005, + "learning_rate": 4.118456683746518e-06, + "loss": 0.6667, + "step": 18589 + }, + { + "epoch": 0.56975603775898, + "grad_norm": 1.8535975697667118, + "learning_rate": 4.11796814510541e-06, + "loss": 0.6055, + "step": 18590 + }, + { + "epoch": 0.5697866862817212, + "grad_norm": 1.6463371562402958, + "learning_rate": 4.1174796151551885e-06, + "loss": 0.6859, + "step": 18591 + }, + { + "epoch": 0.5698173348044624, + "grad_norm": 1.5099879240142295, + "learning_rate": 4.116991093900665e-06, + "loss": 0.5933, + "step": 18592 + }, + { + "epoch": 0.5698479833272037, + "grad_norm": 1.8364099202741495, + "learning_rate": 4.116502581346655e-06, + "loss": 0.5906, + "step": 18593 + }, + { + "epoch": 0.5698786318499448, + "grad_norm": 1.6739175278561387, + "learning_rate": 4.116014077497972e-06, + "loss": 0.7129, + "step": 18594 + }, + { + "epoch": 0.5699092803726861, + "grad_norm": 1.585219100490649, + "learning_rate": 4.115525582359427e-06, + "loss": 0.5781, + "step": 18595 + }, + { + "epoch": 0.5699399288954272, + "grad_norm": 1.883351914602563, + "learning_rate": 4.115037095935837e-06, + "loss": 0.6836, + "step": 18596 + }, + { + "epoch": 0.5699705774181685, + "grad_norm": 1.5471005745779802, + "learning_rate": 4.114548618232012e-06, + "loss": 0.7564, + "step": 18597 + }, + { + "epoch": 0.5700012259409096, + "grad_norm": 1.7887748329763793, + "learning_rate": 4.114060149252764e-06, + "loss": 0.6309, + "step": 18598 + }, + { + "epoch": 0.5700318744636509, + "grad_norm": 1.7452256461823232, + "learning_rate": 4.1135716890029096e-06, + "loss": 0.656, + "step": 18599 + }, + { + "epoch": 0.570062522986392, + "grad_norm": 1.4824993461192022, + "learning_rate": 4.1130832374872605e-06, + "loss": 0.5737, + "step": 18600 + }, + { + "epoch": 0.5700931715091333, + "grad_norm": 1.7796466929820551, + "learning_rate": 4.112594794710628e-06, + "loss": 0.5936, + "step": 18601 + }, + { + "epoch": 0.5701238200318745, + "grad_norm": 1.6610481744387382, + "learning_rate": 4.1121063606778264e-06, + "loss": 0.5607, + "step": 18602 + }, + { + "epoch": 0.5701544685546157, + "grad_norm": 1.8073390852819766, + "learning_rate": 4.111617935393668e-06, + "loss": 0.4776, + "step": 18603 + }, + { + "epoch": 0.5701851170773569, + "grad_norm": 1.918119811963361, + "learning_rate": 4.1111295188629665e-06, + "loss": 0.6625, + "step": 18604 + }, + { + "epoch": 0.5702157656000981, + "grad_norm": 1.5492108708765895, + "learning_rate": 4.110641111090533e-06, + "loss": 0.5309, + "step": 18605 + }, + { + "epoch": 0.5702464141228393, + "grad_norm": 1.6563041946594248, + "learning_rate": 4.110152712081178e-06, + "loss": 0.6097, + "step": 18606 + }, + { + "epoch": 0.5702770626455805, + "grad_norm": 1.8388635560654674, + "learning_rate": 4.109664321839719e-06, + "loss": 0.6601, + "step": 18607 + }, + { + "epoch": 0.5703077111683217, + "grad_norm": 1.7908717910128769, + "learning_rate": 4.109175940370965e-06, + "loss": 0.6642, + "step": 18608 + }, + { + "epoch": 0.570338359691063, + "grad_norm": 0.787129178002681, + "learning_rate": 4.108687567679726e-06, + "loss": 0.4357, + "step": 18609 + }, + { + "epoch": 0.5703690082138041, + "grad_norm": 1.7171707487331567, + "learning_rate": 4.108199203770818e-06, + "loss": 0.6621, + "step": 18610 + }, + { + "epoch": 0.5703996567365452, + "grad_norm": 1.5615793123092196, + "learning_rate": 4.10771084864905e-06, + "loss": 0.6691, + "step": 18611 + }, + { + "epoch": 0.5704303052592865, + "grad_norm": 0.7710149079625077, + "learning_rate": 4.107222502319237e-06, + "loss": 0.4068, + "step": 18612 + }, + { + "epoch": 0.5704609537820277, + "grad_norm": 2.0728022069479235, + "learning_rate": 4.106734164786189e-06, + "loss": 0.6536, + "step": 18613 + }, + { + "epoch": 0.5704916023047689, + "grad_norm": 1.7605756815069533, + "learning_rate": 4.106245836054717e-06, + "loss": 0.6534, + "step": 18614 + }, + { + "epoch": 0.5705222508275101, + "grad_norm": 1.8005183445996882, + "learning_rate": 4.1057575161296335e-06, + "loss": 0.6426, + "step": 18615 + }, + { + "epoch": 0.5705528993502513, + "grad_norm": 1.6360424276610577, + "learning_rate": 4.105269205015753e-06, + "loss": 0.5845, + "step": 18616 + }, + { + "epoch": 0.5705835478729925, + "grad_norm": 1.5891007162516206, + "learning_rate": 4.104780902717879e-06, + "loss": 0.657, + "step": 18617 + }, + { + "epoch": 0.5706141963957337, + "grad_norm": 1.5696590714035852, + "learning_rate": 4.104292609240831e-06, + "loss": 0.4967, + "step": 18618 + }, + { + "epoch": 0.5706448449184749, + "grad_norm": 1.8416077247698197, + "learning_rate": 4.103804324589414e-06, + "loss": 0.5771, + "step": 18619 + }, + { + "epoch": 0.5706754934412162, + "grad_norm": 1.7630024889468823, + "learning_rate": 4.103316048768447e-06, + "loss": 0.5799, + "step": 18620 + }, + { + "epoch": 0.5707061419639573, + "grad_norm": 0.8234059670603251, + "learning_rate": 4.102827781782734e-06, + "loss": 0.4322, + "step": 18621 + }, + { + "epoch": 0.5707367904866986, + "grad_norm": 1.70367152746702, + "learning_rate": 4.102339523637087e-06, + "loss": 0.6603, + "step": 18622 + }, + { + "epoch": 0.5707674390094397, + "grad_norm": 1.8554888964062604, + "learning_rate": 4.10185127433632e-06, + "loss": 0.6151, + "step": 18623 + }, + { + "epoch": 0.570798087532181, + "grad_norm": 1.744213840562847, + "learning_rate": 4.101363033885242e-06, + "loss": 0.5655, + "step": 18624 + }, + { + "epoch": 0.5708287360549221, + "grad_norm": 1.889698752458755, + "learning_rate": 4.100874802288664e-06, + "loss": 0.6652, + "step": 18625 + }, + { + "epoch": 0.5708593845776634, + "grad_norm": 1.7596080005162658, + "learning_rate": 4.100386579551397e-06, + "loss": 0.6123, + "step": 18626 + }, + { + "epoch": 0.5708900331004045, + "grad_norm": 0.8249509422807069, + "learning_rate": 4.099898365678252e-06, + "loss": 0.4164, + "step": 18627 + }, + { + "epoch": 0.5709206816231458, + "grad_norm": 2.02962750178488, + "learning_rate": 4.099410160674035e-06, + "loss": 0.6648, + "step": 18628 + }, + { + "epoch": 0.570951330145887, + "grad_norm": 1.5251375610682025, + "learning_rate": 4.098921964543563e-06, + "loss": 0.5153, + "step": 18629 + }, + { + "epoch": 0.5709819786686282, + "grad_norm": 1.6563885446690039, + "learning_rate": 4.0984337772916415e-06, + "loss": 0.5666, + "step": 18630 + }, + { + "epoch": 0.5710126271913694, + "grad_norm": 1.8936918153650193, + "learning_rate": 4.097945598923085e-06, + "loss": 0.6674, + "step": 18631 + }, + { + "epoch": 0.5710432757141106, + "grad_norm": 1.752757047843011, + "learning_rate": 4.0974574294427016e-06, + "loss": 0.6802, + "step": 18632 + }, + { + "epoch": 0.5710739242368518, + "grad_norm": 0.7502620426223233, + "learning_rate": 4.096969268855299e-06, + "loss": 0.4062, + "step": 18633 + }, + { + "epoch": 0.571104572759593, + "grad_norm": 1.6453804793594082, + "learning_rate": 4.09648111716569e-06, + "loss": 0.6119, + "step": 18634 + }, + { + "epoch": 0.5711352212823342, + "grad_norm": 1.579005788587555, + "learning_rate": 4.095992974378684e-06, + "loss": 0.6547, + "step": 18635 + }, + { + "epoch": 0.5711658698050754, + "grad_norm": 1.7043241206562918, + "learning_rate": 4.09550484049909e-06, + "loss": 0.6575, + "step": 18636 + }, + { + "epoch": 0.5711965183278166, + "grad_norm": 1.6655313213192817, + "learning_rate": 4.0950167155317185e-06, + "loss": 0.5153, + "step": 18637 + }, + { + "epoch": 0.5712271668505579, + "grad_norm": 1.5641606636873036, + "learning_rate": 4.094528599481379e-06, + "loss": 0.5836, + "step": 18638 + }, + { + "epoch": 0.571257815373299, + "grad_norm": 1.6841573829859378, + "learning_rate": 4.0940404923528824e-06, + "loss": 0.5688, + "step": 18639 + }, + { + "epoch": 0.5712884638960403, + "grad_norm": 1.8300882506256957, + "learning_rate": 4.093552394151034e-06, + "loss": 0.6288, + "step": 18640 + }, + { + "epoch": 0.5713191124187814, + "grad_norm": 1.8363298629448734, + "learning_rate": 4.0930643048806465e-06, + "loss": 0.6501, + "step": 18641 + }, + { + "epoch": 0.5713497609415226, + "grad_norm": 1.699812879177453, + "learning_rate": 4.092576224546529e-06, + "loss": 0.5223, + "step": 18642 + }, + { + "epoch": 0.5713804094642638, + "grad_norm": 1.5344616696820645, + "learning_rate": 4.092088153153491e-06, + "loss": 0.54, + "step": 18643 + }, + { + "epoch": 0.571411057987005, + "grad_norm": 1.6906095383612127, + "learning_rate": 4.091600090706338e-06, + "loss": 0.58, + "step": 18644 + }, + { + "epoch": 0.5714417065097462, + "grad_norm": 1.5646967319711846, + "learning_rate": 4.091112037209885e-06, + "loss": 0.5844, + "step": 18645 + }, + { + "epoch": 0.5714723550324874, + "grad_norm": 1.8110991869603046, + "learning_rate": 4.090623992668934e-06, + "loss": 0.5549, + "step": 18646 + }, + { + "epoch": 0.5715030035552287, + "grad_norm": 1.7390293259357401, + "learning_rate": 4.0901359570883006e-06, + "loss": 0.6386, + "step": 18647 + }, + { + "epoch": 0.5715336520779698, + "grad_norm": 1.6445043668223127, + "learning_rate": 4.089647930472791e-06, + "loss": 0.5685, + "step": 18648 + }, + { + "epoch": 0.5715643006007111, + "grad_norm": 1.529435637947826, + "learning_rate": 4.089159912827209e-06, + "loss": 0.5815, + "step": 18649 + }, + { + "epoch": 0.5715949491234522, + "grad_norm": 1.5848806900928287, + "learning_rate": 4.088671904156372e-06, + "loss": 0.5754, + "step": 18650 + }, + { + "epoch": 0.5716255976461935, + "grad_norm": 1.8439035970609934, + "learning_rate": 4.088183904465081e-06, + "loss": 0.7485, + "step": 18651 + }, + { + "epoch": 0.5716562461689346, + "grad_norm": 1.8723362703957536, + "learning_rate": 4.0876959137581484e-06, + "loss": 0.6014, + "step": 18652 + }, + { + "epoch": 0.5716868946916759, + "grad_norm": 1.7246858548403594, + "learning_rate": 4.0872079320403814e-06, + "loss": 0.6221, + "step": 18653 + }, + { + "epoch": 0.571717543214417, + "grad_norm": 0.8334995663366631, + "learning_rate": 4.086719959316588e-06, + "loss": 0.423, + "step": 18654 + }, + { + "epoch": 0.5717481917371583, + "grad_norm": 1.709454355407097, + "learning_rate": 4.0862319955915755e-06, + "loss": 0.6693, + "step": 18655 + }, + { + "epoch": 0.5717788402598994, + "grad_norm": 0.7896206068414731, + "learning_rate": 4.085744040870155e-06, + "loss": 0.425, + "step": 18656 + }, + { + "epoch": 0.5718094887826407, + "grad_norm": 0.7881313996489739, + "learning_rate": 4.08525609515713e-06, + "loss": 0.3937, + "step": 18657 + }, + { + "epoch": 0.5718401373053819, + "grad_norm": 1.8687240013053477, + "learning_rate": 4.084768158457313e-06, + "loss": 0.5956, + "step": 18658 + }, + { + "epoch": 0.5718707858281231, + "grad_norm": 2.1115516383793977, + "learning_rate": 4.08428023077551e-06, + "loss": 0.6692, + "step": 18659 + }, + { + "epoch": 0.5719014343508643, + "grad_norm": 1.5318405145424496, + "learning_rate": 4.0837923121165245e-06, + "loss": 0.5791, + "step": 18660 + }, + { + "epoch": 0.5719320828736055, + "grad_norm": 0.7743359141488005, + "learning_rate": 4.083304402485171e-06, + "loss": 0.4346, + "step": 18661 + }, + { + "epoch": 0.5719627313963467, + "grad_norm": 1.8553516080382448, + "learning_rate": 4.082816501886254e-06, + "loss": 0.5723, + "step": 18662 + }, + { + "epoch": 0.5719933799190879, + "grad_norm": 2.2166757359041833, + "learning_rate": 4.082328610324579e-06, + "loss": 0.7438, + "step": 18663 + }, + { + "epoch": 0.5720240284418291, + "grad_norm": 1.723288761716145, + "learning_rate": 4.0818407278049545e-06, + "loss": 0.5696, + "step": 18664 + }, + { + "epoch": 0.5720546769645704, + "grad_norm": 1.7641122616585758, + "learning_rate": 4.081352854332189e-06, + "loss": 0.6614, + "step": 18665 + }, + { + "epoch": 0.5720853254873115, + "grad_norm": 1.741745902178236, + "learning_rate": 4.08086498991109e-06, + "loss": 0.7036, + "step": 18666 + }, + { + "epoch": 0.5721159740100528, + "grad_norm": 1.7719677363233668, + "learning_rate": 4.080377134546462e-06, + "loss": 0.6758, + "step": 18667 + }, + { + "epoch": 0.5721466225327939, + "grad_norm": 1.7122085921035786, + "learning_rate": 4.0798892882431135e-06, + "loss": 0.6315, + "step": 18668 + }, + { + "epoch": 0.5721772710555352, + "grad_norm": 2.1529752825124824, + "learning_rate": 4.079401451005852e-06, + "loss": 0.6625, + "step": 18669 + }, + { + "epoch": 0.5722079195782763, + "grad_norm": 1.877386372821195, + "learning_rate": 4.078913622839485e-06, + "loss": 0.6403, + "step": 18670 + }, + { + "epoch": 0.5722385681010176, + "grad_norm": 1.8752050640801847, + "learning_rate": 4.078425803748813e-06, + "loss": 0.668, + "step": 18671 + }, + { + "epoch": 0.5722692166237587, + "grad_norm": 0.7896589399158996, + "learning_rate": 4.077937993738652e-06, + "loss": 0.4327, + "step": 18672 + }, + { + "epoch": 0.5722998651464999, + "grad_norm": 1.6862927302293986, + "learning_rate": 4.0774501928138005e-06, + "loss": 0.6176, + "step": 18673 + }, + { + "epoch": 0.5723305136692411, + "grad_norm": 1.846129811321577, + "learning_rate": 4.076962400979071e-06, + "loss": 0.6899, + "step": 18674 + }, + { + "epoch": 0.5723611621919823, + "grad_norm": 0.825350706263423, + "learning_rate": 4.076474618239266e-06, + "loss": 0.4397, + "step": 18675 + }, + { + "epoch": 0.5723918107147236, + "grad_norm": 1.9763691252990974, + "learning_rate": 4.0759868445991925e-06, + "loss": 0.5824, + "step": 18676 + }, + { + "epoch": 0.5724224592374647, + "grad_norm": 1.8205705228603557, + "learning_rate": 4.075499080063658e-06, + "loss": 0.6471, + "step": 18677 + }, + { + "epoch": 0.572453107760206, + "grad_norm": 1.873774748932699, + "learning_rate": 4.075011324637468e-06, + "loss": 0.7267, + "step": 18678 + }, + { + "epoch": 0.5724837562829471, + "grad_norm": 1.7269222559711754, + "learning_rate": 4.074523578325426e-06, + "loss": 0.5923, + "step": 18679 + }, + { + "epoch": 0.5725144048056884, + "grad_norm": 1.7647163658627654, + "learning_rate": 4.0740358411323415e-06, + "loss": 0.6634, + "step": 18680 + }, + { + "epoch": 0.5725450533284295, + "grad_norm": 1.8929572939783104, + "learning_rate": 4.07354811306302e-06, + "loss": 0.615, + "step": 18681 + }, + { + "epoch": 0.5725757018511708, + "grad_norm": 1.662468894449907, + "learning_rate": 4.0730603941222626e-06, + "loss": 0.5828, + "step": 18682 + }, + { + "epoch": 0.5726063503739119, + "grad_norm": 1.7960994581248517, + "learning_rate": 4.072572684314881e-06, + "loss": 0.6025, + "step": 18683 + }, + { + "epoch": 0.5726369988966532, + "grad_norm": 0.7905620902658769, + "learning_rate": 4.072084983645677e-06, + "loss": 0.4223, + "step": 18684 + }, + { + "epoch": 0.5726676474193944, + "grad_norm": 1.5483437969425105, + "learning_rate": 4.071597292119457e-06, + "loss": 0.4813, + "step": 18685 + }, + { + "epoch": 0.5726982959421356, + "grad_norm": 1.8351772795937435, + "learning_rate": 4.071109609741027e-06, + "loss": 0.6328, + "step": 18686 + }, + { + "epoch": 0.5727289444648768, + "grad_norm": 1.763433429969457, + "learning_rate": 4.070621936515191e-06, + "loss": 0.6433, + "step": 18687 + }, + { + "epoch": 0.572759592987618, + "grad_norm": 1.6552416974485247, + "learning_rate": 4.070134272446755e-06, + "loss": 0.633, + "step": 18688 + }, + { + "epoch": 0.5727902415103592, + "grad_norm": 1.7640577498541117, + "learning_rate": 4.069646617540525e-06, + "loss": 0.5807, + "step": 18689 + }, + { + "epoch": 0.5728208900331004, + "grad_norm": 0.8290137194711901, + "learning_rate": 4.069158971801304e-06, + "loss": 0.4168, + "step": 18690 + }, + { + "epoch": 0.5728515385558416, + "grad_norm": 1.8189824191557986, + "learning_rate": 4.068671335233898e-06, + "loss": 0.6635, + "step": 18691 + }, + { + "epoch": 0.5728821870785828, + "grad_norm": 0.76617566572778, + "learning_rate": 4.06818370784311e-06, + "loss": 0.3894, + "step": 18692 + }, + { + "epoch": 0.572912835601324, + "grad_norm": 1.9836323443483623, + "learning_rate": 4.067696089633749e-06, + "loss": 0.7082, + "step": 18693 + }, + { + "epoch": 0.5729434841240653, + "grad_norm": 1.6827927765563768, + "learning_rate": 4.067208480610617e-06, + "loss": 0.6034, + "step": 18694 + }, + { + "epoch": 0.5729741326468064, + "grad_norm": 1.824871723623736, + "learning_rate": 4.066720880778516e-06, + "loss": 0.7104, + "step": 18695 + }, + { + "epoch": 0.5730047811695477, + "grad_norm": 0.8025110399544803, + "learning_rate": 4.0662332901422545e-06, + "loss": 0.4216, + "step": 18696 + }, + { + "epoch": 0.5730354296922888, + "grad_norm": 1.7504411735451002, + "learning_rate": 4.065745708706636e-06, + "loss": 0.7122, + "step": 18697 + }, + { + "epoch": 0.5730660782150301, + "grad_norm": 1.6446683151174097, + "learning_rate": 4.065258136476462e-06, + "loss": 0.6054, + "step": 18698 + }, + { + "epoch": 0.5730967267377712, + "grad_norm": 1.5285239231822807, + "learning_rate": 4.06477057345654e-06, + "loss": 0.5402, + "step": 18699 + }, + { + "epoch": 0.5731273752605125, + "grad_norm": 1.5963737321509834, + "learning_rate": 4.064283019651674e-06, + "loss": 0.6218, + "step": 18700 + }, + { + "epoch": 0.5731580237832536, + "grad_norm": 0.805323681258647, + "learning_rate": 4.063795475066664e-06, + "loss": 0.4093, + "step": 18701 + }, + { + "epoch": 0.5731886723059949, + "grad_norm": 2.008287026060275, + "learning_rate": 4.063307939706319e-06, + "loss": 0.6506, + "step": 18702 + }, + { + "epoch": 0.5732193208287361, + "grad_norm": 1.910081852401554, + "learning_rate": 4.062820413575438e-06, + "loss": 0.5956, + "step": 18703 + }, + { + "epoch": 0.5732499693514772, + "grad_norm": 1.8366462791804528, + "learning_rate": 4.062332896678831e-06, + "loss": 0.5587, + "step": 18704 + }, + { + "epoch": 0.5732806178742185, + "grad_norm": 1.7222125536571526, + "learning_rate": 4.061845389021296e-06, + "loss": 0.5732, + "step": 18705 + }, + { + "epoch": 0.5733112663969596, + "grad_norm": 0.8157828891369032, + "learning_rate": 4.061357890607638e-06, + "loss": 0.4208, + "step": 18706 + }, + { + "epoch": 0.5733419149197009, + "grad_norm": 0.8118762383375977, + "learning_rate": 4.060870401442661e-06, + "loss": 0.4168, + "step": 18707 + }, + { + "epoch": 0.573372563442442, + "grad_norm": 0.7806788446472324, + "learning_rate": 4.060382921531169e-06, + "loss": 0.4102, + "step": 18708 + }, + { + "epoch": 0.5734032119651833, + "grad_norm": 1.7949516178656049, + "learning_rate": 4.059895450877963e-06, + "loss": 0.608, + "step": 18709 + }, + { + "epoch": 0.5734338604879244, + "grad_norm": 1.8024467108659505, + "learning_rate": 4.059407989487849e-06, + "loss": 0.5743, + "step": 18710 + }, + { + "epoch": 0.5734645090106657, + "grad_norm": 1.770031165000167, + "learning_rate": 4.058920537365627e-06, + "loss": 0.5438, + "step": 18711 + }, + { + "epoch": 0.5734951575334069, + "grad_norm": 1.664151686977408, + "learning_rate": 4.058433094516105e-06, + "loss": 0.5768, + "step": 18712 + }, + { + "epoch": 0.5735258060561481, + "grad_norm": 1.5092561226021963, + "learning_rate": 4.057945660944081e-06, + "loss": 0.6035, + "step": 18713 + }, + { + "epoch": 0.5735564545788893, + "grad_norm": 0.789461214641325, + "learning_rate": 4.057458236654358e-06, + "loss": 0.4034, + "step": 18714 + }, + { + "epoch": 0.5735871031016305, + "grad_norm": 1.719734869234221, + "learning_rate": 4.056970821651742e-06, + "loss": 0.6605, + "step": 18715 + }, + { + "epoch": 0.5736177516243717, + "grad_norm": 1.7408256004847011, + "learning_rate": 4.056483415941033e-06, + "loss": 0.5956, + "step": 18716 + }, + { + "epoch": 0.5736484001471129, + "grad_norm": 1.91402499410698, + "learning_rate": 4.055996019527034e-06, + "loss": 0.6917, + "step": 18717 + }, + { + "epoch": 0.5736790486698541, + "grad_norm": 1.8692938655361395, + "learning_rate": 4.0555086324145484e-06, + "loss": 0.6182, + "step": 18718 + }, + { + "epoch": 0.5737096971925953, + "grad_norm": 1.944004142390059, + "learning_rate": 4.055021254608377e-06, + "loss": 0.6256, + "step": 18719 + }, + { + "epoch": 0.5737403457153365, + "grad_norm": 1.8508757276827237, + "learning_rate": 4.054533886113324e-06, + "loss": 0.7149, + "step": 18720 + }, + { + "epoch": 0.5737709942380778, + "grad_norm": 1.463326392047493, + "learning_rate": 4.05404652693419e-06, + "loss": 0.6424, + "step": 18721 + }, + { + "epoch": 0.5738016427608189, + "grad_norm": 1.8278241237495534, + "learning_rate": 4.053559177075777e-06, + "loss": 0.6918, + "step": 18722 + }, + { + "epoch": 0.5738322912835602, + "grad_norm": 1.5976505126184313, + "learning_rate": 4.053071836542889e-06, + "loss": 0.621, + "step": 18723 + }, + { + "epoch": 0.5738629398063013, + "grad_norm": 1.7113964489546594, + "learning_rate": 4.052584505340327e-06, + "loss": 0.5422, + "step": 18724 + }, + { + "epoch": 0.5738935883290426, + "grad_norm": 1.6084583343236036, + "learning_rate": 4.052097183472889e-06, + "loss": 0.5619, + "step": 18725 + }, + { + "epoch": 0.5739242368517837, + "grad_norm": 1.6519187816288259, + "learning_rate": 4.0516098709453835e-06, + "loss": 0.5796, + "step": 18726 + }, + { + "epoch": 0.573954885374525, + "grad_norm": 1.5577615271906118, + "learning_rate": 4.051122567762608e-06, + "loss": 0.6027, + "step": 18727 + }, + { + "epoch": 0.5739855338972661, + "grad_norm": 1.6240326087361183, + "learning_rate": 4.050635273929362e-06, + "loss": 0.5856, + "step": 18728 + }, + { + "epoch": 0.5740161824200074, + "grad_norm": 1.7286066950158263, + "learning_rate": 4.050147989450452e-06, + "loss": 0.6409, + "step": 18729 + }, + { + "epoch": 0.5740468309427486, + "grad_norm": 1.8708992044496071, + "learning_rate": 4.049660714330676e-06, + "loss": 0.6234, + "step": 18730 + }, + { + "epoch": 0.5740774794654898, + "grad_norm": 1.6255318091932165, + "learning_rate": 4.049173448574836e-06, + "loss": 0.566, + "step": 18731 + }, + { + "epoch": 0.574108127988231, + "grad_norm": 1.6011466140225143, + "learning_rate": 4.0486861921877345e-06, + "loss": 0.5991, + "step": 18732 + }, + { + "epoch": 0.5741387765109722, + "grad_norm": 0.839412958917161, + "learning_rate": 4.048198945174169e-06, + "loss": 0.4358, + "step": 18733 + }, + { + "epoch": 0.5741694250337134, + "grad_norm": 1.792624200330832, + "learning_rate": 4.047711707538945e-06, + "loss": 0.6701, + "step": 18734 + }, + { + "epoch": 0.5742000735564545, + "grad_norm": 1.6185640228234337, + "learning_rate": 4.047224479286862e-06, + "loss": 0.6302, + "step": 18735 + }, + { + "epoch": 0.5742307220791958, + "grad_norm": 1.70191482038724, + "learning_rate": 4.0467372604227175e-06, + "loss": 0.5748, + "step": 18736 + }, + { + "epoch": 0.5742613706019369, + "grad_norm": 1.7842790524802312, + "learning_rate": 4.046250050951316e-06, + "loss": 0.7107, + "step": 18737 + }, + { + "epoch": 0.5742920191246782, + "grad_norm": 1.760155953957919, + "learning_rate": 4.045762850877456e-06, + "loss": 0.596, + "step": 18738 + }, + { + "epoch": 0.5743226676474193, + "grad_norm": 1.934191369022422, + "learning_rate": 4.045275660205939e-06, + "loss": 0.6476, + "step": 18739 + }, + { + "epoch": 0.5743533161701606, + "grad_norm": 1.7679356064250196, + "learning_rate": 4.044788478941566e-06, + "loss": 0.6807, + "step": 18740 + }, + { + "epoch": 0.5743839646929018, + "grad_norm": 1.8478204458742797, + "learning_rate": 4.044301307089134e-06, + "loss": 0.6081, + "step": 18741 + }, + { + "epoch": 0.574414613215643, + "grad_norm": 1.5936610524891521, + "learning_rate": 4.043814144653449e-06, + "loss": 0.5206, + "step": 18742 + }, + { + "epoch": 0.5744452617383842, + "grad_norm": 0.7680211368828929, + "learning_rate": 4.043326991639308e-06, + "loss": 0.4138, + "step": 18743 + }, + { + "epoch": 0.5744759102611254, + "grad_norm": 0.7575317221481799, + "learning_rate": 4.0428398480515074e-06, + "loss": 0.4107, + "step": 18744 + }, + { + "epoch": 0.5745065587838666, + "grad_norm": 1.8225002594596313, + "learning_rate": 4.042352713894854e-06, + "loss": 0.6941, + "step": 18745 + }, + { + "epoch": 0.5745372073066078, + "grad_norm": 0.778559543753558, + "learning_rate": 4.041865589174141e-06, + "loss": 0.429, + "step": 18746 + }, + { + "epoch": 0.574567855829349, + "grad_norm": 1.8025600115027338, + "learning_rate": 4.0413784738941755e-06, + "loss": 0.65, + "step": 18747 + }, + { + "epoch": 0.5745985043520903, + "grad_norm": 1.8281054738909857, + "learning_rate": 4.040891368059752e-06, + "loss": 0.6594, + "step": 18748 + }, + { + "epoch": 0.5746291528748314, + "grad_norm": 1.8624541359479954, + "learning_rate": 4.040404271675669e-06, + "loss": 0.6109, + "step": 18749 + }, + { + "epoch": 0.5746598013975727, + "grad_norm": 1.8007735188241254, + "learning_rate": 4.03991718474673e-06, + "loss": 0.5501, + "step": 18750 + }, + { + "epoch": 0.5746904499203138, + "grad_norm": 1.6816636673543641, + "learning_rate": 4.0394301072777335e-06, + "loss": 0.5733, + "step": 18751 + }, + { + "epoch": 0.5747210984430551, + "grad_norm": 1.7242987611445153, + "learning_rate": 4.038943039273476e-06, + "loss": 0.5732, + "step": 18752 + }, + { + "epoch": 0.5747517469657962, + "grad_norm": 0.816958333956805, + "learning_rate": 4.038455980738759e-06, + "loss": 0.4384, + "step": 18753 + }, + { + "epoch": 0.5747823954885375, + "grad_norm": 1.8898301581921482, + "learning_rate": 4.037968931678383e-06, + "loss": 0.6561, + "step": 18754 + }, + { + "epoch": 0.5748130440112786, + "grad_norm": 1.8417120218517797, + "learning_rate": 4.037481892097143e-06, + "loss": 0.5978, + "step": 18755 + }, + { + "epoch": 0.5748436925340199, + "grad_norm": 0.8246687738439876, + "learning_rate": 4.036994861999842e-06, + "loss": 0.4206, + "step": 18756 + }, + { + "epoch": 0.574874341056761, + "grad_norm": 1.5998239886914687, + "learning_rate": 4.036507841391274e-06, + "loss": 0.5926, + "step": 18757 + }, + { + "epoch": 0.5749049895795023, + "grad_norm": 1.6785739108807767, + "learning_rate": 4.036020830276245e-06, + "loss": 0.6634, + "step": 18758 + }, + { + "epoch": 0.5749356381022435, + "grad_norm": 1.6341034921333364, + "learning_rate": 4.0355338286595465e-06, + "loss": 0.5834, + "step": 18759 + }, + { + "epoch": 0.5749662866249847, + "grad_norm": 1.7601490288499255, + "learning_rate": 4.035046836545981e-06, + "loss": 0.6196, + "step": 18760 + }, + { + "epoch": 0.5749969351477259, + "grad_norm": 0.8186845874273554, + "learning_rate": 4.034559853940346e-06, + "loss": 0.4279, + "step": 18761 + }, + { + "epoch": 0.5750275836704671, + "grad_norm": 1.95427398047557, + "learning_rate": 4.0340728808474395e-06, + "loss": 0.6562, + "step": 18762 + }, + { + "epoch": 0.5750582321932083, + "grad_norm": 1.918999520398539, + "learning_rate": 4.03358591727206e-06, + "loss": 0.6375, + "step": 18763 + }, + { + "epoch": 0.5750888807159495, + "grad_norm": 1.6760534671228133, + "learning_rate": 4.033098963219006e-06, + "loss": 0.6231, + "step": 18764 + }, + { + "epoch": 0.5751195292386907, + "grad_norm": 1.8502662041340296, + "learning_rate": 4.032612018693073e-06, + "loss": 0.6401, + "step": 18765 + }, + { + "epoch": 0.5751501777614318, + "grad_norm": 1.7878819080559125, + "learning_rate": 4.032125083699064e-06, + "loss": 0.6196, + "step": 18766 + }, + { + "epoch": 0.5751808262841731, + "grad_norm": 1.685749397027347, + "learning_rate": 4.031638158241775e-06, + "loss": 0.6336, + "step": 18767 + }, + { + "epoch": 0.5752114748069143, + "grad_norm": 1.9657401338420843, + "learning_rate": 4.031151242326e-06, + "loss": 0.656, + "step": 18768 + }, + { + "epoch": 0.5752421233296555, + "grad_norm": 1.8286723694854807, + "learning_rate": 4.0306643359565426e-06, + "loss": 0.6199, + "step": 18769 + }, + { + "epoch": 0.5752727718523967, + "grad_norm": 1.74555995960298, + "learning_rate": 4.030177439138197e-06, + "loss": 0.6352, + "step": 18770 + }, + { + "epoch": 0.5753034203751379, + "grad_norm": 0.8167388316389214, + "learning_rate": 4.029690551875759e-06, + "loss": 0.4395, + "step": 18771 + }, + { + "epoch": 0.5753340688978791, + "grad_norm": 1.8871980408871425, + "learning_rate": 4.02920367417403e-06, + "loss": 0.5942, + "step": 18772 + }, + { + "epoch": 0.5753647174206203, + "grad_norm": 1.9286754220988105, + "learning_rate": 4.028716806037804e-06, + "loss": 0.6458, + "step": 18773 + }, + { + "epoch": 0.5753953659433615, + "grad_norm": 1.69275046537425, + "learning_rate": 4.028229947471881e-06, + "loss": 0.5918, + "step": 18774 + }, + { + "epoch": 0.5754260144661028, + "grad_norm": 0.7934624682021001, + "learning_rate": 4.027743098481058e-06, + "loss": 0.4121, + "step": 18775 + }, + { + "epoch": 0.5754566629888439, + "grad_norm": 1.7968130844188737, + "learning_rate": 4.0272562590701295e-06, + "loss": 0.5899, + "step": 18776 + }, + { + "epoch": 0.5754873115115852, + "grad_norm": 1.7590298871566967, + "learning_rate": 4.026769429243894e-06, + "loss": 0.6318, + "step": 18777 + }, + { + "epoch": 0.5755179600343263, + "grad_norm": 1.6711442992132148, + "learning_rate": 4.0262826090071505e-06, + "loss": 0.5662, + "step": 18778 + }, + { + "epoch": 0.5755486085570676, + "grad_norm": 1.6458043419976494, + "learning_rate": 4.02579579836469e-06, + "loss": 0.6601, + "step": 18779 + }, + { + "epoch": 0.5755792570798087, + "grad_norm": 1.9642507292801148, + "learning_rate": 4.025308997321316e-06, + "loss": 0.6147, + "step": 18780 + }, + { + "epoch": 0.57560990560255, + "grad_norm": 1.6639832140944404, + "learning_rate": 4.0248222058818206e-06, + "loss": 0.6106, + "step": 18781 + }, + { + "epoch": 0.5756405541252911, + "grad_norm": 1.7162774626176582, + "learning_rate": 4.024335424051001e-06, + "loss": 0.5519, + "step": 18782 + }, + { + "epoch": 0.5756712026480324, + "grad_norm": 2.0304138897400645, + "learning_rate": 4.023848651833655e-06, + "loss": 0.6895, + "step": 18783 + }, + { + "epoch": 0.5757018511707735, + "grad_norm": 2.0240378740826346, + "learning_rate": 4.023361889234576e-06, + "loss": 0.5485, + "step": 18784 + }, + { + "epoch": 0.5757324996935148, + "grad_norm": 1.8006528397561425, + "learning_rate": 4.022875136258564e-06, + "loss": 0.5835, + "step": 18785 + }, + { + "epoch": 0.575763148216256, + "grad_norm": 1.7657548579209716, + "learning_rate": 4.022388392910413e-06, + "loss": 0.6598, + "step": 18786 + }, + { + "epoch": 0.5757937967389972, + "grad_norm": 1.744336304415083, + "learning_rate": 4.021901659194919e-06, + "loss": 0.6202, + "step": 18787 + }, + { + "epoch": 0.5758244452617384, + "grad_norm": 1.938512717158902, + "learning_rate": 4.02141493511688e-06, + "loss": 0.698, + "step": 18788 + }, + { + "epoch": 0.5758550937844796, + "grad_norm": 1.926026765789517, + "learning_rate": 4.020928220681089e-06, + "loss": 0.4908, + "step": 18789 + }, + { + "epoch": 0.5758857423072208, + "grad_norm": 1.858311152211975, + "learning_rate": 4.020441515892341e-06, + "loss": 0.5935, + "step": 18790 + }, + { + "epoch": 0.575916390829962, + "grad_norm": 2.0067330397502112, + "learning_rate": 4.019954820755435e-06, + "loss": 0.7382, + "step": 18791 + }, + { + "epoch": 0.5759470393527032, + "grad_norm": 1.9005732015624506, + "learning_rate": 4.019468135275164e-06, + "loss": 0.5518, + "step": 18792 + }, + { + "epoch": 0.5759776878754445, + "grad_norm": 1.908529439903129, + "learning_rate": 4.018981459456325e-06, + "loss": 0.6467, + "step": 18793 + }, + { + "epoch": 0.5760083363981856, + "grad_norm": 1.7868835495999924, + "learning_rate": 4.018494793303714e-06, + "loss": 0.7088, + "step": 18794 + }, + { + "epoch": 0.5760389849209269, + "grad_norm": 1.8134203033102543, + "learning_rate": 4.018008136822122e-06, + "loss": 0.6108, + "step": 18795 + }, + { + "epoch": 0.576069633443668, + "grad_norm": 1.7862059004319037, + "learning_rate": 4.0175214900163485e-06, + "loss": 0.5909, + "step": 18796 + }, + { + "epoch": 0.5761002819664092, + "grad_norm": 0.7506017518537921, + "learning_rate": 4.017034852891189e-06, + "loss": 0.4052, + "step": 18797 + }, + { + "epoch": 0.5761309304891504, + "grad_norm": 1.9117699493480926, + "learning_rate": 4.0165482254514325e-06, + "loss": 0.6803, + "step": 18798 + }, + { + "epoch": 0.5761615790118916, + "grad_norm": 2.22428111040531, + "learning_rate": 4.0160616077018826e-06, + "loss": 0.6788, + "step": 18799 + }, + { + "epoch": 0.5761922275346328, + "grad_norm": 1.831250623647471, + "learning_rate": 4.015574999647324e-06, + "loss": 0.5643, + "step": 18800 + }, + { + "epoch": 0.576222876057374, + "grad_norm": 1.614724546030611, + "learning_rate": 4.0150884012925614e-06, + "loss": 0.5678, + "step": 18801 + }, + { + "epoch": 0.5762535245801153, + "grad_norm": 1.5548051948200137, + "learning_rate": 4.014601812642384e-06, + "loss": 0.628, + "step": 18802 + }, + { + "epoch": 0.5762841731028564, + "grad_norm": 1.6913456982298878, + "learning_rate": 4.0141152337015854e-06, + "loss": 0.587, + "step": 18803 + }, + { + "epoch": 0.5763148216255977, + "grad_norm": 1.7298713897036564, + "learning_rate": 4.013628664474963e-06, + "loss": 0.6554, + "step": 18804 + }, + { + "epoch": 0.5763454701483388, + "grad_norm": 1.6408702410170206, + "learning_rate": 4.01314210496731e-06, + "loss": 0.6008, + "step": 18805 + }, + { + "epoch": 0.5763761186710801, + "grad_norm": 1.8415176889363312, + "learning_rate": 4.012655555183419e-06, + "loss": 0.6207, + "step": 18806 + }, + { + "epoch": 0.5764067671938212, + "grad_norm": 1.6881946945799429, + "learning_rate": 4.012169015128086e-06, + "loss": 0.6138, + "step": 18807 + }, + { + "epoch": 0.5764374157165625, + "grad_norm": 1.6548358707230286, + "learning_rate": 4.0116824848061065e-06, + "loss": 0.5996, + "step": 18808 + }, + { + "epoch": 0.5764680642393036, + "grad_norm": 1.6635133969882183, + "learning_rate": 4.011195964222268e-06, + "loss": 0.5054, + "step": 18809 + }, + { + "epoch": 0.5764987127620449, + "grad_norm": 1.7226462558609414, + "learning_rate": 4.010709453381373e-06, + "loss": 0.5462, + "step": 18810 + }, + { + "epoch": 0.576529361284786, + "grad_norm": 1.707283636991415, + "learning_rate": 4.010222952288207e-06, + "loss": 0.5481, + "step": 18811 + }, + { + "epoch": 0.5765600098075273, + "grad_norm": 2.038061896093011, + "learning_rate": 4.009736460947571e-06, + "loss": 0.6471, + "step": 18812 + }, + { + "epoch": 0.5765906583302685, + "grad_norm": 1.7452004714577414, + "learning_rate": 4.009249979364254e-06, + "loss": 0.5374, + "step": 18813 + }, + { + "epoch": 0.5766213068530097, + "grad_norm": 1.7193234071964985, + "learning_rate": 4.008763507543048e-06, + "loss": 0.5899, + "step": 18814 + }, + { + "epoch": 0.5766519553757509, + "grad_norm": 1.8686455101828245, + "learning_rate": 4.0082770454887514e-06, + "loss": 0.6112, + "step": 18815 + }, + { + "epoch": 0.5766826038984921, + "grad_norm": 2.5717038872496736, + "learning_rate": 4.007790593206154e-06, + "loss": 0.6429, + "step": 18816 + }, + { + "epoch": 0.5767132524212333, + "grad_norm": 1.8247478114906734, + "learning_rate": 4.00730415070005e-06, + "loss": 0.6853, + "step": 18817 + }, + { + "epoch": 0.5767439009439745, + "grad_norm": 1.5935680865399757, + "learning_rate": 4.006817717975232e-06, + "loss": 0.6644, + "step": 18818 + }, + { + "epoch": 0.5767745494667157, + "grad_norm": 1.863094555058133, + "learning_rate": 4.0063312950364925e-06, + "loss": 0.6506, + "step": 18819 + }, + { + "epoch": 0.576805197989457, + "grad_norm": 1.6753770166934348, + "learning_rate": 4.005844881888626e-06, + "loss": 0.6238, + "step": 18820 + }, + { + "epoch": 0.5768358465121981, + "grad_norm": 1.626497222418724, + "learning_rate": 4.005358478536425e-06, + "loss": 0.5629, + "step": 18821 + }, + { + "epoch": 0.5768664950349394, + "grad_norm": 1.826012440472572, + "learning_rate": 4.004872084984679e-06, + "loss": 0.6147, + "step": 18822 + }, + { + "epoch": 0.5768971435576805, + "grad_norm": 1.723789799331234, + "learning_rate": 4.0043857012381855e-06, + "loss": 0.6067, + "step": 18823 + }, + { + "epoch": 0.5769277920804218, + "grad_norm": 1.7058360509704875, + "learning_rate": 4.003899327301733e-06, + "loss": 0.6887, + "step": 18824 + }, + { + "epoch": 0.5769584406031629, + "grad_norm": 1.754389458807407, + "learning_rate": 4.003412963180115e-06, + "loss": 0.599, + "step": 18825 + }, + { + "epoch": 0.5769890891259042, + "grad_norm": 1.6704626756797678, + "learning_rate": 4.002926608878125e-06, + "loss": 0.5116, + "step": 18826 + }, + { + "epoch": 0.5770197376486453, + "grad_norm": 1.8724305655084976, + "learning_rate": 4.002440264400553e-06, + "loss": 0.6719, + "step": 18827 + }, + { + "epoch": 0.5770503861713865, + "grad_norm": 1.9402414587902919, + "learning_rate": 4.001953929752193e-06, + "loss": 0.6608, + "step": 18828 + }, + { + "epoch": 0.5770810346941277, + "grad_norm": 0.8118793650515453, + "learning_rate": 4.001467604937837e-06, + "loss": 0.4166, + "step": 18829 + }, + { + "epoch": 0.5771116832168689, + "grad_norm": 1.6498627345178734, + "learning_rate": 4.000981289962275e-06, + "loss": 0.5923, + "step": 18830 + }, + { + "epoch": 0.5771423317396102, + "grad_norm": 1.7718235283609307, + "learning_rate": 4.000494984830301e-06, + "loss": 0.6244, + "step": 18831 + }, + { + "epoch": 0.5771729802623513, + "grad_norm": 1.503583252731151, + "learning_rate": 4.000008689546707e-06, + "loss": 0.6064, + "step": 18832 + }, + { + "epoch": 0.5772036287850926, + "grad_norm": 1.7910299731948298, + "learning_rate": 3.99952240411628e-06, + "loss": 0.6763, + "step": 18833 + }, + { + "epoch": 0.5772342773078337, + "grad_norm": 1.7890077840071472, + "learning_rate": 3.999036128543817e-06, + "loss": 0.687, + "step": 18834 + }, + { + "epoch": 0.577264925830575, + "grad_norm": 1.8138604410574317, + "learning_rate": 3.998549862834106e-06, + "loss": 0.6705, + "step": 18835 + }, + { + "epoch": 0.5772955743533161, + "grad_norm": 1.5088078262215032, + "learning_rate": 3.998063606991939e-06, + "loss": 0.5461, + "step": 18836 + }, + { + "epoch": 0.5773262228760574, + "grad_norm": 1.8097723071655516, + "learning_rate": 3.997577361022109e-06, + "loss": 0.6104, + "step": 18837 + }, + { + "epoch": 0.5773568713987985, + "grad_norm": 1.7884042007503766, + "learning_rate": 3.997091124929404e-06, + "loss": 0.6262, + "step": 18838 + }, + { + "epoch": 0.5773875199215398, + "grad_norm": 1.66992991684294, + "learning_rate": 3.996604898718618e-06, + "loss": 0.6604, + "step": 18839 + }, + { + "epoch": 0.577418168444281, + "grad_norm": 1.6110986127004108, + "learning_rate": 3.996118682394542e-06, + "loss": 0.5528, + "step": 18840 + }, + { + "epoch": 0.5774488169670222, + "grad_norm": 1.681586078645647, + "learning_rate": 3.995632475961962e-06, + "loss": 0.5821, + "step": 18841 + }, + { + "epoch": 0.5774794654897634, + "grad_norm": 1.8359156453868568, + "learning_rate": 3.995146279425676e-06, + "loss": 0.6672, + "step": 18842 + }, + { + "epoch": 0.5775101140125046, + "grad_norm": 1.8895487247597575, + "learning_rate": 3.9946600927904695e-06, + "loss": 0.7191, + "step": 18843 + }, + { + "epoch": 0.5775407625352458, + "grad_norm": 1.6965280514636176, + "learning_rate": 3.994173916061133e-06, + "loss": 0.6819, + "step": 18844 + }, + { + "epoch": 0.577571411057987, + "grad_norm": 1.6507067122827022, + "learning_rate": 3.993687749242459e-06, + "loss": 0.64, + "step": 18845 + }, + { + "epoch": 0.5776020595807282, + "grad_norm": 1.7351365952598463, + "learning_rate": 3.993201592339237e-06, + "loss": 0.6322, + "step": 18846 + }, + { + "epoch": 0.5776327081034694, + "grad_norm": 1.7650906851710664, + "learning_rate": 3.992715445356258e-06, + "loss": 0.6278, + "step": 18847 + }, + { + "epoch": 0.5776633566262106, + "grad_norm": 1.9237215912710173, + "learning_rate": 3.992229308298311e-06, + "loss": 0.5953, + "step": 18848 + }, + { + "epoch": 0.5776940051489519, + "grad_norm": 1.7092703038003898, + "learning_rate": 3.991743181170186e-06, + "loss": 0.6331, + "step": 18849 + }, + { + "epoch": 0.577724653671693, + "grad_norm": 2.104430457021574, + "learning_rate": 3.991257063976673e-06, + "loss": 0.6567, + "step": 18850 + }, + { + "epoch": 0.5777553021944343, + "grad_norm": 1.6883585614620156, + "learning_rate": 3.990770956722565e-06, + "loss": 0.529, + "step": 18851 + }, + { + "epoch": 0.5777859507171754, + "grad_norm": 1.7531566251966921, + "learning_rate": 3.990284859412646e-06, + "loss": 0.6282, + "step": 18852 + }, + { + "epoch": 0.5778165992399167, + "grad_norm": 1.7358590795945619, + "learning_rate": 3.989798772051711e-06, + "loss": 0.6198, + "step": 18853 + }, + { + "epoch": 0.5778472477626578, + "grad_norm": 1.523766674195304, + "learning_rate": 3.9893126946445435e-06, + "loss": 0.5816, + "step": 18854 + }, + { + "epoch": 0.5778778962853991, + "grad_norm": 0.8588223162208546, + "learning_rate": 3.98882662719594e-06, + "loss": 0.4466, + "step": 18855 + }, + { + "epoch": 0.5779085448081402, + "grad_norm": 1.978976516697107, + "learning_rate": 3.988340569710686e-06, + "loss": 0.7348, + "step": 18856 + }, + { + "epoch": 0.5779391933308815, + "grad_norm": 1.7450573137124346, + "learning_rate": 3.98785452219357e-06, + "loss": 0.6239, + "step": 18857 + }, + { + "epoch": 0.5779698418536227, + "grad_norm": 1.82460750607765, + "learning_rate": 3.9873684846493835e-06, + "loss": 0.541, + "step": 18858 + }, + { + "epoch": 0.5780004903763638, + "grad_norm": 1.861186087931137, + "learning_rate": 3.986882457082914e-06, + "loss": 0.6032, + "step": 18859 + }, + { + "epoch": 0.5780311388991051, + "grad_norm": 1.5671644231177526, + "learning_rate": 3.98639643949895e-06, + "loss": 0.5927, + "step": 18860 + }, + { + "epoch": 0.5780617874218462, + "grad_norm": 2.168612657449596, + "learning_rate": 3.985910431902282e-06, + "loss": 0.5801, + "step": 18861 + }, + { + "epoch": 0.5780924359445875, + "grad_norm": 0.7861206352383585, + "learning_rate": 3.985424434297699e-06, + "loss": 0.3928, + "step": 18862 + }, + { + "epoch": 0.5781230844673286, + "grad_norm": 1.8660160058221575, + "learning_rate": 3.984938446689987e-06, + "loss": 0.5626, + "step": 18863 + }, + { + "epoch": 0.5781537329900699, + "grad_norm": 2.066246335215465, + "learning_rate": 3.9844524690839376e-06, + "loss": 0.6742, + "step": 18864 + }, + { + "epoch": 0.578184381512811, + "grad_norm": 1.597497419857301, + "learning_rate": 3.983966501484336e-06, + "loss": 0.6327, + "step": 18865 + }, + { + "epoch": 0.5782150300355523, + "grad_norm": 1.7757359138480784, + "learning_rate": 3.983480543895974e-06, + "loss": 0.6239, + "step": 18866 + }, + { + "epoch": 0.5782456785582935, + "grad_norm": 1.69086767862164, + "learning_rate": 3.982994596323638e-06, + "loss": 0.5705, + "step": 18867 + }, + { + "epoch": 0.5782763270810347, + "grad_norm": 1.9365794261065634, + "learning_rate": 3.982508658772116e-06, + "loss": 0.6799, + "step": 18868 + }, + { + "epoch": 0.5783069756037759, + "grad_norm": 1.7964754393901103, + "learning_rate": 3.982022731246197e-06, + "loss": 0.5969, + "step": 18869 + }, + { + "epoch": 0.5783376241265171, + "grad_norm": 1.9462620461075495, + "learning_rate": 3.981536813750668e-06, + "loss": 0.7289, + "step": 18870 + }, + { + "epoch": 0.5783682726492583, + "grad_norm": 0.7765631044105008, + "learning_rate": 3.981050906290317e-06, + "loss": 0.4487, + "step": 18871 + }, + { + "epoch": 0.5783989211719995, + "grad_norm": 1.6333190079465705, + "learning_rate": 3.980565008869933e-06, + "loss": 0.6476, + "step": 18872 + }, + { + "epoch": 0.5784295696947407, + "grad_norm": 1.671719752634486, + "learning_rate": 3.9800791214943015e-06, + "loss": 0.534, + "step": 18873 + }, + { + "epoch": 0.578460218217482, + "grad_norm": 1.89647043579649, + "learning_rate": 3.979593244168214e-06, + "loss": 0.6003, + "step": 18874 + }, + { + "epoch": 0.5784908667402231, + "grad_norm": 1.709818679140625, + "learning_rate": 3.979107376896454e-06, + "loss": 0.6732, + "step": 18875 + }, + { + "epoch": 0.5785215152629644, + "grad_norm": 1.832021172193617, + "learning_rate": 3.978621519683808e-06, + "loss": 0.5495, + "step": 18876 + }, + { + "epoch": 0.5785521637857055, + "grad_norm": 1.6312292175190104, + "learning_rate": 3.97813567253507e-06, + "loss": 0.5674, + "step": 18877 + }, + { + "epoch": 0.5785828123084468, + "grad_norm": 1.6846159321521135, + "learning_rate": 3.97764983545502e-06, + "loss": 0.5808, + "step": 18878 + }, + { + "epoch": 0.5786134608311879, + "grad_norm": 1.6274705613228861, + "learning_rate": 3.977164008448447e-06, + "loss": 0.5952, + "step": 18879 + }, + { + "epoch": 0.5786441093539292, + "grad_norm": 1.549273642572994, + "learning_rate": 3.976678191520141e-06, + "loss": 0.6127, + "step": 18880 + }, + { + "epoch": 0.5786747578766703, + "grad_norm": 1.7278249059770732, + "learning_rate": 3.976192384674884e-06, + "loss": 0.5924, + "step": 18881 + }, + { + "epoch": 0.5787054063994116, + "grad_norm": 1.7711056986323643, + "learning_rate": 3.9757065879174665e-06, + "loss": 0.7084, + "step": 18882 + }, + { + "epoch": 0.5787360549221527, + "grad_norm": 1.5230219464319181, + "learning_rate": 3.975220801252674e-06, + "loss": 0.5534, + "step": 18883 + }, + { + "epoch": 0.578766703444894, + "grad_norm": 1.7532240799666567, + "learning_rate": 3.974735024685293e-06, + "loss": 0.7277, + "step": 18884 + }, + { + "epoch": 0.5787973519676352, + "grad_norm": 1.814303048979287, + "learning_rate": 3.974249258220112e-06, + "loss": 0.6046, + "step": 18885 + }, + { + "epoch": 0.5788280004903764, + "grad_norm": 1.7130448615705853, + "learning_rate": 3.973763501861914e-06, + "loss": 0.5473, + "step": 18886 + }, + { + "epoch": 0.5788586490131176, + "grad_norm": 1.9178930600722335, + "learning_rate": 3.973277755615486e-06, + "loss": 0.5864, + "step": 18887 + }, + { + "epoch": 0.5788892975358588, + "grad_norm": 1.756196373356356, + "learning_rate": 3.972792019485616e-06, + "loss": 0.5726, + "step": 18888 + }, + { + "epoch": 0.5789199460586, + "grad_norm": 1.7857119890954702, + "learning_rate": 3.9723062934770895e-06, + "loss": 0.5216, + "step": 18889 + }, + { + "epoch": 0.5789505945813411, + "grad_norm": 1.9364107395076746, + "learning_rate": 3.97182057759469e-06, + "loss": 0.6251, + "step": 18890 + }, + { + "epoch": 0.5789812431040824, + "grad_norm": 1.6829742582751677, + "learning_rate": 3.971334871843207e-06, + "loss": 0.6935, + "step": 18891 + }, + { + "epoch": 0.5790118916268235, + "grad_norm": 1.9221703675047463, + "learning_rate": 3.970849176227424e-06, + "loss": 0.6706, + "step": 18892 + }, + { + "epoch": 0.5790425401495648, + "grad_norm": 1.9098125261278227, + "learning_rate": 3.9703634907521285e-06, + "loss": 0.6693, + "step": 18893 + }, + { + "epoch": 0.579073188672306, + "grad_norm": 1.7750040151755104, + "learning_rate": 3.969877815422106e-06, + "loss": 0.6533, + "step": 18894 + }, + { + "epoch": 0.5791038371950472, + "grad_norm": 1.9093406480955073, + "learning_rate": 3.969392150242136e-06, + "loss": 0.6081, + "step": 18895 + }, + { + "epoch": 0.5791344857177884, + "grad_norm": 1.712471603843466, + "learning_rate": 3.968906495217014e-06, + "loss": 0.6249, + "step": 18896 + }, + { + "epoch": 0.5791651342405296, + "grad_norm": 1.5864554960967796, + "learning_rate": 3.968420850351519e-06, + "loss": 0.6333, + "step": 18897 + }, + { + "epoch": 0.5791957827632708, + "grad_norm": 0.7966617739676499, + "learning_rate": 3.967935215650436e-06, + "loss": 0.4107, + "step": 18898 + }, + { + "epoch": 0.579226431286012, + "grad_norm": 0.7948834689095741, + "learning_rate": 3.967449591118552e-06, + "loss": 0.4324, + "step": 18899 + }, + { + "epoch": 0.5792570798087532, + "grad_norm": 1.7839555100794902, + "learning_rate": 3.966963976760651e-06, + "loss": 0.6108, + "step": 18900 + }, + { + "epoch": 0.5792877283314944, + "grad_norm": 1.824351835644095, + "learning_rate": 3.966478372581518e-06, + "loss": 0.5777, + "step": 18901 + }, + { + "epoch": 0.5793183768542356, + "grad_norm": 1.6951830624341737, + "learning_rate": 3.965992778585939e-06, + "loss": 0.6029, + "step": 18902 + }, + { + "epoch": 0.5793490253769769, + "grad_norm": 1.829333066310494, + "learning_rate": 3.965507194778697e-06, + "loss": 0.6547, + "step": 18903 + }, + { + "epoch": 0.579379673899718, + "grad_norm": 1.4148570494587638, + "learning_rate": 3.965021621164577e-06, + "loss": 0.5545, + "step": 18904 + }, + { + "epoch": 0.5794103224224593, + "grad_norm": 1.5533061642102701, + "learning_rate": 3.964536057748366e-06, + "loss": 0.5486, + "step": 18905 + }, + { + "epoch": 0.5794409709452004, + "grad_norm": 1.6479168279452092, + "learning_rate": 3.964050504534844e-06, + "loss": 0.5253, + "step": 18906 + }, + { + "epoch": 0.5794716194679417, + "grad_norm": 1.7071731270405726, + "learning_rate": 3.963564961528798e-06, + "loss": 0.5753, + "step": 18907 + }, + { + "epoch": 0.5795022679906828, + "grad_norm": 1.6923949672333296, + "learning_rate": 3.9630794287350126e-06, + "loss": 0.6171, + "step": 18908 + }, + { + "epoch": 0.5795329165134241, + "grad_norm": 1.491922534649056, + "learning_rate": 3.962593906158269e-06, + "loss": 0.5758, + "step": 18909 + }, + { + "epoch": 0.5795635650361652, + "grad_norm": 1.7405652207790416, + "learning_rate": 3.962108393803354e-06, + "loss": 0.6504, + "step": 18910 + }, + { + "epoch": 0.5795942135589065, + "grad_norm": 1.6488155415742385, + "learning_rate": 3.96162289167505e-06, + "loss": 0.5667, + "step": 18911 + }, + { + "epoch": 0.5796248620816477, + "grad_norm": 1.7993934709429706, + "learning_rate": 3.961137399778142e-06, + "loss": 0.5666, + "step": 18912 + }, + { + "epoch": 0.5796555106043889, + "grad_norm": 1.6354525310688761, + "learning_rate": 3.960651918117413e-06, + "loss": 0.7352, + "step": 18913 + }, + { + "epoch": 0.5796861591271301, + "grad_norm": 1.7510168881817234, + "learning_rate": 3.960166446697645e-06, + "loss": 0.6283, + "step": 18914 + }, + { + "epoch": 0.5797168076498713, + "grad_norm": 1.8861553120080479, + "learning_rate": 3.959680985523625e-06, + "loss": 0.5966, + "step": 18915 + }, + { + "epoch": 0.5797474561726125, + "grad_norm": 1.821716876216917, + "learning_rate": 3.959195534600136e-06, + "loss": 0.6673, + "step": 18916 + }, + { + "epoch": 0.5797781046953537, + "grad_norm": 1.7661404972048478, + "learning_rate": 3.958710093931956e-06, + "loss": 0.7014, + "step": 18917 + }, + { + "epoch": 0.5798087532180949, + "grad_norm": 1.8021924343413203, + "learning_rate": 3.9582246635238745e-06, + "loss": 0.6557, + "step": 18918 + }, + { + "epoch": 0.5798394017408361, + "grad_norm": 1.9903088641900617, + "learning_rate": 3.957739243380669e-06, + "loss": 0.6835, + "step": 18919 + }, + { + "epoch": 0.5798700502635773, + "grad_norm": 1.8509626663336216, + "learning_rate": 3.957253833507129e-06, + "loss": 0.6396, + "step": 18920 + }, + { + "epoch": 0.5799006987863184, + "grad_norm": 2.1411508186682555, + "learning_rate": 3.956768433908031e-06, + "loss": 0.7176, + "step": 18921 + }, + { + "epoch": 0.5799313473090597, + "grad_norm": 1.6101372409809136, + "learning_rate": 3.9562830445881615e-06, + "loss": 0.6361, + "step": 18922 + }, + { + "epoch": 0.5799619958318009, + "grad_norm": 1.6943193102028438, + "learning_rate": 3.9557976655523025e-06, + "loss": 0.6199, + "step": 18923 + }, + { + "epoch": 0.5799926443545421, + "grad_norm": 1.7423047906985119, + "learning_rate": 3.955312296805237e-06, + "loss": 0.6132, + "step": 18924 + }, + { + "epoch": 0.5800232928772833, + "grad_norm": 1.8318771276553298, + "learning_rate": 3.954826938351745e-06, + "loss": 0.5536, + "step": 18925 + }, + { + "epoch": 0.5800539414000245, + "grad_norm": 1.6038515912091178, + "learning_rate": 3.9543415901966115e-06, + "loss": 0.6306, + "step": 18926 + }, + { + "epoch": 0.5800845899227657, + "grad_norm": 1.8578342056113053, + "learning_rate": 3.953856252344617e-06, + "loss": 0.6762, + "step": 18927 + }, + { + "epoch": 0.5801152384455069, + "grad_norm": 1.8483329862136157, + "learning_rate": 3.953370924800546e-06, + "loss": 0.6801, + "step": 18928 + }, + { + "epoch": 0.5801458869682481, + "grad_norm": 1.6686383767990085, + "learning_rate": 3.952885607569179e-06, + "loss": 0.6678, + "step": 18929 + }, + { + "epoch": 0.5801765354909894, + "grad_norm": 1.7656261804779392, + "learning_rate": 3.952400300655297e-06, + "loss": 0.6875, + "step": 18930 + }, + { + "epoch": 0.5802071840137305, + "grad_norm": 1.5909969105549584, + "learning_rate": 3.951915004063683e-06, + "loss": 0.5331, + "step": 18931 + }, + { + "epoch": 0.5802378325364718, + "grad_norm": 1.5893098571882533, + "learning_rate": 3.95142971779912e-06, + "loss": 0.5348, + "step": 18932 + }, + { + "epoch": 0.5802684810592129, + "grad_norm": 1.7220476894821328, + "learning_rate": 3.950944441866386e-06, + "loss": 0.6324, + "step": 18933 + }, + { + "epoch": 0.5802991295819542, + "grad_norm": 1.8199679306624506, + "learning_rate": 3.950459176270267e-06, + "loss": 0.5975, + "step": 18934 + }, + { + "epoch": 0.5803297781046953, + "grad_norm": 1.7550451596493255, + "learning_rate": 3.9499739210155405e-06, + "loss": 0.5862, + "step": 18935 + }, + { + "epoch": 0.5803604266274366, + "grad_norm": 1.683519084411721, + "learning_rate": 3.94948867610699e-06, + "loss": 0.6315, + "step": 18936 + }, + { + "epoch": 0.5803910751501777, + "grad_norm": 1.6571037683640675, + "learning_rate": 3.949003441549398e-06, + "loss": 0.531, + "step": 18937 + }, + { + "epoch": 0.580421723672919, + "grad_norm": 1.7217206362050412, + "learning_rate": 3.948518217347541e-06, + "loss": 0.6289, + "step": 18938 + }, + { + "epoch": 0.5804523721956601, + "grad_norm": 1.6816871757867495, + "learning_rate": 3.948033003506206e-06, + "loss": 0.6215, + "step": 18939 + }, + { + "epoch": 0.5804830207184014, + "grad_norm": 1.7926007341689418, + "learning_rate": 3.94754780003017e-06, + "loss": 0.697, + "step": 18940 + }, + { + "epoch": 0.5805136692411426, + "grad_norm": 1.7423379082724322, + "learning_rate": 3.9470626069242145e-06, + "loss": 0.6857, + "step": 18941 + }, + { + "epoch": 0.5805443177638838, + "grad_norm": 1.7164636768227348, + "learning_rate": 3.946577424193121e-06, + "loss": 0.6832, + "step": 18942 + }, + { + "epoch": 0.580574966286625, + "grad_norm": 2.0339340560483463, + "learning_rate": 3.94609225184167e-06, + "loss": 0.6545, + "step": 18943 + }, + { + "epoch": 0.5806056148093662, + "grad_norm": 2.08725966643522, + "learning_rate": 3.945607089874639e-06, + "loss": 0.6896, + "step": 18944 + }, + { + "epoch": 0.5806362633321074, + "grad_norm": 1.6059571277138458, + "learning_rate": 3.945121938296814e-06, + "loss": 0.56, + "step": 18945 + }, + { + "epoch": 0.5806669118548486, + "grad_norm": 1.8228131711728284, + "learning_rate": 3.94463679711297e-06, + "loss": 0.7179, + "step": 18946 + }, + { + "epoch": 0.5806975603775898, + "grad_norm": 1.7407176349735027, + "learning_rate": 3.9441516663278925e-06, + "loss": 0.6896, + "step": 18947 + }, + { + "epoch": 0.580728208900331, + "grad_norm": 1.6064493269984337, + "learning_rate": 3.943666545946359e-06, + "loss": 0.5515, + "step": 18948 + }, + { + "epoch": 0.5807588574230722, + "grad_norm": 1.714959072568832, + "learning_rate": 3.9431814359731455e-06, + "loss": 0.5866, + "step": 18949 + }, + { + "epoch": 0.5807895059458135, + "grad_norm": 1.8612277439558915, + "learning_rate": 3.942696336413039e-06, + "loss": 0.5918, + "step": 18950 + }, + { + "epoch": 0.5808201544685546, + "grad_norm": 1.618283113714242, + "learning_rate": 3.942211247270816e-06, + "loss": 0.628, + "step": 18951 + }, + { + "epoch": 0.5808508029912958, + "grad_norm": 0.9296375168979066, + "learning_rate": 3.941726168551254e-06, + "loss": 0.4395, + "step": 18952 + }, + { + "epoch": 0.580881451514037, + "grad_norm": 1.7990686166190917, + "learning_rate": 3.941241100259136e-06, + "loss": 0.6429, + "step": 18953 + }, + { + "epoch": 0.5809121000367782, + "grad_norm": 1.7817424002485185, + "learning_rate": 3.9407560423992405e-06, + "loss": 0.5909, + "step": 18954 + }, + { + "epoch": 0.5809427485595194, + "grad_norm": 2.051169567877565, + "learning_rate": 3.940270994976347e-06, + "loss": 0.6747, + "step": 18955 + }, + { + "epoch": 0.5809733970822606, + "grad_norm": 1.712565279869514, + "learning_rate": 3.939785957995234e-06, + "loss": 0.6054, + "step": 18956 + }, + { + "epoch": 0.5810040456050019, + "grad_norm": 1.8112524515372512, + "learning_rate": 3.9393009314606815e-06, + "loss": 0.7009, + "step": 18957 + }, + { + "epoch": 0.581034694127743, + "grad_norm": 1.7676011805528935, + "learning_rate": 3.938815915377468e-06, + "loss": 0.666, + "step": 18958 + }, + { + "epoch": 0.5810653426504843, + "grad_norm": 1.8279322846900108, + "learning_rate": 3.938330909750374e-06, + "loss": 0.617, + "step": 18959 + }, + { + "epoch": 0.5810959911732254, + "grad_norm": 0.7857405557853585, + "learning_rate": 3.937845914584175e-06, + "loss": 0.4322, + "step": 18960 + }, + { + "epoch": 0.5811266396959667, + "grad_norm": 0.8201070028413023, + "learning_rate": 3.937360929883654e-06, + "loss": 0.4347, + "step": 18961 + }, + { + "epoch": 0.5811572882187078, + "grad_norm": 2.0785707160066673, + "learning_rate": 3.936875955653587e-06, + "loss": 0.5944, + "step": 18962 + }, + { + "epoch": 0.5811879367414491, + "grad_norm": 0.7945370341150008, + "learning_rate": 3.936390991898752e-06, + "loss": 0.4396, + "step": 18963 + }, + { + "epoch": 0.5812185852641902, + "grad_norm": 1.8262066902551133, + "learning_rate": 3.93590603862393e-06, + "loss": 0.7126, + "step": 18964 + }, + { + "epoch": 0.5812492337869315, + "grad_norm": 1.6752441368246713, + "learning_rate": 3.935421095833898e-06, + "loss": 0.5776, + "step": 18965 + }, + { + "epoch": 0.5812798823096726, + "grad_norm": 1.6386393377652047, + "learning_rate": 3.934936163533434e-06, + "loss": 0.683, + "step": 18966 + }, + { + "epoch": 0.5813105308324139, + "grad_norm": 0.794950500586596, + "learning_rate": 3.9344512417273165e-06, + "loss": 0.4493, + "step": 18967 + }, + { + "epoch": 0.5813411793551551, + "grad_norm": 0.7534718819887313, + "learning_rate": 3.9339663304203236e-06, + "loss": 0.4041, + "step": 18968 + }, + { + "epoch": 0.5813718278778963, + "grad_norm": 1.907941995819433, + "learning_rate": 3.933481429617233e-06, + "loss": 0.6273, + "step": 18969 + }, + { + "epoch": 0.5814024764006375, + "grad_norm": 1.8078618287458867, + "learning_rate": 3.932996539322825e-06, + "loss": 0.6509, + "step": 18970 + }, + { + "epoch": 0.5814331249233787, + "grad_norm": 1.7710398188428047, + "learning_rate": 3.932511659541871e-06, + "loss": 0.5655, + "step": 18971 + }, + { + "epoch": 0.5814637734461199, + "grad_norm": 1.8163495947917998, + "learning_rate": 3.9320267902791564e-06, + "loss": 0.6023, + "step": 18972 + }, + { + "epoch": 0.5814944219688611, + "grad_norm": 1.7424848751571191, + "learning_rate": 3.9315419315394525e-06, + "loss": 0.5815, + "step": 18973 + }, + { + "epoch": 0.5815250704916023, + "grad_norm": 1.8328089253884483, + "learning_rate": 3.931057083327541e-06, + "loss": 0.687, + "step": 18974 + }, + { + "epoch": 0.5815557190143436, + "grad_norm": 1.8816598796056114, + "learning_rate": 3.930572245648197e-06, + "loss": 0.5768, + "step": 18975 + }, + { + "epoch": 0.5815863675370847, + "grad_norm": 1.6417642021728935, + "learning_rate": 3.930087418506198e-06, + "loss": 0.5329, + "step": 18976 + }, + { + "epoch": 0.581617016059826, + "grad_norm": 1.836011381045137, + "learning_rate": 3.929602601906322e-06, + "loss": 0.6696, + "step": 18977 + }, + { + "epoch": 0.5816476645825671, + "grad_norm": 1.7237855257339252, + "learning_rate": 3.929117795853345e-06, + "loss": 0.6409, + "step": 18978 + }, + { + "epoch": 0.5816783131053084, + "grad_norm": 1.7592285067940048, + "learning_rate": 3.928633000352043e-06, + "loss": 0.6253, + "step": 18979 + }, + { + "epoch": 0.5817089616280495, + "grad_norm": 1.7756387977951695, + "learning_rate": 3.928148215407197e-06, + "loss": 0.6185, + "step": 18980 + }, + { + "epoch": 0.5817396101507908, + "grad_norm": 1.9670500997492555, + "learning_rate": 3.927663441023578e-06, + "loss": 0.694, + "step": 18981 + }, + { + "epoch": 0.5817702586735319, + "grad_norm": 1.8649931855812394, + "learning_rate": 3.927178677205969e-06, + "loss": 0.5483, + "step": 18982 + }, + { + "epoch": 0.5818009071962731, + "grad_norm": 1.9448310504913866, + "learning_rate": 3.92669392395914e-06, + "loss": 0.6272, + "step": 18983 + }, + { + "epoch": 0.5818315557190143, + "grad_norm": 2.3609351070907945, + "learning_rate": 3.926209181287871e-06, + "loss": 0.6978, + "step": 18984 + }, + { + "epoch": 0.5818622042417555, + "grad_norm": 1.6097655470584809, + "learning_rate": 3.925724449196938e-06, + "loss": 0.7157, + "step": 18985 + }, + { + "epoch": 0.5818928527644968, + "grad_norm": 1.4736734744857567, + "learning_rate": 3.925239727691118e-06, + "loss": 0.6238, + "step": 18986 + }, + { + "epoch": 0.5819235012872379, + "grad_norm": 1.8059927523732147, + "learning_rate": 3.924755016775184e-06, + "loss": 0.7346, + "step": 18987 + }, + { + "epoch": 0.5819541498099792, + "grad_norm": 1.7659653627652325, + "learning_rate": 3.924270316453915e-06, + "loss": 0.6443, + "step": 18988 + }, + { + "epoch": 0.5819847983327203, + "grad_norm": 1.7854434259547105, + "learning_rate": 3.923785626732087e-06, + "loss": 0.665, + "step": 18989 + }, + { + "epoch": 0.5820154468554616, + "grad_norm": 1.8207192080921868, + "learning_rate": 3.923300947614471e-06, + "loss": 0.6332, + "step": 18990 + }, + { + "epoch": 0.5820460953782027, + "grad_norm": 2.0825756933882897, + "learning_rate": 3.92281627910585e-06, + "loss": 0.6218, + "step": 18991 + }, + { + "epoch": 0.582076743900944, + "grad_norm": 0.8988885712481619, + "learning_rate": 3.922331621210992e-06, + "loss": 0.4269, + "step": 18992 + }, + { + "epoch": 0.5821073924236851, + "grad_norm": 1.537076650842733, + "learning_rate": 3.92184697393468e-06, + "loss": 0.5427, + "step": 18993 + }, + { + "epoch": 0.5821380409464264, + "grad_norm": 1.626133623380086, + "learning_rate": 3.9213623372816845e-06, + "loss": 0.6718, + "step": 18994 + }, + { + "epoch": 0.5821686894691676, + "grad_norm": 1.825522415757323, + "learning_rate": 3.920877711256781e-06, + "loss": 0.6508, + "step": 18995 + }, + { + "epoch": 0.5821993379919088, + "grad_norm": 1.6798020491439924, + "learning_rate": 3.920393095864746e-06, + "loss": 0.6266, + "step": 18996 + }, + { + "epoch": 0.58222998651465, + "grad_norm": 1.6960636082625118, + "learning_rate": 3.919908491110354e-06, + "loss": 0.6863, + "step": 18997 + }, + { + "epoch": 0.5822606350373912, + "grad_norm": 1.6693298606472757, + "learning_rate": 3.9194238969983795e-06, + "loss": 0.5614, + "step": 18998 + }, + { + "epoch": 0.5822912835601324, + "grad_norm": 1.659123067181568, + "learning_rate": 3.918939313533598e-06, + "loss": 0.6417, + "step": 18999 + }, + { + "epoch": 0.5823219320828736, + "grad_norm": 1.7791612635294491, + "learning_rate": 3.918454740720784e-06, + "loss": 0.7293, + "step": 19000 + }, + { + "epoch": 0.5823525806056148, + "grad_norm": 1.8457408233513295, + "learning_rate": 3.917970178564713e-06, + "loss": 0.5991, + "step": 19001 + }, + { + "epoch": 0.582383229128356, + "grad_norm": 1.6043414141971357, + "learning_rate": 3.91748562707016e-06, + "loss": 0.665, + "step": 19002 + }, + { + "epoch": 0.5824138776510972, + "grad_norm": 1.7834367702367142, + "learning_rate": 3.917001086241895e-06, + "loss": 0.6219, + "step": 19003 + }, + { + "epoch": 0.5824445261738385, + "grad_norm": 1.8269862528275989, + "learning_rate": 3.916516556084697e-06, + "loss": 0.633, + "step": 19004 + }, + { + "epoch": 0.5824751746965796, + "grad_norm": 1.7511560768928653, + "learning_rate": 3.916032036603339e-06, + "loss": 0.6169, + "step": 19005 + }, + { + "epoch": 0.5825058232193209, + "grad_norm": 1.7527580383357315, + "learning_rate": 3.9155475278025935e-06, + "loss": 0.6366, + "step": 19006 + }, + { + "epoch": 0.582536471742062, + "grad_norm": 0.8095203370818067, + "learning_rate": 3.915063029687236e-06, + "loss": 0.4221, + "step": 19007 + }, + { + "epoch": 0.5825671202648033, + "grad_norm": 1.7187474235136098, + "learning_rate": 3.91457854226204e-06, + "loss": 0.6643, + "step": 19008 + }, + { + "epoch": 0.5825977687875444, + "grad_norm": 0.9935878996378328, + "learning_rate": 3.9140940655317795e-06, + "loss": 0.4207, + "step": 19009 + }, + { + "epoch": 0.5826284173102857, + "grad_norm": 1.854675784116305, + "learning_rate": 3.913609599501228e-06, + "loss": 0.668, + "step": 19010 + }, + { + "epoch": 0.5826590658330268, + "grad_norm": 1.749701308996094, + "learning_rate": 3.913125144175159e-06, + "loss": 0.6829, + "step": 19011 + }, + { + "epoch": 0.5826897143557681, + "grad_norm": 1.9476589580454977, + "learning_rate": 3.912640699558346e-06, + "loss": 0.6308, + "step": 19012 + }, + { + "epoch": 0.5827203628785093, + "grad_norm": 1.8732180983831437, + "learning_rate": 3.912156265655564e-06, + "loss": 0.7338, + "step": 19013 + }, + { + "epoch": 0.5827510114012504, + "grad_norm": 0.8211163453143457, + "learning_rate": 3.9116718424715825e-06, + "loss": 0.4486, + "step": 19014 + }, + { + "epoch": 0.5827816599239917, + "grad_norm": 1.7576559109235022, + "learning_rate": 3.9111874300111786e-06, + "loss": 0.5993, + "step": 19015 + }, + { + "epoch": 0.5828123084467328, + "grad_norm": 2.0920108185626773, + "learning_rate": 3.910703028279123e-06, + "loss": 0.7087, + "step": 19016 + }, + { + "epoch": 0.5828429569694741, + "grad_norm": 1.83688019991271, + "learning_rate": 3.9102186372801875e-06, + "loss": 0.6169, + "step": 19017 + }, + { + "epoch": 0.5828736054922152, + "grad_norm": 1.7059458707438926, + "learning_rate": 3.909734257019148e-06, + "loss": 0.5438, + "step": 19018 + }, + { + "epoch": 0.5829042540149565, + "grad_norm": 1.8060720898906983, + "learning_rate": 3.909249887500775e-06, + "loss": 0.6263, + "step": 19019 + }, + { + "epoch": 0.5829349025376976, + "grad_norm": 1.7525056618569008, + "learning_rate": 3.9087655287298435e-06, + "loss": 0.6524, + "step": 19020 + }, + { + "epoch": 0.5829655510604389, + "grad_norm": 1.6773799180300824, + "learning_rate": 3.908281180711123e-06, + "loss": 0.5429, + "step": 19021 + }, + { + "epoch": 0.58299619958318, + "grad_norm": 1.8213896938169312, + "learning_rate": 3.907796843449387e-06, + "loss": 0.6842, + "step": 19022 + }, + { + "epoch": 0.5830268481059213, + "grad_norm": 1.8541847269601517, + "learning_rate": 3.9073125169494095e-06, + "loss": 0.6462, + "step": 19023 + }, + { + "epoch": 0.5830574966286625, + "grad_norm": 0.8481856433855895, + "learning_rate": 3.906828201215963e-06, + "loss": 0.4649, + "step": 19024 + }, + { + "epoch": 0.5830881451514037, + "grad_norm": 1.639288674910339, + "learning_rate": 3.9063438962538145e-06, + "loss": 0.5977, + "step": 19025 + }, + { + "epoch": 0.5831187936741449, + "grad_norm": 0.7721991095750141, + "learning_rate": 3.9058596020677406e-06, + "loss": 0.4249, + "step": 19026 + }, + { + "epoch": 0.5831494421968861, + "grad_norm": 1.5664261737336251, + "learning_rate": 3.9053753186625114e-06, + "loss": 0.5733, + "step": 19027 + }, + { + "epoch": 0.5831800907196273, + "grad_norm": 1.9018485515298784, + "learning_rate": 3.9048910460429e-06, + "loss": 0.6742, + "step": 19028 + }, + { + "epoch": 0.5832107392423685, + "grad_norm": 1.811478313478255, + "learning_rate": 3.904406784213678e-06, + "loss": 0.549, + "step": 19029 + }, + { + "epoch": 0.5832413877651097, + "grad_norm": 0.7900645692208692, + "learning_rate": 3.9039225331796145e-06, + "loss": 0.4345, + "step": 19030 + }, + { + "epoch": 0.583272036287851, + "grad_norm": 1.4624915784707855, + "learning_rate": 3.903438292945485e-06, + "loss": 0.5723, + "step": 19031 + }, + { + "epoch": 0.5833026848105921, + "grad_norm": 1.8157839565385732, + "learning_rate": 3.902954063516058e-06, + "loss": 0.6238, + "step": 19032 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 1.4289217114135895, + "learning_rate": 3.902469844896103e-06, + "loss": 0.6881, + "step": 19033 + }, + { + "epoch": 0.5833639818560745, + "grad_norm": 1.9218828495130387, + "learning_rate": 3.901985637090397e-06, + "loss": 0.6545, + "step": 19034 + }, + { + "epoch": 0.5833946303788158, + "grad_norm": 1.7958567012438478, + "learning_rate": 3.901501440103706e-06, + "loss": 0.6204, + "step": 19035 + }, + { + "epoch": 0.5834252789015569, + "grad_norm": 1.6598499113064569, + "learning_rate": 3.9010172539408006e-06, + "loss": 0.5682, + "step": 19036 + }, + { + "epoch": 0.5834559274242982, + "grad_norm": 2.0154696544986606, + "learning_rate": 3.9005330786064545e-06, + "loss": 0.6036, + "step": 19037 + }, + { + "epoch": 0.5834865759470393, + "grad_norm": 0.8015038788601558, + "learning_rate": 3.900048914105436e-06, + "loss": 0.4269, + "step": 19038 + }, + { + "epoch": 0.5835172244697806, + "grad_norm": 1.6467464945672057, + "learning_rate": 3.89956476044252e-06, + "loss": 0.5428, + "step": 19039 + }, + { + "epoch": 0.5835478729925218, + "grad_norm": 1.6100500019131039, + "learning_rate": 3.899080617622472e-06, + "loss": 0.6165, + "step": 19040 + }, + { + "epoch": 0.583578521515263, + "grad_norm": 0.8456980573191815, + "learning_rate": 3.898596485650065e-06, + "loss": 0.4305, + "step": 19041 + }, + { + "epoch": 0.5836091700380042, + "grad_norm": 1.7809563573836222, + "learning_rate": 3.898112364530068e-06, + "loss": 0.702, + "step": 19042 + }, + { + "epoch": 0.5836398185607454, + "grad_norm": 1.691913713102105, + "learning_rate": 3.897628254267254e-06, + "loss": 0.6283, + "step": 19043 + }, + { + "epoch": 0.5836704670834866, + "grad_norm": 2.019311329045685, + "learning_rate": 3.897144154866387e-06, + "loss": 0.69, + "step": 19044 + }, + { + "epoch": 0.5837011156062277, + "grad_norm": 1.6920574766592458, + "learning_rate": 3.896660066332244e-06, + "loss": 0.6265, + "step": 19045 + }, + { + "epoch": 0.583731764128969, + "grad_norm": 1.845344096089537, + "learning_rate": 3.896175988669589e-06, + "loss": 0.6846, + "step": 19046 + }, + { + "epoch": 0.5837624126517101, + "grad_norm": 0.8307499445958542, + "learning_rate": 3.8956919218831975e-06, + "loss": 0.427, + "step": 19047 + }, + { + "epoch": 0.5837930611744514, + "grad_norm": 1.7649861961537905, + "learning_rate": 3.895207865977835e-06, + "loss": 0.4619, + "step": 19048 + }, + { + "epoch": 0.5838237096971925, + "grad_norm": 1.7123353520005238, + "learning_rate": 3.89472382095827e-06, + "loss": 0.6877, + "step": 19049 + }, + { + "epoch": 0.5838543582199338, + "grad_norm": 1.7676386422787558, + "learning_rate": 3.894239786829277e-06, + "loss": 0.6438, + "step": 19050 + }, + { + "epoch": 0.583885006742675, + "grad_norm": 1.8813387597556632, + "learning_rate": 3.8937557635956205e-06, + "loss": 0.5687, + "step": 19051 + }, + { + "epoch": 0.5839156552654162, + "grad_norm": 1.546753817294541, + "learning_rate": 3.893271751262071e-06, + "loss": 0.5217, + "step": 19052 + }, + { + "epoch": 0.5839463037881574, + "grad_norm": 1.6604227416492767, + "learning_rate": 3.8927877498334e-06, + "loss": 0.5965, + "step": 19053 + }, + { + "epoch": 0.5839769523108986, + "grad_norm": 1.6560348549447712, + "learning_rate": 3.892303759314372e-06, + "loss": 0.6195, + "step": 19054 + }, + { + "epoch": 0.5840076008336398, + "grad_norm": 1.7276616348288223, + "learning_rate": 3.891819779709761e-06, + "loss": 0.5992, + "step": 19055 + }, + { + "epoch": 0.584038249356381, + "grad_norm": 1.4422897427053138, + "learning_rate": 3.8913358110243335e-06, + "loss": 0.6791, + "step": 19056 + }, + { + "epoch": 0.5840688978791222, + "grad_norm": 1.5549036185935667, + "learning_rate": 3.890851853262855e-06, + "loss": 0.6247, + "step": 19057 + }, + { + "epoch": 0.5840995464018635, + "grad_norm": 1.7635789406537519, + "learning_rate": 3.8903679064301e-06, + "loss": 0.7603, + "step": 19058 + }, + { + "epoch": 0.5841301949246046, + "grad_norm": 0.8224265813680806, + "learning_rate": 3.889883970530833e-06, + "loss": 0.4312, + "step": 19059 + }, + { + "epoch": 0.5841608434473459, + "grad_norm": 1.5632312103732697, + "learning_rate": 3.889400045569822e-06, + "loss": 0.6244, + "step": 19060 + }, + { + "epoch": 0.584191491970087, + "grad_norm": 1.7892167032538062, + "learning_rate": 3.888916131551837e-06, + "loss": 0.7539, + "step": 19061 + }, + { + "epoch": 0.5842221404928283, + "grad_norm": 1.6801003633815743, + "learning_rate": 3.888432228481647e-06, + "loss": 0.6073, + "step": 19062 + }, + { + "epoch": 0.5842527890155694, + "grad_norm": 1.7624568449311695, + "learning_rate": 3.887948336364017e-06, + "loss": 0.6405, + "step": 19063 + }, + { + "epoch": 0.5842834375383107, + "grad_norm": 1.7515592919225325, + "learning_rate": 3.887464455203717e-06, + "loss": 0.6961, + "step": 19064 + }, + { + "epoch": 0.5843140860610518, + "grad_norm": 1.7278621714664857, + "learning_rate": 3.886980585005515e-06, + "loss": 0.6297, + "step": 19065 + }, + { + "epoch": 0.5843447345837931, + "grad_norm": 1.7489856707670306, + "learning_rate": 3.886496725774178e-06, + "loss": 0.5934, + "step": 19066 + }, + { + "epoch": 0.5843753831065343, + "grad_norm": 1.6286708768840386, + "learning_rate": 3.886012877514475e-06, + "loss": 0.6383, + "step": 19067 + }, + { + "epoch": 0.5844060316292755, + "grad_norm": 1.7874983187320745, + "learning_rate": 3.885529040231168e-06, + "loss": 0.6658, + "step": 19068 + }, + { + "epoch": 0.5844366801520167, + "grad_norm": 1.7331434547760391, + "learning_rate": 3.885045213929032e-06, + "loss": 0.5992, + "step": 19069 + }, + { + "epoch": 0.5844673286747579, + "grad_norm": 1.6439281012212776, + "learning_rate": 3.884561398612831e-06, + "loss": 0.6323, + "step": 19070 + }, + { + "epoch": 0.5844979771974991, + "grad_norm": 0.81396529884095, + "learning_rate": 3.88407759428733e-06, + "loss": 0.4328, + "step": 19071 + }, + { + "epoch": 0.5845286257202403, + "grad_norm": 1.8062007306998118, + "learning_rate": 3.883593800957299e-06, + "loss": 0.6817, + "step": 19072 + }, + { + "epoch": 0.5845592742429815, + "grad_norm": 1.8999660125954059, + "learning_rate": 3.883110018627503e-06, + "loss": 0.6133, + "step": 19073 + }, + { + "epoch": 0.5845899227657227, + "grad_norm": 1.8639478673698895, + "learning_rate": 3.88262624730271e-06, + "loss": 0.6106, + "step": 19074 + }, + { + "epoch": 0.5846205712884639, + "grad_norm": 1.7417376046447892, + "learning_rate": 3.882142486987688e-06, + "loss": 0.6495, + "step": 19075 + }, + { + "epoch": 0.584651219811205, + "grad_norm": 1.6471156881472413, + "learning_rate": 3.8816587376872e-06, + "loss": 0.5887, + "step": 19076 + }, + { + "epoch": 0.5846818683339463, + "grad_norm": 0.7776041669778733, + "learning_rate": 3.881174999406017e-06, + "loss": 0.427, + "step": 19077 + }, + { + "epoch": 0.5847125168566875, + "grad_norm": 1.9150302634393517, + "learning_rate": 3.880691272148902e-06, + "loss": 0.6187, + "step": 19078 + }, + { + "epoch": 0.5847431653794287, + "grad_norm": 1.6445013562119843, + "learning_rate": 3.880207555920621e-06, + "loss": 0.5908, + "step": 19079 + }, + { + "epoch": 0.5847738139021699, + "grad_norm": 1.669280587909245, + "learning_rate": 3.879723850725943e-06, + "loss": 0.6264, + "step": 19080 + }, + { + "epoch": 0.5848044624249111, + "grad_norm": 1.6733845594063874, + "learning_rate": 3.879240156569631e-06, + "loss": 0.5664, + "step": 19081 + }, + { + "epoch": 0.5848351109476523, + "grad_norm": 1.674183260791591, + "learning_rate": 3.878756473456453e-06, + "loss": 0.646, + "step": 19082 + }, + { + "epoch": 0.5848657594703935, + "grad_norm": 2.021749954527491, + "learning_rate": 3.878272801391176e-06, + "loss": 0.597, + "step": 19083 + }, + { + "epoch": 0.5848964079931347, + "grad_norm": 1.8119265722470828, + "learning_rate": 3.877789140378561e-06, + "loss": 0.6454, + "step": 19084 + }, + { + "epoch": 0.584927056515876, + "grad_norm": 1.8473042119590966, + "learning_rate": 3.87730549042338e-06, + "loss": 0.6554, + "step": 19085 + }, + { + "epoch": 0.5849577050386171, + "grad_norm": 1.8167708276683314, + "learning_rate": 3.876821851530395e-06, + "loss": 0.6061, + "step": 19086 + }, + { + "epoch": 0.5849883535613584, + "grad_norm": 1.7134489163717652, + "learning_rate": 3.876338223704368e-06, + "loss": 0.6264, + "step": 19087 + }, + { + "epoch": 0.5850190020840995, + "grad_norm": 1.5649914237003548, + "learning_rate": 3.875854606950072e-06, + "loss": 0.6327, + "step": 19088 + }, + { + "epoch": 0.5850496506068408, + "grad_norm": 1.7763513422342634, + "learning_rate": 3.875371001272266e-06, + "loss": 0.6194, + "step": 19089 + }, + { + "epoch": 0.5850802991295819, + "grad_norm": 0.8190171725391355, + "learning_rate": 3.874887406675718e-06, + "loss": 0.4468, + "step": 19090 + }, + { + "epoch": 0.5851109476523232, + "grad_norm": 1.7740418214422442, + "learning_rate": 3.874403823165192e-06, + "loss": 0.6368, + "step": 19091 + }, + { + "epoch": 0.5851415961750643, + "grad_norm": 1.5230206112596942, + "learning_rate": 3.873920250745453e-06, + "loss": 0.6166, + "step": 19092 + }, + { + "epoch": 0.5851722446978056, + "grad_norm": 1.7912074639989188, + "learning_rate": 3.873436689421266e-06, + "loss": 0.6161, + "step": 19093 + }, + { + "epoch": 0.5852028932205467, + "grad_norm": 1.697716544092059, + "learning_rate": 3.872953139197397e-06, + "loss": 0.634, + "step": 19094 + }, + { + "epoch": 0.585233541743288, + "grad_norm": 0.7623605590652176, + "learning_rate": 3.872469600078607e-06, + "loss": 0.4231, + "step": 19095 + }, + { + "epoch": 0.5852641902660292, + "grad_norm": 1.7086980629645572, + "learning_rate": 3.871986072069663e-06, + "loss": 0.5972, + "step": 19096 + }, + { + "epoch": 0.5852948387887704, + "grad_norm": 1.7146673137192567, + "learning_rate": 3.871502555175331e-06, + "loss": 0.696, + "step": 19097 + }, + { + "epoch": 0.5853254873115116, + "grad_norm": 1.771668645286199, + "learning_rate": 3.8710190494003694e-06, + "loss": 0.574, + "step": 19098 + }, + { + "epoch": 0.5853561358342528, + "grad_norm": 1.4964500682602553, + "learning_rate": 3.870535554749549e-06, + "loss": 0.511, + "step": 19099 + }, + { + "epoch": 0.585386784356994, + "grad_norm": 1.8332933641819644, + "learning_rate": 3.870052071227628e-06, + "loss": 0.6096, + "step": 19100 + }, + { + "epoch": 0.5854174328797352, + "grad_norm": 1.8285737822460708, + "learning_rate": 3.869568598839376e-06, + "loss": 0.676, + "step": 19101 + }, + { + "epoch": 0.5854480814024764, + "grad_norm": 0.7952217825554627, + "learning_rate": 3.869085137589552e-06, + "loss": 0.4339, + "step": 19102 + }, + { + "epoch": 0.5854787299252177, + "grad_norm": 1.568715078482158, + "learning_rate": 3.868601687482922e-06, + "loss": 0.6105, + "step": 19103 + }, + { + "epoch": 0.5855093784479588, + "grad_norm": 1.775236569168049, + "learning_rate": 3.8681182485242494e-06, + "loss": 0.5676, + "step": 19104 + }, + { + "epoch": 0.5855400269707001, + "grad_norm": 1.8550882434163534, + "learning_rate": 3.867634820718297e-06, + "loss": 0.5922, + "step": 19105 + }, + { + "epoch": 0.5855706754934412, + "grad_norm": 1.8562101818500507, + "learning_rate": 3.867151404069828e-06, + "loss": 0.6054, + "step": 19106 + }, + { + "epoch": 0.5856013240161824, + "grad_norm": 0.756883882774923, + "learning_rate": 3.8666679985836065e-06, + "loss": 0.4214, + "step": 19107 + }, + { + "epoch": 0.5856319725389236, + "grad_norm": 1.8522675691517272, + "learning_rate": 3.8661846042643945e-06, + "loss": 0.7204, + "step": 19108 + }, + { + "epoch": 0.5856626210616648, + "grad_norm": 1.8360134652803761, + "learning_rate": 3.865701221116957e-06, + "loss": 0.6231, + "step": 19109 + }, + { + "epoch": 0.585693269584406, + "grad_norm": 1.6897051701933128, + "learning_rate": 3.865217849146055e-06, + "loss": 0.6994, + "step": 19110 + }, + { + "epoch": 0.5857239181071472, + "grad_norm": 1.7384315790664069, + "learning_rate": 3.864734488356451e-06, + "loss": 0.7432, + "step": 19111 + }, + { + "epoch": 0.5857545666298885, + "grad_norm": 1.764278070595234, + "learning_rate": 3.864251138752911e-06, + "loss": 0.6602, + "step": 19112 + }, + { + "epoch": 0.5857852151526296, + "grad_norm": 1.8181906527122362, + "learning_rate": 3.863767800340193e-06, + "loss": 0.5451, + "step": 19113 + }, + { + "epoch": 0.5858158636753709, + "grad_norm": 1.6341925502295223, + "learning_rate": 3.863284473123061e-06, + "loss": 0.6311, + "step": 19114 + }, + { + "epoch": 0.585846512198112, + "grad_norm": 1.8287190633279193, + "learning_rate": 3.862801157106279e-06, + "loss": 0.5787, + "step": 19115 + }, + { + "epoch": 0.5858771607208533, + "grad_norm": 1.6647402734935657, + "learning_rate": 3.862317852294609e-06, + "loss": 0.5776, + "step": 19116 + }, + { + "epoch": 0.5859078092435944, + "grad_norm": 1.4942520466946005, + "learning_rate": 3.8618345586928105e-06, + "loss": 0.5638, + "step": 19117 + }, + { + "epoch": 0.5859384577663357, + "grad_norm": 1.6788064492763333, + "learning_rate": 3.861351276305649e-06, + "loss": 0.6552, + "step": 19118 + }, + { + "epoch": 0.5859691062890768, + "grad_norm": 1.7206301175107825, + "learning_rate": 3.860868005137883e-06, + "loss": 0.6775, + "step": 19119 + }, + { + "epoch": 0.5859997548118181, + "grad_norm": 1.7710894604000424, + "learning_rate": 3.8603847451942776e-06, + "loss": 0.5751, + "step": 19120 + }, + { + "epoch": 0.5860304033345592, + "grad_norm": 1.7863364435399944, + "learning_rate": 3.859901496479593e-06, + "loss": 0.6481, + "step": 19121 + }, + { + "epoch": 0.5860610518573005, + "grad_norm": 1.7915044231318877, + "learning_rate": 3.85941825899859e-06, + "loss": 0.5876, + "step": 19122 + }, + { + "epoch": 0.5860917003800417, + "grad_norm": 1.6457726894491667, + "learning_rate": 3.858935032756031e-06, + "loss": 0.5992, + "step": 19123 + }, + { + "epoch": 0.5861223489027829, + "grad_norm": 1.832298497724581, + "learning_rate": 3.858451817756676e-06, + "loss": 0.634, + "step": 19124 + }, + { + "epoch": 0.5861529974255241, + "grad_norm": 1.7490878040510722, + "learning_rate": 3.857968614005287e-06, + "loss": 0.5795, + "step": 19125 + }, + { + "epoch": 0.5861836459482653, + "grad_norm": 1.6334321211976417, + "learning_rate": 3.857485421506627e-06, + "loss": 0.5642, + "step": 19126 + }, + { + "epoch": 0.5862142944710065, + "grad_norm": 1.4749136967457244, + "learning_rate": 3.857002240265454e-06, + "loss": 0.5572, + "step": 19127 + }, + { + "epoch": 0.5862449429937477, + "grad_norm": 1.674553909403125, + "learning_rate": 3.856519070286532e-06, + "loss": 0.5766, + "step": 19128 + }, + { + "epoch": 0.5862755915164889, + "grad_norm": 1.5522338214537141, + "learning_rate": 3.856035911574621e-06, + "loss": 0.5404, + "step": 19129 + }, + { + "epoch": 0.5863062400392302, + "grad_norm": 1.7786722644460038, + "learning_rate": 3.855552764134478e-06, + "loss": 0.5711, + "step": 19130 + }, + { + "epoch": 0.5863368885619713, + "grad_norm": 1.698700843464874, + "learning_rate": 3.855069627970869e-06, + "loss": 0.5912, + "step": 19131 + }, + { + "epoch": 0.5863675370847126, + "grad_norm": 1.7052046603717774, + "learning_rate": 3.854586503088551e-06, + "loss": 0.6144, + "step": 19132 + }, + { + "epoch": 0.5863981856074537, + "grad_norm": 1.8785138710268667, + "learning_rate": 3.854103389492283e-06, + "loss": 0.5612, + "step": 19133 + }, + { + "epoch": 0.586428834130195, + "grad_norm": 1.5348415591679327, + "learning_rate": 3.85362028718683e-06, + "loss": 0.6261, + "step": 19134 + }, + { + "epoch": 0.5864594826529361, + "grad_norm": 0.8041708408978139, + "learning_rate": 3.853137196176949e-06, + "loss": 0.4158, + "step": 19135 + }, + { + "epoch": 0.5864901311756774, + "grad_norm": 1.7899597632339217, + "learning_rate": 3.852654116467401e-06, + "loss": 0.5412, + "step": 19136 + }, + { + "epoch": 0.5865207796984185, + "grad_norm": 2.394897756203924, + "learning_rate": 3.852171048062945e-06, + "loss": 0.6536, + "step": 19137 + }, + { + "epoch": 0.5865514282211597, + "grad_norm": 1.726235178533683, + "learning_rate": 3.851687990968341e-06, + "loss": 0.5971, + "step": 19138 + }, + { + "epoch": 0.586582076743901, + "grad_norm": 1.7568877108551684, + "learning_rate": 3.85120494518835e-06, + "loss": 0.6107, + "step": 19139 + }, + { + "epoch": 0.5866127252666421, + "grad_norm": 1.8145162522819223, + "learning_rate": 3.850721910727731e-06, + "loss": 0.6103, + "step": 19140 + }, + { + "epoch": 0.5866433737893834, + "grad_norm": 1.6356384016009309, + "learning_rate": 3.850238887591241e-06, + "loss": 0.6525, + "step": 19141 + }, + { + "epoch": 0.5866740223121245, + "grad_norm": 1.6406526975731375, + "learning_rate": 3.849755875783644e-06, + "loss": 0.4726, + "step": 19142 + }, + { + "epoch": 0.5867046708348658, + "grad_norm": 1.969992246558081, + "learning_rate": 3.849272875309696e-06, + "loss": 0.6225, + "step": 19143 + }, + { + "epoch": 0.5867353193576069, + "grad_norm": 1.6365057375621452, + "learning_rate": 3.848789886174155e-06, + "loss": 0.6343, + "step": 19144 + }, + { + "epoch": 0.5867659678803482, + "grad_norm": 0.7870364927738104, + "learning_rate": 3.848306908381783e-06, + "loss": 0.4522, + "step": 19145 + }, + { + "epoch": 0.5867966164030893, + "grad_norm": 1.8305593830726477, + "learning_rate": 3.847823941937338e-06, + "loss": 0.5927, + "step": 19146 + }, + { + "epoch": 0.5868272649258306, + "grad_norm": 1.7514509541099836, + "learning_rate": 3.847340986845578e-06, + "loss": 0.6356, + "step": 19147 + }, + { + "epoch": 0.5868579134485717, + "grad_norm": 1.5663609380071752, + "learning_rate": 3.846858043111262e-06, + "loss": 0.5778, + "step": 19148 + }, + { + "epoch": 0.586888561971313, + "grad_norm": 1.7659075349387523, + "learning_rate": 3.846375110739149e-06, + "loss": 0.6372, + "step": 19149 + }, + { + "epoch": 0.5869192104940542, + "grad_norm": 1.9386851826822193, + "learning_rate": 3.8458921897339975e-06, + "loss": 0.6343, + "step": 19150 + }, + { + "epoch": 0.5869498590167954, + "grad_norm": 1.7955868399887223, + "learning_rate": 3.845409280100567e-06, + "loss": 0.5336, + "step": 19151 + }, + { + "epoch": 0.5869805075395366, + "grad_norm": 1.7018786401660488, + "learning_rate": 3.84492638184361e-06, + "loss": 0.614, + "step": 19152 + }, + { + "epoch": 0.5870111560622778, + "grad_norm": 1.6070395164967406, + "learning_rate": 3.844443494967893e-06, + "loss": 0.5546, + "step": 19153 + }, + { + "epoch": 0.587041804585019, + "grad_norm": 1.9197329970976846, + "learning_rate": 3.8439606194781665e-06, + "loss": 0.6523, + "step": 19154 + }, + { + "epoch": 0.5870724531077602, + "grad_norm": 1.7247012705491618, + "learning_rate": 3.843477755379195e-06, + "loss": 0.6766, + "step": 19155 + }, + { + "epoch": 0.5871031016305014, + "grad_norm": 1.6940221315563682, + "learning_rate": 3.842994902675732e-06, + "loss": 0.6801, + "step": 19156 + }, + { + "epoch": 0.5871337501532427, + "grad_norm": 1.8760604593293495, + "learning_rate": 3.842512061372535e-06, + "loss": 0.6555, + "step": 19157 + }, + { + "epoch": 0.5871643986759838, + "grad_norm": 1.9028789159680057, + "learning_rate": 3.842029231474364e-06, + "loss": 0.6366, + "step": 19158 + }, + { + "epoch": 0.5871950471987251, + "grad_norm": 1.5770980752667134, + "learning_rate": 3.841546412985977e-06, + "loss": 0.5139, + "step": 19159 + }, + { + "epoch": 0.5872256957214662, + "grad_norm": 1.8135998632213115, + "learning_rate": 3.841063605912126e-06, + "loss": 0.6612, + "step": 19160 + }, + { + "epoch": 0.5872563442442075, + "grad_norm": 1.7062891524452322, + "learning_rate": 3.840580810257574e-06, + "loss": 0.6195, + "step": 19161 + }, + { + "epoch": 0.5872869927669486, + "grad_norm": 1.79242697627435, + "learning_rate": 3.840098026027075e-06, + "loss": 0.6192, + "step": 19162 + }, + { + "epoch": 0.5873176412896899, + "grad_norm": 1.6502364017103843, + "learning_rate": 3.839615253225387e-06, + "loss": 0.683, + "step": 19163 + }, + { + "epoch": 0.587348289812431, + "grad_norm": 1.6355330736525606, + "learning_rate": 3.839132491857269e-06, + "loss": 0.6144, + "step": 19164 + }, + { + "epoch": 0.5873789383351723, + "grad_norm": 1.9106151403686291, + "learning_rate": 3.838649741927472e-06, + "loss": 0.6704, + "step": 19165 + }, + { + "epoch": 0.5874095868579134, + "grad_norm": 1.5820238557188144, + "learning_rate": 3.838167003440759e-06, + "loss": 0.5644, + "step": 19166 + }, + { + "epoch": 0.5874402353806547, + "grad_norm": 1.8604056600949115, + "learning_rate": 3.837684276401883e-06, + "loss": 0.7016, + "step": 19167 + }, + { + "epoch": 0.5874708839033959, + "grad_norm": 1.7465627406506812, + "learning_rate": 3.837201560815601e-06, + "loss": 0.5973, + "step": 19168 + }, + { + "epoch": 0.587501532426137, + "grad_norm": 1.7310997063907019, + "learning_rate": 3.83671885668667e-06, + "loss": 0.6409, + "step": 19169 + }, + { + "epoch": 0.5875321809488783, + "grad_norm": 1.7478707468495018, + "learning_rate": 3.836236164019845e-06, + "loss": 0.6659, + "step": 19170 + }, + { + "epoch": 0.5875628294716194, + "grad_norm": 1.683318510521181, + "learning_rate": 3.835753482819883e-06, + "loss": 0.5195, + "step": 19171 + }, + { + "epoch": 0.5875934779943607, + "grad_norm": 1.5804235620767042, + "learning_rate": 3.835270813091539e-06, + "loss": 0.6156, + "step": 19172 + }, + { + "epoch": 0.5876241265171018, + "grad_norm": 1.6942944439478813, + "learning_rate": 3.834788154839571e-06, + "loss": 0.6643, + "step": 19173 + }, + { + "epoch": 0.5876547750398431, + "grad_norm": 1.7481711769973183, + "learning_rate": 3.834305508068734e-06, + "loss": 0.5871, + "step": 19174 + }, + { + "epoch": 0.5876854235625842, + "grad_norm": 0.7724318993133843, + "learning_rate": 3.833822872783782e-06, + "loss": 0.4267, + "step": 19175 + }, + { + "epoch": 0.5877160720853255, + "grad_norm": 1.6147674339110811, + "learning_rate": 3.833340248989471e-06, + "loss": 0.5415, + "step": 19176 + }, + { + "epoch": 0.5877467206080667, + "grad_norm": 1.670044882870649, + "learning_rate": 3.832857636690559e-06, + "loss": 0.5686, + "step": 19177 + }, + { + "epoch": 0.5877773691308079, + "grad_norm": 1.7245139920432466, + "learning_rate": 3.832375035891798e-06, + "loss": 0.5351, + "step": 19178 + }, + { + "epoch": 0.5878080176535491, + "grad_norm": 1.7373456888214471, + "learning_rate": 3.831892446597944e-06, + "loss": 0.5936, + "step": 19179 + }, + { + "epoch": 0.5878386661762903, + "grad_norm": 1.7571247002347703, + "learning_rate": 3.831409868813754e-06, + "loss": 0.6877, + "step": 19180 + }, + { + "epoch": 0.5878693146990315, + "grad_norm": 1.6079852047212344, + "learning_rate": 3.83092730254398e-06, + "loss": 0.5345, + "step": 19181 + }, + { + "epoch": 0.5878999632217727, + "grad_norm": 1.7976244048581507, + "learning_rate": 3.830444747793379e-06, + "loss": 0.5878, + "step": 19182 + }, + { + "epoch": 0.5879306117445139, + "grad_norm": 1.6716959330152026, + "learning_rate": 3.829962204566707e-06, + "loss": 0.5949, + "step": 19183 + }, + { + "epoch": 0.5879612602672551, + "grad_norm": 1.6705227714800213, + "learning_rate": 3.829479672868713e-06, + "loss": 0.6587, + "step": 19184 + }, + { + "epoch": 0.5879919087899963, + "grad_norm": 0.7437144468353714, + "learning_rate": 3.828997152704159e-06, + "loss": 0.4121, + "step": 19185 + }, + { + "epoch": 0.5880225573127376, + "grad_norm": 1.546784026531024, + "learning_rate": 3.828514644077794e-06, + "loss": 0.5354, + "step": 19186 + }, + { + "epoch": 0.5880532058354787, + "grad_norm": 1.6466094875456745, + "learning_rate": 3.8280321469943734e-06, + "loss": 0.5709, + "step": 19187 + }, + { + "epoch": 0.58808385435822, + "grad_norm": 1.7173784105349361, + "learning_rate": 3.827549661458653e-06, + "loss": 0.6406, + "step": 19188 + }, + { + "epoch": 0.5881145028809611, + "grad_norm": 2.0145399615214603, + "learning_rate": 3.827067187475384e-06, + "loss": 0.6542, + "step": 19189 + }, + { + "epoch": 0.5881451514037024, + "grad_norm": 1.6564341221529688, + "learning_rate": 3.826584725049325e-06, + "loss": 0.6831, + "step": 19190 + }, + { + "epoch": 0.5881757999264435, + "grad_norm": 1.7267736352675929, + "learning_rate": 3.826102274185225e-06, + "loss": 0.6182, + "step": 19191 + }, + { + "epoch": 0.5882064484491848, + "grad_norm": 1.4661872327898864, + "learning_rate": 3.82561983488784e-06, + "loss": 0.6057, + "step": 19192 + }, + { + "epoch": 0.5882370969719259, + "grad_norm": 1.6713080736256387, + "learning_rate": 3.825137407161923e-06, + "loss": 0.6522, + "step": 19193 + }, + { + "epoch": 0.5882677454946672, + "grad_norm": 1.762250342554031, + "learning_rate": 3.8246549910122285e-06, + "loss": 0.6458, + "step": 19194 + }, + { + "epoch": 0.5882983940174084, + "grad_norm": 1.9784959213698168, + "learning_rate": 3.824172586443507e-06, + "loss": 0.6247, + "step": 19195 + }, + { + "epoch": 0.5883290425401496, + "grad_norm": 1.6877664138648223, + "learning_rate": 3.823690193460517e-06, + "loss": 0.6868, + "step": 19196 + }, + { + "epoch": 0.5883596910628908, + "grad_norm": 1.8392498742756516, + "learning_rate": 3.8232078120680075e-06, + "loss": 0.6802, + "step": 19197 + }, + { + "epoch": 0.588390339585632, + "grad_norm": 1.8004975872071303, + "learning_rate": 3.822725442270731e-06, + "loss": 0.6759, + "step": 19198 + }, + { + "epoch": 0.5884209881083732, + "grad_norm": 1.5564284751396973, + "learning_rate": 3.822243084073443e-06, + "loss": 0.577, + "step": 19199 + }, + { + "epoch": 0.5884516366311143, + "grad_norm": 1.7133076238945129, + "learning_rate": 3.821760737480894e-06, + "loss": 0.5505, + "step": 19200 + }, + { + "epoch": 0.5884822851538556, + "grad_norm": 1.5915766913181026, + "learning_rate": 3.82127840249784e-06, + "loss": 0.6214, + "step": 19201 + }, + { + "epoch": 0.5885129336765967, + "grad_norm": 1.7479006723881105, + "learning_rate": 3.820796079129031e-06, + "loss": 0.6186, + "step": 19202 + }, + { + "epoch": 0.588543582199338, + "grad_norm": 0.8465284852510335, + "learning_rate": 3.8203137673792185e-06, + "loss": 0.4107, + "step": 19203 + }, + { + "epoch": 0.5885742307220792, + "grad_norm": 0.809494313444861, + "learning_rate": 3.819831467253158e-06, + "loss": 0.4188, + "step": 19204 + }, + { + "epoch": 0.5886048792448204, + "grad_norm": 1.8074179163475164, + "learning_rate": 3.8193491787556e-06, + "loss": 0.5992, + "step": 19205 + }, + { + "epoch": 0.5886355277675616, + "grad_norm": 0.8214352548258065, + "learning_rate": 3.818866901891295e-06, + "loss": 0.4218, + "step": 19206 + }, + { + "epoch": 0.5886661762903028, + "grad_norm": 1.7525447908489378, + "learning_rate": 3.818384636664998e-06, + "loss": 0.6702, + "step": 19207 + }, + { + "epoch": 0.588696824813044, + "grad_norm": 1.7068013942128997, + "learning_rate": 3.817902383081458e-06, + "loss": 0.5974, + "step": 19208 + }, + { + "epoch": 0.5887274733357852, + "grad_norm": 1.8920189960509226, + "learning_rate": 3.817420141145431e-06, + "loss": 0.6431, + "step": 19209 + }, + { + "epoch": 0.5887581218585264, + "grad_norm": 1.810218061968455, + "learning_rate": 3.816937910861663e-06, + "loss": 0.6224, + "step": 19210 + }, + { + "epoch": 0.5887887703812676, + "grad_norm": 1.764277862527994, + "learning_rate": 3.81645569223491e-06, + "loss": 0.524, + "step": 19211 + }, + { + "epoch": 0.5888194189040088, + "grad_norm": 1.6723143365002306, + "learning_rate": 3.815973485269921e-06, + "loss": 0.5564, + "step": 19212 + }, + { + "epoch": 0.5888500674267501, + "grad_norm": 1.9305639999989765, + "learning_rate": 3.815491289971449e-06, + "loss": 0.6959, + "step": 19213 + }, + { + "epoch": 0.5888807159494912, + "grad_norm": 1.707229601795532, + "learning_rate": 3.815009106344244e-06, + "loss": 0.5881, + "step": 19214 + }, + { + "epoch": 0.5889113644722325, + "grad_norm": 1.6921483054524116, + "learning_rate": 3.814526934393058e-06, + "loss": 0.6079, + "step": 19215 + }, + { + "epoch": 0.5889420129949736, + "grad_norm": 2.035955880336247, + "learning_rate": 3.814044774122642e-06, + "loss": 0.5578, + "step": 19216 + }, + { + "epoch": 0.5889726615177149, + "grad_norm": 0.884655457157649, + "learning_rate": 3.813562625537743e-06, + "loss": 0.4127, + "step": 19217 + }, + { + "epoch": 0.589003310040456, + "grad_norm": 1.7852122554843906, + "learning_rate": 3.8130804886431194e-06, + "loss": 0.5246, + "step": 19218 + }, + { + "epoch": 0.5890339585631973, + "grad_norm": 0.8440823440971991, + "learning_rate": 3.8125983634435147e-06, + "loss": 0.436, + "step": 19219 + }, + { + "epoch": 0.5890646070859384, + "grad_norm": 1.5504353291214854, + "learning_rate": 3.812116249943683e-06, + "loss": 0.6655, + "step": 19220 + }, + { + "epoch": 0.5890952556086797, + "grad_norm": 1.7734956358554517, + "learning_rate": 3.8116341481483738e-06, + "loss": 0.5821, + "step": 19221 + }, + { + "epoch": 0.5891259041314209, + "grad_norm": 1.8072046992161348, + "learning_rate": 3.811152058062337e-06, + "loss": 0.675, + "step": 19222 + }, + { + "epoch": 0.5891565526541621, + "grad_norm": 1.805563805186923, + "learning_rate": 3.8106699796903236e-06, + "loss": 0.6351, + "step": 19223 + }, + { + "epoch": 0.5891872011769033, + "grad_norm": 1.9164790798092002, + "learning_rate": 3.8101879130370827e-06, + "loss": 0.5389, + "step": 19224 + }, + { + "epoch": 0.5892178496996445, + "grad_norm": 1.5833594248075558, + "learning_rate": 3.8097058581073644e-06, + "loss": 0.5935, + "step": 19225 + }, + { + "epoch": 0.5892484982223857, + "grad_norm": 1.7837927411789058, + "learning_rate": 3.809223814905921e-06, + "loss": 0.5192, + "step": 19226 + }, + { + "epoch": 0.5892791467451269, + "grad_norm": 1.6354745257193832, + "learning_rate": 3.8087417834374964e-06, + "loss": 0.5368, + "step": 19227 + }, + { + "epoch": 0.5893097952678681, + "grad_norm": 1.8377480548053153, + "learning_rate": 3.8082597637068476e-06, + "loss": 0.6091, + "step": 19228 + }, + { + "epoch": 0.5893404437906093, + "grad_norm": 1.852083960733439, + "learning_rate": 3.8077777557187185e-06, + "loss": 0.5605, + "step": 19229 + }, + { + "epoch": 0.5893710923133505, + "grad_norm": 1.7462097212665793, + "learning_rate": 3.807295759477859e-06, + "loss": 0.711, + "step": 19230 + }, + { + "epoch": 0.5894017408360916, + "grad_norm": 1.5722762007291458, + "learning_rate": 3.8068137749890214e-06, + "loss": 0.6355, + "step": 19231 + }, + { + "epoch": 0.5894323893588329, + "grad_norm": 0.9239295770217948, + "learning_rate": 3.8063318022569528e-06, + "loss": 0.4289, + "step": 19232 + }, + { + "epoch": 0.5894630378815741, + "grad_norm": 1.861344996269647, + "learning_rate": 3.8058498412864016e-06, + "loss": 0.609, + "step": 19233 + }, + { + "epoch": 0.5894936864043153, + "grad_norm": 1.6919236051642517, + "learning_rate": 3.805367892082118e-06, + "loss": 0.589, + "step": 19234 + }, + { + "epoch": 0.5895243349270565, + "grad_norm": 1.7933136358393396, + "learning_rate": 3.804885954648849e-06, + "loss": 0.6154, + "step": 19235 + }, + { + "epoch": 0.5895549834497977, + "grad_norm": 1.702271013334686, + "learning_rate": 3.804404028991346e-06, + "loss": 0.5539, + "step": 19236 + }, + { + "epoch": 0.5895856319725389, + "grad_norm": 1.6701835961142972, + "learning_rate": 3.8039221151143566e-06, + "loss": 0.6142, + "step": 19237 + }, + { + "epoch": 0.5896162804952801, + "grad_norm": 0.773194519144322, + "learning_rate": 3.8034402130226255e-06, + "loss": 0.4199, + "step": 19238 + }, + { + "epoch": 0.5896469290180213, + "grad_norm": 1.632948971633312, + "learning_rate": 3.8029583227209077e-06, + "loss": 0.6342, + "step": 19239 + }, + { + "epoch": 0.5896775775407626, + "grad_norm": 1.836530085900821, + "learning_rate": 3.8024764442139467e-06, + "loss": 0.6964, + "step": 19240 + }, + { + "epoch": 0.5897082260635037, + "grad_norm": 1.4025449988424492, + "learning_rate": 3.8019945775064904e-06, + "loss": 0.4277, + "step": 19241 + }, + { + "epoch": 0.589738874586245, + "grad_norm": 1.6241315519234012, + "learning_rate": 3.8015127226032888e-06, + "loss": 0.4926, + "step": 19242 + }, + { + "epoch": 0.5897695231089861, + "grad_norm": 1.7996946571903494, + "learning_rate": 3.80103087950909e-06, + "loss": 0.5817, + "step": 19243 + }, + { + "epoch": 0.5898001716317274, + "grad_norm": 0.8056153480907179, + "learning_rate": 3.800549048228639e-06, + "loss": 0.4387, + "step": 19244 + }, + { + "epoch": 0.5898308201544685, + "grad_norm": 1.7016472489504906, + "learning_rate": 3.8000672287666863e-06, + "loss": 0.6181, + "step": 19245 + }, + { + "epoch": 0.5898614686772098, + "grad_norm": 1.6732594353056012, + "learning_rate": 3.799585421127977e-06, + "loss": 0.6222, + "step": 19246 + }, + { + "epoch": 0.5898921171999509, + "grad_norm": 1.7481890109648268, + "learning_rate": 3.799103625317261e-06, + "loss": 0.5269, + "step": 19247 + }, + { + "epoch": 0.5899227657226922, + "grad_norm": 1.5675949927198205, + "learning_rate": 3.7986218413392844e-06, + "loss": 0.5873, + "step": 19248 + }, + { + "epoch": 0.5899534142454333, + "grad_norm": 1.5821587781290785, + "learning_rate": 3.798140069198792e-06, + "loss": 0.601, + "step": 19249 + }, + { + "epoch": 0.5899840627681746, + "grad_norm": 1.8932619288717354, + "learning_rate": 3.797658308900536e-06, + "loss": 0.5311, + "step": 19250 + }, + { + "epoch": 0.5900147112909158, + "grad_norm": 1.7702128128196843, + "learning_rate": 3.797176560449259e-06, + "loss": 0.6083, + "step": 19251 + }, + { + "epoch": 0.590045359813657, + "grad_norm": 1.8524269597807672, + "learning_rate": 3.7966948238497083e-06, + "loss": 0.6268, + "step": 19252 + }, + { + "epoch": 0.5900760083363982, + "grad_norm": 1.663403908567589, + "learning_rate": 3.7962130991066325e-06, + "loss": 0.6454, + "step": 19253 + }, + { + "epoch": 0.5901066568591394, + "grad_norm": 1.658106714469388, + "learning_rate": 3.795731386224776e-06, + "loss": 0.6794, + "step": 19254 + }, + { + "epoch": 0.5901373053818806, + "grad_norm": 1.550876678937309, + "learning_rate": 3.795249685208887e-06, + "loss": 0.574, + "step": 19255 + }, + { + "epoch": 0.5901679539046218, + "grad_norm": 1.8457656698677054, + "learning_rate": 3.7947679960637113e-06, + "loss": 0.6299, + "step": 19256 + }, + { + "epoch": 0.590198602427363, + "grad_norm": 1.7900508483949396, + "learning_rate": 3.794286318793994e-06, + "loss": 0.632, + "step": 19257 + }, + { + "epoch": 0.5902292509501043, + "grad_norm": 1.8987444074365871, + "learning_rate": 3.7938046534044826e-06, + "loss": 0.6476, + "step": 19258 + }, + { + "epoch": 0.5902598994728454, + "grad_norm": 1.9131607995723765, + "learning_rate": 3.7933229998999237e-06, + "loss": 0.6465, + "step": 19259 + }, + { + "epoch": 0.5902905479955867, + "grad_norm": 1.5533202541970472, + "learning_rate": 3.7928413582850594e-06, + "loss": 0.5161, + "step": 19260 + }, + { + "epoch": 0.5903211965183278, + "grad_norm": 1.578724493444264, + "learning_rate": 3.7923597285646406e-06, + "loss": 0.599, + "step": 19261 + }, + { + "epoch": 0.590351845041069, + "grad_norm": 1.785887125729285, + "learning_rate": 3.7918781107434087e-06, + "loss": 0.5546, + "step": 19262 + }, + { + "epoch": 0.5903824935638102, + "grad_norm": 1.9540282275935486, + "learning_rate": 3.7913965048261123e-06, + "loss": 0.5946, + "step": 19263 + }, + { + "epoch": 0.5904131420865514, + "grad_norm": 0.848288582456909, + "learning_rate": 3.790914910817495e-06, + "loss": 0.4381, + "step": 19264 + }, + { + "epoch": 0.5904437906092926, + "grad_norm": 2.454595591325256, + "learning_rate": 3.790433328722301e-06, + "loss": 0.5345, + "step": 19265 + }, + { + "epoch": 0.5904744391320338, + "grad_norm": 1.684118091062498, + "learning_rate": 3.789951758545278e-06, + "loss": 0.6734, + "step": 19266 + }, + { + "epoch": 0.590505087654775, + "grad_norm": 1.6053535040942368, + "learning_rate": 3.789470200291171e-06, + "loss": 0.5865, + "step": 19267 + }, + { + "epoch": 0.5905357361775162, + "grad_norm": 0.8190132635894997, + "learning_rate": 3.788988653964722e-06, + "loss": 0.4423, + "step": 19268 + }, + { + "epoch": 0.5905663847002575, + "grad_norm": 1.5237906929095897, + "learning_rate": 3.7885071195706786e-06, + "loss": 0.4836, + "step": 19269 + }, + { + "epoch": 0.5905970332229986, + "grad_norm": 1.6269639583829303, + "learning_rate": 3.7880255971137857e-06, + "loss": 0.5964, + "step": 19270 + }, + { + "epoch": 0.5906276817457399, + "grad_norm": 1.7149035010805669, + "learning_rate": 3.7875440865987843e-06, + "loss": 0.5809, + "step": 19271 + }, + { + "epoch": 0.590658330268481, + "grad_norm": 1.8273815534531945, + "learning_rate": 3.787062588030423e-06, + "loss": 0.5962, + "step": 19272 + }, + { + "epoch": 0.5906889787912223, + "grad_norm": 1.697807618646677, + "learning_rate": 3.7865811014134425e-06, + "loss": 0.6083, + "step": 19273 + }, + { + "epoch": 0.5907196273139634, + "grad_norm": 1.8207980540851834, + "learning_rate": 3.7860996267525906e-06, + "loss": 0.6376, + "step": 19274 + }, + { + "epoch": 0.5907502758367047, + "grad_norm": 1.6412640093755666, + "learning_rate": 3.7856181640526093e-06, + "loss": 0.5362, + "step": 19275 + }, + { + "epoch": 0.5907809243594458, + "grad_norm": 1.4717838810879358, + "learning_rate": 3.7851367133182414e-06, + "loss": 0.5401, + "step": 19276 + }, + { + "epoch": 0.5908115728821871, + "grad_norm": 1.7212963735120994, + "learning_rate": 3.784655274554234e-06, + "loss": 0.6654, + "step": 19277 + }, + { + "epoch": 0.5908422214049283, + "grad_norm": 1.7401482756275612, + "learning_rate": 3.7841738477653305e-06, + "loss": 0.5793, + "step": 19278 + }, + { + "epoch": 0.5908728699276695, + "grad_norm": 1.7722229957525155, + "learning_rate": 3.7836924329562697e-06, + "loss": 0.6603, + "step": 19279 + }, + { + "epoch": 0.5909035184504107, + "grad_norm": 1.69440572298294, + "learning_rate": 3.7832110301318013e-06, + "loss": 0.5266, + "step": 19280 + }, + { + "epoch": 0.5909341669731519, + "grad_norm": 0.776926241480539, + "learning_rate": 3.7827296392966634e-06, + "loss": 0.4119, + "step": 19281 + }, + { + "epoch": 0.5909648154958931, + "grad_norm": 1.8075047673668887, + "learning_rate": 3.7822482604556043e-06, + "loss": 0.5946, + "step": 19282 + }, + { + "epoch": 0.5909954640186343, + "grad_norm": 1.9677055037146216, + "learning_rate": 3.7817668936133645e-06, + "loss": 0.5767, + "step": 19283 + }, + { + "epoch": 0.5910261125413755, + "grad_norm": 1.7769968953795177, + "learning_rate": 3.7812855387746857e-06, + "loss": 0.5686, + "step": 19284 + }, + { + "epoch": 0.5910567610641168, + "grad_norm": 1.8094909468982705, + "learning_rate": 3.780804195944313e-06, + "loss": 0.613, + "step": 19285 + }, + { + "epoch": 0.5910874095868579, + "grad_norm": 0.7892763747065392, + "learning_rate": 3.7803228651269887e-06, + "loss": 0.4416, + "step": 19286 + }, + { + "epoch": 0.5911180581095992, + "grad_norm": 1.7989939068169367, + "learning_rate": 3.7798415463274544e-06, + "loss": 0.5936, + "step": 19287 + }, + { + "epoch": 0.5911487066323403, + "grad_norm": 1.6175008957120345, + "learning_rate": 3.7793602395504546e-06, + "loss": 0.546, + "step": 19288 + }, + { + "epoch": 0.5911793551550816, + "grad_norm": 1.8992274751345488, + "learning_rate": 3.7788789448007297e-06, + "loss": 0.7286, + "step": 19289 + }, + { + "epoch": 0.5912100036778227, + "grad_norm": 0.7624708252529574, + "learning_rate": 3.7783976620830235e-06, + "loss": 0.445, + "step": 19290 + }, + { + "epoch": 0.591240652200564, + "grad_norm": 1.9844785438405874, + "learning_rate": 3.7779163914020795e-06, + "loss": 0.5092, + "step": 19291 + }, + { + "epoch": 0.5912713007233051, + "grad_norm": 0.7721129910915299, + "learning_rate": 3.777435132762634e-06, + "loss": 0.4235, + "step": 19292 + }, + { + "epoch": 0.5913019492460463, + "grad_norm": 1.8263512182346473, + "learning_rate": 3.7769538861694365e-06, + "loss": 0.6714, + "step": 19293 + }, + { + "epoch": 0.5913325977687875, + "grad_norm": 1.7579447992697321, + "learning_rate": 3.7764726516272243e-06, + "loss": 0.6017, + "step": 19294 + }, + { + "epoch": 0.5913632462915287, + "grad_norm": 1.6525320429849422, + "learning_rate": 3.7759914291407397e-06, + "loss": 0.5456, + "step": 19295 + }, + { + "epoch": 0.59139389481427, + "grad_norm": 1.926871633746984, + "learning_rate": 3.775510218714725e-06, + "loss": 0.7048, + "step": 19296 + }, + { + "epoch": 0.5914245433370111, + "grad_norm": 1.7471088878273509, + "learning_rate": 3.7750290203539214e-06, + "loss": 0.6486, + "step": 19297 + }, + { + "epoch": 0.5914551918597524, + "grad_norm": 1.8800451585177342, + "learning_rate": 3.7745478340630693e-06, + "loss": 0.7215, + "step": 19298 + }, + { + "epoch": 0.5914858403824935, + "grad_norm": 1.8569609730921108, + "learning_rate": 3.774066659846912e-06, + "loss": 0.6151, + "step": 19299 + }, + { + "epoch": 0.5915164889052348, + "grad_norm": 1.7552892061626746, + "learning_rate": 3.773585497710189e-06, + "loss": 0.6337, + "step": 19300 + }, + { + "epoch": 0.5915471374279759, + "grad_norm": 1.6587120150738968, + "learning_rate": 3.7731043476576424e-06, + "loss": 0.5538, + "step": 19301 + }, + { + "epoch": 0.5915777859507172, + "grad_norm": 1.6954593076869762, + "learning_rate": 3.7726232096940134e-06, + "loss": 0.6072, + "step": 19302 + }, + { + "epoch": 0.5916084344734583, + "grad_norm": 1.7902258231288177, + "learning_rate": 3.772142083824039e-06, + "loss": 0.6602, + "step": 19303 + }, + { + "epoch": 0.5916390829961996, + "grad_norm": 1.8282908558153073, + "learning_rate": 3.7716609700524664e-06, + "loss": 0.6088, + "step": 19304 + }, + { + "epoch": 0.5916697315189408, + "grad_norm": 1.88532329091873, + "learning_rate": 3.771179868384031e-06, + "loss": 0.5927, + "step": 19305 + }, + { + "epoch": 0.591700380041682, + "grad_norm": 1.723757119414137, + "learning_rate": 3.7706987788234738e-06, + "loss": 0.5775, + "step": 19306 + }, + { + "epoch": 0.5917310285644232, + "grad_norm": 0.7217395486321727, + "learning_rate": 3.7702177013755376e-06, + "loss": 0.3987, + "step": 19307 + }, + { + "epoch": 0.5917616770871644, + "grad_norm": 0.8079100948673555, + "learning_rate": 3.7697366360449592e-06, + "loss": 0.4239, + "step": 19308 + }, + { + "epoch": 0.5917923256099056, + "grad_norm": 1.7191446641340182, + "learning_rate": 3.7692555828364824e-06, + "loss": 0.6427, + "step": 19309 + }, + { + "epoch": 0.5918229741326468, + "grad_norm": 1.6240939514585588, + "learning_rate": 3.768774541754845e-06, + "loss": 0.6506, + "step": 19310 + }, + { + "epoch": 0.591853622655388, + "grad_norm": 1.481736695843373, + "learning_rate": 3.768293512804786e-06, + "loss": 0.5673, + "step": 19311 + }, + { + "epoch": 0.5918842711781293, + "grad_norm": 1.7608880977350723, + "learning_rate": 3.7678124959910466e-06, + "loss": 0.6254, + "step": 19312 + }, + { + "epoch": 0.5919149197008704, + "grad_norm": 1.781407659665013, + "learning_rate": 3.767331491318368e-06, + "loss": 0.5946, + "step": 19313 + }, + { + "epoch": 0.5919455682236117, + "grad_norm": 1.7942990490190633, + "learning_rate": 3.7668504987914846e-06, + "loss": 0.637, + "step": 19314 + }, + { + "epoch": 0.5919762167463528, + "grad_norm": 1.7230860692185042, + "learning_rate": 3.76636951841514e-06, + "loss": 0.5762, + "step": 19315 + }, + { + "epoch": 0.5920068652690941, + "grad_norm": 1.6542784025094648, + "learning_rate": 3.7658885501940713e-06, + "loss": 0.5482, + "step": 19316 + }, + { + "epoch": 0.5920375137918352, + "grad_norm": 1.8660337035414605, + "learning_rate": 3.765407594133019e-06, + "loss": 0.5539, + "step": 19317 + }, + { + "epoch": 0.5920681623145765, + "grad_norm": 1.8964288632353512, + "learning_rate": 3.7649266502367225e-06, + "loss": 0.6949, + "step": 19318 + }, + { + "epoch": 0.5920988108373176, + "grad_norm": 1.7686465020834166, + "learning_rate": 3.764445718509918e-06, + "loss": 0.6542, + "step": 19319 + }, + { + "epoch": 0.5921294593600589, + "grad_norm": 1.8637510753383888, + "learning_rate": 3.7639647989573474e-06, + "loss": 0.6141, + "step": 19320 + }, + { + "epoch": 0.5921601078828, + "grad_norm": 1.9681420480579368, + "learning_rate": 3.7634838915837477e-06, + "loss": 0.6311, + "step": 19321 + }, + { + "epoch": 0.5921907564055413, + "grad_norm": 1.418054115370853, + "learning_rate": 3.763002996393857e-06, + "loss": 0.5569, + "step": 19322 + }, + { + "epoch": 0.5922214049282825, + "grad_norm": 1.8475238659878117, + "learning_rate": 3.7625221133924156e-06, + "loss": 0.6977, + "step": 19323 + }, + { + "epoch": 0.5922520534510236, + "grad_norm": 0.8396167123114756, + "learning_rate": 3.76204124258416e-06, + "loss": 0.4012, + "step": 19324 + }, + { + "epoch": 0.5922827019737649, + "grad_norm": 0.8273984185073905, + "learning_rate": 3.7615603839738275e-06, + "loss": 0.4124, + "step": 19325 + }, + { + "epoch": 0.592313350496506, + "grad_norm": 2.230522784708072, + "learning_rate": 3.761079537566158e-06, + "loss": 0.7452, + "step": 19326 + }, + { + "epoch": 0.5923439990192473, + "grad_norm": 1.8125470676414863, + "learning_rate": 3.7605987033658887e-06, + "loss": 0.5719, + "step": 19327 + }, + { + "epoch": 0.5923746475419884, + "grad_norm": 1.8502552301101496, + "learning_rate": 3.760117881377758e-06, + "loss": 0.5903, + "step": 19328 + }, + { + "epoch": 0.5924052960647297, + "grad_norm": 1.5152701214683977, + "learning_rate": 3.759637071606503e-06, + "loss": 0.5803, + "step": 19329 + }, + { + "epoch": 0.5924359445874708, + "grad_norm": 1.7033682780801118, + "learning_rate": 3.75915627405686e-06, + "loss": 0.6397, + "step": 19330 + }, + { + "epoch": 0.5924665931102121, + "grad_norm": 1.9777494811503555, + "learning_rate": 3.758675488733569e-06, + "loss": 0.6413, + "step": 19331 + }, + { + "epoch": 0.5924972416329533, + "grad_norm": 1.827118534559176, + "learning_rate": 3.7581947156413673e-06, + "loss": 0.6295, + "step": 19332 + }, + { + "epoch": 0.5925278901556945, + "grad_norm": 1.7459903718491236, + "learning_rate": 3.757713954784988e-06, + "loss": 0.6641, + "step": 19333 + }, + { + "epoch": 0.5925585386784357, + "grad_norm": 1.888208391467662, + "learning_rate": 3.757233206169173e-06, + "loss": 0.6502, + "step": 19334 + }, + { + "epoch": 0.5925891872011769, + "grad_norm": 1.8161135290443542, + "learning_rate": 3.7567524697986547e-06, + "loss": 0.6937, + "step": 19335 + }, + { + "epoch": 0.5926198357239181, + "grad_norm": 0.8024376924127635, + "learning_rate": 3.7562717456781755e-06, + "loss": 0.3964, + "step": 19336 + }, + { + "epoch": 0.5926504842466593, + "grad_norm": 1.8356070736744934, + "learning_rate": 3.755791033812468e-06, + "loss": 0.5483, + "step": 19337 + }, + { + "epoch": 0.5926811327694005, + "grad_norm": 1.5732967301058283, + "learning_rate": 3.755310334206269e-06, + "loss": 0.6309, + "step": 19338 + }, + { + "epoch": 0.5927117812921417, + "grad_norm": 1.8974191427326113, + "learning_rate": 3.7548296468643164e-06, + "loss": 0.5729, + "step": 19339 + }, + { + "epoch": 0.5927424298148829, + "grad_norm": 1.5950943291447364, + "learning_rate": 3.754348971791346e-06, + "loss": 0.6036, + "step": 19340 + }, + { + "epoch": 0.5927730783376242, + "grad_norm": 1.7499844162801683, + "learning_rate": 3.753868308992093e-06, + "loss": 0.5051, + "step": 19341 + }, + { + "epoch": 0.5928037268603653, + "grad_norm": 1.7983131893688238, + "learning_rate": 3.7533876584712953e-06, + "loss": 0.5952, + "step": 19342 + }, + { + "epoch": 0.5928343753831066, + "grad_norm": 1.8746713896706733, + "learning_rate": 3.7529070202336864e-06, + "loss": 0.602, + "step": 19343 + }, + { + "epoch": 0.5928650239058477, + "grad_norm": 1.9551245033078324, + "learning_rate": 3.7524263942840056e-06, + "loss": 0.6971, + "step": 19344 + }, + { + "epoch": 0.592895672428589, + "grad_norm": 1.6730090827865616, + "learning_rate": 3.751945780626988e-06, + "loss": 0.5672, + "step": 19345 + }, + { + "epoch": 0.5929263209513301, + "grad_norm": 1.843146505362776, + "learning_rate": 3.7514651792673634e-06, + "loss": 0.6011, + "step": 19346 + }, + { + "epoch": 0.5929569694740714, + "grad_norm": 1.832999666097708, + "learning_rate": 3.750984590209876e-06, + "loss": 0.6645, + "step": 19347 + }, + { + "epoch": 0.5929876179968125, + "grad_norm": 1.6956394438086133, + "learning_rate": 3.7505040134592557e-06, + "loss": 0.5464, + "step": 19348 + }, + { + "epoch": 0.5930182665195538, + "grad_norm": 1.7118570907881323, + "learning_rate": 3.750023449020238e-06, + "loss": 0.5887, + "step": 19349 + }, + { + "epoch": 0.593048915042295, + "grad_norm": 0.7836040145788165, + "learning_rate": 3.7495428968975606e-06, + "loss": 0.3987, + "step": 19350 + }, + { + "epoch": 0.5930795635650362, + "grad_norm": 1.9763238536457002, + "learning_rate": 3.749062357095956e-06, + "loss": 0.707, + "step": 19351 + }, + { + "epoch": 0.5931102120877774, + "grad_norm": 1.6697100091281718, + "learning_rate": 3.7485818296201603e-06, + "loss": 0.5873, + "step": 19352 + }, + { + "epoch": 0.5931408606105186, + "grad_norm": 1.8778698722424563, + "learning_rate": 3.7481013144749077e-06, + "loss": 0.5503, + "step": 19353 + }, + { + "epoch": 0.5931715091332598, + "grad_norm": 1.8745097449561412, + "learning_rate": 3.7476208116649333e-06, + "loss": 0.6326, + "step": 19354 + }, + { + "epoch": 0.5932021576560009, + "grad_norm": 1.8097311437745653, + "learning_rate": 3.747140321194972e-06, + "loss": 0.6401, + "step": 19355 + }, + { + "epoch": 0.5932328061787422, + "grad_norm": 1.7572867066654647, + "learning_rate": 3.746659843069759e-06, + "loss": 0.6642, + "step": 19356 + }, + { + "epoch": 0.5932634547014833, + "grad_norm": 1.7216867423844742, + "learning_rate": 3.7461793772940236e-06, + "loss": 0.6534, + "step": 19357 + }, + { + "epoch": 0.5932941032242246, + "grad_norm": 0.8008896668513972, + "learning_rate": 3.745698923872507e-06, + "loss": 0.4011, + "step": 19358 + }, + { + "epoch": 0.5933247517469658, + "grad_norm": 1.8170370605620316, + "learning_rate": 3.7452184828099385e-06, + "loss": 0.605, + "step": 19359 + }, + { + "epoch": 0.593355400269707, + "grad_norm": 1.8260952471019474, + "learning_rate": 3.744738054111053e-06, + "loss": 0.6581, + "step": 19360 + }, + { + "epoch": 0.5933860487924482, + "grad_norm": 1.8318294818812224, + "learning_rate": 3.744257637780585e-06, + "loss": 0.5356, + "step": 19361 + }, + { + "epoch": 0.5934166973151894, + "grad_norm": 1.7769069967607285, + "learning_rate": 3.743777233823267e-06, + "loss": 0.6411, + "step": 19362 + }, + { + "epoch": 0.5934473458379306, + "grad_norm": 0.7705117429934734, + "learning_rate": 3.743296842243834e-06, + "loss": 0.4151, + "step": 19363 + }, + { + "epoch": 0.5934779943606718, + "grad_norm": 1.8157352143947092, + "learning_rate": 3.7428164630470193e-06, + "loss": 0.559, + "step": 19364 + }, + { + "epoch": 0.593508642883413, + "grad_norm": 1.687371817386489, + "learning_rate": 3.7423360962375544e-06, + "loss": 0.5867, + "step": 19365 + }, + { + "epoch": 0.5935392914061542, + "grad_norm": 1.7919091804291543, + "learning_rate": 3.741855741820176e-06, + "loss": 0.6809, + "step": 19366 + }, + { + "epoch": 0.5935699399288954, + "grad_norm": 1.725722126091299, + "learning_rate": 3.741375399799614e-06, + "loss": 0.6293, + "step": 19367 + }, + { + "epoch": 0.5936005884516367, + "grad_norm": 1.6051631065283842, + "learning_rate": 3.7408950701806003e-06, + "loss": 0.5845, + "step": 19368 + }, + { + "epoch": 0.5936312369743778, + "grad_norm": 1.8023051760285338, + "learning_rate": 3.7404147529678715e-06, + "loss": 0.5706, + "step": 19369 + }, + { + "epoch": 0.5936618854971191, + "grad_norm": 1.5060717457510935, + "learning_rate": 3.7399344481661582e-06, + "loss": 0.5372, + "step": 19370 + }, + { + "epoch": 0.5936925340198602, + "grad_norm": 0.7841029628609454, + "learning_rate": 3.739454155780192e-06, + "loss": 0.4347, + "step": 19371 + }, + { + "epoch": 0.5937231825426015, + "grad_norm": 0.8552655890969834, + "learning_rate": 3.7389738758147075e-06, + "loss": 0.4223, + "step": 19372 + }, + { + "epoch": 0.5937538310653426, + "grad_norm": 1.8480646093888857, + "learning_rate": 3.738493608274435e-06, + "loss": 0.609, + "step": 19373 + }, + { + "epoch": 0.5937844795880839, + "grad_norm": 1.5261503658996365, + "learning_rate": 3.7380133531641093e-06, + "loss": 0.6111, + "step": 19374 + }, + { + "epoch": 0.593815128110825, + "grad_norm": 1.631241427810733, + "learning_rate": 3.7375331104884617e-06, + "loss": 0.5379, + "step": 19375 + }, + { + "epoch": 0.5938457766335663, + "grad_norm": 1.820955194440108, + "learning_rate": 3.73705288025222e-06, + "loss": 0.5741, + "step": 19376 + }, + { + "epoch": 0.5938764251563075, + "grad_norm": 0.7935306693491364, + "learning_rate": 3.7365726624601228e-06, + "loss": 0.4106, + "step": 19377 + }, + { + "epoch": 0.5939070736790487, + "grad_norm": 1.9973481031953129, + "learning_rate": 3.736092457116897e-06, + "loss": 0.6775, + "step": 19378 + }, + { + "epoch": 0.5939377222017899, + "grad_norm": 1.8005425830629411, + "learning_rate": 3.7356122642272753e-06, + "loss": 0.7311, + "step": 19379 + }, + { + "epoch": 0.5939683707245311, + "grad_norm": 2.070999871211758, + "learning_rate": 3.73513208379599e-06, + "loss": 0.6081, + "step": 19380 + }, + { + "epoch": 0.5939990192472723, + "grad_norm": 0.819191194570169, + "learning_rate": 3.7346519158277707e-06, + "loss": 0.443, + "step": 19381 + }, + { + "epoch": 0.5940296677700135, + "grad_norm": 1.908613687415709, + "learning_rate": 3.734171760327351e-06, + "loss": 0.6209, + "step": 19382 + }, + { + "epoch": 0.5940603162927547, + "grad_norm": 2.2167309374099915, + "learning_rate": 3.7336916172994608e-06, + "loss": 0.5588, + "step": 19383 + }, + { + "epoch": 0.594090964815496, + "grad_norm": 2.057910563131458, + "learning_rate": 3.73321148674883e-06, + "loss": 0.7374, + "step": 19384 + }, + { + "epoch": 0.5941216133382371, + "grad_norm": 1.8071951918836335, + "learning_rate": 3.7327313686801926e-06, + "loss": 0.6053, + "step": 19385 + }, + { + "epoch": 0.5941522618609782, + "grad_norm": 1.8053160061845401, + "learning_rate": 3.732251263098277e-06, + "loss": 0.6148, + "step": 19386 + }, + { + "epoch": 0.5941829103837195, + "grad_norm": 0.8117673477941186, + "learning_rate": 3.731771170007811e-06, + "loss": 0.4076, + "step": 19387 + }, + { + "epoch": 0.5942135589064607, + "grad_norm": 0.7836032324033416, + "learning_rate": 3.7312910894135324e-06, + "loss": 0.4294, + "step": 19388 + }, + { + "epoch": 0.5942442074292019, + "grad_norm": 1.76382860794999, + "learning_rate": 3.730811021320163e-06, + "loss": 0.5777, + "step": 19389 + }, + { + "epoch": 0.5942748559519431, + "grad_norm": 1.5181504659690654, + "learning_rate": 3.730330965732441e-06, + "loss": 0.6416, + "step": 19390 + }, + { + "epoch": 0.5943055044746843, + "grad_norm": 2.0177259844469013, + "learning_rate": 3.7298509226550916e-06, + "loss": 0.6698, + "step": 19391 + }, + { + "epoch": 0.5943361529974255, + "grad_norm": 1.620306849371045, + "learning_rate": 3.729370892092845e-06, + "loss": 0.5848, + "step": 19392 + }, + { + "epoch": 0.5943668015201667, + "grad_norm": 0.7421363855234615, + "learning_rate": 3.7288908740504337e-06, + "loss": 0.4169, + "step": 19393 + }, + { + "epoch": 0.5943974500429079, + "grad_norm": 0.7534641313524038, + "learning_rate": 3.7284108685325853e-06, + "loss": 0.4282, + "step": 19394 + }, + { + "epoch": 0.5944280985656492, + "grad_norm": 1.8385580861410649, + "learning_rate": 3.727930875544029e-06, + "loss": 0.6634, + "step": 19395 + }, + { + "epoch": 0.5944587470883903, + "grad_norm": 1.8041364767526773, + "learning_rate": 3.727450895089497e-06, + "loss": 0.513, + "step": 19396 + }, + { + "epoch": 0.5944893956111316, + "grad_norm": 0.7782866442961898, + "learning_rate": 3.726970927173717e-06, + "loss": 0.4228, + "step": 19397 + }, + { + "epoch": 0.5945200441338727, + "grad_norm": 1.782413210321628, + "learning_rate": 3.7264909718014153e-06, + "loss": 0.57, + "step": 19398 + }, + { + "epoch": 0.594550692656614, + "grad_norm": 1.8377092204227632, + "learning_rate": 3.726011028977327e-06, + "loss": 0.6644, + "step": 19399 + }, + { + "epoch": 0.5945813411793551, + "grad_norm": 1.9534148909677505, + "learning_rate": 3.725531098706175e-06, + "loss": 0.5995, + "step": 19400 + }, + { + "epoch": 0.5946119897020964, + "grad_norm": 1.8891419536417584, + "learning_rate": 3.7250511809926943e-06, + "loss": 0.5725, + "step": 19401 + }, + { + "epoch": 0.5946426382248375, + "grad_norm": 1.6717008204412636, + "learning_rate": 3.72457127584161e-06, + "loss": 0.6331, + "step": 19402 + }, + { + "epoch": 0.5946732867475788, + "grad_norm": 1.7667644315208826, + "learning_rate": 3.724091383257649e-06, + "loss": 0.6444, + "step": 19403 + }, + { + "epoch": 0.59470393527032, + "grad_norm": 2.1373672617124595, + "learning_rate": 3.723611503245544e-06, + "loss": 0.6007, + "step": 19404 + }, + { + "epoch": 0.5947345837930612, + "grad_norm": 1.7051149416163498, + "learning_rate": 3.723131635810021e-06, + "loss": 0.5272, + "step": 19405 + }, + { + "epoch": 0.5947652323158024, + "grad_norm": 1.6218339976575555, + "learning_rate": 3.7226517809558084e-06, + "loss": 0.544, + "step": 19406 + }, + { + "epoch": 0.5947958808385436, + "grad_norm": 1.5292079220526238, + "learning_rate": 3.7221719386876342e-06, + "loss": 0.602, + "step": 19407 + }, + { + "epoch": 0.5948265293612848, + "grad_norm": 1.664706140565794, + "learning_rate": 3.721692109010227e-06, + "loss": 0.5765, + "step": 19408 + }, + { + "epoch": 0.594857177884026, + "grad_norm": 1.526975526933511, + "learning_rate": 3.7212122919283158e-06, + "loss": 0.459, + "step": 19409 + }, + { + "epoch": 0.5948878264067672, + "grad_norm": 3.6418643585220027, + "learning_rate": 3.7207324874466274e-06, + "loss": 0.6214, + "step": 19410 + }, + { + "epoch": 0.5949184749295084, + "grad_norm": 1.6686931902659647, + "learning_rate": 3.720252695569887e-06, + "loss": 0.5282, + "step": 19411 + }, + { + "epoch": 0.5949491234522496, + "grad_norm": 1.7042865623880097, + "learning_rate": 3.7197729163028252e-06, + "loss": 0.6308, + "step": 19412 + }, + { + "epoch": 0.5949797719749909, + "grad_norm": 1.917769646979922, + "learning_rate": 3.7192931496501687e-06, + "loss": 0.5404, + "step": 19413 + }, + { + "epoch": 0.595010420497732, + "grad_norm": 0.8122155017455199, + "learning_rate": 3.718813395616644e-06, + "loss": 0.4283, + "step": 19414 + }, + { + "epoch": 0.5950410690204733, + "grad_norm": 1.7046604774520293, + "learning_rate": 3.7183336542069792e-06, + "loss": 0.5771, + "step": 19415 + }, + { + "epoch": 0.5950717175432144, + "grad_norm": 1.7658111283686082, + "learning_rate": 3.7178539254258992e-06, + "loss": 0.617, + "step": 19416 + }, + { + "epoch": 0.5951023660659556, + "grad_norm": 1.7874528154857434, + "learning_rate": 3.7173742092781344e-06, + "loss": 0.5829, + "step": 19417 + }, + { + "epoch": 0.5951330145886968, + "grad_norm": 0.8112184925374429, + "learning_rate": 3.7168945057684103e-06, + "loss": 0.4552, + "step": 19418 + }, + { + "epoch": 0.595163663111438, + "grad_norm": 1.9038215708262005, + "learning_rate": 3.71641481490145e-06, + "loss": 0.6643, + "step": 19419 + }, + { + "epoch": 0.5951943116341792, + "grad_norm": 1.5043965325069202, + "learning_rate": 3.7159351366819863e-06, + "loss": 0.5182, + "step": 19420 + }, + { + "epoch": 0.5952249601569204, + "grad_norm": 1.6273479114148375, + "learning_rate": 3.7154554711147405e-06, + "loss": 0.7179, + "step": 19421 + }, + { + "epoch": 0.5952556086796617, + "grad_norm": 1.7862785444070457, + "learning_rate": 3.7149758182044405e-06, + "loss": 0.6406, + "step": 19422 + }, + { + "epoch": 0.5952862572024028, + "grad_norm": 1.6721178996194845, + "learning_rate": 3.714496177955813e-06, + "loss": 0.6202, + "step": 19423 + }, + { + "epoch": 0.5953169057251441, + "grad_norm": 1.768901043089718, + "learning_rate": 3.7140165503735835e-06, + "loss": 0.6349, + "step": 19424 + }, + { + "epoch": 0.5953475542478852, + "grad_norm": 1.5628105777610821, + "learning_rate": 3.7135369354624774e-06, + "loss": 0.6224, + "step": 19425 + }, + { + "epoch": 0.5953782027706265, + "grad_norm": 1.8544160331942359, + "learning_rate": 3.713057333227222e-06, + "loss": 0.6862, + "step": 19426 + }, + { + "epoch": 0.5954088512933676, + "grad_norm": 1.7442491992121831, + "learning_rate": 3.712577743672541e-06, + "loss": 0.5895, + "step": 19427 + }, + { + "epoch": 0.5954394998161089, + "grad_norm": 1.4886151455266854, + "learning_rate": 3.7120981668031608e-06, + "loss": 0.5852, + "step": 19428 + }, + { + "epoch": 0.59547014833885, + "grad_norm": 2.0302363302089517, + "learning_rate": 3.7116186026238094e-06, + "loss": 0.7409, + "step": 19429 + }, + { + "epoch": 0.5955007968615913, + "grad_norm": 1.9806887551572343, + "learning_rate": 3.7111390511392054e-06, + "loss": 0.6978, + "step": 19430 + }, + { + "epoch": 0.5955314453843324, + "grad_norm": 0.8456187834786735, + "learning_rate": 3.7106595123540818e-06, + "loss": 0.4278, + "step": 19431 + }, + { + "epoch": 0.5955620939070737, + "grad_norm": 1.786422802779214, + "learning_rate": 3.710179986273159e-06, + "loss": 0.6404, + "step": 19432 + }, + { + "epoch": 0.5955927424298149, + "grad_norm": 1.7013507155696659, + "learning_rate": 3.709700472901161e-06, + "loss": 0.691, + "step": 19433 + }, + { + "epoch": 0.5956233909525561, + "grad_norm": 1.678471159621965, + "learning_rate": 3.7092209722428162e-06, + "loss": 0.5585, + "step": 19434 + }, + { + "epoch": 0.5956540394752973, + "grad_norm": 1.691674893549127, + "learning_rate": 3.708741484302846e-06, + "loss": 0.5621, + "step": 19435 + }, + { + "epoch": 0.5956846879980385, + "grad_norm": 1.608581008974358, + "learning_rate": 3.708262009085978e-06, + "loss": 0.6564, + "step": 19436 + }, + { + "epoch": 0.5957153365207797, + "grad_norm": 1.7425218492965013, + "learning_rate": 3.707782546596934e-06, + "loss": 0.594, + "step": 19437 + }, + { + "epoch": 0.5957459850435209, + "grad_norm": 1.9060232474706262, + "learning_rate": 3.7073030968404382e-06, + "loss": 0.6147, + "step": 19438 + }, + { + "epoch": 0.5957766335662621, + "grad_norm": 1.866349455404348, + "learning_rate": 3.706823659821217e-06, + "loss": 0.5815, + "step": 19439 + }, + { + "epoch": 0.5958072820890034, + "grad_norm": 1.7783095765201185, + "learning_rate": 3.706344235543995e-06, + "loss": 0.6041, + "step": 19440 + }, + { + "epoch": 0.5958379306117445, + "grad_norm": 1.8820697689478365, + "learning_rate": 3.7058648240134897e-06, + "loss": 0.6807, + "step": 19441 + }, + { + "epoch": 0.5958685791344858, + "grad_norm": 1.554464412829625, + "learning_rate": 3.7053854252344334e-06, + "loss": 0.6018, + "step": 19442 + }, + { + "epoch": 0.5958992276572269, + "grad_norm": 0.8592748345779473, + "learning_rate": 3.7049060392115425e-06, + "loss": 0.4232, + "step": 19443 + }, + { + "epoch": 0.5959298761799682, + "grad_norm": 1.780573174656462, + "learning_rate": 3.704426665949547e-06, + "loss": 0.6131, + "step": 19444 + }, + { + "epoch": 0.5959605247027093, + "grad_norm": 1.7819848369288476, + "learning_rate": 3.7039473054531662e-06, + "loss": 0.653, + "step": 19445 + }, + { + "epoch": 0.5959911732254506, + "grad_norm": 1.7601011531592523, + "learning_rate": 3.7034679577271226e-06, + "loss": 0.6437, + "step": 19446 + }, + { + "epoch": 0.5960218217481917, + "grad_norm": 2.0691381707414545, + "learning_rate": 3.7029886227761426e-06, + "loss": 0.6869, + "step": 19447 + }, + { + "epoch": 0.5960524702709329, + "grad_norm": 1.750810475421135, + "learning_rate": 3.7025093006049467e-06, + "loss": 0.6138, + "step": 19448 + }, + { + "epoch": 0.5960831187936741, + "grad_norm": 0.7591748281596538, + "learning_rate": 3.702029991218258e-06, + "loss": 0.4227, + "step": 19449 + }, + { + "epoch": 0.5961137673164153, + "grad_norm": 1.6783361576481786, + "learning_rate": 3.7015506946208014e-06, + "loss": 0.6522, + "step": 19450 + }, + { + "epoch": 0.5961444158391566, + "grad_norm": 1.6555305893112175, + "learning_rate": 3.7010714108172986e-06, + "loss": 0.5764, + "step": 19451 + }, + { + "epoch": 0.5961750643618977, + "grad_norm": 1.812623005159799, + "learning_rate": 3.7005921398124682e-06, + "loss": 0.5665, + "step": 19452 + }, + { + "epoch": 0.596205712884639, + "grad_norm": 1.9565392345865873, + "learning_rate": 3.70011288161104e-06, + "loss": 0.6412, + "step": 19453 + }, + { + "epoch": 0.5962363614073801, + "grad_norm": 1.8244872584997862, + "learning_rate": 3.6996336362177274e-06, + "loss": 0.5913, + "step": 19454 + }, + { + "epoch": 0.5962670099301214, + "grad_norm": 1.8031919360861384, + "learning_rate": 3.699154403637262e-06, + "loss": 0.5958, + "step": 19455 + }, + { + "epoch": 0.5962976584528625, + "grad_norm": 0.7796911206946123, + "learning_rate": 3.6986751838743596e-06, + "loss": 0.4248, + "step": 19456 + }, + { + "epoch": 0.5963283069756038, + "grad_norm": 1.6012192262491383, + "learning_rate": 3.6981959769337423e-06, + "loss": 0.5907, + "step": 19457 + }, + { + "epoch": 0.596358955498345, + "grad_norm": 1.6181999916742387, + "learning_rate": 3.6977167828201344e-06, + "loss": 0.581, + "step": 19458 + }, + { + "epoch": 0.5963896040210862, + "grad_norm": 1.6384525236384193, + "learning_rate": 3.6972376015382563e-06, + "loss": 0.5699, + "step": 19459 + }, + { + "epoch": 0.5964202525438274, + "grad_norm": 1.705114825414781, + "learning_rate": 3.6967584330928274e-06, + "loss": 0.6113, + "step": 19460 + }, + { + "epoch": 0.5964509010665686, + "grad_norm": 2.7065251558441505, + "learning_rate": 3.6962792774885735e-06, + "loss": 0.6705, + "step": 19461 + }, + { + "epoch": 0.5964815495893098, + "grad_norm": 2.0790663195874197, + "learning_rate": 3.695800134730212e-06, + "loss": 0.7524, + "step": 19462 + }, + { + "epoch": 0.596512198112051, + "grad_norm": 1.8523508776729654, + "learning_rate": 3.695321004822467e-06, + "loss": 0.5393, + "step": 19463 + }, + { + "epoch": 0.5965428466347922, + "grad_norm": 0.8131816324943953, + "learning_rate": 3.6948418877700577e-06, + "loss": 0.4334, + "step": 19464 + }, + { + "epoch": 0.5965734951575334, + "grad_norm": 1.798908743559139, + "learning_rate": 3.6943627835777035e-06, + "loss": 0.5658, + "step": 19465 + }, + { + "epoch": 0.5966041436802746, + "grad_norm": 1.9577740113800044, + "learning_rate": 3.693883692250128e-06, + "loss": 0.6452, + "step": 19466 + }, + { + "epoch": 0.5966347922030159, + "grad_norm": 1.719359391815523, + "learning_rate": 3.693404613792051e-06, + "loss": 0.6013, + "step": 19467 + }, + { + "epoch": 0.596665440725757, + "grad_norm": 1.5373970892412774, + "learning_rate": 3.6929255482081916e-06, + "loss": 0.5817, + "step": 19468 + }, + { + "epoch": 0.5966960892484983, + "grad_norm": 1.6355994613514018, + "learning_rate": 3.692446495503272e-06, + "loss": 0.6184, + "step": 19469 + }, + { + "epoch": 0.5967267377712394, + "grad_norm": 1.7191938464664684, + "learning_rate": 3.6919674556820108e-06, + "loss": 0.6628, + "step": 19470 + }, + { + "epoch": 0.5967573862939807, + "grad_norm": 0.7787966082564265, + "learning_rate": 3.691488428749129e-06, + "loss": 0.4364, + "step": 19471 + }, + { + "epoch": 0.5967880348167218, + "grad_norm": 1.7650690050957292, + "learning_rate": 3.691009414709349e-06, + "loss": 0.5687, + "step": 19472 + }, + { + "epoch": 0.5968186833394631, + "grad_norm": 2.0574091791621028, + "learning_rate": 3.6905304135673848e-06, + "loss": 0.5681, + "step": 19473 + }, + { + "epoch": 0.5968493318622042, + "grad_norm": 1.7431562246944232, + "learning_rate": 3.6900514253279618e-06, + "loss": 0.5635, + "step": 19474 + }, + { + "epoch": 0.5968799803849455, + "grad_norm": 0.7645495283279021, + "learning_rate": 3.689572449995797e-06, + "loss": 0.4248, + "step": 19475 + }, + { + "epoch": 0.5969106289076866, + "grad_norm": 1.7629831014337312, + "learning_rate": 3.6890934875756086e-06, + "loss": 0.6959, + "step": 19476 + }, + { + "epoch": 0.5969412774304279, + "grad_norm": 1.6548735730094917, + "learning_rate": 3.6886145380721182e-06, + "loss": 0.6198, + "step": 19477 + }, + { + "epoch": 0.5969719259531691, + "grad_norm": 2.031714703645662, + "learning_rate": 3.6881356014900447e-06, + "loss": 0.5967, + "step": 19478 + }, + { + "epoch": 0.5970025744759102, + "grad_norm": 1.7785835359824331, + "learning_rate": 3.687656677834106e-06, + "loss": 0.5911, + "step": 19479 + }, + { + "epoch": 0.5970332229986515, + "grad_norm": 1.7859733427552267, + "learning_rate": 3.6871777671090233e-06, + "loss": 0.6085, + "step": 19480 + }, + { + "epoch": 0.5970638715213926, + "grad_norm": 1.682236366499462, + "learning_rate": 3.686698869319512e-06, + "loss": 0.6346, + "step": 19481 + }, + { + "epoch": 0.5970945200441339, + "grad_norm": 1.755455281262762, + "learning_rate": 3.686219984470294e-06, + "loss": 0.6038, + "step": 19482 + }, + { + "epoch": 0.597125168566875, + "grad_norm": 1.7076185970640183, + "learning_rate": 3.685741112566088e-06, + "loss": 0.6226, + "step": 19483 + }, + { + "epoch": 0.5971558170896163, + "grad_norm": 1.85626289685358, + "learning_rate": 3.6852622536116076e-06, + "loss": 0.6722, + "step": 19484 + }, + { + "epoch": 0.5971864656123574, + "grad_norm": 1.652448764447401, + "learning_rate": 3.684783407611578e-06, + "loss": 0.5859, + "step": 19485 + }, + { + "epoch": 0.5972171141350987, + "grad_norm": 0.8036232222378359, + "learning_rate": 3.684304574570713e-06, + "loss": 0.4379, + "step": 19486 + }, + { + "epoch": 0.5972477626578399, + "grad_norm": 1.9756476982062676, + "learning_rate": 3.6838257544937307e-06, + "loss": 0.6608, + "step": 19487 + }, + { + "epoch": 0.5972784111805811, + "grad_norm": 1.8487697617258985, + "learning_rate": 3.68334694738535e-06, + "loss": 0.539, + "step": 19488 + }, + { + "epoch": 0.5973090597033223, + "grad_norm": 1.8364078521192653, + "learning_rate": 3.6828681532502884e-06, + "loss": 0.651, + "step": 19489 + }, + { + "epoch": 0.5973397082260635, + "grad_norm": 1.9982732484609511, + "learning_rate": 3.6823893720932656e-06, + "loss": 0.6475, + "step": 19490 + }, + { + "epoch": 0.5973703567488047, + "grad_norm": 1.5012925291723804, + "learning_rate": 3.6819106039189967e-06, + "loss": 0.691, + "step": 19491 + }, + { + "epoch": 0.5974010052715459, + "grad_norm": 1.7004961905727545, + "learning_rate": 3.681431848732199e-06, + "loss": 0.6925, + "step": 19492 + }, + { + "epoch": 0.5974316537942871, + "grad_norm": 1.7660467958561292, + "learning_rate": 3.6809531065375914e-06, + "loss": 0.6418, + "step": 19493 + }, + { + "epoch": 0.5974623023170283, + "grad_norm": 1.8235592410006878, + "learning_rate": 3.680474377339892e-06, + "loss": 0.5917, + "step": 19494 + }, + { + "epoch": 0.5974929508397695, + "grad_norm": 1.8350982413131562, + "learning_rate": 3.6799956611438124e-06, + "loss": 0.7205, + "step": 19495 + }, + { + "epoch": 0.5975235993625108, + "grad_norm": 1.7757063558042767, + "learning_rate": 3.679516957954077e-06, + "loss": 0.6261, + "step": 19496 + }, + { + "epoch": 0.5975542478852519, + "grad_norm": 1.8305970836882453, + "learning_rate": 3.6790382677753954e-06, + "loss": 0.6385, + "step": 19497 + }, + { + "epoch": 0.5975848964079932, + "grad_norm": 1.888675426570334, + "learning_rate": 3.6785595906124903e-06, + "loss": 0.6577, + "step": 19498 + }, + { + "epoch": 0.5976155449307343, + "grad_norm": 1.9651496822431553, + "learning_rate": 3.678080926470076e-06, + "loss": 0.5417, + "step": 19499 + }, + { + "epoch": 0.5976461934534756, + "grad_norm": 2.081351126106748, + "learning_rate": 3.6776022753528664e-06, + "loss": 0.5983, + "step": 19500 + }, + { + "epoch": 0.5976768419762167, + "grad_norm": 2.1327691694528617, + "learning_rate": 3.6771236372655817e-06, + "loss": 0.6166, + "step": 19501 + }, + { + "epoch": 0.597707490498958, + "grad_norm": 1.7464270030924123, + "learning_rate": 3.6766450122129355e-06, + "loss": 0.5303, + "step": 19502 + }, + { + "epoch": 0.5977381390216991, + "grad_norm": 1.703364475638287, + "learning_rate": 3.6761664001996437e-06, + "loss": 0.5448, + "step": 19503 + }, + { + "epoch": 0.5977687875444404, + "grad_norm": 1.8072411762760532, + "learning_rate": 3.6756878012304242e-06, + "loss": 0.5971, + "step": 19504 + }, + { + "epoch": 0.5977994360671816, + "grad_norm": 0.7707429145968181, + "learning_rate": 3.6752092153099934e-06, + "loss": 0.4124, + "step": 19505 + }, + { + "epoch": 0.5978300845899228, + "grad_norm": 0.8030316801898522, + "learning_rate": 3.674730642443061e-06, + "loss": 0.4238, + "step": 19506 + }, + { + "epoch": 0.597860733112664, + "grad_norm": 1.8446976492099925, + "learning_rate": 3.6742520826343508e-06, + "loss": 0.6729, + "step": 19507 + }, + { + "epoch": 0.5978913816354052, + "grad_norm": 1.8287484965411216, + "learning_rate": 3.673773535888571e-06, + "loss": 0.6221, + "step": 19508 + }, + { + "epoch": 0.5979220301581464, + "grad_norm": 1.72265603521505, + "learning_rate": 3.673295002210442e-06, + "loss": 0.5973, + "step": 19509 + }, + { + "epoch": 0.5979526786808875, + "grad_norm": 1.6913610371938328, + "learning_rate": 3.672816481604676e-06, + "loss": 0.6202, + "step": 19510 + }, + { + "epoch": 0.5979833272036288, + "grad_norm": 1.7042751093344557, + "learning_rate": 3.672337974075988e-06, + "loss": 0.658, + "step": 19511 + }, + { + "epoch": 0.5980139757263699, + "grad_norm": 1.848400053308879, + "learning_rate": 3.671859479629094e-06, + "loss": 0.7211, + "step": 19512 + }, + { + "epoch": 0.5980446242491112, + "grad_norm": 1.809497881390554, + "learning_rate": 3.671380998268709e-06, + "loss": 0.5527, + "step": 19513 + }, + { + "epoch": 0.5980752727718524, + "grad_norm": 1.6269701761245723, + "learning_rate": 3.670902529999546e-06, + "loss": 0.575, + "step": 19514 + }, + { + "epoch": 0.5981059212945936, + "grad_norm": 1.657278319256192, + "learning_rate": 3.670424074826322e-06, + "loss": 0.5736, + "step": 19515 + }, + { + "epoch": 0.5981365698173348, + "grad_norm": 1.674043187194475, + "learning_rate": 3.6699456327537477e-06, + "loss": 0.6048, + "step": 19516 + }, + { + "epoch": 0.598167218340076, + "grad_norm": 1.7216000568116996, + "learning_rate": 3.6694672037865416e-06, + "loss": 0.5772, + "step": 19517 + }, + { + "epoch": 0.5981978668628172, + "grad_norm": 1.648134259936896, + "learning_rate": 3.6689887879294146e-06, + "loss": 0.5282, + "step": 19518 + }, + { + "epoch": 0.5982285153855584, + "grad_norm": 1.4602952413313712, + "learning_rate": 3.6685103851870808e-06, + "loss": 0.5335, + "step": 19519 + }, + { + "epoch": 0.5982591639082996, + "grad_norm": 0.8055976255317008, + "learning_rate": 3.6680319955642556e-06, + "loss": 0.4194, + "step": 19520 + }, + { + "epoch": 0.5982898124310408, + "grad_norm": 1.7025371420095752, + "learning_rate": 3.6675536190656525e-06, + "loss": 0.6444, + "step": 19521 + }, + { + "epoch": 0.598320460953782, + "grad_norm": 1.568913491301863, + "learning_rate": 3.6670752556959834e-06, + "loss": 0.5754, + "step": 19522 + }, + { + "epoch": 0.5983511094765233, + "grad_norm": 0.866505143457592, + "learning_rate": 3.6665969054599633e-06, + "loss": 0.4527, + "step": 19523 + }, + { + "epoch": 0.5983817579992644, + "grad_norm": 1.6450686614334529, + "learning_rate": 3.6661185683623047e-06, + "loss": 0.5588, + "step": 19524 + }, + { + "epoch": 0.5984124065220057, + "grad_norm": 2.08273846447409, + "learning_rate": 3.665640244407721e-06, + "loss": 0.5703, + "step": 19525 + }, + { + "epoch": 0.5984430550447468, + "grad_norm": 2.122869203096768, + "learning_rate": 3.6651619336009275e-06, + "loss": 0.6601, + "step": 19526 + }, + { + "epoch": 0.5984737035674881, + "grad_norm": 2.0897015468518805, + "learning_rate": 3.664683635946632e-06, + "loss": 0.6319, + "step": 19527 + }, + { + "epoch": 0.5985043520902292, + "grad_norm": 1.6405153463719035, + "learning_rate": 3.664205351449553e-06, + "loss": 0.5985, + "step": 19528 + }, + { + "epoch": 0.5985350006129705, + "grad_norm": 0.787423106736258, + "learning_rate": 3.663727080114399e-06, + "loss": 0.428, + "step": 19529 + }, + { + "epoch": 0.5985656491357116, + "grad_norm": 0.8079050994611153, + "learning_rate": 3.663248821945884e-06, + "loss": 0.4113, + "step": 19530 + }, + { + "epoch": 0.5985962976584529, + "grad_norm": 0.7840423177979775, + "learning_rate": 3.6627705769487204e-06, + "loss": 0.4408, + "step": 19531 + }, + { + "epoch": 0.598626946181194, + "grad_norm": 1.8143258786794134, + "learning_rate": 3.662292345127621e-06, + "loss": 0.5769, + "step": 19532 + }, + { + "epoch": 0.5986575947039353, + "grad_norm": 1.9005843264111688, + "learning_rate": 3.6618141264872964e-06, + "loss": 0.6577, + "step": 19533 + }, + { + "epoch": 0.5986882432266765, + "grad_norm": 1.6455231698893489, + "learning_rate": 3.6613359210324606e-06, + "loss": 0.6487, + "step": 19534 + }, + { + "epoch": 0.5987188917494177, + "grad_norm": 1.7409078850974344, + "learning_rate": 3.6608577287678226e-06, + "loss": 0.7314, + "step": 19535 + }, + { + "epoch": 0.5987495402721589, + "grad_norm": 1.6828612388117734, + "learning_rate": 3.6603795496980983e-06, + "loss": 0.6, + "step": 19536 + }, + { + "epoch": 0.5987801887949001, + "grad_norm": 1.8354334327660637, + "learning_rate": 3.6599013838279975e-06, + "loss": 0.6134, + "step": 19537 + }, + { + "epoch": 0.5988108373176413, + "grad_norm": 1.7998182289757478, + "learning_rate": 3.659423231162228e-06, + "loss": 0.5779, + "step": 19538 + }, + { + "epoch": 0.5988414858403825, + "grad_norm": 1.7108049856074548, + "learning_rate": 3.658945091705508e-06, + "loss": 0.6352, + "step": 19539 + }, + { + "epoch": 0.5988721343631237, + "grad_norm": 1.874345958673576, + "learning_rate": 3.6584669654625436e-06, + "loss": 0.5475, + "step": 19540 + }, + { + "epoch": 0.5989027828858648, + "grad_norm": 1.6458485876274462, + "learning_rate": 3.657988852438047e-06, + "loss": 0.642, + "step": 19541 + }, + { + "epoch": 0.5989334314086061, + "grad_norm": 1.773679040678756, + "learning_rate": 3.6575107526367297e-06, + "loss": 0.5773, + "step": 19542 + }, + { + "epoch": 0.5989640799313473, + "grad_norm": 0.8289585431166243, + "learning_rate": 3.657032666063302e-06, + "loss": 0.4247, + "step": 19543 + }, + { + "epoch": 0.5989947284540885, + "grad_norm": 1.5712652485521228, + "learning_rate": 3.6565545927224762e-06, + "loss": 0.5849, + "step": 19544 + }, + { + "epoch": 0.5990253769768297, + "grad_norm": 1.6372123182695812, + "learning_rate": 3.6560765326189617e-06, + "loss": 0.5286, + "step": 19545 + }, + { + "epoch": 0.5990560254995709, + "grad_norm": 2.0167437906520784, + "learning_rate": 3.655598485757468e-06, + "loss": 0.6891, + "step": 19546 + }, + { + "epoch": 0.5990866740223121, + "grad_norm": 1.9298396983667276, + "learning_rate": 3.655120452142707e-06, + "loss": 0.7128, + "step": 19547 + }, + { + "epoch": 0.5991173225450533, + "grad_norm": 1.6270885270333937, + "learning_rate": 3.6546424317793893e-06, + "loss": 0.5847, + "step": 19548 + }, + { + "epoch": 0.5991479710677945, + "grad_norm": 1.8099144535813796, + "learning_rate": 3.6541644246722212e-06, + "loss": 0.5636, + "step": 19549 + }, + { + "epoch": 0.5991786195905358, + "grad_norm": 0.7960160791393857, + "learning_rate": 3.653686430825919e-06, + "loss": 0.419, + "step": 19550 + }, + { + "epoch": 0.5992092681132769, + "grad_norm": 1.7773349340405926, + "learning_rate": 3.653208450245187e-06, + "loss": 0.5771, + "step": 19551 + }, + { + "epoch": 0.5992399166360182, + "grad_norm": 1.643455862697796, + "learning_rate": 3.6527304829347356e-06, + "loss": 0.5729, + "step": 19552 + }, + { + "epoch": 0.5992705651587593, + "grad_norm": 1.7547746432047413, + "learning_rate": 3.652252528899277e-06, + "loss": 0.6167, + "step": 19553 + }, + { + "epoch": 0.5993012136815006, + "grad_norm": 0.7899822226494637, + "learning_rate": 3.651774588143518e-06, + "loss": 0.4254, + "step": 19554 + }, + { + "epoch": 0.5993318622042417, + "grad_norm": 1.6177604018516254, + "learning_rate": 3.65129666067217e-06, + "loss": 0.5882, + "step": 19555 + }, + { + "epoch": 0.599362510726983, + "grad_norm": 0.7955909059518642, + "learning_rate": 3.6508187464899402e-06, + "loss": 0.4195, + "step": 19556 + }, + { + "epoch": 0.5993931592497241, + "grad_norm": 0.7746190086632813, + "learning_rate": 3.650340845601539e-06, + "loss": 0.4099, + "step": 19557 + }, + { + "epoch": 0.5994238077724654, + "grad_norm": 1.8510923429234203, + "learning_rate": 3.6498629580116747e-06, + "loss": 0.535, + "step": 19558 + }, + { + "epoch": 0.5994544562952066, + "grad_norm": 2.1670440649403364, + "learning_rate": 3.6493850837250576e-06, + "loss": 0.698, + "step": 19559 + }, + { + "epoch": 0.5994851048179478, + "grad_norm": 1.684799371402369, + "learning_rate": 3.6489072227463924e-06, + "loss": 0.6665, + "step": 19560 + }, + { + "epoch": 0.599515753340689, + "grad_norm": 1.7402967381373031, + "learning_rate": 3.648429375080391e-06, + "loss": 0.5999, + "step": 19561 + }, + { + "epoch": 0.5995464018634302, + "grad_norm": 1.6810453332560324, + "learning_rate": 3.6479515407317603e-06, + "loss": 0.598, + "step": 19562 + }, + { + "epoch": 0.5995770503861714, + "grad_norm": 1.7666280828258512, + "learning_rate": 3.6474737197052094e-06, + "loss": 0.5282, + "step": 19563 + }, + { + "epoch": 0.5996076989089126, + "grad_norm": 1.847198567928944, + "learning_rate": 3.6469959120054464e-06, + "loss": 0.6382, + "step": 19564 + }, + { + "epoch": 0.5996383474316538, + "grad_norm": 1.8570566622671876, + "learning_rate": 3.6465181176371777e-06, + "loss": 0.5628, + "step": 19565 + }, + { + "epoch": 0.599668995954395, + "grad_norm": 1.7669637801064924, + "learning_rate": 3.6460403366051132e-06, + "loss": 0.6085, + "step": 19566 + }, + { + "epoch": 0.5996996444771362, + "grad_norm": 0.7956207509739166, + "learning_rate": 3.6455625689139617e-06, + "loss": 0.3935, + "step": 19567 + }, + { + "epoch": 0.5997302929998775, + "grad_norm": 1.90469501010854, + "learning_rate": 3.645084814568425e-06, + "loss": 0.5622, + "step": 19568 + }, + { + "epoch": 0.5997609415226186, + "grad_norm": 1.6617421115628106, + "learning_rate": 3.6446070735732168e-06, + "loss": 0.6589, + "step": 19569 + }, + { + "epoch": 0.5997915900453599, + "grad_norm": 0.7711721112679057, + "learning_rate": 3.6441293459330394e-06, + "loss": 0.4355, + "step": 19570 + }, + { + "epoch": 0.599822238568101, + "grad_norm": 1.8308298511261067, + "learning_rate": 3.6436516316526054e-06, + "loss": 0.6367, + "step": 19571 + }, + { + "epoch": 0.5998528870908422, + "grad_norm": 2.076717111360441, + "learning_rate": 3.643173930736618e-06, + "loss": 0.5904, + "step": 19572 + }, + { + "epoch": 0.5998835356135834, + "grad_norm": 1.7614544766094955, + "learning_rate": 3.642696243189784e-06, + "loss": 0.731, + "step": 19573 + }, + { + "epoch": 0.5999141841363246, + "grad_norm": 0.8084588934913072, + "learning_rate": 3.6422185690168123e-06, + "loss": 0.4502, + "step": 19574 + }, + { + "epoch": 0.5999448326590658, + "grad_norm": 0.7787413366839814, + "learning_rate": 3.641740908222408e-06, + "loss": 0.4248, + "step": 19575 + }, + { + "epoch": 0.599975481181807, + "grad_norm": 1.8799478307629969, + "learning_rate": 3.6412632608112775e-06, + "loss": 0.6834, + "step": 19576 + }, + { + "epoch": 0.6000061297045483, + "grad_norm": 1.9182762048143351, + "learning_rate": 3.6407856267881283e-06, + "loss": 0.7176, + "step": 19577 + }, + { + "epoch": 0.6000367782272894, + "grad_norm": 1.5861728394920016, + "learning_rate": 3.6403080061576677e-06, + "loss": 0.5633, + "step": 19578 + }, + { + "epoch": 0.6000674267500307, + "grad_norm": 1.7955576475153243, + "learning_rate": 3.6398303989245964e-06, + "loss": 0.5766, + "step": 19579 + }, + { + "epoch": 0.6000980752727718, + "grad_norm": 1.7405957470339597, + "learning_rate": 3.6393528050936277e-06, + "loss": 0.629, + "step": 19580 + }, + { + "epoch": 0.6001287237955131, + "grad_norm": 1.7495656882071862, + "learning_rate": 3.6388752246694613e-06, + "loss": 0.632, + "step": 19581 + }, + { + "epoch": 0.6001593723182542, + "grad_norm": 1.7320220172245469, + "learning_rate": 3.638397657656808e-06, + "loss": 0.664, + "step": 19582 + }, + { + "epoch": 0.6001900208409955, + "grad_norm": 1.845071322786074, + "learning_rate": 3.63792010406037e-06, + "loss": 0.6804, + "step": 19583 + }, + { + "epoch": 0.6002206693637366, + "grad_norm": 1.542460954685355, + "learning_rate": 3.637442563884853e-06, + "loss": 0.6021, + "step": 19584 + }, + { + "epoch": 0.6002513178864779, + "grad_norm": 1.5805575069441862, + "learning_rate": 3.636965037134964e-06, + "loss": 0.6409, + "step": 19585 + }, + { + "epoch": 0.600281966409219, + "grad_norm": 1.6942751858752925, + "learning_rate": 3.6364875238154073e-06, + "loss": 0.559, + "step": 19586 + }, + { + "epoch": 0.6003126149319603, + "grad_norm": 1.9480557648325953, + "learning_rate": 3.6360100239308867e-06, + "loss": 0.6376, + "step": 19587 + }, + { + "epoch": 0.6003432634547015, + "grad_norm": 0.8248888488504038, + "learning_rate": 3.6355325374861096e-06, + "loss": 0.4198, + "step": 19588 + }, + { + "epoch": 0.6003739119774427, + "grad_norm": 1.5479326595644376, + "learning_rate": 3.635055064485778e-06, + "loss": 0.5535, + "step": 19589 + }, + { + "epoch": 0.6004045605001839, + "grad_norm": 1.7944846340659393, + "learning_rate": 3.634577604934599e-06, + "loss": 0.627, + "step": 19590 + }, + { + "epoch": 0.6004352090229251, + "grad_norm": 1.673833495118982, + "learning_rate": 3.634100158837278e-06, + "loss": 0.6868, + "step": 19591 + }, + { + "epoch": 0.6004658575456663, + "grad_norm": 1.9069040566285722, + "learning_rate": 3.633622726198514e-06, + "loss": 0.5816, + "step": 19592 + }, + { + "epoch": 0.6004965060684075, + "grad_norm": 1.7034467134265787, + "learning_rate": 3.6331453070230182e-06, + "loss": 0.6299, + "step": 19593 + }, + { + "epoch": 0.6005271545911487, + "grad_norm": 1.7457711806866332, + "learning_rate": 3.6326679013154904e-06, + "loss": 0.5723, + "step": 19594 + }, + { + "epoch": 0.60055780311389, + "grad_norm": 1.6936568626482589, + "learning_rate": 3.632190509080634e-06, + "loss": 0.6294, + "step": 19595 + }, + { + "epoch": 0.6005884516366311, + "grad_norm": 2.075467545609608, + "learning_rate": 3.631713130323157e-06, + "loss": 0.6153, + "step": 19596 + }, + { + "epoch": 0.6006191001593724, + "grad_norm": 1.9258828368410097, + "learning_rate": 3.631235765047758e-06, + "loss": 0.5651, + "step": 19597 + }, + { + "epoch": 0.6006497486821135, + "grad_norm": 1.772358063776085, + "learning_rate": 3.6307584132591445e-06, + "loss": 0.6663, + "step": 19598 + }, + { + "epoch": 0.6006803972048548, + "grad_norm": 1.653186333610547, + "learning_rate": 3.6302810749620193e-06, + "loss": 0.5338, + "step": 19599 + }, + { + "epoch": 0.6007110457275959, + "grad_norm": 1.6821665753475412, + "learning_rate": 3.629803750161084e-06, + "loss": 0.5992, + "step": 19600 + }, + { + "epoch": 0.6007416942503372, + "grad_norm": 1.8744956004829811, + "learning_rate": 3.629326438861044e-06, + "loss": 0.644, + "step": 19601 + }, + { + "epoch": 0.6007723427730783, + "grad_norm": 1.9252209324384226, + "learning_rate": 3.6288491410666015e-06, + "loss": 0.5404, + "step": 19602 + }, + { + "epoch": 0.6008029912958195, + "grad_norm": 1.6600119357200644, + "learning_rate": 3.6283718567824575e-06, + "loss": 0.5524, + "step": 19603 + }, + { + "epoch": 0.6008336398185607, + "grad_norm": 1.7306373537803077, + "learning_rate": 3.6278945860133184e-06, + "loss": 0.5992, + "step": 19604 + }, + { + "epoch": 0.6008642883413019, + "grad_norm": 1.8638531193386838, + "learning_rate": 3.6274173287638848e-06, + "loss": 0.5868, + "step": 19605 + }, + { + "epoch": 0.6008949368640432, + "grad_norm": 0.7815330151059654, + "learning_rate": 3.626940085038858e-06, + "loss": 0.4285, + "step": 19606 + }, + { + "epoch": 0.6009255853867843, + "grad_norm": 1.9563521674731847, + "learning_rate": 3.6264628548429427e-06, + "loss": 0.6732, + "step": 19607 + }, + { + "epoch": 0.6009562339095256, + "grad_norm": 1.7027522000747135, + "learning_rate": 3.62598563818084e-06, + "loss": 0.5899, + "step": 19608 + }, + { + "epoch": 0.6009868824322667, + "grad_norm": 0.8056973329134985, + "learning_rate": 3.6255084350572523e-06, + "loss": 0.417, + "step": 19609 + }, + { + "epoch": 0.601017530955008, + "grad_norm": 1.671414510477594, + "learning_rate": 3.6250312454768827e-06, + "loss": 0.5602, + "step": 19610 + }, + { + "epoch": 0.6010481794777491, + "grad_norm": 1.9522190379584807, + "learning_rate": 3.6245540694444303e-06, + "loss": 0.5891, + "step": 19611 + }, + { + "epoch": 0.6010788280004904, + "grad_norm": 0.7714396465408897, + "learning_rate": 3.6240769069646016e-06, + "loss": 0.4448, + "step": 19612 + }, + { + "epoch": 0.6011094765232315, + "grad_norm": 1.9956993302497696, + "learning_rate": 3.6235997580420934e-06, + "loss": 0.6545, + "step": 19613 + }, + { + "epoch": 0.6011401250459728, + "grad_norm": 1.960920080377245, + "learning_rate": 3.623122622681608e-06, + "loss": 0.6317, + "step": 19614 + }, + { + "epoch": 0.601170773568714, + "grad_norm": 1.569374170755168, + "learning_rate": 3.6226455008878486e-06, + "loss": 0.5754, + "step": 19615 + }, + { + "epoch": 0.6012014220914552, + "grad_norm": 2.0023121022055625, + "learning_rate": 3.622168392665515e-06, + "loss": 0.5783, + "step": 19616 + }, + { + "epoch": 0.6012320706141964, + "grad_norm": 1.7341329274634214, + "learning_rate": 3.6216912980193094e-06, + "loss": 0.6756, + "step": 19617 + }, + { + "epoch": 0.6012627191369376, + "grad_norm": 1.7997340053685082, + "learning_rate": 3.621214216953932e-06, + "loss": 0.7087, + "step": 19618 + }, + { + "epoch": 0.6012933676596788, + "grad_norm": 1.4485490513342782, + "learning_rate": 3.620737149474083e-06, + "loss": 0.6256, + "step": 19619 + }, + { + "epoch": 0.60132401618242, + "grad_norm": 1.8734005426140026, + "learning_rate": 3.6202600955844642e-06, + "loss": 0.6498, + "step": 19620 + }, + { + "epoch": 0.6013546647051612, + "grad_norm": 2.0867180354741897, + "learning_rate": 3.6197830552897773e-06, + "loss": 0.5768, + "step": 19621 + }, + { + "epoch": 0.6013853132279025, + "grad_norm": 2.009549733940646, + "learning_rate": 3.619306028594718e-06, + "loss": 0.632, + "step": 19622 + }, + { + "epoch": 0.6014159617506436, + "grad_norm": 1.7249142738305612, + "learning_rate": 3.6188290155039925e-06, + "loss": 0.6356, + "step": 19623 + }, + { + "epoch": 0.6014466102733849, + "grad_norm": 1.8511374959351556, + "learning_rate": 3.618352016022295e-06, + "loss": 0.6337, + "step": 19624 + }, + { + "epoch": 0.601477258796126, + "grad_norm": 1.6646549045212287, + "learning_rate": 3.617875030154332e-06, + "loss": 0.5524, + "step": 19625 + }, + { + "epoch": 0.6015079073188673, + "grad_norm": 1.700687079171754, + "learning_rate": 3.6173980579047984e-06, + "loss": 0.638, + "step": 19626 + }, + { + "epoch": 0.6015385558416084, + "grad_norm": 1.5652721696070728, + "learning_rate": 3.6169210992783948e-06, + "loss": 0.5886, + "step": 19627 + }, + { + "epoch": 0.6015692043643497, + "grad_norm": 2.045229209194322, + "learning_rate": 3.6164441542798225e-06, + "loss": 0.615, + "step": 19628 + }, + { + "epoch": 0.6015998528870908, + "grad_norm": 1.7676069386112998, + "learning_rate": 3.61596722291378e-06, + "loss": 0.5513, + "step": 19629 + }, + { + "epoch": 0.6016305014098321, + "grad_norm": 1.7691584187306078, + "learning_rate": 3.6154903051849656e-06, + "loss": 0.621, + "step": 19630 + }, + { + "epoch": 0.6016611499325732, + "grad_norm": 1.7395905460695908, + "learning_rate": 3.6150134010980796e-06, + "loss": 0.6757, + "step": 19631 + }, + { + "epoch": 0.6016917984553145, + "grad_norm": 1.9850717296980493, + "learning_rate": 3.6145365106578235e-06, + "loss": 0.6181, + "step": 19632 + }, + { + "epoch": 0.6017224469780557, + "grad_norm": 1.649290147300558, + "learning_rate": 3.614059633868889e-06, + "loss": 0.6291, + "step": 19633 + }, + { + "epoch": 0.6017530955007968, + "grad_norm": 1.7480688617885365, + "learning_rate": 3.613582770735984e-06, + "loss": 0.7485, + "step": 19634 + }, + { + "epoch": 0.6017837440235381, + "grad_norm": 0.73928102655028, + "learning_rate": 3.6131059212637986e-06, + "loss": 0.3929, + "step": 19635 + }, + { + "epoch": 0.6018143925462792, + "grad_norm": 2.0382627152103896, + "learning_rate": 3.612629085457039e-06, + "loss": 0.7335, + "step": 19636 + }, + { + "epoch": 0.6018450410690205, + "grad_norm": 1.6697370837247099, + "learning_rate": 3.6121522633203987e-06, + "loss": 0.5669, + "step": 19637 + }, + { + "epoch": 0.6018756895917616, + "grad_norm": 1.9929944267578121, + "learning_rate": 3.6116754548585765e-06, + "loss": 0.6461, + "step": 19638 + }, + { + "epoch": 0.6019063381145029, + "grad_norm": 1.7684811857141516, + "learning_rate": 3.611198660076273e-06, + "loss": 0.6128, + "step": 19639 + }, + { + "epoch": 0.601936986637244, + "grad_norm": 1.6930938561704758, + "learning_rate": 3.610721878978183e-06, + "loss": 0.6051, + "step": 19640 + }, + { + "epoch": 0.6019676351599853, + "grad_norm": 1.954171092034899, + "learning_rate": 3.610245111569005e-06, + "loss": 0.6289, + "step": 19641 + }, + { + "epoch": 0.6019982836827265, + "grad_norm": 1.7161236388519625, + "learning_rate": 3.609768357853439e-06, + "loss": 0.6841, + "step": 19642 + }, + { + "epoch": 0.6020289322054677, + "grad_norm": 1.7150949070543942, + "learning_rate": 3.6092916178361802e-06, + "loss": 0.626, + "step": 19643 + }, + { + "epoch": 0.6020595807282089, + "grad_norm": 1.6363194522995388, + "learning_rate": 3.6088148915219277e-06, + "loss": 0.6594, + "step": 19644 + }, + { + "epoch": 0.6020902292509501, + "grad_norm": 1.6328333994084387, + "learning_rate": 3.6083381789153792e-06, + "loss": 0.5079, + "step": 19645 + }, + { + "epoch": 0.6021208777736913, + "grad_norm": 0.8264765421511668, + "learning_rate": 3.6078614800212273e-06, + "loss": 0.4481, + "step": 19646 + }, + { + "epoch": 0.6021515262964325, + "grad_norm": 1.9901912932920545, + "learning_rate": 3.6073847948441756e-06, + "loss": 0.5993, + "step": 19647 + }, + { + "epoch": 0.6021821748191737, + "grad_norm": 0.7625659166099723, + "learning_rate": 3.606908123388917e-06, + "loss": 0.419, + "step": 19648 + }, + { + "epoch": 0.602212823341915, + "grad_norm": 1.7094733352446587, + "learning_rate": 3.606431465660148e-06, + "loss": 0.4882, + "step": 19649 + }, + { + "epoch": 0.6022434718646561, + "grad_norm": 0.8004481126678565, + "learning_rate": 3.605954821662567e-06, + "loss": 0.4396, + "step": 19650 + }, + { + "epoch": 0.6022741203873974, + "grad_norm": 1.7912025365337536, + "learning_rate": 3.6054781914008696e-06, + "loss": 0.628, + "step": 19651 + }, + { + "epoch": 0.6023047689101385, + "grad_norm": 1.830373525103728, + "learning_rate": 3.6050015748797528e-06, + "loss": 0.6409, + "step": 19652 + }, + { + "epoch": 0.6023354174328798, + "grad_norm": 1.851816905179635, + "learning_rate": 3.6045249721039122e-06, + "loss": 0.7662, + "step": 19653 + }, + { + "epoch": 0.6023660659556209, + "grad_norm": 1.8412621879328797, + "learning_rate": 3.604048383078044e-06, + "loss": 0.6228, + "step": 19654 + }, + { + "epoch": 0.6023967144783622, + "grad_norm": 1.8676198134668562, + "learning_rate": 3.6035718078068455e-06, + "loss": 0.6661, + "step": 19655 + }, + { + "epoch": 0.6024273630011033, + "grad_norm": 1.7436516989679434, + "learning_rate": 3.6030952462950103e-06, + "loss": 0.5959, + "step": 19656 + }, + { + "epoch": 0.6024580115238446, + "grad_norm": 1.761253937782661, + "learning_rate": 3.6026186985472344e-06, + "loss": 0.6995, + "step": 19657 + }, + { + "epoch": 0.6024886600465857, + "grad_norm": 1.603096980876257, + "learning_rate": 3.602142164568214e-06, + "loss": 0.5567, + "step": 19658 + }, + { + "epoch": 0.602519308569327, + "grad_norm": 1.6705337840671295, + "learning_rate": 3.6016656443626458e-06, + "loss": 0.6494, + "step": 19659 + }, + { + "epoch": 0.6025499570920682, + "grad_norm": 1.7849609834913847, + "learning_rate": 3.6011891379352224e-06, + "loss": 0.6553, + "step": 19660 + }, + { + "epoch": 0.6025806056148094, + "grad_norm": 1.8705210811612425, + "learning_rate": 3.600712645290641e-06, + "loss": 0.6671, + "step": 19661 + }, + { + "epoch": 0.6026112541375506, + "grad_norm": 1.883292309487976, + "learning_rate": 3.600236166433595e-06, + "loss": 0.6837, + "step": 19662 + }, + { + "epoch": 0.6026419026602918, + "grad_norm": 1.6645800830288469, + "learning_rate": 3.5997597013687813e-06, + "loss": 0.611, + "step": 19663 + }, + { + "epoch": 0.602672551183033, + "grad_norm": 1.7548024938254254, + "learning_rate": 3.5992832501008943e-06, + "loss": 0.7049, + "step": 19664 + }, + { + "epoch": 0.6027031997057741, + "grad_norm": 1.8523913785822592, + "learning_rate": 3.5988068126346254e-06, + "loss": 0.593, + "step": 19665 + }, + { + "epoch": 0.6027338482285154, + "grad_norm": 1.9294933794350957, + "learning_rate": 3.5983303889746745e-06, + "loss": 0.5916, + "step": 19666 + }, + { + "epoch": 0.6027644967512565, + "grad_norm": 0.8573929606070763, + "learning_rate": 3.597853979125732e-06, + "loss": 0.4152, + "step": 19667 + }, + { + "epoch": 0.6027951452739978, + "grad_norm": 1.853916545391658, + "learning_rate": 3.5973775830924907e-06, + "loss": 0.5754, + "step": 19668 + }, + { + "epoch": 0.602825793796739, + "grad_norm": 1.7134820578666983, + "learning_rate": 3.5969012008796487e-06, + "loss": 0.604, + "step": 19669 + }, + { + "epoch": 0.6028564423194802, + "grad_norm": 1.7844801558823686, + "learning_rate": 3.5964248324918977e-06, + "loss": 0.5994, + "step": 19670 + }, + { + "epoch": 0.6028870908422214, + "grad_norm": 1.7283631709441591, + "learning_rate": 3.5959484779339327e-06, + "loss": 0.614, + "step": 19671 + }, + { + "epoch": 0.6029177393649626, + "grad_norm": 1.6395729061005835, + "learning_rate": 3.5954721372104464e-06, + "loss": 0.6211, + "step": 19672 + }, + { + "epoch": 0.6029483878877038, + "grad_norm": 1.7699029366067103, + "learning_rate": 3.594995810326132e-06, + "loss": 0.5768, + "step": 19673 + }, + { + "epoch": 0.602979036410445, + "grad_norm": 1.69385628841741, + "learning_rate": 3.5945194972856834e-06, + "loss": 0.5481, + "step": 19674 + }, + { + "epoch": 0.6030096849331862, + "grad_norm": 1.941221587614188, + "learning_rate": 3.594043198093795e-06, + "loss": 0.5831, + "step": 19675 + }, + { + "epoch": 0.6030403334559274, + "grad_norm": 1.7430650746577319, + "learning_rate": 3.5935669127551566e-06, + "loss": 0.6218, + "step": 19676 + }, + { + "epoch": 0.6030709819786686, + "grad_norm": 1.7697309937535435, + "learning_rate": 3.5930906412744656e-06, + "loss": 0.6347, + "step": 19677 + }, + { + "epoch": 0.6031016305014099, + "grad_norm": 1.7044308342751053, + "learning_rate": 3.5926143836564093e-06, + "loss": 0.7084, + "step": 19678 + }, + { + "epoch": 0.603132279024151, + "grad_norm": 1.919956018956643, + "learning_rate": 3.5921381399056864e-06, + "loss": 0.5979, + "step": 19679 + }, + { + "epoch": 0.6031629275468923, + "grad_norm": 0.7985562526802865, + "learning_rate": 3.591661910026987e-06, + "loss": 0.4166, + "step": 19680 + }, + { + "epoch": 0.6031935760696334, + "grad_norm": 1.8116249013846757, + "learning_rate": 3.5911856940250006e-06, + "loss": 0.5785, + "step": 19681 + }, + { + "epoch": 0.6032242245923747, + "grad_norm": 1.5694479162412127, + "learning_rate": 3.5907094919044237e-06, + "loss": 0.5137, + "step": 19682 + }, + { + "epoch": 0.6032548731151158, + "grad_norm": 0.7915605716594074, + "learning_rate": 3.5902333036699465e-06, + "loss": 0.4392, + "step": 19683 + }, + { + "epoch": 0.6032855216378571, + "grad_norm": 2.164945667676202, + "learning_rate": 3.58975712932626e-06, + "loss": 0.7328, + "step": 19684 + }, + { + "epoch": 0.6033161701605982, + "grad_norm": 1.6192170494704483, + "learning_rate": 3.5892809688780594e-06, + "loss": 0.5896, + "step": 19685 + }, + { + "epoch": 0.6033468186833395, + "grad_norm": 1.832274972216766, + "learning_rate": 3.5888048223300343e-06, + "loss": 0.6449, + "step": 19686 + }, + { + "epoch": 0.6033774672060807, + "grad_norm": 1.6214387962267789, + "learning_rate": 3.588328689686874e-06, + "loss": 0.5837, + "step": 19687 + }, + { + "epoch": 0.6034081157288219, + "grad_norm": 1.9888915150553061, + "learning_rate": 3.587852570953275e-06, + "loss": 0.6305, + "step": 19688 + }, + { + "epoch": 0.6034387642515631, + "grad_norm": 2.130764950324046, + "learning_rate": 3.587376466133923e-06, + "loss": 0.6334, + "step": 19689 + }, + { + "epoch": 0.6034694127743043, + "grad_norm": 0.7702079263369691, + "learning_rate": 3.5869003752335152e-06, + "loss": 0.4167, + "step": 19690 + }, + { + "epoch": 0.6035000612970455, + "grad_norm": 1.6066529340701368, + "learning_rate": 3.5864242982567386e-06, + "loss": 0.6566, + "step": 19691 + }, + { + "epoch": 0.6035307098197867, + "grad_norm": 0.7454212295118912, + "learning_rate": 3.5859482352082837e-06, + "loss": 0.3884, + "step": 19692 + }, + { + "epoch": 0.6035613583425279, + "grad_norm": 0.80484496084411, + "learning_rate": 3.5854721860928436e-06, + "loss": 0.4272, + "step": 19693 + }, + { + "epoch": 0.6035920068652691, + "grad_norm": 1.800611152124239, + "learning_rate": 3.5849961509151088e-06, + "loss": 0.6631, + "step": 19694 + }, + { + "epoch": 0.6036226553880103, + "grad_norm": 2.0674047000000835, + "learning_rate": 3.584520129679767e-06, + "loss": 0.613, + "step": 19695 + }, + { + "epoch": 0.6036533039107514, + "grad_norm": 2.0572694831803284, + "learning_rate": 3.5840441223915123e-06, + "loss": 0.5585, + "step": 19696 + }, + { + "epoch": 0.6036839524334927, + "grad_norm": 1.7592688454200323, + "learning_rate": 3.5835681290550315e-06, + "loss": 0.6259, + "step": 19697 + }, + { + "epoch": 0.6037146009562339, + "grad_norm": 1.8311508271478292, + "learning_rate": 3.5830921496750178e-06, + "loss": 0.6608, + "step": 19698 + }, + { + "epoch": 0.6037452494789751, + "grad_norm": 1.944811393062771, + "learning_rate": 3.58261618425616e-06, + "loss": 0.7115, + "step": 19699 + }, + { + "epoch": 0.6037758980017163, + "grad_norm": 1.8344648276571833, + "learning_rate": 3.5821402328031463e-06, + "loss": 0.6207, + "step": 19700 + }, + { + "epoch": 0.6038065465244575, + "grad_norm": 1.678154432077264, + "learning_rate": 3.5816642953206686e-06, + "loss": 0.6239, + "step": 19701 + }, + { + "epoch": 0.6038371950471987, + "grad_norm": 1.7808139059724897, + "learning_rate": 3.5811883718134154e-06, + "loss": 0.5785, + "step": 19702 + }, + { + "epoch": 0.6038678435699399, + "grad_norm": 1.77284711201431, + "learning_rate": 3.5807124622860756e-06, + "loss": 0.5817, + "step": 19703 + }, + { + "epoch": 0.6038984920926811, + "grad_norm": 1.6590033817610403, + "learning_rate": 3.580236566743339e-06, + "loss": 0.6337, + "step": 19704 + }, + { + "epoch": 0.6039291406154224, + "grad_norm": 1.7857973032908236, + "learning_rate": 3.5797606851898946e-06, + "loss": 0.5405, + "step": 19705 + }, + { + "epoch": 0.6039597891381635, + "grad_norm": 0.912939483278073, + "learning_rate": 3.5792848176304323e-06, + "loss": 0.4161, + "step": 19706 + }, + { + "epoch": 0.6039904376609048, + "grad_norm": 0.893345300172173, + "learning_rate": 3.578808964069641e-06, + "loss": 0.4368, + "step": 19707 + }, + { + "epoch": 0.6040210861836459, + "grad_norm": 2.102986606892131, + "learning_rate": 3.578333124512206e-06, + "loss": 0.6088, + "step": 19708 + }, + { + "epoch": 0.6040517347063872, + "grad_norm": 1.768096593862705, + "learning_rate": 3.5778572989628215e-06, + "loss": 0.6784, + "step": 19709 + }, + { + "epoch": 0.6040823832291283, + "grad_norm": 1.5913400690707606, + "learning_rate": 3.5773814874261716e-06, + "loss": 0.691, + "step": 19710 + }, + { + "epoch": 0.6041130317518696, + "grad_norm": 1.8902625324393276, + "learning_rate": 3.5769056899069455e-06, + "loss": 0.5915, + "step": 19711 + }, + { + "epoch": 0.6041436802746107, + "grad_norm": 2.0180781707541713, + "learning_rate": 3.576429906409832e-06, + "loss": 0.5218, + "step": 19712 + }, + { + "epoch": 0.604174328797352, + "grad_norm": 2.8353175073698065, + "learning_rate": 3.575954136939519e-06, + "loss": 0.542, + "step": 19713 + }, + { + "epoch": 0.6042049773200932, + "grad_norm": 0.879145840288188, + "learning_rate": 3.575478381500693e-06, + "loss": 0.4306, + "step": 19714 + }, + { + "epoch": 0.6042356258428344, + "grad_norm": 1.7013775754024953, + "learning_rate": 3.575002640098045e-06, + "loss": 0.5763, + "step": 19715 + }, + { + "epoch": 0.6042662743655756, + "grad_norm": 1.9284662708637716, + "learning_rate": 3.5745269127362584e-06, + "loss": 0.5988, + "step": 19716 + }, + { + "epoch": 0.6042969228883168, + "grad_norm": 1.8104111773123501, + "learning_rate": 3.5740511994200245e-06, + "loss": 0.5752, + "step": 19717 + }, + { + "epoch": 0.604327571411058, + "grad_norm": 0.8165174306997475, + "learning_rate": 3.57357550015403e-06, + "loss": 0.3986, + "step": 19718 + }, + { + "epoch": 0.6043582199337992, + "grad_norm": 1.6881550745162843, + "learning_rate": 3.573099814942958e-06, + "loss": 0.6116, + "step": 19719 + }, + { + "epoch": 0.6043888684565404, + "grad_norm": 1.726233245613569, + "learning_rate": 3.5726241437915014e-06, + "loss": 0.609, + "step": 19720 + }, + { + "epoch": 0.6044195169792816, + "grad_norm": 1.762043002511959, + "learning_rate": 3.572148486704344e-06, + "loss": 0.6694, + "step": 19721 + }, + { + "epoch": 0.6044501655020228, + "grad_norm": 1.5576756247517973, + "learning_rate": 3.5716728436861715e-06, + "loss": 0.5403, + "step": 19722 + }, + { + "epoch": 0.6044808140247641, + "grad_norm": 1.767253667124356, + "learning_rate": 3.5711972147416723e-06, + "loss": 0.5532, + "step": 19723 + }, + { + "epoch": 0.6045114625475052, + "grad_norm": 2.5145380109773146, + "learning_rate": 3.570721599875532e-06, + "loss": 0.6615, + "step": 19724 + }, + { + "epoch": 0.6045421110702465, + "grad_norm": 1.6974497068064607, + "learning_rate": 3.5702459990924386e-06, + "loss": 0.7075, + "step": 19725 + }, + { + "epoch": 0.6045727595929876, + "grad_norm": 1.954627688517818, + "learning_rate": 3.5697704123970767e-06, + "loss": 0.6834, + "step": 19726 + }, + { + "epoch": 0.6046034081157288, + "grad_norm": 1.5229131438441428, + "learning_rate": 3.5692948397941322e-06, + "loss": 0.6271, + "step": 19727 + }, + { + "epoch": 0.60463405663847, + "grad_norm": 1.8103074009760673, + "learning_rate": 3.5688192812882927e-06, + "loss": 0.6308, + "step": 19728 + }, + { + "epoch": 0.6046647051612112, + "grad_norm": 1.6777768006999532, + "learning_rate": 3.5683437368842444e-06, + "loss": 0.6862, + "step": 19729 + }, + { + "epoch": 0.6046953536839524, + "grad_norm": 1.8346466950238915, + "learning_rate": 3.5678682065866684e-06, + "loss": 0.5888, + "step": 19730 + }, + { + "epoch": 0.6047260022066936, + "grad_norm": 1.6490069344656637, + "learning_rate": 3.567392690400256e-06, + "loss": 0.5898, + "step": 19731 + }, + { + "epoch": 0.6047566507294349, + "grad_norm": 0.7952044747348392, + "learning_rate": 3.5669171883296896e-06, + "loss": 0.4213, + "step": 19732 + }, + { + "epoch": 0.604787299252176, + "grad_norm": 1.9576544307741022, + "learning_rate": 3.5664417003796524e-06, + "loss": 0.6188, + "step": 19733 + }, + { + "epoch": 0.6048179477749173, + "grad_norm": 1.757276837811021, + "learning_rate": 3.5659662265548344e-06, + "loss": 0.6351, + "step": 19734 + }, + { + "epoch": 0.6048485962976584, + "grad_norm": 1.8850898701231413, + "learning_rate": 3.5654907668599165e-06, + "loss": 0.5579, + "step": 19735 + }, + { + "epoch": 0.6048792448203997, + "grad_norm": 1.7552842620962523, + "learning_rate": 3.5650153212995864e-06, + "loss": 0.5773, + "step": 19736 + }, + { + "epoch": 0.6049098933431408, + "grad_norm": 1.7467318186790384, + "learning_rate": 3.564539889878527e-06, + "loss": 0.691, + "step": 19737 + }, + { + "epoch": 0.6049405418658821, + "grad_norm": 1.7875078321772415, + "learning_rate": 3.564064472601423e-06, + "loss": 0.6192, + "step": 19738 + }, + { + "epoch": 0.6049711903886232, + "grad_norm": 1.8299542305428944, + "learning_rate": 3.5635890694729596e-06, + "loss": 0.5967, + "step": 19739 + }, + { + "epoch": 0.6050018389113645, + "grad_norm": 1.962940550451023, + "learning_rate": 3.5631136804978215e-06, + "loss": 0.5702, + "step": 19740 + }, + { + "epoch": 0.6050324874341056, + "grad_norm": 1.6736170586959433, + "learning_rate": 3.5626383056806896e-06, + "loss": 0.6462, + "step": 19741 + }, + { + "epoch": 0.6050631359568469, + "grad_norm": 2.022526637126696, + "learning_rate": 3.562162945026253e-06, + "loss": 0.5562, + "step": 19742 + }, + { + "epoch": 0.6050937844795881, + "grad_norm": 1.653343175174734, + "learning_rate": 3.5616875985391897e-06, + "loss": 0.5983, + "step": 19743 + }, + { + "epoch": 0.6051244330023293, + "grad_norm": 1.8698875764855043, + "learning_rate": 3.5612122662241894e-06, + "loss": 0.6459, + "step": 19744 + }, + { + "epoch": 0.6051550815250705, + "grad_norm": 1.6731307186867608, + "learning_rate": 3.560736948085932e-06, + "loss": 0.5427, + "step": 19745 + }, + { + "epoch": 0.6051857300478117, + "grad_norm": 1.6465462766396004, + "learning_rate": 3.5602616441291003e-06, + "loss": 0.541, + "step": 19746 + }, + { + "epoch": 0.6052163785705529, + "grad_norm": 1.824926649079809, + "learning_rate": 3.55978635435838e-06, + "loss": 0.6581, + "step": 19747 + }, + { + "epoch": 0.6052470270932941, + "grad_norm": 1.705531047844107, + "learning_rate": 3.5593110787784535e-06, + "loss": 0.6805, + "step": 19748 + }, + { + "epoch": 0.6052776756160353, + "grad_norm": 0.8348001995286175, + "learning_rate": 3.558835817394003e-06, + "loss": 0.4363, + "step": 19749 + }, + { + "epoch": 0.6053083241387766, + "grad_norm": 1.9572050379175712, + "learning_rate": 3.5583605702097122e-06, + "loss": 0.5675, + "step": 19750 + }, + { + "epoch": 0.6053389726615177, + "grad_norm": 1.708744452920872, + "learning_rate": 3.557885337230263e-06, + "loss": 0.6234, + "step": 19751 + }, + { + "epoch": 0.605369621184259, + "grad_norm": 1.7375873229344894, + "learning_rate": 3.5574101184603405e-06, + "loss": 0.6504, + "step": 19752 + }, + { + "epoch": 0.6054002697070001, + "grad_norm": 1.6673162990101302, + "learning_rate": 3.5569349139046237e-06, + "loss": 0.5276, + "step": 19753 + }, + { + "epoch": 0.6054309182297414, + "grad_norm": 1.7565079008520188, + "learning_rate": 3.556459723567796e-06, + "loss": 0.6302, + "step": 19754 + }, + { + "epoch": 0.6054615667524825, + "grad_norm": 2.1349138794473923, + "learning_rate": 3.5559845474545406e-06, + "loss": 0.615, + "step": 19755 + }, + { + "epoch": 0.6054922152752238, + "grad_norm": 1.6911068430126388, + "learning_rate": 3.5555093855695396e-06, + "loss": 0.5895, + "step": 19756 + }, + { + "epoch": 0.6055228637979649, + "grad_norm": 1.7663177010227167, + "learning_rate": 3.5550342379174725e-06, + "loss": 0.6391, + "step": 19757 + }, + { + "epoch": 0.6055535123207061, + "grad_norm": 1.7343150098702893, + "learning_rate": 3.5545591045030238e-06, + "loss": 0.6186, + "step": 19758 + }, + { + "epoch": 0.6055841608434473, + "grad_norm": 1.640610467291216, + "learning_rate": 3.5540839853308754e-06, + "loss": 0.548, + "step": 19759 + }, + { + "epoch": 0.6056148093661885, + "grad_norm": 1.9539177282861817, + "learning_rate": 3.5536088804057044e-06, + "loss": 0.6555, + "step": 19760 + }, + { + "epoch": 0.6056454578889298, + "grad_norm": 1.8980750998235463, + "learning_rate": 3.553133789732198e-06, + "loss": 0.6449, + "step": 19761 + }, + { + "epoch": 0.6056761064116709, + "grad_norm": 1.9047643979966, + "learning_rate": 3.5526587133150314e-06, + "loss": 0.6381, + "step": 19762 + }, + { + "epoch": 0.6057067549344122, + "grad_norm": 1.8067786316735124, + "learning_rate": 3.5521836511588925e-06, + "loss": 0.7082, + "step": 19763 + }, + { + "epoch": 0.6057374034571533, + "grad_norm": 1.7374259523613944, + "learning_rate": 3.5517086032684567e-06, + "loss": 0.5953, + "step": 19764 + }, + { + "epoch": 0.6057680519798946, + "grad_norm": 1.8416491828896075, + "learning_rate": 3.5512335696484064e-06, + "loss": 0.6139, + "step": 19765 + }, + { + "epoch": 0.6057987005026357, + "grad_norm": 1.7571094812633647, + "learning_rate": 3.550758550303423e-06, + "loss": 0.6932, + "step": 19766 + }, + { + "epoch": 0.605829349025377, + "grad_norm": 1.579224234040905, + "learning_rate": 3.5502835452381866e-06, + "loss": 0.6434, + "step": 19767 + }, + { + "epoch": 0.6058599975481181, + "grad_norm": 1.5414871892754651, + "learning_rate": 3.5498085544573755e-06, + "loss": 0.5186, + "step": 19768 + }, + { + "epoch": 0.6058906460708594, + "grad_norm": 1.9590273209791915, + "learning_rate": 3.549333577965674e-06, + "loss": 0.6861, + "step": 19769 + }, + { + "epoch": 0.6059212945936006, + "grad_norm": 0.8373733377750713, + "learning_rate": 3.5488586157677586e-06, + "loss": 0.4363, + "step": 19770 + }, + { + "epoch": 0.6059519431163418, + "grad_norm": 1.7070414677348367, + "learning_rate": 3.5483836678683108e-06, + "loss": 0.5511, + "step": 19771 + }, + { + "epoch": 0.605982591639083, + "grad_norm": 0.7996604936198745, + "learning_rate": 3.547908734272012e-06, + "loss": 0.4273, + "step": 19772 + }, + { + "epoch": 0.6060132401618242, + "grad_norm": 0.7422402720059492, + "learning_rate": 3.5474338149835363e-06, + "loss": 0.3924, + "step": 19773 + }, + { + "epoch": 0.6060438886845654, + "grad_norm": 1.7207095456281958, + "learning_rate": 3.5469589100075707e-06, + "loss": 0.6396, + "step": 19774 + }, + { + "epoch": 0.6060745372073066, + "grad_norm": 1.6842132415631288, + "learning_rate": 3.546484019348789e-06, + "loss": 0.6, + "step": 19775 + }, + { + "epoch": 0.6061051857300478, + "grad_norm": 1.865491028677414, + "learning_rate": 3.5460091430118714e-06, + "loss": 0.6391, + "step": 19776 + }, + { + "epoch": 0.606135834252789, + "grad_norm": 1.644081362722796, + "learning_rate": 3.5455342810014987e-06, + "loss": 0.5892, + "step": 19777 + }, + { + "epoch": 0.6061664827755302, + "grad_norm": 1.7809942849319176, + "learning_rate": 3.5450594333223476e-06, + "loss": 0.591, + "step": 19778 + }, + { + "epoch": 0.6061971312982715, + "grad_norm": 1.8094945288524, + "learning_rate": 3.5445845999790994e-06, + "loss": 0.6091, + "step": 19779 + }, + { + "epoch": 0.6062277798210126, + "grad_norm": 1.6272258472791605, + "learning_rate": 3.544109780976432e-06, + "loss": 0.6597, + "step": 19780 + }, + { + "epoch": 0.6062584283437539, + "grad_norm": 1.8161833120628241, + "learning_rate": 3.543634976319022e-06, + "loss": 0.5448, + "step": 19781 + }, + { + "epoch": 0.606289076866495, + "grad_norm": 1.9104430737718014, + "learning_rate": 3.54316018601155e-06, + "loss": 0.5799, + "step": 19782 + }, + { + "epoch": 0.6063197253892363, + "grad_norm": 0.864361279310396, + "learning_rate": 3.542685410058695e-06, + "loss": 0.4, + "step": 19783 + }, + { + "epoch": 0.6063503739119774, + "grad_norm": 1.5935203122038029, + "learning_rate": 3.5422106484651297e-06, + "loss": 0.492, + "step": 19784 + }, + { + "epoch": 0.6063810224347187, + "grad_norm": 1.6031440418089915, + "learning_rate": 3.5417359012355395e-06, + "loss": 0.6365, + "step": 19785 + }, + { + "epoch": 0.6064116709574598, + "grad_norm": 1.847892840615589, + "learning_rate": 3.541261168374598e-06, + "loss": 0.6912, + "step": 19786 + }, + { + "epoch": 0.6064423194802011, + "grad_norm": 1.7478959179047509, + "learning_rate": 3.5407864498869815e-06, + "loss": 0.6796, + "step": 19787 + }, + { + "epoch": 0.6064729680029423, + "grad_norm": 1.980150079107268, + "learning_rate": 3.5403117457773708e-06, + "loss": 0.6781, + "step": 19788 + }, + { + "epoch": 0.6065036165256834, + "grad_norm": 1.9100413899355841, + "learning_rate": 3.539837056050441e-06, + "loss": 0.6552, + "step": 19789 + }, + { + "epoch": 0.6065342650484247, + "grad_norm": 1.7328067526245647, + "learning_rate": 3.5393623807108714e-06, + "loss": 0.4407, + "step": 19790 + }, + { + "epoch": 0.6065649135711658, + "grad_norm": 0.8343599035369724, + "learning_rate": 3.5388877197633378e-06, + "loss": 0.4247, + "step": 19791 + }, + { + "epoch": 0.6065955620939071, + "grad_norm": 0.7839160815157868, + "learning_rate": 3.5384130732125165e-06, + "loss": 0.3905, + "step": 19792 + }, + { + "epoch": 0.6066262106166482, + "grad_norm": 1.6899312272741387, + "learning_rate": 3.5379384410630858e-06, + "loss": 0.5955, + "step": 19793 + }, + { + "epoch": 0.6066568591393895, + "grad_norm": 0.7685895178232408, + "learning_rate": 3.5374638233197233e-06, + "loss": 0.4099, + "step": 19794 + }, + { + "epoch": 0.6066875076621306, + "grad_norm": 1.765403091072089, + "learning_rate": 3.5369892199871e-06, + "loss": 0.6771, + "step": 19795 + }, + { + "epoch": 0.6067181561848719, + "grad_norm": 1.6504663797453496, + "learning_rate": 3.5365146310699007e-06, + "loss": 0.5723, + "step": 19796 + }, + { + "epoch": 0.606748804707613, + "grad_norm": 1.682112974310743, + "learning_rate": 3.536040056572794e-06, + "loss": 0.4891, + "step": 19797 + }, + { + "epoch": 0.6067794532303543, + "grad_norm": 0.7792720194385934, + "learning_rate": 3.5355654965004604e-06, + "loss": 0.404, + "step": 19798 + }, + { + "epoch": 0.6068101017530955, + "grad_norm": 1.9122569264816145, + "learning_rate": 3.535090950857575e-06, + "loss": 0.6001, + "step": 19799 + }, + { + "epoch": 0.6068407502758367, + "grad_norm": 1.8225283810696677, + "learning_rate": 3.534616419648812e-06, + "loss": 0.6481, + "step": 19800 + }, + { + "epoch": 0.6068713987985779, + "grad_norm": 2.0487625223453096, + "learning_rate": 3.534141902878849e-06, + "loss": 0.6764, + "step": 19801 + }, + { + "epoch": 0.6069020473213191, + "grad_norm": 1.7627226168554841, + "learning_rate": 3.533667400552362e-06, + "loss": 0.686, + "step": 19802 + }, + { + "epoch": 0.6069326958440603, + "grad_norm": 1.8590713536245902, + "learning_rate": 3.533192912674023e-06, + "loss": 0.6703, + "step": 19803 + }, + { + "epoch": 0.6069633443668015, + "grad_norm": 1.7631964112198881, + "learning_rate": 3.5327184392485124e-06, + "loss": 0.6254, + "step": 19804 + }, + { + "epoch": 0.6069939928895427, + "grad_norm": 2.0159379379428315, + "learning_rate": 3.5322439802804993e-06, + "loss": 0.6032, + "step": 19805 + }, + { + "epoch": 0.607024641412284, + "grad_norm": 1.8417101914602838, + "learning_rate": 3.5317695357746645e-06, + "loss": 0.6468, + "step": 19806 + }, + { + "epoch": 0.6070552899350251, + "grad_norm": 1.7044849242782691, + "learning_rate": 3.5312951057356793e-06, + "loss": 0.6524, + "step": 19807 + }, + { + "epoch": 0.6070859384577664, + "grad_norm": 1.5653782382498687, + "learning_rate": 3.5308206901682186e-06, + "loss": 0.6329, + "step": 19808 + }, + { + "epoch": 0.6071165869805075, + "grad_norm": 1.9046017985924415, + "learning_rate": 3.530346289076958e-06, + "loss": 0.5941, + "step": 19809 + }, + { + "epoch": 0.6071472355032488, + "grad_norm": 1.6972707762041424, + "learning_rate": 3.529871902466572e-06, + "loss": 0.5401, + "step": 19810 + }, + { + "epoch": 0.6071778840259899, + "grad_norm": 1.889569452955055, + "learning_rate": 3.5293975303417322e-06, + "loss": 0.5713, + "step": 19811 + }, + { + "epoch": 0.6072085325487312, + "grad_norm": 0.816738595147859, + "learning_rate": 3.5289231727071166e-06, + "loss": 0.4181, + "step": 19812 + }, + { + "epoch": 0.6072391810714723, + "grad_norm": 1.878728645662505, + "learning_rate": 3.528448829567398e-06, + "loss": 0.6558, + "step": 19813 + }, + { + "epoch": 0.6072698295942136, + "grad_norm": 1.7247100808581406, + "learning_rate": 3.527974500927247e-06, + "loss": 0.6106, + "step": 19814 + }, + { + "epoch": 0.6073004781169548, + "grad_norm": 1.9284134484195525, + "learning_rate": 3.527500186791343e-06, + "loss": 0.7001, + "step": 19815 + }, + { + "epoch": 0.607331126639696, + "grad_norm": 1.9285391547647186, + "learning_rate": 3.5270258871643526e-06, + "loss": 0.7526, + "step": 19816 + }, + { + "epoch": 0.6073617751624372, + "grad_norm": 1.8747985784738452, + "learning_rate": 3.5265516020509573e-06, + "loss": 0.6092, + "step": 19817 + }, + { + "epoch": 0.6073924236851784, + "grad_norm": 1.578195501837031, + "learning_rate": 3.526077331455824e-06, + "loss": 0.5992, + "step": 19818 + }, + { + "epoch": 0.6074230722079196, + "grad_norm": 1.5584207385150477, + "learning_rate": 3.5256030753836267e-06, + "loss": 0.6184, + "step": 19819 + }, + { + "epoch": 0.6074537207306607, + "grad_norm": 1.7001260790743822, + "learning_rate": 3.525128833839041e-06, + "loss": 0.549, + "step": 19820 + }, + { + "epoch": 0.607484369253402, + "grad_norm": 1.8762785756083225, + "learning_rate": 3.5246546068267382e-06, + "loss": 0.5242, + "step": 19821 + }, + { + "epoch": 0.6075150177761431, + "grad_norm": 0.8397631464301999, + "learning_rate": 3.5241803943513907e-06, + "loss": 0.4394, + "step": 19822 + }, + { + "epoch": 0.6075456662988844, + "grad_norm": 1.8568891740747775, + "learning_rate": 3.523706196417672e-06, + "loss": 0.6122, + "step": 19823 + }, + { + "epoch": 0.6075763148216256, + "grad_norm": 1.6547677592271766, + "learning_rate": 3.523232013030252e-06, + "loss": 0.5767, + "step": 19824 + }, + { + "epoch": 0.6076069633443668, + "grad_norm": 1.6653974990947986, + "learning_rate": 3.522757844193807e-06, + "loss": 0.5521, + "step": 19825 + }, + { + "epoch": 0.607637611867108, + "grad_norm": 1.7149178027321768, + "learning_rate": 3.5222836899130077e-06, + "loss": 0.6067, + "step": 19826 + }, + { + "epoch": 0.6076682603898492, + "grad_norm": 1.558995923971291, + "learning_rate": 3.521809550192522e-06, + "loss": 0.5528, + "step": 19827 + }, + { + "epoch": 0.6076989089125904, + "grad_norm": 1.5862078799449848, + "learning_rate": 3.5213354250370278e-06, + "loss": 0.6373, + "step": 19828 + }, + { + "epoch": 0.6077295574353316, + "grad_norm": 1.6679829180558141, + "learning_rate": 3.5208613144511934e-06, + "loss": 0.6245, + "step": 19829 + }, + { + "epoch": 0.6077602059580728, + "grad_norm": 1.5328844209050148, + "learning_rate": 3.520387218439689e-06, + "loss": 0.5664, + "step": 19830 + }, + { + "epoch": 0.607790854480814, + "grad_norm": 1.7576178951702188, + "learning_rate": 3.5199131370071905e-06, + "loss": 0.5945, + "step": 19831 + }, + { + "epoch": 0.6078215030035552, + "grad_norm": 1.7425509670887922, + "learning_rate": 3.519439070158365e-06, + "loss": 0.6874, + "step": 19832 + }, + { + "epoch": 0.6078521515262965, + "grad_norm": 0.8087553211577486, + "learning_rate": 3.518965017897885e-06, + "loss": 0.4253, + "step": 19833 + }, + { + "epoch": 0.6078828000490376, + "grad_norm": 2.1512339057358907, + "learning_rate": 3.5184909802304228e-06, + "loss": 0.6956, + "step": 19834 + }, + { + "epoch": 0.6079134485717789, + "grad_norm": 1.698194853997438, + "learning_rate": 3.518016957160647e-06, + "loss": 0.633, + "step": 19835 + }, + { + "epoch": 0.60794409709452, + "grad_norm": 1.7992973390531433, + "learning_rate": 3.51754294869323e-06, + "loss": 0.6257, + "step": 19836 + }, + { + "epoch": 0.6079747456172613, + "grad_norm": 1.8991682446926836, + "learning_rate": 3.517068954832843e-06, + "loss": 0.7446, + "step": 19837 + }, + { + "epoch": 0.6080053941400024, + "grad_norm": 1.7600309365711333, + "learning_rate": 3.516594975584151e-06, + "loss": 0.6144, + "step": 19838 + }, + { + "epoch": 0.6080360426627437, + "grad_norm": 1.8087569855646752, + "learning_rate": 3.516121010951832e-06, + "loss": 0.5101, + "step": 19839 + }, + { + "epoch": 0.6080666911854848, + "grad_norm": 1.5616296503886604, + "learning_rate": 3.515647060940551e-06, + "loss": 0.5991, + "step": 19840 + }, + { + "epoch": 0.6080973397082261, + "grad_norm": 1.89290610392304, + "learning_rate": 3.5151731255549794e-06, + "loss": 0.6463, + "step": 19841 + }, + { + "epoch": 0.6081279882309673, + "grad_norm": 1.912765453764979, + "learning_rate": 3.5146992047997864e-06, + "loss": 0.5981, + "step": 19842 + }, + { + "epoch": 0.6081586367537085, + "grad_norm": 1.866503849868564, + "learning_rate": 3.514225298679642e-06, + "loss": 0.586, + "step": 19843 + }, + { + "epoch": 0.6081892852764497, + "grad_norm": 1.780013538159988, + "learning_rate": 3.513751407199217e-06, + "loss": 0.6632, + "step": 19844 + }, + { + "epoch": 0.6082199337991909, + "grad_norm": 1.8217372815226867, + "learning_rate": 3.5132775303631793e-06, + "loss": 0.7041, + "step": 19845 + }, + { + "epoch": 0.6082505823219321, + "grad_norm": 0.8200774827083941, + "learning_rate": 3.5128036681761975e-06, + "loss": 0.4219, + "step": 19846 + }, + { + "epoch": 0.6082812308446733, + "grad_norm": 1.7671920434085933, + "learning_rate": 3.5123298206429425e-06, + "loss": 0.5507, + "step": 19847 + }, + { + "epoch": 0.6083118793674145, + "grad_norm": 1.9547127411662624, + "learning_rate": 3.5118559877680834e-06, + "loss": 0.6558, + "step": 19848 + }, + { + "epoch": 0.6083425278901557, + "grad_norm": 1.5800966823203682, + "learning_rate": 3.5113821695562867e-06, + "loss": 0.6576, + "step": 19849 + }, + { + "epoch": 0.6083731764128969, + "grad_norm": 2.001220162416779, + "learning_rate": 3.5109083660122233e-06, + "loss": 0.5534, + "step": 19850 + }, + { + "epoch": 0.608403824935638, + "grad_norm": 2.0051408058109415, + "learning_rate": 3.510434577140559e-06, + "loss": 0.6398, + "step": 19851 + }, + { + "epoch": 0.6084344734583793, + "grad_norm": 0.7807772613739978, + "learning_rate": 3.5099608029459653e-06, + "loss": 0.4302, + "step": 19852 + }, + { + "epoch": 0.6084651219811205, + "grad_norm": 1.9909854056090026, + "learning_rate": 3.5094870434331093e-06, + "loss": 0.6092, + "step": 19853 + }, + { + "epoch": 0.6084957705038617, + "grad_norm": 1.8100190810020773, + "learning_rate": 3.5090132986066572e-06, + "loss": 0.6256, + "step": 19854 + }, + { + "epoch": 0.6085264190266029, + "grad_norm": 1.9079610348385176, + "learning_rate": 3.50853956847128e-06, + "loss": 0.6136, + "step": 19855 + }, + { + "epoch": 0.6085570675493441, + "grad_norm": 0.788923251456102, + "learning_rate": 3.508065853031645e-06, + "loss": 0.4224, + "step": 19856 + }, + { + "epoch": 0.6085877160720853, + "grad_norm": 1.8834590988064142, + "learning_rate": 3.507592152292416e-06, + "loss": 0.6532, + "step": 19857 + }, + { + "epoch": 0.6086183645948265, + "grad_norm": 1.8913315812897915, + "learning_rate": 3.5071184662582664e-06, + "loss": 0.6287, + "step": 19858 + }, + { + "epoch": 0.6086490131175677, + "grad_norm": 2.0198441375274165, + "learning_rate": 3.5066447949338573e-06, + "loss": 0.643, + "step": 19859 + }, + { + "epoch": 0.608679661640309, + "grad_norm": 1.749678772057237, + "learning_rate": 3.5061711383238623e-06, + "loss": 0.7076, + "step": 19860 + }, + { + "epoch": 0.6087103101630501, + "grad_norm": 1.7512533073227199, + "learning_rate": 3.5056974964329443e-06, + "loss": 0.6596, + "step": 19861 + }, + { + "epoch": 0.6087409586857914, + "grad_norm": 1.770775768360001, + "learning_rate": 3.50522386926577e-06, + "loss": 0.591, + "step": 19862 + }, + { + "epoch": 0.6087716072085325, + "grad_norm": 0.7483094330874355, + "learning_rate": 3.5047502568270085e-06, + "loss": 0.4233, + "step": 19863 + }, + { + "epoch": 0.6088022557312738, + "grad_norm": 0.7642096561018992, + "learning_rate": 3.504276659121325e-06, + "loss": 0.3945, + "step": 19864 + }, + { + "epoch": 0.6088329042540149, + "grad_norm": 1.8880253453428506, + "learning_rate": 3.5038030761533858e-06, + "loss": 0.6634, + "step": 19865 + }, + { + "epoch": 0.6088635527767562, + "grad_norm": 1.6202578433774417, + "learning_rate": 3.5033295079278585e-06, + "loss": 0.5434, + "step": 19866 + }, + { + "epoch": 0.6088942012994973, + "grad_norm": 1.881760298172005, + "learning_rate": 3.5028559544494095e-06, + "loss": 0.5651, + "step": 19867 + }, + { + "epoch": 0.6089248498222386, + "grad_norm": 1.824633023744843, + "learning_rate": 3.5023824157227003e-06, + "loss": 0.5818, + "step": 19868 + }, + { + "epoch": 0.6089554983449798, + "grad_norm": 0.7841202913566347, + "learning_rate": 3.501908891752404e-06, + "loss": 0.4092, + "step": 19869 + }, + { + "epoch": 0.608986146867721, + "grad_norm": 1.8313787207076386, + "learning_rate": 3.5014353825431796e-06, + "loss": 0.7043, + "step": 19870 + }, + { + "epoch": 0.6090167953904622, + "grad_norm": 1.7431600584855098, + "learning_rate": 3.5009618880996986e-06, + "loss": 0.603, + "step": 19871 + }, + { + "epoch": 0.6090474439132034, + "grad_norm": 1.9675497862217022, + "learning_rate": 3.5004884084266235e-06, + "loss": 0.6539, + "step": 19872 + }, + { + "epoch": 0.6090780924359446, + "grad_norm": 1.679792700256641, + "learning_rate": 3.5000149435286172e-06, + "loss": 0.6678, + "step": 19873 + }, + { + "epoch": 0.6091087409586858, + "grad_norm": 1.7563381830886322, + "learning_rate": 3.49954149341035e-06, + "loss": 0.6765, + "step": 19874 + }, + { + "epoch": 0.609139389481427, + "grad_norm": 1.6752940303745747, + "learning_rate": 3.4990680580764837e-06, + "loss": 0.5855, + "step": 19875 + }, + { + "epoch": 0.6091700380041682, + "grad_norm": 1.6066336306292253, + "learning_rate": 3.4985946375316828e-06, + "loss": 0.5804, + "step": 19876 + }, + { + "epoch": 0.6092006865269094, + "grad_norm": 1.658640494476141, + "learning_rate": 3.4981212317806133e-06, + "loss": 0.6445, + "step": 19877 + }, + { + "epoch": 0.6092313350496507, + "grad_norm": 1.7047676603108541, + "learning_rate": 3.49764784082794e-06, + "loss": 0.6486, + "step": 19878 + }, + { + "epoch": 0.6092619835723918, + "grad_norm": 0.831856462934535, + "learning_rate": 3.4971744646783267e-06, + "loss": 0.4365, + "step": 19879 + }, + { + "epoch": 0.6092926320951331, + "grad_norm": 1.5833089103476226, + "learning_rate": 3.49670110333644e-06, + "loss": 0.6065, + "step": 19880 + }, + { + "epoch": 0.6093232806178742, + "grad_norm": 1.8510959802298952, + "learning_rate": 3.496227756806938e-06, + "loss": 0.7035, + "step": 19881 + }, + { + "epoch": 0.6093539291406154, + "grad_norm": 1.9195591538779482, + "learning_rate": 3.495754425094493e-06, + "loss": 0.6472, + "step": 19882 + }, + { + "epoch": 0.6093845776633566, + "grad_norm": 1.880418633044292, + "learning_rate": 3.4952811082037626e-06, + "loss": 0.5369, + "step": 19883 + }, + { + "epoch": 0.6094152261860978, + "grad_norm": 1.7750952972561227, + "learning_rate": 3.4948078061394116e-06, + "loss": 0.6375, + "step": 19884 + }, + { + "epoch": 0.609445874708839, + "grad_norm": 2.0130049987325287, + "learning_rate": 3.4943345189061052e-06, + "loss": 0.653, + "step": 19885 + }, + { + "epoch": 0.6094765232315802, + "grad_norm": 1.6265635259484266, + "learning_rate": 3.493861246508506e-06, + "loss": 0.4815, + "step": 19886 + }, + { + "epoch": 0.6095071717543215, + "grad_norm": 1.6629872397322154, + "learning_rate": 3.493387988951277e-06, + "loss": 0.5591, + "step": 19887 + }, + { + "epoch": 0.6095378202770626, + "grad_norm": 1.8872296002934026, + "learning_rate": 3.492914746239081e-06, + "loss": 0.6434, + "step": 19888 + }, + { + "epoch": 0.6095684687998039, + "grad_norm": 1.918461795697678, + "learning_rate": 3.4924415183765826e-06, + "loss": 0.6454, + "step": 19889 + }, + { + "epoch": 0.609599117322545, + "grad_norm": 1.8464757526584508, + "learning_rate": 3.491968305368443e-06, + "loss": 0.744, + "step": 19890 + }, + { + "epoch": 0.6096297658452863, + "grad_norm": 1.6100091043605036, + "learning_rate": 3.4914951072193274e-06, + "loss": 0.5092, + "step": 19891 + }, + { + "epoch": 0.6096604143680274, + "grad_norm": 1.7687147008748894, + "learning_rate": 3.4910219239338938e-06, + "loss": 0.5995, + "step": 19892 + }, + { + "epoch": 0.6096910628907687, + "grad_norm": 1.5155250534166345, + "learning_rate": 3.4905487555168093e-06, + "loss": 0.5917, + "step": 19893 + }, + { + "epoch": 0.6097217114135098, + "grad_norm": 1.8730217255316841, + "learning_rate": 3.490075601972734e-06, + "loss": 0.6159, + "step": 19894 + }, + { + "epoch": 0.6097523599362511, + "grad_norm": 1.8927900687616237, + "learning_rate": 3.4896024633063288e-06, + "loss": 0.6152, + "step": 19895 + }, + { + "epoch": 0.6097830084589922, + "grad_norm": 1.5789895417909938, + "learning_rate": 3.489129339522258e-06, + "loss": 0.5764, + "step": 19896 + }, + { + "epoch": 0.6098136569817335, + "grad_norm": 1.5458987367943215, + "learning_rate": 3.4886562306251815e-06, + "loss": 0.5397, + "step": 19897 + }, + { + "epoch": 0.6098443055044747, + "grad_norm": 1.6694037899560668, + "learning_rate": 3.4881831366197627e-06, + "loss": 0.6273, + "step": 19898 + }, + { + "epoch": 0.6098749540272159, + "grad_norm": 1.7981876854416465, + "learning_rate": 3.4877100575106622e-06, + "loss": 0.577, + "step": 19899 + }, + { + "epoch": 0.6099056025499571, + "grad_norm": 1.7622098770432861, + "learning_rate": 3.4872369933025404e-06, + "loss": 0.5667, + "step": 19900 + }, + { + "epoch": 0.6099362510726983, + "grad_norm": 1.5508928805578188, + "learning_rate": 3.4867639440000617e-06, + "loss": 0.5388, + "step": 19901 + }, + { + "epoch": 0.6099668995954395, + "grad_norm": 1.9583716968132352, + "learning_rate": 3.486290909607884e-06, + "loss": 0.6497, + "step": 19902 + }, + { + "epoch": 0.6099975481181807, + "grad_norm": 1.7337148880418711, + "learning_rate": 3.4858178901306684e-06, + "loss": 0.5717, + "step": 19903 + }, + { + "epoch": 0.6100281966409219, + "grad_norm": 1.611154654461563, + "learning_rate": 3.4853448855730775e-06, + "loss": 0.5219, + "step": 19904 + }, + { + "epoch": 0.6100588451636632, + "grad_norm": 1.7984799026917166, + "learning_rate": 3.48487189593977e-06, + "loss": 0.6286, + "step": 19905 + }, + { + "epoch": 0.6100894936864043, + "grad_norm": 2.0346893959338175, + "learning_rate": 3.484398921235408e-06, + "loss": 0.5543, + "step": 19906 + }, + { + "epoch": 0.6101201422091456, + "grad_norm": 2.0966612113385854, + "learning_rate": 3.4839259614646516e-06, + "loss": 0.7436, + "step": 19907 + }, + { + "epoch": 0.6101507907318867, + "grad_norm": 1.8670729184800414, + "learning_rate": 3.483453016632159e-06, + "loss": 0.6204, + "step": 19908 + }, + { + "epoch": 0.610181439254628, + "grad_norm": 1.7293615047528035, + "learning_rate": 3.4829800867425933e-06, + "loss": 0.5887, + "step": 19909 + }, + { + "epoch": 0.6102120877773691, + "grad_norm": 1.7984363781133135, + "learning_rate": 3.4825071718006142e-06, + "loss": 0.6578, + "step": 19910 + }, + { + "epoch": 0.6102427363001104, + "grad_norm": 1.7992087854119703, + "learning_rate": 3.4820342718108767e-06, + "loss": 0.5944, + "step": 19911 + }, + { + "epoch": 0.6102733848228515, + "grad_norm": 1.7283814123056962, + "learning_rate": 3.4815613867780474e-06, + "loss": 0.6713, + "step": 19912 + }, + { + "epoch": 0.6103040333455927, + "grad_norm": 1.923431473036008, + "learning_rate": 3.481088516706781e-06, + "loss": 0.6474, + "step": 19913 + }, + { + "epoch": 0.610334681868334, + "grad_norm": 0.8359878807476588, + "learning_rate": 3.4806156616017374e-06, + "loss": 0.4338, + "step": 19914 + }, + { + "epoch": 0.6103653303910751, + "grad_norm": 1.6818956494768607, + "learning_rate": 3.480142821467577e-06, + "loss": 0.6096, + "step": 19915 + }, + { + "epoch": 0.6103959789138164, + "grad_norm": 1.8326107306015886, + "learning_rate": 3.4796699963089577e-06, + "loss": 0.627, + "step": 19916 + }, + { + "epoch": 0.6104266274365575, + "grad_norm": 1.7302530409009738, + "learning_rate": 3.4791971861305395e-06, + "loss": 0.5333, + "step": 19917 + }, + { + "epoch": 0.6104572759592988, + "grad_norm": 0.8355383066129372, + "learning_rate": 3.4787243909369806e-06, + "loss": 0.4323, + "step": 19918 + }, + { + "epoch": 0.6104879244820399, + "grad_norm": 0.8307742647583123, + "learning_rate": 3.478251610732939e-06, + "loss": 0.4168, + "step": 19919 + }, + { + "epoch": 0.6105185730047812, + "grad_norm": 2.4704300369888146, + "learning_rate": 3.4777788455230744e-06, + "loss": 0.6208, + "step": 19920 + }, + { + "epoch": 0.6105492215275223, + "grad_norm": 1.5325835654375162, + "learning_rate": 3.477306095312045e-06, + "loss": 0.5168, + "step": 19921 + }, + { + "epoch": 0.6105798700502636, + "grad_norm": 0.806266526003367, + "learning_rate": 3.476833360104505e-06, + "loss": 0.4253, + "step": 19922 + }, + { + "epoch": 0.6106105185730047, + "grad_norm": 1.5570284589386811, + "learning_rate": 3.476360639905119e-06, + "loss": 0.6509, + "step": 19923 + }, + { + "epoch": 0.610641167095746, + "grad_norm": 1.6737145086587373, + "learning_rate": 3.4758879347185386e-06, + "loss": 0.6152, + "step": 19924 + }, + { + "epoch": 0.6106718156184872, + "grad_norm": 1.8060341235781885, + "learning_rate": 3.475415244549427e-06, + "loss": 0.5418, + "step": 19925 + }, + { + "epoch": 0.6107024641412284, + "grad_norm": 1.8681786690255777, + "learning_rate": 3.4749425694024386e-06, + "loss": 0.642, + "step": 19926 + }, + { + "epoch": 0.6107331126639696, + "grad_norm": 1.916182210316396, + "learning_rate": 3.4744699092822296e-06, + "loss": 0.5551, + "step": 19927 + }, + { + "epoch": 0.6107637611867108, + "grad_norm": 1.6375086658605327, + "learning_rate": 3.4739972641934606e-06, + "loss": 0.5446, + "step": 19928 + }, + { + "epoch": 0.610794409709452, + "grad_norm": 1.6022005910285757, + "learning_rate": 3.4735246341407867e-06, + "loss": 0.6808, + "step": 19929 + }, + { + "epoch": 0.6108250582321932, + "grad_norm": 1.9138705683633364, + "learning_rate": 3.473052019128864e-06, + "loss": 0.7126, + "step": 19930 + }, + { + "epoch": 0.6108557067549344, + "grad_norm": 1.619608381796362, + "learning_rate": 3.472579419162352e-06, + "loss": 0.5743, + "step": 19931 + }, + { + "epoch": 0.6108863552776757, + "grad_norm": 1.865997241075373, + "learning_rate": 3.472106834245904e-06, + "loss": 0.6533, + "step": 19932 + }, + { + "epoch": 0.6109170038004168, + "grad_norm": 1.8611485881773138, + "learning_rate": 3.4716342643841796e-06, + "loss": 0.7076, + "step": 19933 + }, + { + "epoch": 0.6109476523231581, + "grad_norm": 0.7800293524032437, + "learning_rate": 3.471161709581835e-06, + "loss": 0.4148, + "step": 19934 + }, + { + "epoch": 0.6109783008458992, + "grad_norm": 1.6934593387435393, + "learning_rate": 3.470689169843522e-06, + "loss": 0.472, + "step": 19935 + }, + { + "epoch": 0.6110089493686405, + "grad_norm": 1.6768976846375907, + "learning_rate": 3.4702166451739026e-06, + "loss": 0.6152, + "step": 19936 + }, + { + "epoch": 0.6110395978913816, + "grad_norm": 1.7192839380700915, + "learning_rate": 3.4697441355776296e-06, + "loss": 0.6182, + "step": 19937 + }, + { + "epoch": 0.6110702464141229, + "grad_norm": 1.7433112750589683, + "learning_rate": 3.4692716410593587e-06, + "loss": 0.5994, + "step": 19938 + }, + { + "epoch": 0.611100894936864, + "grad_norm": 2.0226069442733827, + "learning_rate": 3.468799161623746e-06, + "loss": 0.6901, + "step": 19939 + }, + { + "epoch": 0.6111315434596053, + "grad_norm": 0.7689345265940392, + "learning_rate": 3.468326697275447e-06, + "loss": 0.4199, + "step": 19940 + }, + { + "epoch": 0.6111621919823464, + "grad_norm": 1.9488739564342525, + "learning_rate": 3.467854248019116e-06, + "loss": 0.7379, + "step": 19941 + }, + { + "epoch": 0.6111928405050877, + "grad_norm": 0.8082612425966784, + "learning_rate": 3.4673818138594107e-06, + "loss": 0.4128, + "step": 19942 + }, + { + "epoch": 0.6112234890278289, + "grad_norm": 0.8985551354173343, + "learning_rate": 3.466909394800983e-06, + "loss": 0.4202, + "step": 19943 + }, + { + "epoch": 0.61125413755057, + "grad_norm": 1.614728505051623, + "learning_rate": 3.4664369908484912e-06, + "loss": 0.4841, + "step": 19944 + }, + { + "epoch": 0.6112847860733113, + "grad_norm": 1.8198223968984328, + "learning_rate": 3.4659646020065874e-06, + "loss": 0.6967, + "step": 19945 + }, + { + "epoch": 0.6113154345960524, + "grad_norm": 1.9245423536681656, + "learning_rate": 3.4654922282799256e-06, + "loss": 0.6509, + "step": 19946 + }, + { + "epoch": 0.6113460831187937, + "grad_norm": 1.634879772646968, + "learning_rate": 3.4650198696731627e-06, + "loss": 0.6194, + "step": 19947 + }, + { + "epoch": 0.6113767316415348, + "grad_norm": 1.9784908846312632, + "learning_rate": 3.4645475261909524e-06, + "loss": 0.6865, + "step": 19948 + }, + { + "epoch": 0.6114073801642761, + "grad_norm": 1.7688147713421396, + "learning_rate": 3.464075197837946e-06, + "loss": 0.681, + "step": 19949 + }, + { + "epoch": 0.6114380286870172, + "grad_norm": 1.944159711976541, + "learning_rate": 3.463602884618801e-06, + "loss": 0.62, + "step": 19950 + }, + { + "epoch": 0.6114686772097585, + "grad_norm": 1.8201761058787178, + "learning_rate": 3.4631305865381693e-06, + "loss": 0.65, + "step": 19951 + }, + { + "epoch": 0.6114993257324997, + "grad_norm": 1.725570094939191, + "learning_rate": 3.4626583036007055e-06, + "loss": 0.6386, + "step": 19952 + }, + { + "epoch": 0.6115299742552409, + "grad_norm": 1.75938033345263, + "learning_rate": 3.462186035811065e-06, + "loss": 0.5829, + "step": 19953 + }, + { + "epoch": 0.6115606227779821, + "grad_norm": 1.7441176420148536, + "learning_rate": 3.4617137831738945e-06, + "loss": 0.6262, + "step": 19954 + }, + { + "epoch": 0.6115912713007233, + "grad_norm": 1.7248525209283723, + "learning_rate": 3.4612415456938553e-06, + "loss": 0.5748, + "step": 19955 + }, + { + "epoch": 0.6116219198234645, + "grad_norm": 1.6703006748645273, + "learning_rate": 3.4607693233755958e-06, + "loss": 0.4721, + "step": 19956 + }, + { + "epoch": 0.6116525683462057, + "grad_norm": 1.6894366233872347, + "learning_rate": 3.460297116223769e-06, + "loss": 0.6345, + "step": 19957 + }, + { + "epoch": 0.6116832168689469, + "grad_norm": 1.824493387359436, + "learning_rate": 3.4598249242430304e-06, + "loss": 0.5629, + "step": 19958 + }, + { + "epoch": 0.6117138653916881, + "grad_norm": 1.850409249663015, + "learning_rate": 3.4593527474380288e-06, + "loss": 0.6873, + "step": 19959 + }, + { + "epoch": 0.6117445139144293, + "grad_norm": 1.794035129319999, + "learning_rate": 3.45888058581342e-06, + "loss": 0.608, + "step": 19960 + }, + { + "epoch": 0.6117751624371706, + "grad_norm": 1.9510783093868642, + "learning_rate": 3.458408439373856e-06, + "loss": 0.6274, + "step": 19961 + }, + { + "epoch": 0.6118058109599117, + "grad_norm": 1.7245709593860234, + "learning_rate": 3.4579363081239857e-06, + "loss": 0.543, + "step": 19962 + }, + { + "epoch": 0.611836459482653, + "grad_norm": 1.5854482135506311, + "learning_rate": 3.4574641920684653e-06, + "loss": 0.6055, + "step": 19963 + }, + { + "epoch": 0.6118671080053941, + "grad_norm": 1.5911577796745842, + "learning_rate": 3.4569920912119458e-06, + "loss": 0.6409, + "step": 19964 + }, + { + "epoch": 0.6118977565281354, + "grad_norm": 1.7780183518939563, + "learning_rate": 3.456520005559075e-06, + "loss": 0.5224, + "step": 19965 + }, + { + "epoch": 0.6119284050508765, + "grad_norm": 1.8128372667428372, + "learning_rate": 3.4560479351145103e-06, + "loss": 0.5971, + "step": 19966 + }, + { + "epoch": 0.6119590535736178, + "grad_norm": 0.8567292074812936, + "learning_rate": 3.4555758798829e-06, + "loss": 0.4205, + "step": 19967 + }, + { + "epoch": 0.611989702096359, + "grad_norm": 1.7906051423839031, + "learning_rate": 3.4551038398688943e-06, + "loss": 0.6378, + "step": 19968 + }, + { + "epoch": 0.6120203506191002, + "grad_norm": 1.7714216259599602, + "learning_rate": 3.4546318150771463e-06, + "loss": 0.6599, + "step": 19969 + }, + { + "epoch": 0.6120509991418414, + "grad_norm": 1.8365157872297218, + "learning_rate": 3.454159805512306e-06, + "loss": 0.5142, + "step": 19970 + }, + { + "epoch": 0.6120816476645826, + "grad_norm": 1.8969356746377972, + "learning_rate": 3.453687811179025e-06, + "loss": 0.5953, + "step": 19971 + }, + { + "epoch": 0.6121122961873238, + "grad_norm": 1.7763733639534176, + "learning_rate": 3.4532158320819543e-06, + "loss": 0.6604, + "step": 19972 + }, + { + "epoch": 0.612142944710065, + "grad_norm": 2.0217318433874296, + "learning_rate": 3.452743868225743e-06, + "loss": 0.6248, + "step": 19973 + }, + { + "epoch": 0.6121735932328062, + "grad_norm": 0.7964805291022317, + "learning_rate": 3.4522719196150423e-06, + "loss": 0.4155, + "step": 19974 + }, + { + "epoch": 0.6122042417555473, + "grad_norm": 1.6651579033522297, + "learning_rate": 3.4517999862545045e-06, + "loss": 0.5984, + "step": 19975 + }, + { + "epoch": 0.6122348902782886, + "grad_norm": 2.1074343278902807, + "learning_rate": 3.4513280681487738e-06, + "loss": 0.7014, + "step": 19976 + }, + { + "epoch": 0.6122655388010297, + "grad_norm": 1.838961746501008, + "learning_rate": 3.4508561653025076e-06, + "loss": 0.5935, + "step": 19977 + }, + { + "epoch": 0.612296187323771, + "grad_norm": 1.803851408586033, + "learning_rate": 3.450384277720348e-06, + "loss": 0.5537, + "step": 19978 + }, + { + "epoch": 0.6123268358465122, + "grad_norm": 1.6646983827644715, + "learning_rate": 3.449912405406952e-06, + "loss": 0.6304, + "step": 19979 + }, + { + "epoch": 0.6123574843692534, + "grad_norm": 1.697830490527625, + "learning_rate": 3.449440548366965e-06, + "loss": 0.689, + "step": 19980 + }, + { + "epoch": 0.6123881328919946, + "grad_norm": 1.736357745544882, + "learning_rate": 3.4489687066050353e-06, + "loss": 0.6053, + "step": 19981 + }, + { + "epoch": 0.6124187814147358, + "grad_norm": 1.6854563891033905, + "learning_rate": 3.448496880125815e-06, + "loss": 0.6067, + "step": 19982 + }, + { + "epoch": 0.612449429937477, + "grad_norm": 2.12815909762349, + "learning_rate": 3.4480250689339522e-06, + "loss": 0.6525, + "step": 19983 + }, + { + "epoch": 0.6124800784602182, + "grad_norm": 1.7035006428275052, + "learning_rate": 3.4475532730340944e-06, + "loss": 0.5321, + "step": 19984 + }, + { + "epoch": 0.6125107269829594, + "grad_norm": 1.9481053465771747, + "learning_rate": 3.4470814924308926e-06, + "loss": 0.7404, + "step": 19985 + }, + { + "epoch": 0.6125413755057006, + "grad_norm": 1.8110605230459702, + "learning_rate": 3.446609727128993e-06, + "loss": 0.5532, + "step": 19986 + }, + { + "epoch": 0.6125720240284418, + "grad_norm": 1.8468135554546095, + "learning_rate": 3.446137977133046e-06, + "loss": 0.6003, + "step": 19987 + }, + { + "epoch": 0.6126026725511831, + "grad_norm": 1.867071884782059, + "learning_rate": 3.4456662424477006e-06, + "loss": 0.6042, + "step": 19988 + }, + { + "epoch": 0.6126333210739242, + "grad_norm": 1.7744909770367028, + "learning_rate": 3.4451945230776007e-06, + "loss": 0.654, + "step": 19989 + }, + { + "epoch": 0.6126639695966655, + "grad_norm": 1.846208672481918, + "learning_rate": 3.4447228190273987e-06, + "loss": 0.6927, + "step": 19990 + }, + { + "epoch": 0.6126946181194066, + "grad_norm": 1.6803082565785885, + "learning_rate": 3.44425113030174e-06, + "loss": 0.5172, + "step": 19991 + }, + { + "epoch": 0.6127252666421479, + "grad_norm": 1.5672458651413137, + "learning_rate": 3.4437794569052724e-06, + "loss": 0.6223, + "step": 19992 + }, + { + "epoch": 0.612755915164889, + "grad_norm": 1.7549886826411691, + "learning_rate": 3.443307798842645e-06, + "loss": 0.6464, + "step": 19993 + }, + { + "epoch": 0.6127865636876303, + "grad_norm": 1.5864909348206622, + "learning_rate": 3.4428361561185043e-06, + "loss": 0.5562, + "step": 19994 + }, + { + "epoch": 0.6128172122103714, + "grad_norm": 1.7517261628391916, + "learning_rate": 3.442364528737496e-06, + "loss": 0.6504, + "step": 19995 + }, + { + "epoch": 0.6128478607331127, + "grad_norm": 1.8167620063522247, + "learning_rate": 3.4418929167042704e-06, + "loss": 0.6556, + "step": 19996 + }, + { + "epoch": 0.6128785092558539, + "grad_norm": 1.793509808038817, + "learning_rate": 3.4414213200234696e-06, + "loss": 0.5641, + "step": 19997 + }, + { + "epoch": 0.6129091577785951, + "grad_norm": 1.7114671785459328, + "learning_rate": 3.4409497386997472e-06, + "loss": 0.6209, + "step": 19998 + }, + { + "epoch": 0.6129398063013363, + "grad_norm": 1.791478707770155, + "learning_rate": 3.440478172737744e-06, + "loss": 0.6238, + "step": 19999 + }, + { + "epoch": 0.6129704548240775, + "grad_norm": 0.7932899176650432, + "learning_rate": 3.4400066221421073e-06, + "loss": 0.4272, + "step": 20000 + }, + { + "epoch": 0.6130011033468187, + "grad_norm": 1.5582187793502618, + "learning_rate": 3.439535086917486e-06, + "loss": 0.5355, + "step": 20001 + }, + { + "epoch": 0.6130317518695599, + "grad_norm": 1.6432438115527819, + "learning_rate": 3.4390635670685244e-06, + "loss": 0.6106, + "step": 20002 + }, + { + "epoch": 0.6130624003923011, + "grad_norm": 0.7702442281193594, + "learning_rate": 3.438592062599868e-06, + "loss": 0.4168, + "step": 20003 + }, + { + "epoch": 0.6130930489150423, + "grad_norm": 1.7376324153501788, + "learning_rate": 3.438120573516165e-06, + "loss": 0.5901, + "step": 20004 + }, + { + "epoch": 0.6131236974377835, + "grad_norm": 1.753736795617146, + "learning_rate": 3.437649099822058e-06, + "loss": 0.6226, + "step": 20005 + }, + { + "epoch": 0.6131543459605246, + "grad_norm": 1.777063877877705, + "learning_rate": 3.437177641522196e-06, + "loss": 0.7053, + "step": 20006 + }, + { + "epoch": 0.6131849944832659, + "grad_norm": 1.8088983242274714, + "learning_rate": 3.4367061986212223e-06, + "loss": 0.603, + "step": 20007 + }, + { + "epoch": 0.6132156430060071, + "grad_norm": 1.9045621350712796, + "learning_rate": 3.4362347711237797e-06, + "loss": 0.7115, + "step": 20008 + }, + { + "epoch": 0.6132462915287483, + "grad_norm": 1.8527516443235297, + "learning_rate": 3.4357633590345195e-06, + "loss": 0.6156, + "step": 20009 + }, + { + "epoch": 0.6132769400514895, + "grad_norm": 1.6793077033945087, + "learning_rate": 3.435291962358082e-06, + "loss": 0.5611, + "step": 20010 + }, + { + "epoch": 0.6133075885742307, + "grad_norm": 1.5734035033854812, + "learning_rate": 3.434820581099112e-06, + "loss": 0.5752, + "step": 20011 + }, + { + "epoch": 0.6133382370969719, + "grad_norm": 1.6116163536965884, + "learning_rate": 3.4343492152622564e-06, + "loss": 0.6367, + "step": 20012 + }, + { + "epoch": 0.6133688856197131, + "grad_norm": 1.6179526099450392, + "learning_rate": 3.4338778648521575e-06, + "loss": 0.6605, + "step": 20013 + }, + { + "epoch": 0.6133995341424543, + "grad_norm": 1.7544778727898451, + "learning_rate": 3.433406529873462e-06, + "loss": 0.6575, + "step": 20014 + }, + { + "epoch": 0.6134301826651956, + "grad_norm": 1.799343005699092, + "learning_rate": 3.4329352103308123e-06, + "loss": 0.6923, + "step": 20015 + }, + { + "epoch": 0.6134608311879367, + "grad_norm": 1.4913997299974293, + "learning_rate": 3.432463906228852e-06, + "loss": 0.4809, + "step": 20016 + }, + { + "epoch": 0.613491479710678, + "grad_norm": 1.9645507705343332, + "learning_rate": 3.4319926175722272e-06, + "loss": 0.6636, + "step": 20017 + }, + { + "epoch": 0.6135221282334191, + "grad_norm": 1.59531779250335, + "learning_rate": 3.4315213443655816e-06, + "loss": 0.5708, + "step": 20018 + }, + { + "epoch": 0.6135527767561604, + "grad_norm": 1.8126964664500194, + "learning_rate": 3.4310500866135543e-06, + "loss": 0.6294, + "step": 20019 + }, + { + "epoch": 0.6135834252789015, + "grad_norm": 1.7932672370294707, + "learning_rate": 3.4305788443207944e-06, + "loss": 0.6159, + "step": 20020 + }, + { + "epoch": 0.6136140738016428, + "grad_norm": 0.7886402986879186, + "learning_rate": 3.4301076174919423e-06, + "loss": 0.4083, + "step": 20021 + }, + { + "epoch": 0.6136447223243839, + "grad_norm": 1.766195046218155, + "learning_rate": 3.4296364061316402e-06, + "loss": 0.6704, + "step": 20022 + }, + { + "epoch": 0.6136753708471252, + "grad_norm": 1.6482672573676276, + "learning_rate": 3.4291652102445337e-06, + "loss": 0.6541, + "step": 20023 + }, + { + "epoch": 0.6137060193698664, + "grad_norm": 0.8093002978717676, + "learning_rate": 3.4286940298352627e-06, + "loss": 0.4389, + "step": 20024 + }, + { + "epoch": 0.6137366678926076, + "grad_norm": 1.871917110026218, + "learning_rate": 3.4282228649084733e-06, + "loss": 0.6208, + "step": 20025 + }, + { + "epoch": 0.6137673164153488, + "grad_norm": 1.7021626125702656, + "learning_rate": 3.4277517154688055e-06, + "loss": 0.6087, + "step": 20026 + }, + { + "epoch": 0.61379796493809, + "grad_norm": 1.6645656957059078, + "learning_rate": 3.4272805815209015e-06, + "loss": 0.6762, + "step": 20027 + }, + { + "epoch": 0.6138286134608312, + "grad_norm": 1.5781858689575243, + "learning_rate": 3.4268094630694047e-06, + "loss": 0.6801, + "step": 20028 + }, + { + "epoch": 0.6138592619835724, + "grad_norm": 1.8485982816102924, + "learning_rate": 3.4263383601189594e-06, + "loss": 0.639, + "step": 20029 + }, + { + "epoch": 0.6138899105063136, + "grad_norm": 1.6246604818341814, + "learning_rate": 3.4258672726742005e-06, + "loss": 0.5879, + "step": 20030 + }, + { + "epoch": 0.6139205590290548, + "grad_norm": 1.796829506752128, + "learning_rate": 3.425396200739778e-06, + "loss": 0.5891, + "step": 20031 + }, + { + "epoch": 0.613951207551796, + "grad_norm": 0.7993164588078756, + "learning_rate": 3.4249251443203256e-06, + "loss": 0.4148, + "step": 20032 + }, + { + "epoch": 0.6139818560745373, + "grad_norm": 1.5002944215257816, + "learning_rate": 3.4244541034204926e-06, + "loss": 0.5566, + "step": 20033 + }, + { + "epoch": 0.6140125045972784, + "grad_norm": 1.8860573071433926, + "learning_rate": 3.4239830780449147e-06, + "loss": 0.599, + "step": 20034 + }, + { + "epoch": 0.6140431531200197, + "grad_norm": 1.783600602605681, + "learning_rate": 3.423512068198234e-06, + "loss": 0.5987, + "step": 20035 + }, + { + "epoch": 0.6140738016427608, + "grad_norm": 1.675587766348633, + "learning_rate": 3.423041073885094e-06, + "loss": 0.5072, + "step": 20036 + }, + { + "epoch": 0.614104450165502, + "grad_norm": 1.6340383517754276, + "learning_rate": 3.422570095110133e-06, + "loss": 0.5281, + "step": 20037 + }, + { + "epoch": 0.6141350986882432, + "grad_norm": 1.777752767049151, + "learning_rate": 3.4220991318779917e-06, + "loss": 0.6681, + "step": 20038 + }, + { + "epoch": 0.6141657472109844, + "grad_norm": 1.9688653279111723, + "learning_rate": 3.4216281841933126e-06, + "loss": 0.6657, + "step": 20039 + }, + { + "epoch": 0.6141963957337256, + "grad_norm": 1.5523403353885474, + "learning_rate": 3.4211572520607334e-06, + "loss": 0.482, + "step": 20040 + }, + { + "epoch": 0.6142270442564668, + "grad_norm": 0.7814596916900892, + "learning_rate": 3.4206863354848978e-06, + "loss": 0.4033, + "step": 20041 + }, + { + "epoch": 0.614257692779208, + "grad_norm": 1.8410437448592971, + "learning_rate": 3.420215434470443e-06, + "loss": 0.5989, + "step": 20042 + }, + { + "epoch": 0.6142883413019492, + "grad_norm": 1.7418446633119873, + "learning_rate": 3.4197445490220086e-06, + "loss": 0.6542, + "step": 20043 + }, + { + "epoch": 0.6143189898246905, + "grad_norm": 1.673367866451533, + "learning_rate": 3.419273679144237e-06, + "loss": 0.6668, + "step": 20044 + }, + { + "epoch": 0.6143496383474316, + "grad_norm": 1.6289735093462363, + "learning_rate": 3.418802824841766e-06, + "loss": 0.5541, + "step": 20045 + }, + { + "epoch": 0.6143802868701729, + "grad_norm": 1.5759952115592668, + "learning_rate": 3.4183319861192344e-06, + "loss": 0.6518, + "step": 20046 + }, + { + "epoch": 0.614410935392914, + "grad_norm": 1.5376172109979471, + "learning_rate": 3.417861162981283e-06, + "loss": 0.4987, + "step": 20047 + }, + { + "epoch": 0.6144415839156553, + "grad_norm": 1.782460504151838, + "learning_rate": 3.4173903554325517e-06, + "loss": 0.5727, + "step": 20048 + }, + { + "epoch": 0.6144722324383964, + "grad_norm": 1.8940553883762383, + "learning_rate": 3.4169195634776747e-06, + "loss": 0.596, + "step": 20049 + }, + { + "epoch": 0.6145028809611377, + "grad_norm": 0.7428395652807265, + "learning_rate": 3.416448787121298e-06, + "loss": 0.3997, + "step": 20050 + }, + { + "epoch": 0.6145335294838788, + "grad_norm": 1.8135091749693926, + "learning_rate": 3.4159780263680533e-06, + "loss": 0.6217, + "step": 20051 + }, + { + "epoch": 0.6145641780066201, + "grad_norm": 1.9277497824843124, + "learning_rate": 3.4155072812225852e-06, + "loss": 0.6291, + "step": 20052 + }, + { + "epoch": 0.6145948265293613, + "grad_norm": 1.8790093718581526, + "learning_rate": 3.4150365516895285e-06, + "loss": 0.658, + "step": 20053 + }, + { + "epoch": 0.6146254750521025, + "grad_norm": 0.7995765408707408, + "learning_rate": 3.4145658377735206e-06, + "loss": 0.4283, + "step": 20054 + }, + { + "epoch": 0.6146561235748437, + "grad_norm": 1.9624226167223278, + "learning_rate": 3.4140951394792033e-06, + "loss": 0.5583, + "step": 20055 + }, + { + "epoch": 0.6146867720975849, + "grad_norm": 1.8756018549397564, + "learning_rate": 3.4136244568112115e-06, + "loss": 0.5759, + "step": 20056 + }, + { + "epoch": 0.6147174206203261, + "grad_norm": 1.7837261931292492, + "learning_rate": 3.4131537897741828e-06, + "loss": 0.678, + "step": 20057 + }, + { + "epoch": 0.6147480691430673, + "grad_norm": 1.6980636749193287, + "learning_rate": 3.412683138372757e-06, + "loss": 0.539, + "step": 20058 + }, + { + "epoch": 0.6147787176658085, + "grad_norm": 1.7787679813962698, + "learning_rate": 3.412212502611569e-06, + "loss": 0.5993, + "step": 20059 + }, + { + "epoch": 0.6148093661885498, + "grad_norm": 1.8265360096688936, + "learning_rate": 3.4117418824952597e-06, + "loss": 0.5614, + "step": 20060 + }, + { + "epoch": 0.6148400147112909, + "grad_norm": 1.9015586254671593, + "learning_rate": 3.411271278028464e-06, + "loss": 0.6584, + "step": 20061 + }, + { + "epoch": 0.6148706632340322, + "grad_norm": 2.0393518713925034, + "learning_rate": 3.4108006892158162e-06, + "loss": 0.5675, + "step": 20062 + }, + { + "epoch": 0.6149013117567733, + "grad_norm": 1.7959364010374477, + "learning_rate": 3.4103301160619585e-06, + "loss": 0.591, + "step": 20063 + }, + { + "epoch": 0.6149319602795146, + "grad_norm": 1.7361004427660214, + "learning_rate": 3.4098595585715243e-06, + "loss": 0.6116, + "step": 20064 + }, + { + "epoch": 0.6149626088022557, + "grad_norm": 1.5804790946154477, + "learning_rate": 3.4093890167491493e-06, + "loss": 0.4916, + "step": 20065 + }, + { + "epoch": 0.614993257324997, + "grad_norm": 1.9567642457832108, + "learning_rate": 3.408918490599472e-06, + "loss": 0.6679, + "step": 20066 + }, + { + "epoch": 0.6150239058477381, + "grad_norm": 1.75380046389947, + "learning_rate": 3.4084479801271285e-06, + "loss": 0.644, + "step": 20067 + }, + { + "epoch": 0.6150545543704793, + "grad_norm": 2.027968584140985, + "learning_rate": 3.407977485336754e-06, + "loss": 0.6257, + "step": 20068 + }, + { + "epoch": 0.6150852028932206, + "grad_norm": 1.7250251230232734, + "learning_rate": 3.4075070062329847e-06, + "loss": 0.5776, + "step": 20069 + }, + { + "epoch": 0.6151158514159617, + "grad_norm": 1.6651339808987844, + "learning_rate": 3.4070365428204555e-06, + "loss": 0.5115, + "step": 20070 + }, + { + "epoch": 0.615146499938703, + "grad_norm": 1.7421881210814025, + "learning_rate": 3.406566095103804e-06, + "loss": 0.5492, + "step": 20071 + }, + { + "epoch": 0.6151771484614441, + "grad_norm": 1.8180253769082626, + "learning_rate": 3.4060956630876653e-06, + "loss": 0.6528, + "step": 20072 + }, + { + "epoch": 0.6152077969841854, + "grad_norm": 1.7928373504466275, + "learning_rate": 3.4056252467766703e-06, + "loss": 0.6316, + "step": 20073 + }, + { + "epoch": 0.6152384455069265, + "grad_norm": 1.9710521952462354, + "learning_rate": 3.4051548461754615e-06, + "loss": 0.6405, + "step": 20074 + }, + { + "epoch": 0.6152690940296678, + "grad_norm": 1.7231749671357202, + "learning_rate": 3.404684461288669e-06, + "loss": 0.6184, + "step": 20075 + }, + { + "epoch": 0.6152997425524089, + "grad_norm": 2.0552290939645004, + "learning_rate": 3.4042140921209265e-06, + "loss": 0.6759, + "step": 20076 + }, + { + "epoch": 0.6153303910751502, + "grad_norm": 1.5313125578000633, + "learning_rate": 3.4037437386768735e-06, + "loss": 0.5439, + "step": 20077 + }, + { + "epoch": 0.6153610395978913, + "grad_norm": 0.8309705053159724, + "learning_rate": 3.4032734009611403e-06, + "loss": 0.4118, + "step": 20078 + }, + { + "epoch": 0.6153916881206326, + "grad_norm": 1.7543353754075754, + "learning_rate": 3.4028030789783635e-06, + "loss": 0.6688, + "step": 20079 + }, + { + "epoch": 0.6154223366433738, + "grad_norm": 1.851021379307945, + "learning_rate": 3.4023327727331768e-06, + "loss": 0.6358, + "step": 20080 + }, + { + "epoch": 0.615452985166115, + "grad_norm": 1.6418874880417313, + "learning_rate": 3.4018624822302126e-06, + "loss": 0.6295, + "step": 20081 + }, + { + "epoch": 0.6154836336888562, + "grad_norm": 1.6983298574147334, + "learning_rate": 3.401392207474108e-06, + "loss": 0.6361, + "step": 20082 + }, + { + "epoch": 0.6155142822115974, + "grad_norm": 1.8624512245382157, + "learning_rate": 3.4009219484694954e-06, + "loss": 0.6783, + "step": 20083 + }, + { + "epoch": 0.6155449307343386, + "grad_norm": 0.7685748693144909, + "learning_rate": 3.4004517052210056e-06, + "loss": 0.4262, + "step": 20084 + }, + { + "epoch": 0.6155755792570798, + "grad_norm": 1.8430822870875407, + "learning_rate": 3.3999814777332774e-06, + "loss": 0.5559, + "step": 20085 + }, + { + "epoch": 0.615606227779821, + "grad_norm": 1.8603856270891457, + "learning_rate": 3.399511266010939e-06, + "loss": 0.5861, + "step": 20086 + }, + { + "epoch": 0.6156368763025623, + "grad_norm": 1.6607599641790691, + "learning_rate": 3.3990410700586262e-06, + "loss": 0.5714, + "step": 20087 + }, + { + "epoch": 0.6156675248253034, + "grad_norm": 1.9637178937525026, + "learning_rate": 3.398570889880971e-06, + "loss": 0.6514, + "step": 20088 + }, + { + "epoch": 0.6156981733480447, + "grad_norm": 0.7811621607024806, + "learning_rate": 3.3981007254826064e-06, + "loss": 0.402, + "step": 20089 + }, + { + "epoch": 0.6157288218707858, + "grad_norm": 1.8543559557747347, + "learning_rate": 3.3976305768681662e-06, + "loss": 0.6817, + "step": 20090 + }, + { + "epoch": 0.6157594703935271, + "grad_norm": 1.77774385883581, + "learning_rate": 3.3971604440422813e-06, + "loss": 0.5777, + "step": 20091 + }, + { + "epoch": 0.6157901189162682, + "grad_norm": 1.7158308121996813, + "learning_rate": 3.396690327009584e-06, + "loss": 0.5891, + "step": 20092 + }, + { + "epoch": 0.6158207674390095, + "grad_norm": 1.6585224702677124, + "learning_rate": 3.396220225774709e-06, + "loss": 0.5872, + "step": 20093 + }, + { + "epoch": 0.6158514159617506, + "grad_norm": 0.7957802946866845, + "learning_rate": 3.395750140342286e-06, + "loss": 0.4278, + "step": 20094 + }, + { + "epoch": 0.6158820644844919, + "grad_norm": 1.7610462325601763, + "learning_rate": 3.395280070716946e-06, + "loss": 0.6424, + "step": 20095 + }, + { + "epoch": 0.615912713007233, + "grad_norm": 0.7977072470730368, + "learning_rate": 3.3948100169033225e-06, + "loss": 0.4291, + "step": 20096 + }, + { + "epoch": 0.6159433615299743, + "grad_norm": 1.8617544704837792, + "learning_rate": 3.394339978906046e-06, + "loss": 0.6128, + "step": 20097 + }, + { + "epoch": 0.6159740100527155, + "grad_norm": 1.576848517484986, + "learning_rate": 3.39386995672975e-06, + "loss": 0.6353, + "step": 20098 + }, + { + "epoch": 0.6160046585754566, + "grad_norm": 2.0080551778808733, + "learning_rate": 3.3933999503790638e-06, + "loss": 0.5373, + "step": 20099 + }, + { + "epoch": 0.6160353070981979, + "grad_norm": 1.8456865700024587, + "learning_rate": 3.3929299598586183e-06, + "loss": 0.7118, + "step": 20100 + }, + { + "epoch": 0.616065955620939, + "grad_norm": 2.066478330550117, + "learning_rate": 3.3924599851730456e-06, + "loss": 0.4885, + "step": 20101 + }, + { + "epoch": 0.6160966041436803, + "grad_norm": 1.7648272848024962, + "learning_rate": 3.391990026326977e-06, + "loss": 0.673, + "step": 20102 + }, + { + "epoch": 0.6161272526664214, + "grad_norm": 1.6410537809195198, + "learning_rate": 3.3915200833250393e-06, + "loss": 0.561, + "step": 20103 + }, + { + "epoch": 0.6161579011891627, + "grad_norm": 1.5506389393267563, + "learning_rate": 3.391050156171869e-06, + "loss": 0.5648, + "step": 20104 + }, + { + "epoch": 0.6161885497119038, + "grad_norm": 1.6457183343203918, + "learning_rate": 3.39058024487209e-06, + "loss": 0.6095, + "step": 20105 + }, + { + "epoch": 0.6162191982346451, + "grad_norm": 2.170605742737231, + "learning_rate": 3.390110349430339e-06, + "loss": 0.6197, + "step": 20106 + }, + { + "epoch": 0.6162498467573863, + "grad_norm": 1.7833314171072312, + "learning_rate": 3.389640469851241e-06, + "loss": 0.5679, + "step": 20107 + }, + { + "epoch": 0.6162804952801275, + "grad_norm": 1.7404047261229634, + "learning_rate": 3.3891706061394263e-06, + "loss": 0.5651, + "step": 20108 + }, + { + "epoch": 0.6163111438028687, + "grad_norm": 0.7908685663589469, + "learning_rate": 3.388700758299527e-06, + "loss": 0.4402, + "step": 20109 + }, + { + "epoch": 0.6163417923256099, + "grad_norm": 1.9672364098608397, + "learning_rate": 3.388230926336172e-06, + "loss": 0.6924, + "step": 20110 + }, + { + "epoch": 0.6163724408483511, + "grad_norm": 1.6147982671771983, + "learning_rate": 3.3877611102539885e-06, + "loss": 0.5992, + "step": 20111 + }, + { + "epoch": 0.6164030893710923, + "grad_norm": 0.8396270476273787, + "learning_rate": 3.387291310057608e-06, + "loss": 0.4253, + "step": 20112 + }, + { + "epoch": 0.6164337378938335, + "grad_norm": 1.6659754559237625, + "learning_rate": 3.3868215257516583e-06, + "loss": 0.6147, + "step": 20113 + }, + { + "epoch": 0.6164643864165747, + "grad_norm": 1.6408083455195424, + "learning_rate": 3.38635175734077e-06, + "loss": 0.505, + "step": 20114 + }, + { + "epoch": 0.6164950349393159, + "grad_norm": 1.8624678023761863, + "learning_rate": 3.3858820048295714e-06, + "loss": 0.6347, + "step": 20115 + }, + { + "epoch": 0.6165256834620572, + "grad_norm": 1.7556317440369213, + "learning_rate": 3.3854122682226873e-06, + "loss": 0.6689, + "step": 20116 + }, + { + "epoch": 0.6165563319847983, + "grad_norm": 1.7116642879968686, + "learning_rate": 3.3849425475247533e-06, + "loss": 0.6298, + "step": 20117 + }, + { + "epoch": 0.6165869805075396, + "grad_norm": 1.7431970752394768, + "learning_rate": 3.384472842740392e-06, + "loss": 0.6799, + "step": 20118 + }, + { + "epoch": 0.6166176290302807, + "grad_norm": 1.781639446479928, + "learning_rate": 3.384003153874231e-06, + "loss": 0.6285, + "step": 20119 + }, + { + "epoch": 0.616648277553022, + "grad_norm": 1.5198096986571643, + "learning_rate": 3.383533480930903e-06, + "loss": 0.5719, + "step": 20120 + }, + { + "epoch": 0.6166789260757631, + "grad_norm": 1.8228237902557751, + "learning_rate": 3.383063823915032e-06, + "loss": 0.6642, + "step": 20121 + }, + { + "epoch": 0.6167095745985044, + "grad_norm": 1.6969054072013925, + "learning_rate": 3.3825941828312463e-06, + "loss": 0.5156, + "step": 20122 + }, + { + "epoch": 0.6167402231212455, + "grad_norm": 1.7962852661937943, + "learning_rate": 3.382124557684175e-06, + "loss": 0.6389, + "step": 20123 + }, + { + "epoch": 0.6167708716439868, + "grad_norm": 0.8166881409786969, + "learning_rate": 3.3816549484784434e-06, + "loss": 0.4102, + "step": 20124 + }, + { + "epoch": 0.616801520166728, + "grad_norm": 1.8446923382208367, + "learning_rate": 3.38118535521868e-06, + "loss": 0.5628, + "step": 20125 + }, + { + "epoch": 0.6168321686894692, + "grad_norm": 1.7080002601940938, + "learning_rate": 3.3807157779095135e-06, + "loss": 0.596, + "step": 20126 + }, + { + "epoch": 0.6168628172122104, + "grad_norm": 2.0288005672649434, + "learning_rate": 3.3802462165555653e-06, + "loss": 0.6754, + "step": 20127 + }, + { + "epoch": 0.6168934657349516, + "grad_norm": 1.8660540853167669, + "learning_rate": 3.3797766711614686e-06, + "loss": 0.6963, + "step": 20128 + }, + { + "epoch": 0.6169241142576928, + "grad_norm": 1.8701639299549597, + "learning_rate": 3.379307141731846e-06, + "loss": 0.5953, + "step": 20129 + }, + { + "epoch": 0.6169547627804339, + "grad_norm": 1.6215945012345767, + "learning_rate": 3.3788376282713244e-06, + "loss": 0.6072, + "step": 20130 + }, + { + "epoch": 0.6169854113031752, + "grad_norm": 1.8189837603649583, + "learning_rate": 3.3783681307845307e-06, + "loss": 0.5808, + "step": 20131 + }, + { + "epoch": 0.6170160598259163, + "grad_norm": 1.944489605937557, + "learning_rate": 3.3778986492760895e-06, + "loss": 0.604, + "step": 20132 + }, + { + "epoch": 0.6170467083486576, + "grad_norm": 1.6125878079893068, + "learning_rate": 3.377429183750629e-06, + "loss": 0.5683, + "step": 20133 + }, + { + "epoch": 0.6170773568713988, + "grad_norm": 0.7854910555486416, + "learning_rate": 3.3769597342127745e-06, + "loss": 0.4374, + "step": 20134 + }, + { + "epoch": 0.61710800539414, + "grad_norm": 1.5384663473458964, + "learning_rate": 3.3764903006671496e-06, + "loss": 0.5734, + "step": 20135 + }, + { + "epoch": 0.6171386539168812, + "grad_norm": 1.791249556582372, + "learning_rate": 3.376020883118382e-06, + "loss": 0.6468, + "step": 20136 + }, + { + "epoch": 0.6171693024396224, + "grad_norm": 1.6413420723366758, + "learning_rate": 3.3755514815710976e-06, + "loss": 0.6181, + "step": 20137 + }, + { + "epoch": 0.6171999509623636, + "grad_norm": 1.7700133246230128, + "learning_rate": 3.375082096029918e-06, + "loss": 0.5366, + "step": 20138 + }, + { + "epoch": 0.6172305994851048, + "grad_norm": 1.5991085287537388, + "learning_rate": 3.374612726499471e-06, + "loss": 0.5338, + "step": 20139 + }, + { + "epoch": 0.617261248007846, + "grad_norm": 0.8077481718302417, + "learning_rate": 3.3741433729843796e-06, + "loss": 0.4374, + "step": 20140 + }, + { + "epoch": 0.6172918965305872, + "grad_norm": 1.8102057431877918, + "learning_rate": 3.3736740354892707e-06, + "loss": 0.5801, + "step": 20141 + }, + { + "epoch": 0.6173225450533284, + "grad_norm": 2.0758958102266907, + "learning_rate": 3.373204714018768e-06, + "loss": 0.5934, + "step": 20142 + }, + { + "epoch": 0.6173531935760697, + "grad_norm": 1.7244308644955924, + "learning_rate": 3.3727354085774944e-06, + "loss": 0.6884, + "step": 20143 + }, + { + "epoch": 0.6173838420988108, + "grad_norm": 1.911040435683925, + "learning_rate": 3.3722661191700757e-06, + "loss": 0.5593, + "step": 20144 + }, + { + "epoch": 0.6174144906215521, + "grad_norm": 1.851854049598556, + "learning_rate": 3.3717968458011364e-06, + "loss": 0.5899, + "step": 20145 + }, + { + "epoch": 0.6174451391442932, + "grad_norm": 1.7386765007510487, + "learning_rate": 3.371327588475297e-06, + "loss": 0.5877, + "step": 20146 + }, + { + "epoch": 0.6174757876670345, + "grad_norm": 1.8227363298679222, + "learning_rate": 3.3708583471971854e-06, + "loss": 0.6739, + "step": 20147 + }, + { + "epoch": 0.6175064361897756, + "grad_norm": 2.034815740539113, + "learning_rate": 3.3703891219714237e-06, + "loss": 0.5702, + "step": 20148 + }, + { + "epoch": 0.6175370847125169, + "grad_norm": 1.6314376710440703, + "learning_rate": 3.369919912802633e-06, + "loss": 0.5569, + "step": 20149 + }, + { + "epoch": 0.617567733235258, + "grad_norm": 1.7904103013076238, + "learning_rate": 3.36945071969544e-06, + "loss": 0.5922, + "step": 20150 + }, + { + "epoch": 0.6175983817579993, + "grad_norm": 1.7830589772156424, + "learning_rate": 3.368981542654465e-06, + "loss": 0.6451, + "step": 20151 + }, + { + "epoch": 0.6176290302807405, + "grad_norm": 1.84901746353147, + "learning_rate": 3.3685123816843335e-06, + "loss": 0.6594, + "step": 20152 + }, + { + "epoch": 0.6176596788034817, + "grad_norm": 1.6299561250564207, + "learning_rate": 3.3680432367896667e-06, + "loss": 0.5764, + "step": 20153 + }, + { + "epoch": 0.6176903273262229, + "grad_norm": 1.8330000694387496, + "learning_rate": 3.367574107975087e-06, + "loss": 0.6325, + "step": 20154 + }, + { + "epoch": 0.6177209758489641, + "grad_norm": 1.758613695028552, + "learning_rate": 3.3671049952452172e-06, + "loss": 0.5496, + "step": 20155 + }, + { + "epoch": 0.6177516243717053, + "grad_norm": 2.247166078667834, + "learning_rate": 3.366635898604681e-06, + "loss": 0.6587, + "step": 20156 + }, + { + "epoch": 0.6177822728944465, + "grad_norm": 1.7285824334085593, + "learning_rate": 3.3661668180580965e-06, + "loss": 0.6633, + "step": 20157 + }, + { + "epoch": 0.6178129214171877, + "grad_norm": 1.491972002124259, + "learning_rate": 3.3656977536100916e-06, + "loss": 0.5014, + "step": 20158 + }, + { + "epoch": 0.617843569939929, + "grad_norm": 1.9273970199439365, + "learning_rate": 3.3652287052652816e-06, + "loss": 0.552, + "step": 20159 + }, + { + "epoch": 0.6178742184626701, + "grad_norm": 1.7904234962480803, + "learning_rate": 3.3647596730282944e-06, + "loss": 0.6948, + "step": 20160 + }, + { + "epoch": 0.6179048669854112, + "grad_norm": 2.016214880902336, + "learning_rate": 3.3642906569037474e-06, + "loss": 0.6181, + "step": 20161 + }, + { + "epoch": 0.6179355155081525, + "grad_norm": 1.7250611016711082, + "learning_rate": 3.363821656896262e-06, + "loss": 0.6482, + "step": 20162 + }, + { + "epoch": 0.6179661640308937, + "grad_norm": 1.7145714035026383, + "learning_rate": 3.363352673010462e-06, + "loss": 0.5555, + "step": 20163 + }, + { + "epoch": 0.6179968125536349, + "grad_norm": 1.7784094036465417, + "learning_rate": 3.3628837052509666e-06, + "loss": 0.6503, + "step": 20164 + }, + { + "epoch": 0.6180274610763761, + "grad_norm": 1.65575748517617, + "learning_rate": 3.3624147536223962e-06, + "loss": 0.6329, + "step": 20165 + }, + { + "epoch": 0.6180581095991173, + "grad_norm": 1.9497082044326208, + "learning_rate": 3.3619458181293728e-06, + "loss": 0.5668, + "step": 20166 + }, + { + "epoch": 0.6180887581218585, + "grad_norm": 1.9706282840292184, + "learning_rate": 3.3614768987765155e-06, + "loss": 0.6362, + "step": 20167 + }, + { + "epoch": 0.6181194066445997, + "grad_norm": 1.8628491512968708, + "learning_rate": 3.361007995568446e-06, + "loss": 0.6386, + "step": 20168 + }, + { + "epoch": 0.6181500551673409, + "grad_norm": 0.8428001228888347, + "learning_rate": 3.360539108509786e-06, + "loss": 0.4167, + "step": 20169 + }, + { + "epoch": 0.6181807036900822, + "grad_norm": 1.6295799801404889, + "learning_rate": 3.3600702376051497e-06, + "loss": 0.5449, + "step": 20170 + }, + { + "epoch": 0.6182113522128233, + "grad_norm": 1.528273707203889, + "learning_rate": 3.359601382859165e-06, + "loss": 0.5452, + "step": 20171 + }, + { + "epoch": 0.6182420007355646, + "grad_norm": 1.7692876565528417, + "learning_rate": 3.359132544276446e-06, + "loss": 0.6733, + "step": 20172 + }, + { + "epoch": 0.6182726492583057, + "grad_norm": 1.9092084681602892, + "learning_rate": 3.358663721861613e-06, + "loss": 0.6309, + "step": 20173 + }, + { + "epoch": 0.618303297781047, + "grad_norm": 1.561089639851084, + "learning_rate": 3.358194915619287e-06, + "loss": 0.5611, + "step": 20174 + }, + { + "epoch": 0.6183339463037881, + "grad_norm": 1.5333501741888176, + "learning_rate": 3.3577261255540873e-06, + "loss": 0.6625, + "step": 20175 + }, + { + "epoch": 0.6183645948265294, + "grad_norm": 1.7827188956186997, + "learning_rate": 3.3572573516706307e-06, + "loss": 0.5602, + "step": 20176 + }, + { + "epoch": 0.6183952433492705, + "grad_norm": 1.5323487877201103, + "learning_rate": 3.356788593973539e-06, + "loss": 0.6037, + "step": 20177 + }, + { + "epoch": 0.6184258918720118, + "grad_norm": 1.796592453421197, + "learning_rate": 3.356319852467428e-06, + "loss": 0.5454, + "step": 20178 + }, + { + "epoch": 0.618456540394753, + "grad_norm": 1.8166689707391754, + "learning_rate": 3.3558511271569194e-06, + "loss": 0.663, + "step": 20179 + }, + { + "epoch": 0.6184871889174942, + "grad_norm": 0.7813118637996507, + "learning_rate": 3.355382418046632e-06, + "loss": 0.4236, + "step": 20180 + }, + { + "epoch": 0.6185178374402354, + "grad_norm": 1.7033996039259882, + "learning_rate": 3.3549137251411788e-06, + "loss": 0.646, + "step": 20181 + }, + { + "epoch": 0.6185484859629766, + "grad_norm": 1.5789698097053504, + "learning_rate": 3.354445048445185e-06, + "loss": 0.5819, + "step": 20182 + }, + { + "epoch": 0.6185791344857178, + "grad_norm": 0.8122165581050435, + "learning_rate": 3.3539763879632636e-06, + "loss": 0.4266, + "step": 20183 + }, + { + "epoch": 0.618609783008459, + "grad_norm": 1.9753420438015512, + "learning_rate": 3.353507743700033e-06, + "loss": 0.5886, + "step": 20184 + }, + { + "epoch": 0.6186404315312002, + "grad_norm": 1.8699108436061764, + "learning_rate": 3.353039115660113e-06, + "loss": 0.5232, + "step": 20185 + }, + { + "epoch": 0.6186710800539414, + "grad_norm": 1.6004506469575097, + "learning_rate": 3.3525705038481194e-06, + "loss": 0.5839, + "step": 20186 + }, + { + "epoch": 0.6187017285766826, + "grad_norm": 1.7994586398366184, + "learning_rate": 3.352101908268671e-06, + "loss": 0.5595, + "step": 20187 + }, + { + "epoch": 0.6187323770994239, + "grad_norm": 1.9655813069651904, + "learning_rate": 3.3516333289263843e-06, + "loss": 0.5462, + "step": 20188 + }, + { + "epoch": 0.618763025622165, + "grad_norm": 2.020027794324559, + "learning_rate": 3.3511647658258747e-06, + "loss": 0.5941, + "step": 20189 + }, + { + "epoch": 0.6187936741449063, + "grad_norm": 1.8134028605840016, + "learning_rate": 3.3506962189717628e-06, + "loss": 0.6082, + "step": 20190 + }, + { + "epoch": 0.6188243226676474, + "grad_norm": 1.6212506126494288, + "learning_rate": 3.350227688368662e-06, + "loss": 0.5974, + "step": 20191 + }, + { + "epoch": 0.6188549711903886, + "grad_norm": 1.7866777048729323, + "learning_rate": 3.349759174021189e-06, + "loss": 0.6057, + "step": 20192 + }, + { + "epoch": 0.6188856197131298, + "grad_norm": 1.8912208845052416, + "learning_rate": 3.349290675933962e-06, + "loss": 0.6481, + "step": 20193 + }, + { + "epoch": 0.618916268235871, + "grad_norm": 0.7589454785487206, + "learning_rate": 3.348822194111595e-06, + "loss": 0.4081, + "step": 20194 + }, + { + "epoch": 0.6189469167586122, + "grad_norm": 1.6148396335491833, + "learning_rate": 3.3483537285587066e-06, + "loss": 0.655, + "step": 20195 + }, + { + "epoch": 0.6189775652813534, + "grad_norm": 1.8213883642862685, + "learning_rate": 3.3478852792799116e-06, + "loss": 0.6589, + "step": 20196 + }, + { + "epoch": 0.6190082138040947, + "grad_norm": 1.506378666272306, + "learning_rate": 3.3474168462798244e-06, + "loss": 0.5597, + "step": 20197 + }, + { + "epoch": 0.6190388623268358, + "grad_norm": 1.817051134606795, + "learning_rate": 3.3469484295630634e-06, + "loss": 0.6062, + "step": 20198 + }, + { + "epoch": 0.6190695108495771, + "grad_norm": 0.7835943665020696, + "learning_rate": 3.3464800291342432e-06, + "loss": 0.435, + "step": 20199 + }, + { + "epoch": 0.6191001593723182, + "grad_norm": 1.8491349150904448, + "learning_rate": 3.346011644997975e-06, + "loss": 0.6095, + "step": 20200 + }, + { + "epoch": 0.6191308078950595, + "grad_norm": 1.6737293931181991, + "learning_rate": 3.3455432771588803e-06, + "loss": 0.5608, + "step": 20201 + }, + { + "epoch": 0.6191614564178006, + "grad_norm": 1.7986370040188087, + "learning_rate": 3.345074925621571e-06, + "loss": 0.6124, + "step": 20202 + }, + { + "epoch": 0.6191921049405419, + "grad_norm": 0.7802500571522473, + "learning_rate": 3.3446065903906597e-06, + "loss": 0.4157, + "step": 20203 + }, + { + "epoch": 0.619222753463283, + "grad_norm": 0.796235291406591, + "learning_rate": 3.3441382714707647e-06, + "loss": 0.4253, + "step": 20204 + }, + { + "epoch": 0.6192534019860243, + "grad_norm": 1.9084031540822268, + "learning_rate": 3.3436699688664975e-06, + "loss": 0.6842, + "step": 20205 + }, + { + "epoch": 0.6192840505087654, + "grad_norm": 1.7462891332114057, + "learning_rate": 3.3432016825824753e-06, + "loss": 0.6696, + "step": 20206 + }, + { + "epoch": 0.6193146990315067, + "grad_norm": 1.4579958260338983, + "learning_rate": 3.3427334126233115e-06, + "loss": 0.5293, + "step": 20207 + }, + { + "epoch": 0.6193453475542479, + "grad_norm": 0.8879522752906471, + "learning_rate": 3.3422651589936173e-06, + "loss": 0.4081, + "step": 20208 + }, + { + "epoch": 0.6193759960769891, + "grad_norm": 1.6689715964937233, + "learning_rate": 3.3417969216980107e-06, + "loss": 0.6582, + "step": 20209 + }, + { + "epoch": 0.6194066445997303, + "grad_norm": 1.9936984428171518, + "learning_rate": 3.3413287007411034e-06, + "loss": 0.6188, + "step": 20210 + }, + { + "epoch": 0.6194372931224715, + "grad_norm": 1.5938704878399022, + "learning_rate": 3.340860496127506e-06, + "loss": 0.5489, + "step": 20211 + }, + { + "epoch": 0.6194679416452127, + "grad_norm": 1.805701834569897, + "learning_rate": 3.3403923078618378e-06, + "loss": 0.5001, + "step": 20212 + }, + { + "epoch": 0.6194985901679539, + "grad_norm": 0.8332632262703782, + "learning_rate": 3.3399241359487057e-06, + "loss": 0.4063, + "step": 20213 + }, + { + "epoch": 0.6195292386906951, + "grad_norm": 1.582178785840196, + "learning_rate": 3.339455980392729e-06, + "loss": 0.6177, + "step": 20214 + }, + { + "epoch": 0.6195598872134364, + "grad_norm": 1.721968169454078, + "learning_rate": 3.3389878411985165e-06, + "loss": 0.7397, + "step": 20215 + }, + { + "epoch": 0.6195905357361775, + "grad_norm": 1.818360576054386, + "learning_rate": 3.3385197183706803e-06, + "loss": 0.6073, + "step": 20216 + }, + { + "epoch": 0.6196211842589188, + "grad_norm": 2.004723700295565, + "learning_rate": 3.3380516119138357e-06, + "loss": 0.6168, + "step": 20217 + }, + { + "epoch": 0.6196518327816599, + "grad_norm": 1.832698154434655, + "learning_rate": 3.3375835218325934e-06, + "loss": 0.6414, + "step": 20218 + }, + { + "epoch": 0.6196824813044012, + "grad_norm": 1.7507301415207044, + "learning_rate": 3.337115448131566e-06, + "loss": 0.5776, + "step": 20219 + }, + { + "epoch": 0.6197131298271423, + "grad_norm": 1.8257291425200017, + "learning_rate": 3.336647390815366e-06, + "loss": 0.5422, + "step": 20220 + }, + { + "epoch": 0.6197437783498836, + "grad_norm": 2.776917784312109, + "learning_rate": 3.3361793498886035e-06, + "loss": 0.6815, + "step": 20221 + }, + { + "epoch": 0.6197744268726247, + "grad_norm": 1.859993231686729, + "learning_rate": 3.3357113253558927e-06, + "loss": 0.6811, + "step": 20222 + }, + { + "epoch": 0.6198050753953659, + "grad_norm": 1.9054208432868285, + "learning_rate": 3.3352433172218457e-06, + "loss": 0.6063, + "step": 20223 + }, + { + "epoch": 0.6198357239181072, + "grad_norm": 1.5185273551361282, + "learning_rate": 3.3347753254910686e-06, + "loss": 0.5093, + "step": 20224 + }, + { + "epoch": 0.6198663724408483, + "grad_norm": 1.7963638572879348, + "learning_rate": 3.3343073501681794e-06, + "loss": 0.6711, + "step": 20225 + }, + { + "epoch": 0.6198970209635896, + "grad_norm": 1.7263577331600735, + "learning_rate": 3.3338393912577848e-06, + "loss": 0.482, + "step": 20226 + }, + { + "epoch": 0.6199276694863307, + "grad_norm": 1.8635266827030574, + "learning_rate": 3.3333714487644963e-06, + "loss": 0.5707, + "step": 20227 + }, + { + "epoch": 0.619958318009072, + "grad_norm": 1.809728275822843, + "learning_rate": 3.3329035226929265e-06, + "loss": 0.5984, + "step": 20228 + }, + { + "epoch": 0.6199889665318131, + "grad_norm": 1.9311065160989602, + "learning_rate": 3.332435613047685e-06, + "loss": 0.6796, + "step": 20229 + }, + { + "epoch": 0.6200196150545544, + "grad_norm": 1.7066583139168552, + "learning_rate": 3.3319677198333804e-06, + "loss": 0.5842, + "step": 20230 + }, + { + "epoch": 0.6200502635772955, + "grad_norm": 1.8038349725520926, + "learning_rate": 3.331499843054626e-06, + "loss": 0.6446, + "step": 20231 + }, + { + "epoch": 0.6200809121000368, + "grad_norm": 1.8089073620933622, + "learning_rate": 3.3310319827160297e-06, + "loss": 0.6343, + "step": 20232 + }, + { + "epoch": 0.620111560622778, + "grad_norm": 2.0197043080886514, + "learning_rate": 3.330564138822203e-06, + "loss": 0.7085, + "step": 20233 + }, + { + "epoch": 0.6201422091455192, + "grad_norm": 1.98894423039122, + "learning_rate": 3.3300963113777563e-06, + "loss": 0.5954, + "step": 20234 + }, + { + "epoch": 0.6201728576682604, + "grad_norm": 1.64049986175199, + "learning_rate": 3.329628500387295e-06, + "loss": 0.5798, + "step": 20235 + }, + { + "epoch": 0.6202035061910016, + "grad_norm": 0.8058955364882344, + "learning_rate": 3.329160705855434e-06, + "loss": 0.4183, + "step": 20236 + }, + { + "epoch": 0.6202341547137428, + "grad_norm": 1.8519923416698856, + "learning_rate": 3.328692927786779e-06, + "loss": 0.6377, + "step": 20237 + }, + { + "epoch": 0.620264803236484, + "grad_norm": 1.8238466802383515, + "learning_rate": 3.32822516618594e-06, + "loss": 0.5604, + "step": 20238 + }, + { + "epoch": 0.6202954517592252, + "grad_norm": 1.7898427742158602, + "learning_rate": 3.327757421057526e-06, + "loss": 0.5983, + "step": 20239 + }, + { + "epoch": 0.6203261002819664, + "grad_norm": 1.5753255777413093, + "learning_rate": 3.327289692406146e-06, + "loss": 0.5343, + "step": 20240 + }, + { + "epoch": 0.6203567488047076, + "grad_norm": 1.7367159750636936, + "learning_rate": 3.3268219802364088e-06, + "loss": 0.728, + "step": 20241 + }, + { + "epoch": 0.6203873973274489, + "grad_norm": 1.7302307240977288, + "learning_rate": 3.3263542845529247e-06, + "loss": 0.6671, + "step": 20242 + }, + { + "epoch": 0.62041804585019, + "grad_norm": 0.7830594135103125, + "learning_rate": 3.3258866053602967e-06, + "loss": 0.4172, + "step": 20243 + }, + { + "epoch": 0.6204486943729313, + "grad_norm": 1.6552084617924954, + "learning_rate": 3.325418942663139e-06, + "loss": 0.583, + "step": 20244 + }, + { + "epoch": 0.6204793428956724, + "grad_norm": 1.917409874095901, + "learning_rate": 3.3249512964660556e-06, + "loss": 0.5608, + "step": 20245 + }, + { + "epoch": 0.6205099914184137, + "grad_norm": 0.7780374763971255, + "learning_rate": 3.3244836667736557e-06, + "loss": 0.4231, + "step": 20246 + }, + { + "epoch": 0.6205406399411548, + "grad_norm": 1.5445078919433746, + "learning_rate": 3.3240160535905475e-06, + "loss": 0.6421, + "step": 20247 + }, + { + "epoch": 0.6205712884638961, + "grad_norm": 1.464578852183084, + "learning_rate": 3.3235484569213373e-06, + "loss": 0.5651, + "step": 20248 + }, + { + "epoch": 0.6206019369866372, + "grad_norm": 1.6571093029808903, + "learning_rate": 3.3230808767706328e-06, + "loss": 0.6054, + "step": 20249 + }, + { + "epoch": 0.6206325855093785, + "grad_norm": 1.7118066763123327, + "learning_rate": 3.3226133131430428e-06, + "loss": 0.5402, + "step": 20250 + }, + { + "epoch": 0.6206632340321196, + "grad_norm": 1.6529037344699902, + "learning_rate": 3.3221457660431713e-06, + "loss": 0.6253, + "step": 20251 + }, + { + "epoch": 0.6206938825548609, + "grad_norm": 1.8441755516487852, + "learning_rate": 3.321678235475628e-06, + "loss": 0.6685, + "step": 20252 + }, + { + "epoch": 0.6207245310776021, + "grad_norm": 1.786404739532649, + "learning_rate": 3.3212107214450196e-06, + "loss": 0.729, + "step": 20253 + }, + { + "epoch": 0.6207551796003432, + "grad_norm": 0.7917822312430998, + "learning_rate": 3.320743223955948e-06, + "loss": 0.4137, + "step": 20254 + }, + { + "epoch": 0.6207858281230845, + "grad_norm": 0.7460083704553951, + "learning_rate": 3.3202757430130265e-06, + "loss": 0.4255, + "step": 20255 + }, + { + "epoch": 0.6208164766458256, + "grad_norm": 1.6232292317695647, + "learning_rate": 3.3198082786208575e-06, + "loss": 0.6378, + "step": 20256 + }, + { + "epoch": 0.6208471251685669, + "grad_norm": 0.7643778194200013, + "learning_rate": 3.3193408307840453e-06, + "loss": 0.4037, + "step": 20257 + }, + { + "epoch": 0.620877773691308, + "grad_norm": 1.7106503547500762, + "learning_rate": 3.3188733995072e-06, + "loss": 0.5706, + "step": 20258 + }, + { + "epoch": 0.6209084222140493, + "grad_norm": 1.4769577436935895, + "learning_rate": 3.3184059847949234e-06, + "loss": 0.5229, + "step": 20259 + }, + { + "epoch": 0.6209390707367904, + "grad_norm": 1.9320483240754838, + "learning_rate": 3.3179385866518236e-06, + "loss": 0.6895, + "step": 20260 + }, + { + "epoch": 0.6209697192595317, + "grad_norm": 1.866468509890451, + "learning_rate": 3.3174712050825066e-06, + "loss": 0.6111, + "step": 20261 + }, + { + "epoch": 0.6210003677822729, + "grad_norm": 1.7534415822628753, + "learning_rate": 3.3170038400915737e-06, + "loss": 0.5879, + "step": 20262 + }, + { + "epoch": 0.6210310163050141, + "grad_norm": 1.7856005616533261, + "learning_rate": 3.3165364916836346e-06, + "loss": 0.5977, + "step": 20263 + }, + { + "epoch": 0.6210616648277553, + "grad_norm": 0.8402744481044486, + "learning_rate": 3.3160691598632934e-06, + "loss": 0.4011, + "step": 20264 + }, + { + "epoch": 0.6210923133504965, + "grad_norm": 1.6813138584510794, + "learning_rate": 3.31560184463515e-06, + "loss": 0.5462, + "step": 20265 + }, + { + "epoch": 0.6211229618732377, + "grad_norm": 1.8232826358177254, + "learning_rate": 3.3151345460038154e-06, + "loss": 0.6067, + "step": 20266 + }, + { + "epoch": 0.6211536103959789, + "grad_norm": 1.8751735566520178, + "learning_rate": 3.3146672639738886e-06, + "loss": 0.5862, + "step": 20267 + }, + { + "epoch": 0.6211842589187201, + "grad_norm": 1.718952431019854, + "learning_rate": 3.3141999985499795e-06, + "loss": 0.602, + "step": 20268 + }, + { + "epoch": 0.6212149074414614, + "grad_norm": 1.6916504470129161, + "learning_rate": 3.3137327497366885e-06, + "loss": 0.668, + "step": 20269 + }, + { + "epoch": 0.6212455559642025, + "grad_norm": 1.8486772216531933, + "learning_rate": 3.3132655175386188e-06, + "loss": 0.6202, + "step": 20270 + }, + { + "epoch": 0.6212762044869438, + "grad_norm": 1.8792604711451826, + "learning_rate": 3.312798301960376e-06, + "loss": 0.6994, + "step": 20271 + }, + { + "epoch": 0.6213068530096849, + "grad_norm": 1.7383082856937706, + "learning_rate": 3.312331103006564e-06, + "loss": 0.582, + "step": 20272 + }, + { + "epoch": 0.6213375015324262, + "grad_norm": 2.027527560204616, + "learning_rate": 3.3118639206817836e-06, + "loss": 0.6085, + "step": 20273 + }, + { + "epoch": 0.6213681500551673, + "grad_norm": 1.7976780809558874, + "learning_rate": 3.3113967549906424e-06, + "loss": 0.5998, + "step": 20274 + }, + { + "epoch": 0.6213987985779086, + "grad_norm": 1.83622945884188, + "learning_rate": 3.3109296059377405e-06, + "loss": 0.6099, + "step": 20275 + }, + { + "epoch": 0.6214294471006497, + "grad_norm": 1.7715738000063985, + "learning_rate": 3.310462473527679e-06, + "loss": 0.6451, + "step": 20276 + }, + { + "epoch": 0.621460095623391, + "grad_norm": 0.8126911903807653, + "learning_rate": 3.309995357765066e-06, + "loss": 0.4506, + "step": 20277 + }, + { + "epoch": 0.6214907441461321, + "grad_norm": 1.7454630780990872, + "learning_rate": 3.3095282586545e-06, + "loss": 0.6104, + "step": 20278 + }, + { + "epoch": 0.6215213926688734, + "grad_norm": 1.8246766550232363, + "learning_rate": 3.309061176200584e-06, + "loss": 0.6514, + "step": 20279 + }, + { + "epoch": 0.6215520411916146, + "grad_norm": 1.5436349489393768, + "learning_rate": 3.3085941104079217e-06, + "loss": 0.5307, + "step": 20280 + }, + { + "epoch": 0.6215826897143558, + "grad_norm": 1.8161213362215118, + "learning_rate": 3.3081270612811132e-06, + "loss": 0.5771, + "step": 20281 + }, + { + "epoch": 0.621613338237097, + "grad_norm": 1.7401683448170122, + "learning_rate": 3.3076600288247627e-06, + "loss": 0.5709, + "step": 20282 + }, + { + "epoch": 0.6216439867598382, + "grad_norm": 1.6237879937285198, + "learning_rate": 3.3071930130434717e-06, + "loss": 0.5173, + "step": 20283 + }, + { + "epoch": 0.6216746352825794, + "grad_norm": 1.5740785258381982, + "learning_rate": 3.306726013941839e-06, + "loss": 0.5582, + "step": 20284 + }, + { + "epoch": 0.6217052838053205, + "grad_norm": 1.7549576802363949, + "learning_rate": 3.306259031524469e-06, + "loss": 0.6157, + "step": 20285 + }, + { + "epoch": 0.6217359323280618, + "grad_norm": 1.654972909522068, + "learning_rate": 3.305792065795962e-06, + "loss": 0.5781, + "step": 20286 + }, + { + "epoch": 0.6217665808508029, + "grad_norm": 1.7546523505294391, + "learning_rate": 3.3053251167609214e-06, + "loss": 0.6361, + "step": 20287 + }, + { + "epoch": 0.6217972293735442, + "grad_norm": 1.922707803871604, + "learning_rate": 3.3048581844239436e-06, + "loss": 0.7269, + "step": 20288 + }, + { + "epoch": 0.6218278778962854, + "grad_norm": 1.608989309380751, + "learning_rate": 3.304391268789632e-06, + "loss": 0.5602, + "step": 20289 + }, + { + "epoch": 0.6218585264190266, + "grad_norm": 1.8347130798533224, + "learning_rate": 3.303924369862588e-06, + "loss": 0.5915, + "step": 20290 + }, + { + "epoch": 0.6218891749417678, + "grad_norm": 1.9657816977499252, + "learning_rate": 3.3034574876474113e-06, + "loss": 0.6583, + "step": 20291 + }, + { + "epoch": 0.621919823464509, + "grad_norm": 1.8948444954543957, + "learning_rate": 3.3029906221487e-06, + "loss": 0.7181, + "step": 20292 + }, + { + "epoch": 0.6219504719872502, + "grad_norm": 1.8685280136679954, + "learning_rate": 3.302523773371058e-06, + "loss": 0.5616, + "step": 20293 + }, + { + "epoch": 0.6219811205099914, + "grad_norm": 1.643774882835605, + "learning_rate": 3.302056941319083e-06, + "loss": 0.5841, + "step": 20294 + }, + { + "epoch": 0.6220117690327326, + "grad_norm": 1.7692959729720847, + "learning_rate": 3.301590125997376e-06, + "loss": 0.4703, + "step": 20295 + }, + { + "epoch": 0.6220424175554738, + "grad_norm": 1.796664848839257, + "learning_rate": 3.301123327410537e-06, + "loss": 0.6873, + "step": 20296 + }, + { + "epoch": 0.622073066078215, + "grad_norm": 1.8708423276441348, + "learning_rate": 3.300656545563161e-06, + "loss": 0.5791, + "step": 20297 + }, + { + "epoch": 0.6221037146009563, + "grad_norm": 0.803032825832607, + "learning_rate": 3.300189780459855e-06, + "loss": 0.3939, + "step": 20298 + }, + { + "epoch": 0.6221343631236974, + "grad_norm": 1.6575116225559088, + "learning_rate": 3.299723032105212e-06, + "loss": 0.5859, + "step": 20299 + }, + { + "epoch": 0.6221650116464387, + "grad_norm": 1.65154945947927, + "learning_rate": 3.2992563005038323e-06, + "loss": 0.6077, + "step": 20300 + }, + { + "epoch": 0.6221956601691798, + "grad_norm": 0.8148196781307027, + "learning_rate": 3.298789585660317e-06, + "loss": 0.4276, + "step": 20301 + }, + { + "epoch": 0.6222263086919211, + "grad_norm": 1.9583855049003036, + "learning_rate": 3.298322887579263e-06, + "loss": 0.5757, + "step": 20302 + }, + { + "epoch": 0.6222569572146622, + "grad_norm": 1.7772601282395664, + "learning_rate": 3.2978562062652674e-06, + "loss": 0.6573, + "step": 20303 + }, + { + "epoch": 0.6222876057374035, + "grad_norm": 1.7623419196084364, + "learning_rate": 3.2973895417229312e-06, + "loss": 0.6273, + "step": 20304 + }, + { + "epoch": 0.6223182542601446, + "grad_norm": 1.7326755011155137, + "learning_rate": 3.296922893956851e-06, + "loss": 0.6264, + "step": 20305 + }, + { + "epoch": 0.6223489027828859, + "grad_norm": 1.6891226011979017, + "learning_rate": 3.2964562629716256e-06, + "loss": 0.5726, + "step": 20306 + }, + { + "epoch": 0.622379551305627, + "grad_norm": 0.802196698562019, + "learning_rate": 3.295989648771854e-06, + "loss": 0.4148, + "step": 20307 + }, + { + "epoch": 0.6224101998283683, + "grad_norm": 0.8168891851276758, + "learning_rate": 3.2955230513621294e-06, + "loss": 0.4203, + "step": 20308 + }, + { + "epoch": 0.6224408483511095, + "grad_norm": 1.7899758591254133, + "learning_rate": 3.295056470747055e-06, + "loss": 0.5803, + "step": 20309 + }, + { + "epoch": 0.6224714968738507, + "grad_norm": 1.5988560178038764, + "learning_rate": 3.294589906931225e-06, + "loss": 0.6357, + "step": 20310 + }, + { + "epoch": 0.6225021453965919, + "grad_norm": 1.8241047506168757, + "learning_rate": 3.294123359919235e-06, + "loss": 0.6923, + "step": 20311 + }, + { + "epoch": 0.6225327939193331, + "grad_norm": 1.666165394336207, + "learning_rate": 3.2936568297156856e-06, + "loss": 0.6176, + "step": 20312 + }, + { + "epoch": 0.6225634424420743, + "grad_norm": 1.6028958570489444, + "learning_rate": 3.293190316325171e-06, + "loss": 0.5905, + "step": 20313 + }, + { + "epoch": 0.6225940909648155, + "grad_norm": 0.7626684794128763, + "learning_rate": 3.2927238197522897e-06, + "loss": 0.416, + "step": 20314 + }, + { + "epoch": 0.6226247394875567, + "grad_norm": 1.7209162462787801, + "learning_rate": 3.292257340001638e-06, + "loss": 0.5611, + "step": 20315 + }, + { + "epoch": 0.6226553880102978, + "grad_norm": 1.8789140285084516, + "learning_rate": 3.29179087707781e-06, + "loss": 0.5867, + "step": 20316 + }, + { + "epoch": 0.6226860365330391, + "grad_norm": 0.7696046103276575, + "learning_rate": 3.291324430985405e-06, + "loss": 0.3895, + "step": 20317 + }, + { + "epoch": 0.6227166850557803, + "grad_norm": 1.6435855185850445, + "learning_rate": 3.2908580017290185e-06, + "loss": 0.6059, + "step": 20318 + }, + { + "epoch": 0.6227473335785215, + "grad_norm": 1.816689819963533, + "learning_rate": 3.2903915893132423e-06, + "loss": 0.5818, + "step": 20319 + }, + { + "epoch": 0.6227779821012627, + "grad_norm": 1.639843259619909, + "learning_rate": 3.2899251937426783e-06, + "loss": 0.5368, + "step": 20320 + }, + { + "epoch": 0.6228086306240039, + "grad_norm": 1.607569428194566, + "learning_rate": 3.289458815021916e-06, + "loss": 0.5668, + "step": 20321 + }, + { + "epoch": 0.6228392791467451, + "grad_norm": 1.815978739524941, + "learning_rate": 3.288992453155556e-06, + "loss": 0.591, + "step": 20322 + }, + { + "epoch": 0.6228699276694863, + "grad_norm": 1.8753536457022737, + "learning_rate": 3.288526108148191e-06, + "loss": 0.6184, + "step": 20323 + }, + { + "epoch": 0.6229005761922275, + "grad_norm": 1.6819420059112766, + "learning_rate": 3.2880597800044144e-06, + "loss": 0.5346, + "step": 20324 + }, + { + "epoch": 0.6229312247149688, + "grad_norm": 1.9935844452801965, + "learning_rate": 3.2875934687288245e-06, + "loss": 0.7069, + "step": 20325 + }, + { + "epoch": 0.6229618732377099, + "grad_norm": 0.8123960545416904, + "learning_rate": 3.287127174326014e-06, + "loss": 0.4327, + "step": 20326 + }, + { + "epoch": 0.6229925217604512, + "grad_norm": 1.8942519072496673, + "learning_rate": 3.286660896800577e-06, + "loss": 0.6237, + "step": 20327 + }, + { + "epoch": 0.6230231702831923, + "grad_norm": 1.7309968365861423, + "learning_rate": 3.2861946361571094e-06, + "loss": 0.5178, + "step": 20328 + }, + { + "epoch": 0.6230538188059336, + "grad_norm": 1.7044302756570844, + "learning_rate": 3.2857283924002055e-06, + "loss": 0.6262, + "step": 20329 + }, + { + "epoch": 0.6230844673286747, + "grad_norm": 1.8045162583839167, + "learning_rate": 3.285262165534456e-06, + "loss": 0.6029, + "step": 20330 + }, + { + "epoch": 0.623115115851416, + "grad_norm": 0.8083474492370243, + "learning_rate": 3.2847959555644582e-06, + "loss": 0.4329, + "step": 20331 + }, + { + "epoch": 0.6231457643741571, + "grad_norm": 1.8422977198961485, + "learning_rate": 3.284329762494804e-06, + "loss": 0.6299, + "step": 20332 + }, + { + "epoch": 0.6231764128968984, + "grad_norm": 0.8584408713131739, + "learning_rate": 3.283863586330088e-06, + "loss": 0.43, + "step": 20333 + }, + { + "epoch": 0.6232070614196396, + "grad_norm": 0.791280621692549, + "learning_rate": 3.2833974270749047e-06, + "loss": 0.4245, + "step": 20334 + }, + { + "epoch": 0.6232377099423808, + "grad_norm": 1.5476357913936305, + "learning_rate": 3.2829312847338434e-06, + "loss": 0.5018, + "step": 20335 + }, + { + "epoch": 0.623268358465122, + "grad_norm": 1.754124445269533, + "learning_rate": 3.282465159311501e-06, + "loss": 0.6202, + "step": 20336 + }, + { + "epoch": 0.6232990069878632, + "grad_norm": 1.85968245389962, + "learning_rate": 3.28199905081247e-06, + "loss": 0.6297, + "step": 20337 + }, + { + "epoch": 0.6233296555106044, + "grad_norm": 2.2036690317707026, + "learning_rate": 3.281532959241338e-06, + "loss": 0.5342, + "step": 20338 + }, + { + "epoch": 0.6233603040333456, + "grad_norm": 1.5703158189431474, + "learning_rate": 3.281066884602705e-06, + "loss": 0.59, + "step": 20339 + }, + { + "epoch": 0.6233909525560868, + "grad_norm": 1.6905688738418474, + "learning_rate": 3.280600826901157e-06, + "loss": 0.5884, + "step": 20340 + }, + { + "epoch": 0.623421601078828, + "grad_norm": 1.5981890486819021, + "learning_rate": 3.280134786141292e-06, + "loss": 0.6263, + "step": 20341 + }, + { + "epoch": 0.6234522496015692, + "grad_norm": 1.652927819809463, + "learning_rate": 3.279668762327698e-06, + "loss": 0.6346, + "step": 20342 + }, + { + "epoch": 0.6234828981243105, + "grad_norm": 1.6077979940870588, + "learning_rate": 3.2792027554649663e-06, + "loss": 0.5915, + "step": 20343 + }, + { + "epoch": 0.6235135466470516, + "grad_norm": 1.7878547800258524, + "learning_rate": 3.278736765557692e-06, + "loss": 0.5834, + "step": 20344 + }, + { + "epoch": 0.6235441951697929, + "grad_norm": 1.9281936758420593, + "learning_rate": 3.278270792610464e-06, + "loss": 0.641, + "step": 20345 + }, + { + "epoch": 0.623574843692534, + "grad_norm": 1.793462129773038, + "learning_rate": 3.2778048366278737e-06, + "loss": 0.5899, + "step": 20346 + }, + { + "epoch": 0.6236054922152752, + "grad_norm": 1.64032234033022, + "learning_rate": 3.277338897614514e-06, + "loss": 0.5885, + "step": 20347 + }, + { + "epoch": 0.6236361407380164, + "grad_norm": 1.6299078736882084, + "learning_rate": 3.2768729755749734e-06, + "loss": 0.5807, + "step": 20348 + }, + { + "epoch": 0.6236667892607576, + "grad_norm": 0.8005536494164044, + "learning_rate": 3.2764070705138463e-06, + "loss": 0.4075, + "step": 20349 + }, + { + "epoch": 0.6236974377834988, + "grad_norm": 1.6081490721336686, + "learning_rate": 3.2759411824357213e-06, + "loss": 0.4646, + "step": 20350 + }, + { + "epoch": 0.62372808630624, + "grad_norm": 1.8984516010884738, + "learning_rate": 3.2754753113451864e-06, + "loss": 0.6364, + "step": 20351 + }, + { + "epoch": 0.6237587348289813, + "grad_norm": 1.5935941187687028, + "learning_rate": 3.275009457246837e-06, + "loss": 0.5267, + "step": 20352 + }, + { + "epoch": 0.6237893833517224, + "grad_norm": 0.7880893687312328, + "learning_rate": 3.2745436201452606e-06, + "loss": 0.4124, + "step": 20353 + }, + { + "epoch": 0.6238200318744637, + "grad_norm": 1.800094339058846, + "learning_rate": 3.274077800045046e-06, + "loss": 0.6226, + "step": 20354 + }, + { + "epoch": 0.6238506803972048, + "grad_norm": 1.8859072830697772, + "learning_rate": 3.2736119969507858e-06, + "loss": 0.6094, + "step": 20355 + }, + { + "epoch": 0.6238813289199461, + "grad_norm": 1.6618415705967766, + "learning_rate": 3.2731462108670676e-06, + "loss": 0.582, + "step": 20356 + }, + { + "epoch": 0.6239119774426872, + "grad_norm": 1.674550309431135, + "learning_rate": 3.2726804417984816e-06, + "loss": 0.6844, + "step": 20357 + }, + { + "epoch": 0.6239426259654285, + "grad_norm": 0.7582691168273324, + "learning_rate": 3.272214689749618e-06, + "loss": 0.3979, + "step": 20358 + }, + { + "epoch": 0.6239732744881696, + "grad_norm": 1.744573601631207, + "learning_rate": 3.271748954725063e-06, + "loss": 0.7002, + "step": 20359 + }, + { + "epoch": 0.6240039230109109, + "grad_norm": 1.8245557596815511, + "learning_rate": 3.2712832367294094e-06, + "loss": 0.6901, + "step": 20360 + }, + { + "epoch": 0.624034571533652, + "grad_norm": 1.896003557606396, + "learning_rate": 3.2708175357672457e-06, + "loss": 0.5929, + "step": 20361 + }, + { + "epoch": 0.6240652200563933, + "grad_norm": 1.8240041549048385, + "learning_rate": 3.2703518518431552e-06, + "loss": 0.6042, + "step": 20362 + }, + { + "epoch": 0.6240958685791345, + "grad_norm": 1.9600886708099714, + "learning_rate": 3.269886184961735e-06, + "loss": 0.7216, + "step": 20363 + }, + { + "epoch": 0.6241265171018757, + "grad_norm": 1.8511518542612924, + "learning_rate": 3.2694205351275666e-06, + "loss": 0.6166, + "step": 20364 + }, + { + "epoch": 0.6241571656246169, + "grad_norm": 1.6118154282500918, + "learning_rate": 3.2689549023452405e-06, + "loss": 0.714, + "step": 20365 + }, + { + "epoch": 0.6241878141473581, + "grad_norm": 1.9426385151142616, + "learning_rate": 3.268489286619345e-06, + "loss": 0.6424, + "step": 20366 + }, + { + "epoch": 0.6242184626700993, + "grad_norm": 1.6605808552361634, + "learning_rate": 3.2680236879544667e-06, + "loss": 0.5355, + "step": 20367 + }, + { + "epoch": 0.6242491111928405, + "grad_norm": 1.6083695094367465, + "learning_rate": 3.2675581063551954e-06, + "loss": 0.6074, + "step": 20368 + }, + { + "epoch": 0.6242797597155817, + "grad_norm": 1.7227796347585513, + "learning_rate": 3.2670925418261167e-06, + "loss": 0.5748, + "step": 20369 + }, + { + "epoch": 0.624310408238323, + "grad_norm": 1.8902592493141552, + "learning_rate": 3.2666269943718175e-06, + "loss": 0.6068, + "step": 20370 + }, + { + "epoch": 0.6243410567610641, + "grad_norm": 1.6467182939628897, + "learning_rate": 3.266161463996888e-06, + "loss": 0.6319, + "step": 20371 + }, + { + "epoch": 0.6243717052838054, + "grad_norm": 1.8178273140435153, + "learning_rate": 3.2656959507059137e-06, + "loss": 0.6566, + "step": 20372 + }, + { + "epoch": 0.6244023538065465, + "grad_norm": 1.722985262863499, + "learning_rate": 3.265230454503478e-06, + "loss": 0.6157, + "step": 20373 + }, + { + "epoch": 0.6244330023292878, + "grad_norm": 1.7619916967352776, + "learning_rate": 3.2647649753941733e-06, + "loss": 0.4641, + "step": 20374 + }, + { + "epoch": 0.6244636508520289, + "grad_norm": 1.8129922640181055, + "learning_rate": 3.2642995133825815e-06, + "loss": 0.7045, + "step": 20375 + }, + { + "epoch": 0.6244942993747702, + "grad_norm": 1.6435794533744255, + "learning_rate": 3.263834068473292e-06, + "loss": 0.5969, + "step": 20376 + }, + { + "epoch": 0.6245249478975113, + "grad_norm": 1.7739813364489714, + "learning_rate": 3.2633686406708888e-06, + "loss": 0.5727, + "step": 20377 + }, + { + "epoch": 0.6245555964202525, + "grad_norm": 1.5870277322673492, + "learning_rate": 3.2629032299799577e-06, + "loss": 0.5729, + "step": 20378 + }, + { + "epoch": 0.6245862449429938, + "grad_norm": 1.768998406895267, + "learning_rate": 3.262437836405088e-06, + "loss": 0.6144, + "step": 20379 + }, + { + "epoch": 0.6246168934657349, + "grad_norm": 1.6581526581348325, + "learning_rate": 3.261972459950862e-06, + "loss": 0.6162, + "step": 20380 + }, + { + "epoch": 0.6246475419884762, + "grad_norm": 1.8139782596069052, + "learning_rate": 3.2615071006218644e-06, + "loss": 0.5692, + "step": 20381 + }, + { + "epoch": 0.6246781905112173, + "grad_norm": 1.668284183705762, + "learning_rate": 3.261041758422685e-06, + "loss": 0.5361, + "step": 20382 + }, + { + "epoch": 0.6247088390339586, + "grad_norm": 1.8377136690709146, + "learning_rate": 3.260576433357905e-06, + "loss": 0.6033, + "step": 20383 + }, + { + "epoch": 0.6247394875566997, + "grad_norm": 1.6874868871884394, + "learning_rate": 3.2601111254321083e-06, + "loss": 0.5689, + "step": 20384 + }, + { + "epoch": 0.624770136079441, + "grad_norm": 1.8614180307712744, + "learning_rate": 3.2596458346498836e-06, + "loss": 0.6049, + "step": 20385 + }, + { + "epoch": 0.6248007846021821, + "grad_norm": 2.088708877864155, + "learning_rate": 3.2591805610158134e-06, + "loss": 0.5319, + "step": 20386 + }, + { + "epoch": 0.6248314331249234, + "grad_norm": 1.7560566205853574, + "learning_rate": 3.258715304534483e-06, + "loss": 0.5586, + "step": 20387 + }, + { + "epoch": 0.6248620816476645, + "grad_norm": 0.8121636423564433, + "learning_rate": 3.2582500652104765e-06, + "loss": 0.4195, + "step": 20388 + }, + { + "epoch": 0.6248927301704058, + "grad_norm": 1.911015421768257, + "learning_rate": 3.2577848430483767e-06, + "loss": 0.6333, + "step": 20389 + }, + { + "epoch": 0.624923378693147, + "grad_norm": 1.6424680494893706, + "learning_rate": 3.2573196380527693e-06, + "loss": 0.526, + "step": 20390 + }, + { + "epoch": 0.6249540272158882, + "grad_norm": 1.6998173430338215, + "learning_rate": 3.2568544502282384e-06, + "loss": 0.5998, + "step": 20391 + }, + { + "epoch": 0.6249846757386294, + "grad_norm": 0.7878365790477732, + "learning_rate": 3.256389279579364e-06, + "loss": 0.4231, + "step": 20392 + }, + { + "epoch": 0.6250153242613706, + "grad_norm": 1.725711407392629, + "learning_rate": 3.255924126110735e-06, + "loss": 0.5711, + "step": 20393 + }, + { + "epoch": 0.6250459727841118, + "grad_norm": 0.8305176933610717, + "learning_rate": 3.2554589898269284e-06, + "loss": 0.4259, + "step": 20394 + }, + { + "epoch": 0.625076621306853, + "grad_norm": 1.921070093976453, + "learning_rate": 3.2549938707325346e-06, + "loss": 0.6169, + "step": 20395 + }, + { + "epoch": 0.6251072698295942, + "grad_norm": 0.8106677232238452, + "learning_rate": 3.2545287688321308e-06, + "loss": 0.4137, + "step": 20396 + }, + { + "epoch": 0.6251379183523355, + "grad_norm": 1.6726559229054971, + "learning_rate": 3.2540636841303006e-06, + "loss": 0.6721, + "step": 20397 + }, + { + "epoch": 0.6251685668750766, + "grad_norm": 0.804605592780331, + "learning_rate": 3.2535986166316292e-06, + "loss": 0.4206, + "step": 20398 + }, + { + "epoch": 0.6251992153978179, + "grad_norm": 1.9475696233068989, + "learning_rate": 3.2531335663406976e-06, + "loss": 0.6364, + "step": 20399 + }, + { + "epoch": 0.625229863920559, + "grad_norm": 0.7782212109358141, + "learning_rate": 3.2526685332620867e-06, + "loss": 0.4139, + "step": 20400 + }, + { + "epoch": 0.6252605124433003, + "grad_norm": 1.4953785584247812, + "learning_rate": 3.252203517400381e-06, + "loss": 0.5589, + "step": 20401 + }, + { + "epoch": 0.6252911609660414, + "grad_norm": 1.8340381746687302, + "learning_rate": 3.251738518760161e-06, + "loss": 0.6695, + "step": 20402 + }, + { + "epoch": 0.6253218094887827, + "grad_norm": 1.7836050866222852, + "learning_rate": 3.2512735373460068e-06, + "loss": 0.6785, + "step": 20403 + }, + { + "epoch": 0.6253524580115238, + "grad_norm": 0.7441211898227595, + "learning_rate": 3.250808573162505e-06, + "loss": 0.4194, + "step": 20404 + }, + { + "epoch": 0.6253831065342651, + "grad_norm": 1.9205063166449636, + "learning_rate": 3.250343626214231e-06, + "loss": 0.6786, + "step": 20405 + }, + { + "epoch": 0.6254137550570062, + "grad_norm": 1.7143745164095099, + "learning_rate": 3.2498786965057716e-06, + "loss": 0.6051, + "step": 20406 + }, + { + "epoch": 0.6254444035797475, + "grad_norm": 1.5788217210460607, + "learning_rate": 3.249413784041704e-06, + "loss": 0.6828, + "step": 20407 + }, + { + "epoch": 0.6254750521024887, + "grad_norm": 1.6364565687579948, + "learning_rate": 3.248948888826609e-06, + "loss": 0.6263, + "step": 20408 + }, + { + "epoch": 0.6255057006252298, + "grad_norm": 1.7279428820741478, + "learning_rate": 3.2484840108650706e-06, + "loss": 0.6829, + "step": 20409 + }, + { + "epoch": 0.6255363491479711, + "grad_norm": 1.5996601511924269, + "learning_rate": 3.2480191501616663e-06, + "loss": 0.6076, + "step": 20410 + }, + { + "epoch": 0.6255669976707122, + "grad_norm": 1.8576862064175677, + "learning_rate": 3.2475543067209768e-06, + "loss": 0.6153, + "step": 20411 + }, + { + "epoch": 0.6255976461934535, + "grad_norm": 1.6860829977843599, + "learning_rate": 3.247089480547585e-06, + "loss": 0.631, + "step": 20412 + }, + { + "epoch": 0.6256282947161946, + "grad_norm": 1.7965759458967256, + "learning_rate": 3.246624671646067e-06, + "loss": 0.5522, + "step": 20413 + }, + { + "epoch": 0.6256589432389359, + "grad_norm": 1.8125782503103876, + "learning_rate": 3.2461598800210065e-06, + "loss": 0.5906, + "step": 20414 + }, + { + "epoch": 0.625689591761677, + "grad_norm": 0.8180241995716859, + "learning_rate": 3.245695105676982e-06, + "loss": 0.4388, + "step": 20415 + }, + { + "epoch": 0.6257202402844183, + "grad_norm": 1.587144770849988, + "learning_rate": 3.2452303486185698e-06, + "loss": 0.5478, + "step": 20416 + }, + { + "epoch": 0.6257508888071595, + "grad_norm": 1.9141237999106469, + "learning_rate": 3.244765608850354e-06, + "loss": 0.6707, + "step": 20417 + }, + { + "epoch": 0.6257815373299007, + "grad_norm": 1.7289395855264227, + "learning_rate": 3.244300886376912e-06, + "loss": 0.6214, + "step": 20418 + }, + { + "epoch": 0.6258121858526419, + "grad_norm": 2.114457695560023, + "learning_rate": 3.2438361812028212e-06, + "loss": 0.5347, + "step": 20419 + }, + { + "epoch": 0.6258428343753831, + "grad_norm": 1.6362951064822926, + "learning_rate": 3.243371493332663e-06, + "loss": 0.5914, + "step": 20420 + }, + { + "epoch": 0.6258734828981243, + "grad_norm": 1.8352994697187999, + "learning_rate": 3.2429068227710137e-06, + "loss": 0.5857, + "step": 20421 + }, + { + "epoch": 0.6259041314208655, + "grad_norm": 1.668150125146379, + "learning_rate": 3.2424421695224538e-06, + "loss": 0.6115, + "step": 20422 + }, + { + "epoch": 0.6259347799436067, + "grad_norm": 1.7070529019543141, + "learning_rate": 3.241977533591561e-06, + "loss": 0.5622, + "step": 20423 + }, + { + "epoch": 0.625965428466348, + "grad_norm": 1.6477864802836408, + "learning_rate": 3.241512914982913e-06, + "loss": 0.5678, + "step": 20424 + }, + { + "epoch": 0.6259960769890891, + "grad_norm": 1.6655588720691676, + "learning_rate": 3.2410483137010885e-06, + "loss": 0.5555, + "step": 20425 + }, + { + "epoch": 0.6260267255118304, + "grad_norm": 1.7064498146858182, + "learning_rate": 3.240583729750666e-06, + "loss": 0.5831, + "step": 20426 + }, + { + "epoch": 0.6260573740345715, + "grad_norm": 1.7088160534663974, + "learning_rate": 3.24011916313622e-06, + "loss": 0.5875, + "step": 20427 + }, + { + "epoch": 0.6260880225573128, + "grad_norm": 1.9328780278290016, + "learning_rate": 3.2396546138623313e-06, + "loss": 0.6331, + "step": 20428 + }, + { + "epoch": 0.6261186710800539, + "grad_norm": 1.9587404320508106, + "learning_rate": 3.2391900819335766e-06, + "loss": 0.6288, + "step": 20429 + }, + { + "epoch": 0.6261493196027952, + "grad_norm": 1.7886592236209582, + "learning_rate": 3.2387255673545317e-06, + "loss": 0.6424, + "step": 20430 + }, + { + "epoch": 0.6261799681255363, + "grad_norm": 1.7280053724756865, + "learning_rate": 3.2382610701297743e-06, + "loss": 0.6325, + "step": 20431 + }, + { + "epoch": 0.6262106166482776, + "grad_norm": 1.8011089882694236, + "learning_rate": 3.2377965902638807e-06, + "loss": 0.6152, + "step": 20432 + }, + { + "epoch": 0.6262412651710187, + "grad_norm": 1.882961606247245, + "learning_rate": 3.23733212776143e-06, + "loss": 0.6018, + "step": 20433 + }, + { + "epoch": 0.62627191369376, + "grad_norm": 1.9815683876309718, + "learning_rate": 3.2368676826269972e-06, + "loss": 0.6004, + "step": 20434 + }, + { + "epoch": 0.6263025622165012, + "grad_norm": 1.8445282798463316, + "learning_rate": 3.2364032548651554e-06, + "loss": 0.5566, + "step": 20435 + }, + { + "epoch": 0.6263332107392424, + "grad_norm": 1.8066637419344729, + "learning_rate": 3.2359388444804863e-06, + "loss": 0.6572, + "step": 20436 + }, + { + "epoch": 0.6263638592619836, + "grad_norm": 2.0538076822163123, + "learning_rate": 3.2354744514775626e-06, + "loss": 0.6052, + "step": 20437 + }, + { + "epoch": 0.6263945077847248, + "grad_norm": 1.7889066421034765, + "learning_rate": 3.235010075860959e-06, + "loss": 0.5492, + "step": 20438 + }, + { + "epoch": 0.626425156307466, + "grad_norm": 1.9447879462225675, + "learning_rate": 3.2345457176352546e-06, + "loss": 0.5667, + "step": 20439 + }, + { + "epoch": 0.6264558048302071, + "grad_norm": 1.7472301028634778, + "learning_rate": 3.2340813768050213e-06, + "loss": 0.6228, + "step": 20440 + }, + { + "epoch": 0.6264864533529484, + "grad_norm": 1.8716509960965149, + "learning_rate": 3.233617053374837e-06, + "loss": 0.6336, + "step": 20441 + }, + { + "epoch": 0.6265171018756895, + "grad_norm": 0.783958501784461, + "learning_rate": 3.233152747349276e-06, + "loss": 0.412, + "step": 20442 + }, + { + "epoch": 0.6265477503984308, + "grad_norm": 1.7617463650923086, + "learning_rate": 3.232688458732912e-06, + "loss": 0.5231, + "step": 20443 + }, + { + "epoch": 0.626578398921172, + "grad_norm": 1.761286456259328, + "learning_rate": 3.2322241875303217e-06, + "loss": 0.6493, + "step": 20444 + }, + { + "epoch": 0.6266090474439132, + "grad_norm": 1.8232358520418952, + "learning_rate": 3.23175993374608e-06, + "loss": 0.6284, + "step": 20445 + }, + { + "epoch": 0.6266396959666544, + "grad_norm": 1.7009347975579192, + "learning_rate": 3.231295697384757e-06, + "loss": 0.5573, + "step": 20446 + }, + { + "epoch": 0.6266703444893956, + "grad_norm": 1.8839956452164062, + "learning_rate": 3.2308314784509333e-06, + "loss": 0.6797, + "step": 20447 + }, + { + "epoch": 0.6267009930121368, + "grad_norm": 1.9484133073937024, + "learning_rate": 3.230367276949176e-06, + "loss": 0.5288, + "step": 20448 + }, + { + "epoch": 0.626731641534878, + "grad_norm": 1.7012399299564873, + "learning_rate": 3.2299030928840665e-06, + "loss": 0.6086, + "step": 20449 + }, + { + "epoch": 0.6267622900576192, + "grad_norm": 2.0397944674013577, + "learning_rate": 3.2294389262601733e-06, + "loss": 0.6307, + "step": 20450 + }, + { + "epoch": 0.6267929385803604, + "grad_norm": 2.127789739440519, + "learning_rate": 3.22897477708207e-06, + "loss": 0.6128, + "step": 20451 + }, + { + "epoch": 0.6268235871031016, + "grad_norm": 1.8922337541698242, + "learning_rate": 3.228510645354333e-06, + "loss": 0.626, + "step": 20452 + }, + { + "epoch": 0.6268542356258429, + "grad_norm": 1.993519886790638, + "learning_rate": 3.2280465310815335e-06, + "loss": 0.6786, + "step": 20453 + }, + { + "epoch": 0.626884884148584, + "grad_norm": 1.7312893206112885, + "learning_rate": 3.227582434268244e-06, + "loss": 0.6495, + "step": 20454 + }, + { + "epoch": 0.6269155326713253, + "grad_norm": 1.8053956613079936, + "learning_rate": 3.22711835491904e-06, + "loss": 0.539, + "step": 20455 + }, + { + "epoch": 0.6269461811940664, + "grad_norm": 1.8654875251556506, + "learning_rate": 3.2266542930384926e-06, + "loss": 0.554, + "step": 20456 + }, + { + "epoch": 0.6269768297168077, + "grad_norm": 1.6583036652400782, + "learning_rate": 3.226190248631171e-06, + "loss": 0.5642, + "step": 20457 + }, + { + "epoch": 0.6270074782395488, + "grad_norm": 1.6920328105842262, + "learning_rate": 3.2257262217016546e-06, + "loss": 0.587, + "step": 20458 + }, + { + "epoch": 0.6270381267622901, + "grad_norm": 0.7741976987031327, + "learning_rate": 3.2252622122545076e-06, + "loss": 0.414, + "step": 20459 + }, + { + "epoch": 0.6270687752850312, + "grad_norm": 1.8388461893146304, + "learning_rate": 3.2247982202943096e-06, + "loss": 0.608, + "step": 20460 + }, + { + "epoch": 0.6270994238077725, + "grad_norm": 3.7219588187060415, + "learning_rate": 3.2243342458256287e-06, + "loss": 0.6858, + "step": 20461 + }, + { + "epoch": 0.6271300723305137, + "grad_norm": 1.821827794665857, + "learning_rate": 3.223870288853035e-06, + "loss": 0.5733, + "step": 20462 + }, + { + "epoch": 0.6271607208532549, + "grad_norm": 1.8240108833655269, + "learning_rate": 3.223406349381103e-06, + "loss": 0.6649, + "step": 20463 + }, + { + "epoch": 0.6271913693759961, + "grad_norm": 1.7588539859886048, + "learning_rate": 3.2229424274144028e-06, + "loss": 0.5148, + "step": 20464 + }, + { + "epoch": 0.6272220178987373, + "grad_norm": 1.8946433088644223, + "learning_rate": 3.222478522957504e-06, + "loss": 0.5866, + "step": 20465 + }, + { + "epoch": 0.6272526664214785, + "grad_norm": 1.8426665500586485, + "learning_rate": 3.2220146360149806e-06, + "loss": 0.6565, + "step": 20466 + }, + { + "epoch": 0.6272833149442197, + "grad_norm": 0.7799195657227673, + "learning_rate": 3.2215507665914015e-06, + "loss": 0.4088, + "step": 20467 + }, + { + "epoch": 0.6273139634669609, + "grad_norm": 1.80781682289389, + "learning_rate": 3.2210869146913374e-06, + "loss": 0.712, + "step": 20468 + }, + { + "epoch": 0.6273446119897022, + "grad_norm": 1.7039281101464476, + "learning_rate": 3.220623080319361e-06, + "loss": 0.6075, + "step": 20469 + }, + { + "epoch": 0.6273752605124433, + "grad_norm": 1.9981046673091818, + "learning_rate": 3.2201592634800375e-06, + "loss": 0.6606, + "step": 20470 + }, + { + "epoch": 0.6274059090351845, + "grad_norm": 1.8207161280310507, + "learning_rate": 3.2196954641779433e-06, + "loss": 0.6384, + "step": 20471 + }, + { + "epoch": 0.6274365575579257, + "grad_norm": 1.7900117174229335, + "learning_rate": 3.219231682417644e-06, + "loss": 0.6822, + "step": 20472 + }, + { + "epoch": 0.6274672060806669, + "grad_norm": 1.7019631922860596, + "learning_rate": 3.2187679182037096e-06, + "loss": 0.6497, + "step": 20473 + }, + { + "epoch": 0.6274978546034081, + "grad_norm": 1.7090889026841425, + "learning_rate": 3.2183041715407117e-06, + "loss": 0.5887, + "step": 20474 + }, + { + "epoch": 0.6275285031261493, + "grad_norm": 1.8083928628977999, + "learning_rate": 3.217840442433218e-06, + "loss": 0.6276, + "step": 20475 + }, + { + "epoch": 0.6275591516488905, + "grad_norm": 1.8476766770211812, + "learning_rate": 3.2173767308857982e-06, + "loss": 0.6272, + "step": 20476 + }, + { + "epoch": 0.6275898001716317, + "grad_norm": 1.5746272060019286, + "learning_rate": 3.2169130369030234e-06, + "loss": 0.5692, + "step": 20477 + }, + { + "epoch": 0.627620448694373, + "grad_norm": 1.9338928181694697, + "learning_rate": 3.216449360489458e-06, + "loss": 0.6447, + "step": 20478 + }, + { + "epoch": 0.6276510972171141, + "grad_norm": 1.5949492130975411, + "learning_rate": 3.2159857016496763e-06, + "loss": 0.5901, + "step": 20479 + }, + { + "epoch": 0.6276817457398554, + "grad_norm": 1.6052734125449388, + "learning_rate": 3.215522060388243e-06, + "loss": 0.5325, + "step": 20480 + }, + { + "epoch": 0.6277123942625965, + "grad_norm": 0.7859366377630111, + "learning_rate": 3.215058436709726e-06, + "loss": 0.427, + "step": 20481 + }, + { + "epoch": 0.6277430427853378, + "grad_norm": 1.700496816493946, + "learning_rate": 3.214594830618696e-06, + "loss": 0.6146, + "step": 20482 + }, + { + "epoch": 0.6277736913080789, + "grad_norm": 1.8769930907121721, + "learning_rate": 3.21413124211972e-06, + "loss": 0.7597, + "step": 20483 + }, + { + "epoch": 0.6278043398308202, + "grad_norm": 1.893411442780474, + "learning_rate": 3.2136676712173647e-06, + "loss": 0.6334, + "step": 20484 + }, + { + "epoch": 0.6278349883535613, + "grad_norm": 1.8860265075972134, + "learning_rate": 3.2132041179162e-06, + "loss": 0.6567, + "step": 20485 + }, + { + "epoch": 0.6278656368763026, + "grad_norm": 1.7284712234150863, + "learning_rate": 3.212740582220791e-06, + "loss": 0.5035, + "step": 20486 + }, + { + "epoch": 0.6278962853990437, + "grad_norm": 1.6846336214219417, + "learning_rate": 3.212277064135708e-06, + "loss": 0.6497, + "step": 20487 + }, + { + "epoch": 0.627926933921785, + "grad_norm": 1.8520473621275255, + "learning_rate": 3.211813563665517e-06, + "loss": 0.6373, + "step": 20488 + }, + { + "epoch": 0.6279575824445262, + "grad_norm": 0.8420123145079044, + "learning_rate": 3.2113500808147814e-06, + "loss": 0.435, + "step": 20489 + }, + { + "epoch": 0.6279882309672674, + "grad_norm": 1.8600798430082806, + "learning_rate": 3.2108866155880745e-06, + "loss": 0.6494, + "step": 20490 + }, + { + "epoch": 0.6280188794900086, + "grad_norm": 1.8019334874980244, + "learning_rate": 3.2104231679899584e-06, + "loss": 0.6098, + "step": 20491 + }, + { + "epoch": 0.6280495280127498, + "grad_norm": 1.8203123231175113, + "learning_rate": 3.2099597380249998e-06, + "loss": 0.6239, + "step": 20492 + }, + { + "epoch": 0.628080176535491, + "grad_norm": 1.6813828116600422, + "learning_rate": 3.2094963256977663e-06, + "loss": 0.617, + "step": 20493 + }, + { + "epoch": 0.6281108250582322, + "grad_norm": 0.8011237811660112, + "learning_rate": 3.209032931012823e-06, + "loss": 0.4387, + "step": 20494 + }, + { + "epoch": 0.6281414735809734, + "grad_norm": 2.1130738426754254, + "learning_rate": 3.208569553974738e-06, + "loss": 0.5551, + "step": 20495 + }, + { + "epoch": 0.6281721221037146, + "grad_norm": 1.5869359285553717, + "learning_rate": 3.2081061945880756e-06, + "loss": 0.5181, + "step": 20496 + }, + { + "epoch": 0.6282027706264558, + "grad_norm": 1.6043372518582864, + "learning_rate": 3.207642852857399e-06, + "loss": 0.6475, + "step": 20497 + }, + { + "epoch": 0.6282334191491971, + "grad_norm": 1.6510017739327898, + "learning_rate": 3.207179528787278e-06, + "loss": 0.5349, + "step": 20498 + }, + { + "epoch": 0.6282640676719382, + "grad_norm": 1.7433039551315448, + "learning_rate": 3.206716222382277e-06, + "loss": 0.6375, + "step": 20499 + }, + { + "epoch": 0.6282947161946795, + "grad_norm": 1.6904331284704677, + "learning_rate": 3.206252933646956e-06, + "loss": 0.5358, + "step": 20500 + }, + { + "epoch": 0.6283253647174206, + "grad_norm": 1.8309344876804101, + "learning_rate": 3.2057896625858875e-06, + "loss": 0.5888, + "step": 20501 + }, + { + "epoch": 0.6283560132401618, + "grad_norm": 1.6218136051997558, + "learning_rate": 3.2053264092036297e-06, + "loss": 0.5355, + "step": 20502 + }, + { + "epoch": 0.628386661762903, + "grad_norm": 0.795315415937686, + "learning_rate": 3.204863173504752e-06, + "loss": 0.4131, + "step": 20503 + }, + { + "epoch": 0.6284173102856442, + "grad_norm": 1.9966415876676118, + "learning_rate": 3.2043999554938165e-06, + "loss": 0.6269, + "step": 20504 + }, + { + "epoch": 0.6284479588083854, + "grad_norm": 1.7124076749619856, + "learning_rate": 3.203936755175386e-06, + "loss": 0.5952, + "step": 20505 + }, + { + "epoch": 0.6284786073311266, + "grad_norm": 1.680833286997206, + "learning_rate": 3.2034735725540283e-06, + "loss": 0.6289, + "step": 20506 + }, + { + "epoch": 0.6285092558538679, + "grad_norm": 0.8005603629553749, + "learning_rate": 3.203010407634305e-06, + "loss": 0.4334, + "step": 20507 + }, + { + "epoch": 0.628539904376609, + "grad_norm": 1.7747090889027841, + "learning_rate": 3.202547260420778e-06, + "loss": 0.5894, + "step": 20508 + }, + { + "epoch": 0.6285705528993503, + "grad_norm": 1.7359692676921357, + "learning_rate": 3.202084130918014e-06, + "loss": 0.5404, + "step": 20509 + }, + { + "epoch": 0.6286012014220914, + "grad_norm": 1.9273072856443914, + "learning_rate": 3.201621019130576e-06, + "loss": 0.6928, + "step": 20510 + }, + { + "epoch": 0.6286318499448327, + "grad_norm": 1.8085345810200577, + "learning_rate": 3.2011579250630244e-06, + "loss": 0.6372, + "step": 20511 + }, + { + "epoch": 0.6286624984675738, + "grad_norm": 1.5751519364508104, + "learning_rate": 3.2006948487199264e-06, + "loss": 0.5913, + "step": 20512 + }, + { + "epoch": 0.6286931469903151, + "grad_norm": 0.7463297456308166, + "learning_rate": 3.2002317901058387e-06, + "loss": 0.4282, + "step": 20513 + }, + { + "epoch": 0.6287237955130562, + "grad_norm": 1.7798602902930591, + "learning_rate": 3.199768749225331e-06, + "loss": 0.6026, + "step": 20514 + }, + { + "epoch": 0.6287544440357975, + "grad_norm": 1.831598368895332, + "learning_rate": 3.199305726082962e-06, + "loss": 0.6533, + "step": 20515 + }, + { + "epoch": 0.6287850925585386, + "grad_norm": 1.771422039323073, + "learning_rate": 3.1988427206832927e-06, + "loss": 0.6829, + "step": 20516 + }, + { + "epoch": 0.6288157410812799, + "grad_norm": 1.7063523417246873, + "learning_rate": 3.1983797330308886e-06, + "loss": 0.6047, + "step": 20517 + }, + { + "epoch": 0.6288463896040211, + "grad_norm": 1.679340403531877, + "learning_rate": 3.1979167631303087e-06, + "loss": 0.6184, + "step": 20518 + }, + { + "epoch": 0.6288770381267623, + "grad_norm": 1.7877455528216395, + "learning_rate": 3.1974538109861164e-06, + "loss": 0.6479, + "step": 20519 + }, + { + "epoch": 0.6289076866495035, + "grad_norm": 1.7314249929324064, + "learning_rate": 3.1969908766028736e-06, + "loss": 0.6238, + "step": 20520 + }, + { + "epoch": 0.6289383351722447, + "grad_norm": 1.5963073937045704, + "learning_rate": 3.1965279599851397e-06, + "loss": 0.6055, + "step": 20521 + }, + { + "epoch": 0.6289689836949859, + "grad_norm": 1.7368209585925733, + "learning_rate": 3.1960650611374777e-06, + "loss": 0.5616, + "step": 20522 + }, + { + "epoch": 0.6289996322177271, + "grad_norm": 1.8918427245116092, + "learning_rate": 3.1956021800644497e-06, + "loss": 0.5901, + "step": 20523 + }, + { + "epoch": 0.6290302807404683, + "grad_norm": 0.7764907233409674, + "learning_rate": 3.1951393167706137e-06, + "loss": 0.4163, + "step": 20524 + }, + { + "epoch": 0.6290609292632096, + "grad_norm": 1.7027222447945773, + "learning_rate": 3.194676471260533e-06, + "loss": 0.5882, + "step": 20525 + }, + { + "epoch": 0.6290915777859507, + "grad_norm": 2.088807083021128, + "learning_rate": 3.194213643538766e-06, + "loss": 0.671, + "step": 20526 + }, + { + "epoch": 0.629122226308692, + "grad_norm": 1.5957325761922654, + "learning_rate": 3.193750833609873e-06, + "loss": 0.5463, + "step": 20527 + }, + { + "epoch": 0.6291528748314331, + "grad_norm": 1.7772445845143667, + "learning_rate": 3.193288041478416e-06, + "loss": 0.5916, + "step": 20528 + }, + { + "epoch": 0.6291835233541744, + "grad_norm": 1.7740015768492643, + "learning_rate": 3.192825267148954e-06, + "loss": 0.6072, + "step": 20529 + }, + { + "epoch": 0.6292141718769155, + "grad_norm": 2.106563592016565, + "learning_rate": 3.1923625106260483e-06, + "loss": 0.6333, + "step": 20530 + }, + { + "epoch": 0.6292448203996568, + "grad_norm": 1.7958682209491599, + "learning_rate": 3.1918997719142573e-06, + "loss": 0.5547, + "step": 20531 + }, + { + "epoch": 0.6292754689223979, + "grad_norm": 0.7988015724369831, + "learning_rate": 3.1914370510181382e-06, + "loss": 0.4105, + "step": 20532 + }, + { + "epoch": 0.6293061174451391, + "grad_norm": 1.794436708051027, + "learning_rate": 3.190974347942255e-06, + "loss": 0.6329, + "step": 20533 + }, + { + "epoch": 0.6293367659678804, + "grad_norm": 1.7730830234880592, + "learning_rate": 3.1905116626911636e-06, + "loss": 0.5987, + "step": 20534 + }, + { + "epoch": 0.6293674144906215, + "grad_norm": 1.7648875254092415, + "learning_rate": 3.1900489952694225e-06, + "loss": 0.6218, + "step": 20535 + }, + { + "epoch": 0.6293980630133628, + "grad_norm": 1.9367203694194854, + "learning_rate": 3.1895863456815933e-06, + "loss": 0.6553, + "step": 20536 + }, + { + "epoch": 0.6294287115361039, + "grad_norm": 1.7725007012459248, + "learning_rate": 3.189123713932233e-06, + "loss": 0.6672, + "step": 20537 + }, + { + "epoch": 0.6294593600588452, + "grad_norm": 0.8029795212166516, + "learning_rate": 3.1886611000258984e-06, + "loss": 0.4001, + "step": 20538 + }, + { + "epoch": 0.6294900085815863, + "grad_norm": 1.859428869761475, + "learning_rate": 3.1881985039671515e-06, + "loss": 0.7105, + "step": 20539 + }, + { + "epoch": 0.6295206571043276, + "grad_norm": 1.6612694277913398, + "learning_rate": 3.187735925760547e-06, + "loss": 0.58, + "step": 20540 + }, + { + "epoch": 0.6295513056270687, + "grad_norm": 0.8203464200345841, + "learning_rate": 3.1872733654106443e-06, + "loss": 0.4438, + "step": 20541 + }, + { + "epoch": 0.62958195414981, + "grad_norm": 1.9265863411516104, + "learning_rate": 3.1868108229220024e-06, + "loss": 0.6644, + "step": 20542 + }, + { + "epoch": 0.6296126026725511, + "grad_norm": 1.9143156778344304, + "learning_rate": 3.1863482982991745e-06, + "loss": 0.6529, + "step": 20543 + }, + { + "epoch": 0.6296432511952924, + "grad_norm": 1.7126392400511523, + "learning_rate": 3.1858857915467234e-06, + "loss": 0.702, + "step": 20544 + }, + { + "epoch": 0.6296738997180336, + "grad_norm": 1.8685763281745644, + "learning_rate": 3.185423302669204e-06, + "loss": 0.6082, + "step": 20545 + }, + { + "epoch": 0.6297045482407748, + "grad_norm": 1.7058653054479038, + "learning_rate": 3.184960831671171e-06, + "loss": 0.4668, + "step": 20546 + }, + { + "epoch": 0.629735196763516, + "grad_norm": 1.7875947542725796, + "learning_rate": 3.184498378557184e-06, + "loss": 0.593, + "step": 20547 + }, + { + "epoch": 0.6297658452862572, + "grad_norm": 1.881378653866311, + "learning_rate": 3.1840359433317993e-06, + "loss": 0.7228, + "step": 20548 + }, + { + "epoch": 0.6297964938089984, + "grad_norm": 1.801178029497944, + "learning_rate": 3.1835735259995725e-06, + "loss": 0.602, + "step": 20549 + }, + { + "epoch": 0.6298271423317396, + "grad_norm": 1.868347343136499, + "learning_rate": 3.1831111265650616e-06, + "loss": 0.6731, + "step": 20550 + }, + { + "epoch": 0.6298577908544808, + "grad_norm": 0.7827174154144594, + "learning_rate": 3.1826487450328203e-06, + "loss": 0.431, + "step": 20551 + }, + { + "epoch": 0.629888439377222, + "grad_norm": 1.6960540523379326, + "learning_rate": 3.182186381407407e-06, + "loss": 0.6657, + "step": 20552 + }, + { + "epoch": 0.6299190878999632, + "grad_norm": 1.8209737319765598, + "learning_rate": 3.181724035693378e-06, + "loss": 0.5876, + "step": 20553 + }, + { + "epoch": 0.6299497364227045, + "grad_norm": 1.8294161839086198, + "learning_rate": 3.1812617078952834e-06, + "loss": 0.6667, + "step": 20554 + }, + { + "epoch": 0.6299803849454456, + "grad_norm": 1.9009205347016842, + "learning_rate": 3.1807993980176855e-06, + "loss": 0.6454, + "step": 20555 + }, + { + "epoch": 0.6300110334681869, + "grad_norm": 1.8061743720510557, + "learning_rate": 3.1803371060651343e-06, + "loss": 0.5747, + "step": 20556 + }, + { + "epoch": 0.630041681990928, + "grad_norm": 1.582172718577165, + "learning_rate": 3.1798748320421895e-06, + "loss": 0.5808, + "step": 20557 + }, + { + "epoch": 0.6300723305136693, + "grad_norm": 1.843620824646968, + "learning_rate": 3.1794125759534033e-06, + "loss": 0.5741, + "step": 20558 + }, + { + "epoch": 0.6301029790364104, + "grad_norm": 1.7558867673622744, + "learning_rate": 3.178950337803329e-06, + "loss": 0.6156, + "step": 20559 + }, + { + "epoch": 0.6301336275591517, + "grad_norm": 1.4763604531454075, + "learning_rate": 3.1784881175965248e-06, + "loss": 0.5795, + "step": 20560 + }, + { + "epoch": 0.6301642760818928, + "grad_norm": 1.956979708653327, + "learning_rate": 3.1780259153375426e-06, + "loss": 0.6788, + "step": 20561 + }, + { + "epoch": 0.6301949246046341, + "grad_norm": 1.6646531729223066, + "learning_rate": 3.177563731030937e-06, + "loss": 0.6202, + "step": 20562 + }, + { + "epoch": 0.6302255731273753, + "grad_norm": 1.779383527753664, + "learning_rate": 3.1771015646812625e-06, + "loss": 0.5744, + "step": 20563 + }, + { + "epoch": 0.6302562216501164, + "grad_norm": 1.580131632275643, + "learning_rate": 3.1766394162930734e-06, + "loss": 0.5977, + "step": 20564 + }, + { + "epoch": 0.6302868701728577, + "grad_norm": 1.6584162494130699, + "learning_rate": 3.1761772858709204e-06, + "loss": 0.6188, + "step": 20565 + }, + { + "epoch": 0.6303175186955988, + "grad_norm": 1.9418258011750256, + "learning_rate": 3.1757151734193624e-06, + "loss": 0.655, + "step": 20566 + }, + { + "epoch": 0.6303481672183401, + "grad_norm": 1.8963419212253279, + "learning_rate": 3.175253078942947e-06, + "loss": 0.6015, + "step": 20567 + }, + { + "epoch": 0.6303788157410812, + "grad_norm": 2.1602437910487513, + "learning_rate": 3.174791002446231e-06, + "loss": 0.6215, + "step": 20568 + }, + { + "epoch": 0.6304094642638225, + "grad_norm": 1.6880441293261115, + "learning_rate": 3.1743289439337665e-06, + "loss": 0.5146, + "step": 20569 + }, + { + "epoch": 0.6304401127865636, + "grad_norm": 1.6558278318128472, + "learning_rate": 3.173866903410105e-06, + "loss": 0.5971, + "step": 20570 + }, + { + "epoch": 0.6304707613093049, + "grad_norm": 0.8087431404706698, + "learning_rate": 3.173404880879801e-06, + "loss": 0.4368, + "step": 20571 + }, + { + "epoch": 0.6305014098320461, + "grad_norm": 1.7317480515544545, + "learning_rate": 3.1729428763474057e-06, + "loss": 0.6039, + "step": 20572 + }, + { + "epoch": 0.6305320583547873, + "grad_norm": 1.8064338019759336, + "learning_rate": 3.1724808898174712e-06, + "loss": 0.5586, + "step": 20573 + }, + { + "epoch": 0.6305627068775285, + "grad_norm": 1.867065751662398, + "learning_rate": 3.1720189212945513e-06, + "loss": 0.613, + "step": 20574 + }, + { + "epoch": 0.6305933554002697, + "grad_norm": 0.7741085325664374, + "learning_rate": 3.171556970783195e-06, + "loss": 0.43, + "step": 20575 + }, + { + "epoch": 0.6306240039230109, + "grad_norm": 1.9318572465099035, + "learning_rate": 3.1710950382879586e-06, + "loss": 0.6125, + "step": 20576 + }, + { + "epoch": 0.6306546524457521, + "grad_norm": 2.0730497728469066, + "learning_rate": 3.170633123813389e-06, + "loss": 0.5474, + "step": 20577 + }, + { + "epoch": 0.6306853009684933, + "grad_norm": 1.827468457307641, + "learning_rate": 3.1701712273640383e-06, + "loss": 0.6095, + "step": 20578 + }, + { + "epoch": 0.6307159494912346, + "grad_norm": 0.7667473426642679, + "learning_rate": 3.16970934894446e-06, + "loss": 0.4024, + "step": 20579 + }, + { + "epoch": 0.6307465980139757, + "grad_norm": 1.6213528710827545, + "learning_rate": 3.169247488559204e-06, + "loss": 0.5356, + "step": 20580 + }, + { + "epoch": 0.630777246536717, + "grad_norm": 1.8555208996407504, + "learning_rate": 3.168785646212819e-06, + "loss": 0.6919, + "step": 20581 + }, + { + "epoch": 0.6308078950594581, + "grad_norm": 0.8222068065796254, + "learning_rate": 3.1683238219098596e-06, + "loss": 0.4233, + "step": 20582 + }, + { + "epoch": 0.6308385435821994, + "grad_norm": 1.5315070045170853, + "learning_rate": 3.1678620156548744e-06, + "loss": 0.5118, + "step": 20583 + }, + { + "epoch": 0.6308691921049405, + "grad_norm": 1.8615123182478486, + "learning_rate": 3.167400227452411e-06, + "loss": 0.6402, + "step": 20584 + }, + { + "epoch": 0.6308998406276818, + "grad_norm": 2.0494866756675765, + "learning_rate": 3.166938457307025e-06, + "loss": 0.6676, + "step": 20585 + }, + { + "epoch": 0.6309304891504229, + "grad_norm": 2.1002029564933653, + "learning_rate": 3.1664767052232603e-06, + "loss": 0.591, + "step": 20586 + }, + { + "epoch": 0.6309611376731642, + "grad_norm": 1.5041327994469118, + "learning_rate": 3.1660149712056728e-06, + "loss": 0.6404, + "step": 20587 + }, + { + "epoch": 0.6309917861959053, + "grad_norm": 2.0587708563225022, + "learning_rate": 3.1655532552588077e-06, + "loss": 0.6243, + "step": 20588 + }, + { + "epoch": 0.6310224347186466, + "grad_norm": 1.7662165017183171, + "learning_rate": 3.1650915573872154e-06, + "loss": 0.5585, + "step": 20589 + }, + { + "epoch": 0.6310530832413878, + "grad_norm": 1.4973126711300524, + "learning_rate": 3.164629877595446e-06, + "loss": 0.5887, + "step": 20590 + }, + { + "epoch": 0.631083731764129, + "grad_norm": 1.7082285640907753, + "learning_rate": 3.164168215888048e-06, + "loss": 0.6655, + "step": 20591 + }, + { + "epoch": 0.6311143802868702, + "grad_norm": 1.6874912253715182, + "learning_rate": 3.163706572269569e-06, + "loss": 0.6148, + "step": 20592 + }, + { + "epoch": 0.6311450288096114, + "grad_norm": 1.9638621004739583, + "learning_rate": 3.163244946744561e-06, + "loss": 0.5443, + "step": 20593 + }, + { + "epoch": 0.6311756773323526, + "grad_norm": 0.7703000803427188, + "learning_rate": 3.162783339317569e-06, + "loss": 0.4259, + "step": 20594 + }, + { + "epoch": 0.6312063258550937, + "grad_norm": 1.681140422628178, + "learning_rate": 3.1623217499931434e-06, + "loss": 0.5737, + "step": 20595 + }, + { + "epoch": 0.631236974377835, + "grad_norm": 2.046869515834786, + "learning_rate": 3.161860178775833e-06, + "loss": 0.6525, + "step": 20596 + }, + { + "epoch": 0.6312676229005761, + "grad_norm": 1.5897377279239533, + "learning_rate": 3.161398625670182e-06, + "loss": 0.6606, + "step": 20597 + }, + { + "epoch": 0.6312982714233174, + "grad_norm": 1.9451053887809673, + "learning_rate": 3.1609370906807436e-06, + "loss": 0.6237, + "step": 20598 + }, + { + "epoch": 0.6313289199460586, + "grad_norm": 1.530129722489017, + "learning_rate": 3.1604755738120614e-06, + "loss": 0.5867, + "step": 20599 + }, + { + "epoch": 0.6313595684687998, + "grad_norm": 1.8331877045745735, + "learning_rate": 3.1600140750686827e-06, + "loss": 0.6106, + "step": 20600 + }, + { + "epoch": 0.631390216991541, + "grad_norm": 2.216866519762971, + "learning_rate": 3.1595525944551574e-06, + "loss": 0.6911, + "step": 20601 + }, + { + "epoch": 0.6314208655142822, + "grad_norm": 1.7790874022630763, + "learning_rate": 3.159091131976031e-06, + "loss": 0.5726, + "step": 20602 + }, + { + "epoch": 0.6314515140370234, + "grad_norm": 1.876145080165682, + "learning_rate": 3.1586296876358506e-06, + "loss": 0.7303, + "step": 20603 + }, + { + "epoch": 0.6314821625597646, + "grad_norm": 1.6534027226277344, + "learning_rate": 3.1581682614391634e-06, + "loss": 0.4839, + "step": 20604 + }, + { + "epoch": 0.6315128110825058, + "grad_norm": 2.12536289350892, + "learning_rate": 3.157706853390515e-06, + "loss": 0.6279, + "step": 20605 + }, + { + "epoch": 0.631543459605247, + "grad_norm": 1.630770071855235, + "learning_rate": 3.157245463494453e-06, + "loss": 0.5992, + "step": 20606 + }, + { + "epoch": 0.6315741081279882, + "grad_norm": 1.5676693378052788, + "learning_rate": 3.1567840917555237e-06, + "loss": 0.5882, + "step": 20607 + }, + { + "epoch": 0.6316047566507295, + "grad_norm": 1.7569519793180206, + "learning_rate": 3.15632273817827e-06, + "loss": 0.6535, + "step": 20608 + }, + { + "epoch": 0.6316354051734706, + "grad_norm": 1.756850533703347, + "learning_rate": 3.1558614027672417e-06, + "loss": 0.5994, + "step": 20609 + }, + { + "epoch": 0.6316660536962119, + "grad_norm": 1.9394946359765757, + "learning_rate": 3.1554000855269833e-06, + "loss": 0.6319, + "step": 20610 + }, + { + "epoch": 0.631696702218953, + "grad_norm": 1.8386236975274692, + "learning_rate": 3.1549387864620386e-06, + "loss": 0.6354, + "step": 20611 + }, + { + "epoch": 0.6317273507416943, + "grad_norm": 1.5295921269765844, + "learning_rate": 3.154477505576955e-06, + "loss": 0.5849, + "step": 20612 + }, + { + "epoch": 0.6317579992644354, + "grad_norm": 1.6778470506476795, + "learning_rate": 3.1540162428762755e-06, + "loss": 0.6357, + "step": 20613 + }, + { + "epoch": 0.6317886477871767, + "grad_norm": 1.7632759186345714, + "learning_rate": 3.153554998364547e-06, + "loss": 0.5655, + "step": 20614 + }, + { + "epoch": 0.6318192963099178, + "grad_norm": 0.8188165454928438, + "learning_rate": 3.153093772046314e-06, + "loss": 0.4111, + "step": 20615 + }, + { + "epoch": 0.6318499448326591, + "grad_norm": 1.6837122106806413, + "learning_rate": 3.152632563926119e-06, + "loss": 0.5584, + "step": 20616 + }, + { + "epoch": 0.6318805933554003, + "grad_norm": 1.5986635299457, + "learning_rate": 3.1521713740085103e-06, + "loss": 0.5812, + "step": 20617 + }, + { + "epoch": 0.6319112418781415, + "grad_norm": 1.6471723104657379, + "learning_rate": 3.1517102022980296e-06, + "loss": 0.5986, + "step": 20618 + }, + { + "epoch": 0.6319418904008827, + "grad_norm": 1.9774148434510037, + "learning_rate": 3.1512490487992197e-06, + "loss": 0.6772, + "step": 20619 + }, + { + "epoch": 0.6319725389236239, + "grad_norm": 1.5982896377206557, + "learning_rate": 3.150787913516627e-06, + "loss": 0.5555, + "step": 20620 + }, + { + "epoch": 0.6320031874463651, + "grad_norm": 1.6664134737525644, + "learning_rate": 3.1503267964547927e-06, + "loss": 0.5339, + "step": 20621 + }, + { + "epoch": 0.6320338359691063, + "grad_norm": 1.6941792531551343, + "learning_rate": 3.149865697618263e-06, + "loss": 0.5764, + "step": 20622 + }, + { + "epoch": 0.6320644844918475, + "grad_norm": 1.790384044287892, + "learning_rate": 3.1494046170115798e-06, + "loss": 0.6632, + "step": 20623 + }, + { + "epoch": 0.6320951330145888, + "grad_norm": 1.9339876163400387, + "learning_rate": 3.148943554639286e-06, + "loss": 0.5556, + "step": 20624 + }, + { + "epoch": 0.6321257815373299, + "grad_norm": 1.9379931227921448, + "learning_rate": 3.148482510505926e-06, + "loss": 0.6388, + "step": 20625 + }, + { + "epoch": 0.632156430060071, + "grad_norm": 1.8772127855070257, + "learning_rate": 3.1480214846160405e-06, + "loss": 0.6077, + "step": 20626 + }, + { + "epoch": 0.6321870785828123, + "grad_norm": 0.7572178440886999, + "learning_rate": 3.147560476974173e-06, + "loss": 0.4057, + "step": 20627 + }, + { + "epoch": 0.6322177271055535, + "grad_norm": 1.703632634075055, + "learning_rate": 3.147099487584868e-06, + "loss": 0.6379, + "step": 20628 + }, + { + "epoch": 0.6322483756282947, + "grad_norm": 1.8470388947696161, + "learning_rate": 3.1466385164526625e-06, + "loss": 0.6445, + "step": 20629 + }, + { + "epoch": 0.6322790241510359, + "grad_norm": 1.8465695549401877, + "learning_rate": 3.1461775635821053e-06, + "loss": 0.5803, + "step": 20630 + }, + { + "epoch": 0.6323096726737771, + "grad_norm": 1.937339525887373, + "learning_rate": 3.1457166289777335e-06, + "loss": 0.6823, + "step": 20631 + }, + { + "epoch": 0.6323403211965183, + "grad_norm": 1.7028379008634038, + "learning_rate": 3.14525571264409e-06, + "loss": 0.5574, + "step": 20632 + }, + { + "epoch": 0.6323709697192595, + "grad_norm": 1.9807999561834733, + "learning_rate": 3.1447948145857165e-06, + "loss": 0.712, + "step": 20633 + }, + { + "epoch": 0.6324016182420007, + "grad_norm": 0.81856154631167, + "learning_rate": 3.1443339348071544e-06, + "loss": 0.4258, + "step": 20634 + }, + { + "epoch": 0.632432266764742, + "grad_norm": 1.7948460803441175, + "learning_rate": 3.1438730733129445e-06, + "loss": 0.606, + "step": 20635 + }, + { + "epoch": 0.6324629152874831, + "grad_norm": 1.9084181791448542, + "learning_rate": 3.1434122301076286e-06, + "loss": 0.6423, + "step": 20636 + }, + { + "epoch": 0.6324935638102244, + "grad_norm": 0.8170446817760217, + "learning_rate": 3.142951405195749e-06, + "loss": 0.4158, + "step": 20637 + }, + { + "epoch": 0.6325242123329655, + "grad_norm": 1.542378784662037, + "learning_rate": 3.14249059858184e-06, + "loss": 0.6526, + "step": 20638 + }, + { + "epoch": 0.6325548608557068, + "grad_norm": 1.4663188862908076, + "learning_rate": 3.1420298102704505e-06, + "loss": 0.6211, + "step": 20639 + }, + { + "epoch": 0.6325855093784479, + "grad_norm": 1.8705255262160958, + "learning_rate": 3.1415690402661124e-06, + "loss": 0.6272, + "step": 20640 + }, + { + "epoch": 0.6326161579011892, + "grad_norm": 1.7091958030761236, + "learning_rate": 3.1411082885733747e-06, + "loss": 0.5832, + "step": 20641 + }, + { + "epoch": 0.6326468064239303, + "grad_norm": 1.540790482789843, + "learning_rate": 3.1406475551967703e-06, + "loss": 0.5966, + "step": 20642 + }, + { + "epoch": 0.6326774549466716, + "grad_norm": 1.704117779905222, + "learning_rate": 3.140186840140841e-06, + "loss": 0.652, + "step": 20643 + }, + { + "epoch": 0.6327081034694128, + "grad_norm": 1.6621512693707894, + "learning_rate": 3.139726143410127e-06, + "loss": 0.5557, + "step": 20644 + }, + { + "epoch": 0.632738751992154, + "grad_norm": 1.7688976151503824, + "learning_rate": 3.139265465009168e-06, + "loss": 0.5231, + "step": 20645 + }, + { + "epoch": 0.6327694005148952, + "grad_norm": 1.8595458453847666, + "learning_rate": 3.1388048049425007e-06, + "loss": 0.6687, + "step": 20646 + }, + { + "epoch": 0.6328000490376364, + "grad_norm": 1.6697849703705367, + "learning_rate": 3.1383441632146673e-06, + "loss": 0.5878, + "step": 20647 + }, + { + "epoch": 0.6328306975603776, + "grad_norm": 1.8111921188063522, + "learning_rate": 3.1378835398302043e-06, + "loss": 0.5992, + "step": 20648 + }, + { + "epoch": 0.6328613460831188, + "grad_norm": 1.9836239718026927, + "learning_rate": 3.137422934793652e-06, + "loss": 0.5876, + "step": 20649 + }, + { + "epoch": 0.63289199460586, + "grad_norm": 1.7511467841299693, + "learning_rate": 3.136962348109549e-06, + "loss": 0.5737, + "step": 20650 + }, + { + "epoch": 0.6329226431286012, + "grad_norm": 1.7257239336226786, + "learning_rate": 3.13650177978243e-06, + "loss": 0.5994, + "step": 20651 + }, + { + "epoch": 0.6329532916513424, + "grad_norm": 0.7676942231849078, + "learning_rate": 3.1360412298168384e-06, + "loss": 0.4153, + "step": 20652 + }, + { + "epoch": 0.6329839401740837, + "grad_norm": 1.785741364067304, + "learning_rate": 3.1355806982173086e-06, + "loss": 0.5917, + "step": 20653 + }, + { + "epoch": 0.6330145886968248, + "grad_norm": 1.5664620779334737, + "learning_rate": 3.1351201849883784e-06, + "loss": 0.5447, + "step": 20654 + }, + { + "epoch": 0.6330452372195661, + "grad_norm": 1.9927579832337827, + "learning_rate": 3.134659690134587e-06, + "loss": 0.529, + "step": 20655 + }, + { + "epoch": 0.6330758857423072, + "grad_norm": 1.4434713403960462, + "learning_rate": 3.1341992136604706e-06, + "loss": 0.6052, + "step": 20656 + }, + { + "epoch": 0.6331065342650484, + "grad_norm": 1.792927197868223, + "learning_rate": 3.1337387555705667e-06, + "loss": 0.6195, + "step": 20657 + }, + { + "epoch": 0.6331371827877896, + "grad_norm": 1.7096608060098095, + "learning_rate": 3.1332783158694123e-06, + "loss": 0.5858, + "step": 20658 + }, + { + "epoch": 0.6331678313105308, + "grad_norm": 0.7789655629882515, + "learning_rate": 3.132817894561544e-06, + "loss": 0.3992, + "step": 20659 + }, + { + "epoch": 0.633198479833272, + "grad_norm": 1.627742769662724, + "learning_rate": 3.1323574916515e-06, + "loss": 0.5448, + "step": 20660 + }, + { + "epoch": 0.6332291283560132, + "grad_norm": 1.9604169610543662, + "learning_rate": 3.1318971071438154e-06, + "loss": 0.6273, + "step": 20661 + }, + { + "epoch": 0.6332597768787545, + "grad_norm": 1.6850477708633635, + "learning_rate": 3.131436741043024e-06, + "loss": 0.6057, + "step": 20662 + }, + { + "epoch": 0.6332904254014956, + "grad_norm": 1.7508374088878074, + "learning_rate": 3.130976393353668e-06, + "loss": 0.5696, + "step": 20663 + }, + { + "epoch": 0.6333210739242369, + "grad_norm": 1.6820357576574265, + "learning_rate": 3.1305160640802786e-06, + "loss": 0.5659, + "step": 20664 + }, + { + "epoch": 0.633351722446978, + "grad_norm": 0.8005494773952327, + "learning_rate": 3.1300557532273913e-06, + "loss": 0.4154, + "step": 20665 + }, + { + "epoch": 0.6333823709697193, + "grad_norm": 2.121048267443057, + "learning_rate": 3.129595460799544e-06, + "loss": 0.6209, + "step": 20666 + }, + { + "epoch": 0.6334130194924604, + "grad_norm": 1.6316664668968632, + "learning_rate": 3.129135186801271e-06, + "loss": 0.6373, + "step": 20667 + }, + { + "epoch": 0.6334436680152017, + "grad_norm": 0.7860853090688118, + "learning_rate": 3.128674931237108e-06, + "loss": 0.4027, + "step": 20668 + }, + { + "epoch": 0.6334743165379428, + "grad_norm": 0.783565625571643, + "learning_rate": 3.12821469411159e-06, + "loss": 0.4269, + "step": 20669 + }, + { + "epoch": 0.6335049650606841, + "grad_norm": 1.6944847869223791, + "learning_rate": 3.1277544754292505e-06, + "loss": 0.6434, + "step": 20670 + }, + { + "epoch": 0.6335356135834253, + "grad_norm": 1.9638019898648351, + "learning_rate": 3.127294275194627e-06, + "loss": 0.6575, + "step": 20671 + }, + { + "epoch": 0.6335662621061665, + "grad_norm": 1.764127111351941, + "learning_rate": 3.1268340934122515e-06, + "loss": 0.6211, + "step": 20672 + }, + { + "epoch": 0.6335969106289077, + "grad_norm": 1.9439015957825199, + "learning_rate": 3.126373930086658e-06, + "loss": 0.7041, + "step": 20673 + }, + { + "epoch": 0.6336275591516489, + "grad_norm": 1.5400391520516552, + "learning_rate": 3.125913785222382e-06, + "loss": 0.5409, + "step": 20674 + }, + { + "epoch": 0.6336582076743901, + "grad_norm": 1.869693419567552, + "learning_rate": 3.1254536588239566e-06, + "loss": 0.5724, + "step": 20675 + }, + { + "epoch": 0.6336888561971313, + "grad_norm": 1.7124002567647634, + "learning_rate": 3.1249935508959163e-06, + "loss": 0.6345, + "step": 20676 + }, + { + "epoch": 0.6337195047198725, + "grad_norm": 1.76919890963178, + "learning_rate": 3.1245334614427946e-06, + "loss": 0.5624, + "step": 20677 + }, + { + "epoch": 0.6337501532426137, + "grad_norm": 1.6288898076252858, + "learning_rate": 3.1240733904691234e-06, + "loss": 0.5905, + "step": 20678 + }, + { + "epoch": 0.6337808017653549, + "grad_norm": 1.6719277939478288, + "learning_rate": 3.1236133379794386e-06, + "loss": 0.6118, + "step": 20679 + }, + { + "epoch": 0.6338114502880962, + "grad_norm": 1.784195875608321, + "learning_rate": 3.1231533039782724e-06, + "loss": 0.5937, + "step": 20680 + }, + { + "epoch": 0.6338420988108373, + "grad_norm": 1.9387210325390238, + "learning_rate": 3.1226932884701537e-06, + "loss": 0.6575, + "step": 20681 + }, + { + "epoch": 0.6338727473335786, + "grad_norm": 1.7842824923323402, + "learning_rate": 3.1222332914596214e-06, + "loss": 0.6379, + "step": 20682 + }, + { + "epoch": 0.6339033958563197, + "grad_norm": 1.6486796625906053, + "learning_rate": 3.121773312951202e-06, + "loss": 0.6144, + "step": 20683 + }, + { + "epoch": 0.633934044379061, + "grad_norm": 1.6961487813083702, + "learning_rate": 3.121313352949434e-06, + "loss": 0.6761, + "step": 20684 + }, + { + "epoch": 0.6339646929018021, + "grad_norm": 1.6994503236953893, + "learning_rate": 3.120853411458845e-06, + "loss": 0.4963, + "step": 20685 + }, + { + "epoch": 0.6339953414245434, + "grad_norm": 1.6317287324016387, + "learning_rate": 3.120393488483967e-06, + "loss": 0.564, + "step": 20686 + }, + { + "epoch": 0.6340259899472845, + "grad_norm": 0.8342582578644038, + "learning_rate": 3.119933584029334e-06, + "loss": 0.4299, + "step": 20687 + }, + { + "epoch": 0.6340566384700257, + "grad_norm": 1.5945745698619094, + "learning_rate": 3.1194736980994764e-06, + "loss": 0.5525, + "step": 20688 + }, + { + "epoch": 0.634087286992767, + "grad_norm": 1.6316919949621114, + "learning_rate": 3.1190138306989247e-06, + "loss": 0.5966, + "step": 20689 + }, + { + "epoch": 0.6341179355155081, + "grad_norm": 1.9843328154729234, + "learning_rate": 3.118553981832212e-06, + "loss": 0.6246, + "step": 20690 + }, + { + "epoch": 0.6341485840382494, + "grad_norm": 1.7264863304432023, + "learning_rate": 3.118094151503869e-06, + "loss": 0.6204, + "step": 20691 + }, + { + "epoch": 0.6341792325609905, + "grad_norm": 1.87262400540405, + "learning_rate": 3.1176343397184226e-06, + "loss": 0.652, + "step": 20692 + }, + { + "epoch": 0.6342098810837318, + "grad_norm": 1.8246652633270235, + "learning_rate": 3.1171745464804106e-06, + "loss": 0.5507, + "step": 20693 + }, + { + "epoch": 0.6342405296064729, + "grad_norm": 1.7981692481134053, + "learning_rate": 3.1167147717943556e-06, + "loss": 0.5903, + "step": 20694 + }, + { + "epoch": 0.6342711781292142, + "grad_norm": 1.8515550743151101, + "learning_rate": 3.116255015664795e-06, + "loss": 0.6251, + "step": 20695 + }, + { + "epoch": 0.6343018266519553, + "grad_norm": 1.7470580730824272, + "learning_rate": 3.1157952780962555e-06, + "loss": 0.6048, + "step": 20696 + }, + { + "epoch": 0.6343324751746966, + "grad_norm": 1.8126182300481366, + "learning_rate": 3.1153355590932655e-06, + "loss": 0.6087, + "step": 20697 + }, + { + "epoch": 0.6343631236974377, + "grad_norm": 1.7193292729844105, + "learning_rate": 3.114875858660358e-06, + "loss": 0.5922, + "step": 20698 + }, + { + "epoch": 0.634393772220179, + "grad_norm": 1.8383755260953547, + "learning_rate": 3.11441617680206e-06, + "loss": 0.7079, + "step": 20699 + }, + { + "epoch": 0.6344244207429202, + "grad_norm": 1.585724875955732, + "learning_rate": 3.113956513522902e-06, + "loss": 0.5635, + "step": 20700 + }, + { + "epoch": 0.6344550692656614, + "grad_norm": 0.7649192265579107, + "learning_rate": 3.1134968688274135e-06, + "loss": 0.4186, + "step": 20701 + }, + { + "epoch": 0.6344857177884026, + "grad_norm": 1.7539112272766955, + "learning_rate": 3.1130372427201215e-06, + "loss": 0.5538, + "step": 20702 + }, + { + "epoch": 0.6345163663111438, + "grad_norm": 1.9766151059212718, + "learning_rate": 3.1125776352055583e-06, + "loss": 0.6451, + "step": 20703 + }, + { + "epoch": 0.634547014833885, + "grad_norm": 0.8349811476476676, + "learning_rate": 3.112118046288251e-06, + "loss": 0.4445, + "step": 20704 + }, + { + "epoch": 0.6345776633566262, + "grad_norm": 1.685995451779664, + "learning_rate": 3.1116584759727255e-06, + "loss": 0.6308, + "step": 20705 + }, + { + "epoch": 0.6346083118793674, + "grad_norm": 0.7845147499677652, + "learning_rate": 3.1111989242635144e-06, + "loss": 0.4286, + "step": 20706 + }, + { + "epoch": 0.6346389604021087, + "grad_norm": 1.8753007470413738, + "learning_rate": 3.110739391165142e-06, + "loss": 0.5859, + "step": 20707 + }, + { + "epoch": 0.6346696089248498, + "grad_norm": 1.8892174419275034, + "learning_rate": 3.110279876682137e-06, + "loss": 0.6037, + "step": 20708 + }, + { + "epoch": 0.6347002574475911, + "grad_norm": 1.9597293029049454, + "learning_rate": 3.1098203808190297e-06, + "loss": 0.6857, + "step": 20709 + }, + { + "epoch": 0.6347309059703322, + "grad_norm": 1.748061334780207, + "learning_rate": 3.1093609035803446e-06, + "loss": 0.6302, + "step": 20710 + }, + { + "epoch": 0.6347615544930735, + "grad_norm": 1.7251760077981622, + "learning_rate": 3.1089014449706114e-06, + "loss": 0.5281, + "step": 20711 + }, + { + "epoch": 0.6347922030158146, + "grad_norm": 1.8622796236793049, + "learning_rate": 3.1084420049943553e-06, + "loss": 0.5936, + "step": 20712 + }, + { + "epoch": 0.6348228515385559, + "grad_norm": 0.8254477866063524, + "learning_rate": 3.107982583656104e-06, + "loss": 0.456, + "step": 20713 + }, + { + "epoch": 0.634853500061297, + "grad_norm": 1.7493987351132745, + "learning_rate": 3.1075231809603847e-06, + "loss": 0.646, + "step": 20714 + }, + { + "epoch": 0.6348841485840383, + "grad_norm": 1.7347529176181884, + "learning_rate": 3.107063796911725e-06, + "loss": 0.5633, + "step": 20715 + }, + { + "epoch": 0.6349147971067794, + "grad_norm": 1.5588999394506122, + "learning_rate": 3.106604431514648e-06, + "loss": 0.5298, + "step": 20716 + }, + { + "epoch": 0.6349454456295207, + "grad_norm": 2.1070557937290464, + "learning_rate": 3.106145084773683e-06, + "loss": 0.7073, + "step": 20717 + }, + { + "epoch": 0.6349760941522619, + "grad_norm": 1.4559751378403538, + "learning_rate": 3.105685756693355e-06, + "loss": 0.6169, + "step": 20718 + }, + { + "epoch": 0.635006742675003, + "grad_norm": 1.7545055464141541, + "learning_rate": 3.105226447278189e-06, + "loss": 0.6001, + "step": 20719 + }, + { + "epoch": 0.6350373911977443, + "grad_norm": 2.010896640535331, + "learning_rate": 3.104767156532713e-06, + "loss": 0.6092, + "step": 20720 + }, + { + "epoch": 0.6350680397204854, + "grad_norm": 1.8211945212488774, + "learning_rate": 3.1043078844614495e-06, + "loss": 0.5776, + "step": 20721 + }, + { + "epoch": 0.6350986882432267, + "grad_norm": 1.6801811921834031, + "learning_rate": 3.1038486310689266e-06, + "loss": 0.6434, + "step": 20722 + }, + { + "epoch": 0.6351293367659678, + "grad_norm": 1.9084939570896358, + "learning_rate": 3.1033893963596695e-06, + "loss": 0.62, + "step": 20723 + }, + { + "epoch": 0.6351599852887091, + "grad_norm": 1.678188712909797, + "learning_rate": 3.1029301803381984e-06, + "loss": 0.5414, + "step": 20724 + }, + { + "epoch": 0.6351906338114502, + "grad_norm": 1.7202728135473977, + "learning_rate": 3.1024709830090453e-06, + "loss": 0.6325, + "step": 20725 + }, + { + "epoch": 0.6352212823341915, + "grad_norm": 1.5128955332200558, + "learning_rate": 3.10201180437673e-06, + "loss": 0.5299, + "step": 20726 + }, + { + "epoch": 0.6352519308569327, + "grad_norm": 1.7823915709131202, + "learning_rate": 3.1015526444457767e-06, + "loss": 0.594, + "step": 20727 + }, + { + "epoch": 0.6352825793796739, + "grad_norm": 1.7995719105405628, + "learning_rate": 3.1010935032207123e-06, + "loss": 0.6309, + "step": 20728 + }, + { + "epoch": 0.6353132279024151, + "grad_norm": 1.6143757755593895, + "learning_rate": 3.100634380706058e-06, + "loss": 0.6208, + "step": 20729 + }, + { + "epoch": 0.6353438764251563, + "grad_norm": 1.8525868785719348, + "learning_rate": 3.10017527690634e-06, + "loss": 0.6816, + "step": 20730 + }, + { + "epoch": 0.6353745249478975, + "grad_norm": 1.7924813190803395, + "learning_rate": 3.099716191826082e-06, + "loss": 0.6229, + "step": 20731 + }, + { + "epoch": 0.6354051734706387, + "grad_norm": 1.5411852721801524, + "learning_rate": 3.099257125469805e-06, + "loss": 0.513, + "step": 20732 + }, + { + "epoch": 0.6354358219933799, + "grad_norm": 1.810903732085817, + "learning_rate": 3.0987980778420346e-06, + "loss": 0.6326, + "step": 20733 + }, + { + "epoch": 0.6354664705161212, + "grad_norm": 1.7219994353601173, + "learning_rate": 3.0983390489472948e-06, + "loss": 0.5279, + "step": 20734 + }, + { + "epoch": 0.6354971190388623, + "grad_norm": 1.8075873216516853, + "learning_rate": 3.0978800387901033e-06, + "loss": 0.564, + "step": 20735 + }, + { + "epoch": 0.6355277675616036, + "grad_norm": 1.625941775909582, + "learning_rate": 3.09742104737499e-06, + "loss": 0.6157, + "step": 20736 + }, + { + "epoch": 0.6355584160843447, + "grad_norm": 1.918405518408745, + "learning_rate": 3.0969620747064704e-06, + "loss": 0.6774, + "step": 20737 + }, + { + "epoch": 0.635589064607086, + "grad_norm": 1.8661379868164054, + "learning_rate": 3.096503120789074e-06, + "loss": 0.6543, + "step": 20738 + }, + { + "epoch": 0.6356197131298271, + "grad_norm": 1.8516986269632212, + "learning_rate": 3.0960441856273183e-06, + "loss": 0.5529, + "step": 20739 + }, + { + "epoch": 0.6356503616525684, + "grad_norm": 1.7677891394658867, + "learning_rate": 3.095585269225725e-06, + "loss": 0.6468, + "step": 20740 + }, + { + "epoch": 0.6356810101753095, + "grad_norm": 1.7048311899874087, + "learning_rate": 3.095126371588818e-06, + "loss": 0.6026, + "step": 20741 + }, + { + "epoch": 0.6357116586980508, + "grad_norm": 0.805821047706876, + "learning_rate": 3.0946674927211186e-06, + "loss": 0.4158, + "step": 20742 + }, + { + "epoch": 0.635742307220792, + "grad_norm": 1.5526840647372389, + "learning_rate": 3.094208632627147e-06, + "loss": 0.584, + "step": 20743 + }, + { + "epoch": 0.6357729557435332, + "grad_norm": 1.7209800510983295, + "learning_rate": 3.093749791311427e-06, + "loss": 0.6148, + "step": 20744 + }, + { + "epoch": 0.6358036042662744, + "grad_norm": 1.6227862533295507, + "learning_rate": 3.093290968778478e-06, + "loss": 0.5791, + "step": 20745 + }, + { + "epoch": 0.6358342527890156, + "grad_norm": 1.6800777326369807, + "learning_rate": 3.0928321650328187e-06, + "loss": 0.6031, + "step": 20746 + }, + { + "epoch": 0.6358649013117568, + "grad_norm": 0.8041956756685935, + "learning_rate": 3.092373380078974e-06, + "loss": 0.4025, + "step": 20747 + }, + { + "epoch": 0.635895549834498, + "grad_norm": 1.633522288838291, + "learning_rate": 3.091914613921461e-06, + "loss": 0.5498, + "step": 20748 + }, + { + "epoch": 0.6359261983572392, + "grad_norm": 1.945194600379558, + "learning_rate": 3.091455866564803e-06, + "loss": 0.6248, + "step": 20749 + }, + { + "epoch": 0.6359568468799803, + "grad_norm": 1.5826273406896285, + "learning_rate": 3.0909971380135184e-06, + "loss": 0.6764, + "step": 20750 + }, + { + "epoch": 0.6359874954027216, + "grad_norm": 1.8995301778292903, + "learning_rate": 3.0905384282721262e-06, + "loss": 0.5867, + "step": 20751 + }, + { + "epoch": 0.6360181439254627, + "grad_norm": 1.6237423635985324, + "learning_rate": 3.0900797373451485e-06, + "loss": 0.5721, + "step": 20752 + }, + { + "epoch": 0.636048792448204, + "grad_norm": 1.9006585319336424, + "learning_rate": 3.089621065237104e-06, + "loss": 0.6314, + "step": 20753 + }, + { + "epoch": 0.6360794409709452, + "grad_norm": 1.7085909100560808, + "learning_rate": 3.089162411952511e-06, + "loss": 0.6339, + "step": 20754 + }, + { + "epoch": 0.6361100894936864, + "grad_norm": 1.9079465449074857, + "learning_rate": 3.0887037774958906e-06, + "loss": 0.6506, + "step": 20755 + }, + { + "epoch": 0.6361407380164276, + "grad_norm": 1.608747392705523, + "learning_rate": 3.088245161871759e-06, + "loss": 0.5973, + "step": 20756 + }, + { + "epoch": 0.6361713865391688, + "grad_norm": 1.7511192536432985, + "learning_rate": 3.0877865650846395e-06, + "loss": 0.6483, + "step": 20757 + }, + { + "epoch": 0.63620203506191, + "grad_norm": 1.6679070793796198, + "learning_rate": 3.087327987139048e-06, + "loss": 0.5989, + "step": 20758 + }, + { + "epoch": 0.6362326835846512, + "grad_norm": 0.9018726766502613, + "learning_rate": 3.0868694280395006e-06, + "loss": 0.4217, + "step": 20759 + }, + { + "epoch": 0.6362633321073924, + "grad_norm": 1.6853655902241722, + "learning_rate": 3.086410887790522e-06, + "loss": 0.5818, + "step": 20760 + }, + { + "epoch": 0.6362939806301336, + "grad_norm": 1.7330579107832071, + "learning_rate": 3.0859523663966244e-06, + "loss": 0.5794, + "step": 20761 + }, + { + "epoch": 0.6363246291528748, + "grad_norm": 1.5137190501381517, + "learning_rate": 3.0854938638623276e-06, + "loss": 0.5135, + "step": 20762 + }, + { + "epoch": 0.6363552776756161, + "grad_norm": 1.6309018880138884, + "learning_rate": 3.08503538019215e-06, + "loss": 0.5969, + "step": 20763 + }, + { + "epoch": 0.6363859261983572, + "grad_norm": 0.7762416183018767, + "learning_rate": 3.084576915390609e-06, + "loss": 0.3814, + "step": 20764 + }, + { + "epoch": 0.6364165747210985, + "grad_norm": 1.5975441405017001, + "learning_rate": 3.084118469462221e-06, + "loss": 0.5701, + "step": 20765 + }, + { + "epoch": 0.6364472232438396, + "grad_norm": 1.9710726948535584, + "learning_rate": 3.083660042411505e-06, + "loss": 0.611, + "step": 20766 + }, + { + "epoch": 0.6364778717665809, + "grad_norm": 1.6819686964749772, + "learning_rate": 3.0832016342429754e-06, + "loss": 0.5844, + "step": 20767 + }, + { + "epoch": 0.636508520289322, + "grad_norm": 1.9542812015225854, + "learning_rate": 3.0827432449611527e-06, + "loss": 0.6829, + "step": 20768 + }, + { + "epoch": 0.6365391688120633, + "grad_norm": 1.6637659494359303, + "learning_rate": 3.082284874570551e-06, + "loss": 0.5898, + "step": 20769 + }, + { + "epoch": 0.6365698173348044, + "grad_norm": 2.024374164530236, + "learning_rate": 3.081826523075685e-06, + "loss": 0.5967, + "step": 20770 + }, + { + "epoch": 0.6366004658575457, + "grad_norm": 1.7248286120876788, + "learning_rate": 3.0813681904810748e-06, + "loss": 0.5899, + "step": 20771 + }, + { + "epoch": 0.6366311143802869, + "grad_norm": 1.6568043729092918, + "learning_rate": 3.080909876791235e-06, + "loss": 0.5613, + "step": 20772 + }, + { + "epoch": 0.6366617629030281, + "grad_norm": 1.598670166236102, + "learning_rate": 3.0804515820106797e-06, + "loss": 0.5506, + "step": 20773 + }, + { + "epoch": 0.6366924114257693, + "grad_norm": 1.6298587287012913, + "learning_rate": 3.079993306143927e-06, + "loss": 0.5517, + "step": 20774 + }, + { + "epoch": 0.6367230599485105, + "grad_norm": 1.940021306158929, + "learning_rate": 3.0795350491954904e-06, + "loss": 0.6429, + "step": 20775 + }, + { + "epoch": 0.6367537084712517, + "grad_norm": 1.7102904570554656, + "learning_rate": 3.079076811169888e-06, + "loss": 0.581, + "step": 20776 + }, + { + "epoch": 0.6367843569939929, + "grad_norm": 0.8256550794199924, + "learning_rate": 3.0786185920716337e-06, + "loss": 0.4028, + "step": 20777 + }, + { + "epoch": 0.6368150055167341, + "grad_norm": 1.9625729542955992, + "learning_rate": 3.0781603919052394e-06, + "loss": 0.6392, + "step": 20778 + }, + { + "epoch": 0.6368456540394754, + "grad_norm": 2.18449653889642, + "learning_rate": 3.0777022106752254e-06, + "loss": 0.627, + "step": 20779 + }, + { + "epoch": 0.6368763025622165, + "grad_norm": 1.954177229188417, + "learning_rate": 3.0772440483861022e-06, + "loss": 0.6413, + "step": 20780 + }, + { + "epoch": 0.6369069510849577, + "grad_norm": 1.8953200469717564, + "learning_rate": 3.076785905042384e-06, + "loss": 0.603, + "step": 20781 + }, + { + "epoch": 0.6369375996076989, + "grad_norm": 1.9052628866777463, + "learning_rate": 3.076327780648588e-06, + "loss": 0.6535, + "step": 20782 + }, + { + "epoch": 0.6369682481304401, + "grad_norm": 1.8254420840343883, + "learning_rate": 3.0758696752092255e-06, + "loss": 0.5654, + "step": 20783 + }, + { + "epoch": 0.6369988966531813, + "grad_norm": 1.8849125165137934, + "learning_rate": 3.0754115887288116e-06, + "loss": 0.6154, + "step": 20784 + }, + { + "epoch": 0.6370295451759225, + "grad_norm": 1.8373616220283386, + "learning_rate": 3.0749535212118608e-06, + "loss": 0.4966, + "step": 20785 + }, + { + "epoch": 0.6370601936986637, + "grad_norm": 0.7789431234287083, + "learning_rate": 3.074495472662884e-06, + "loss": 0.4258, + "step": 20786 + }, + { + "epoch": 0.6370908422214049, + "grad_norm": 1.712499141347845, + "learning_rate": 3.0740374430863966e-06, + "loss": 0.624, + "step": 20787 + }, + { + "epoch": 0.6371214907441461, + "grad_norm": 1.697145762909831, + "learning_rate": 3.0735794324869127e-06, + "loss": 0.6045, + "step": 20788 + }, + { + "epoch": 0.6371521392668873, + "grad_norm": 1.7751652211420068, + "learning_rate": 3.0731214408689403e-06, + "loss": 0.6006, + "step": 20789 + }, + { + "epoch": 0.6371827877896286, + "grad_norm": 1.7818007102097972, + "learning_rate": 3.0726634682369984e-06, + "loss": 0.4706, + "step": 20790 + }, + { + "epoch": 0.6372134363123697, + "grad_norm": 2.092155918159525, + "learning_rate": 3.0722055145955953e-06, + "loss": 0.615, + "step": 20791 + }, + { + "epoch": 0.637244084835111, + "grad_norm": 1.8446557955383678, + "learning_rate": 3.0717475799492436e-06, + "loss": 0.6708, + "step": 20792 + }, + { + "epoch": 0.6372747333578521, + "grad_norm": 1.8346214689158706, + "learning_rate": 3.0712896643024576e-06, + "loss": 0.6449, + "step": 20793 + }, + { + "epoch": 0.6373053818805934, + "grad_norm": 0.7786941214282904, + "learning_rate": 3.0708317676597464e-06, + "loss": 0.4118, + "step": 20794 + }, + { + "epoch": 0.6373360304033345, + "grad_norm": 1.700238370634972, + "learning_rate": 3.070373890025625e-06, + "loss": 0.5595, + "step": 20795 + }, + { + "epoch": 0.6373666789260758, + "grad_norm": 0.7759493659696406, + "learning_rate": 3.0699160314046033e-06, + "loss": 0.4277, + "step": 20796 + }, + { + "epoch": 0.6373973274488169, + "grad_norm": 0.7932835121403086, + "learning_rate": 3.0694581918011916e-06, + "loss": 0.405, + "step": 20797 + }, + { + "epoch": 0.6374279759715582, + "grad_norm": 1.5189108988748656, + "learning_rate": 3.0690003712199037e-06, + "loss": 0.5082, + "step": 20798 + }, + { + "epoch": 0.6374586244942994, + "grad_norm": 1.6864294167771214, + "learning_rate": 3.068542569665249e-06, + "loss": 0.5178, + "step": 20799 + }, + { + "epoch": 0.6374892730170406, + "grad_norm": 1.7815893354722427, + "learning_rate": 3.0680847871417373e-06, + "loss": 0.7169, + "step": 20800 + }, + { + "epoch": 0.6375199215397818, + "grad_norm": 0.7896652928234837, + "learning_rate": 3.0676270236538823e-06, + "loss": 0.4017, + "step": 20801 + }, + { + "epoch": 0.637550570062523, + "grad_norm": 1.9293322428764794, + "learning_rate": 3.0671692792061904e-06, + "loss": 0.6043, + "step": 20802 + }, + { + "epoch": 0.6375812185852642, + "grad_norm": 1.765895216102873, + "learning_rate": 3.0667115538031766e-06, + "loss": 0.6598, + "step": 20803 + }, + { + "epoch": 0.6376118671080054, + "grad_norm": 1.8536283606148627, + "learning_rate": 3.0662538474493473e-06, + "loss": 0.5379, + "step": 20804 + }, + { + "epoch": 0.6376425156307466, + "grad_norm": 1.7521735712332338, + "learning_rate": 3.0657961601492133e-06, + "loss": 0.5772, + "step": 20805 + }, + { + "epoch": 0.6376731641534878, + "grad_norm": 1.8172627705706177, + "learning_rate": 3.065338491907285e-06, + "loss": 0.5809, + "step": 20806 + }, + { + "epoch": 0.637703812676229, + "grad_norm": 1.6873373955823663, + "learning_rate": 3.0648808427280717e-06, + "loss": 0.5142, + "step": 20807 + }, + { + "epoch": 0.6377344611989703, + "grad_norm": 1.9342826787258904, + "learning_rate": 3.0644232126160815e-06, + "loss": 0.6187, + "step": 20808 + }, + { + "epoch": 0.6377651097217114, + "grad_norm": 1.8912942595751077, + "learning_rate": 3.0639656015758256e-06, + "loss": 0.606, + "step": 20809 + }, + { + "epoch": 0.6377957582444527, + "grad_norm": 1.679694424267562, + "learning_rate": 3.063508009611812e-06, + "loss": 0.5798, + "step": 20810 + }, + { + "epoch": 0.6378264067671938, + "grad_norm": 1.7033374097883043, + "learning_rate": 3.0630504367285493e-06, + "loss": 0.6319, + "step": 20811 + }, + { + "epoch": 0.637857055289935, + "grad_norm": 1.8485960313811587, + "learning_rate": 3.0625928829305474e-06, + "loss": 0.6283, + "step": 20812 + }, + { + "epoch": 0.6378877038126762, + "grad_norm": 1.6787363351736748, + "learning_rate": 3.0621353482223116e-06, + "loss": 0.5762, + "step": 20813 + }, + { + "epoch": 0.6379183523354174, + "grad_norm": 1.58004350929684, + "learning_rate": 3.061677832608353e-06, + "loss": 0.6073, + "step": 20814 + }, + { + "epoch": 0.6379490008581586, + "grad_norm": 1.9471504013080134, + "learning_rate": 3.0612203360931792e-06, + "loss": 0.5577, + "step": 20815 + }, + { + "epoch": 0.6379796493808998, + "grad_norm": 1.7969324806485552, + "learning_rate": 3.060762858681296e-06, + "loss": 0.5927, + "step": 20816 + }, + { + "epoch": 0.638010297903641, + "grad_norm": 2.084676090664511, + "learning_rate": 3.0603054003772137e-06, + "loss": 0.758, + "step": 20817 + }, + { + "epoch": 0.6380409464263822, + "grad_norm": 1.7335488762259328, + "learning_rate": 3.059847961185438e-06, + "loss": 0.6249, + "step": 20818 + }, + { + "epoch": 0.6380715949491235, + "grad_norm": 1.600467229060709, + "learning_rate": 3.059390541110476e-06, + "loss": 0.5868, + "step": 20819 + }, + { + "epoch": 0.6381022434718646, + "grad_norm": 1.7707459556726173, + "learning_rate": 3.0589331401568383e-06, + "loss": 0.5936, + "step": 20820 + }, + { + "epoch": 0.6381328919946059, + "grad_norm": 1.742249705019826, + "learning_rate": 3.0584757583290257e-06, + "loss": 0.5792, + "step": 20821 + }, + { + "epoch": 0.638163540517347, + "grad_norm": 0.8794829893263938, + "learning_rate": 3.0580183956315513e-06, + "loss": 0.4191, + "step": 20822 + }, + { + "epoch": 0.6381941890400883, + "grad_norm": 1.8252997114398348, + "learning_rate": 3.057561052068917e-06, + "loss": 0.6546, + "step": 20823 + }, + { + "epoch": 0.6382248375628294, + "grad_norm": 1.7494143023746072, + "learning_rate": 3.0571037276456293e-06, + "loss": 0.5939, + "step": 20824 + }, + { + "epoch": 0.6382554860855707, + "grad_norm": 1.8106515336402587, + "learning_rate": 3.0566464223661975e-06, + "loss": 0.509, + "step": 20825 + }, + { + "epoch": 0.6382861346083119, + "grad_norm": 1.701098665071213, + "learning_rate": 3.056189136235126e-06, + "loss": 0.612, + "step": 20826 + }, + { + "epoch": 0.6383167831310531, + "grad_norm": 2.058963955123678, + "learning_rate": 3.0557318692569183e-06, + "loss": 0.6174, + "step": 20827 + }, + { + "epoch": 0.6383474316537943, + "grad_norm": 0.7731531915306822, + "learning_rate": 3.0552746214360834e-06, + "loss": 0.4104, + "step": 20828 + }, + { + "epoch": 0.6383780801765355, + "grad_norm": 1.414537643824622, + "learning_rate": 3.054817392777124e-06, + "loss": 0.4102, + "step": 20829 + }, + { + "epoch": 0.6384087286992767, + "grad_norm": 1.9355990402704064, + "learning_rate": 3.054360183284548e-06, + "loss": 0.5959, + "step": 20830 + }, + { + "epoch": 0.6384393772220179, + "grad_norm": 0.8116651220819597, + "learning_rate": 3.0539029929628596e-06, + "loss": 0.4325, + "step": 20831 + }, + { + "epoch": 0.6384700257447591, + "grad_norm": 1.768266110084487, + "learning_rate": 3.05344582181656e-06, + "loss": 0.646, + "step": 20832 + }, + { + "epoch": 0.6385006742675003, + "grad_norm": 1.8597697060972205, + "learning_rate": 3.05298866985016e-06, + "loss": 0.6415, + "step": 20833 + }, + { + "epoch": 0.6385313227902415, + "grad_norm": 1.6032786202166616, + "learning_rate": 3.05253153706816e-06, + "loss": 0.5309, + "step": 20834 + }, + { + "epoch": 0.6385619713129828, + "grad_norm": 0.7845641282909234, + "learning_rate": 3.052074423475063e-06, + "loss": 0.4234, + "step": 20835 + }, + { + "epoch": 0.6385926198357239, + "grad_norm": 1.6594699964804187, + "learning_rate": 3.051617329075377e-06, + "loss": 0.6009, + "step": 20836 + }, + { + "epoch": 0.6386232683584652, + "grad_norm": 2.017891248280549, + "learning_rate": 3.051160253873603e-06, + "loss": 0.5583, + "step": 20837 + }, + { + "epoch": 0.6386539168812063, + "grad_norm": 1.701894301073669, + "learning_rate": 3.0507031978742463e-06, + "loss": 0.5388, + "step": 20838 + }, + { + "epoch": 0.6386845654039476, + "grad_norm": 0.7792574149403756, + "learning_rate": 3.0502461610818095e-06, + "loss": 0.4183, + "step": 20839 + }, + { + "epoch": 0.6387152139266887, + "grad_norm": 1.636407405522348, + "learning_rate": 3.0497891435007952e-06, + "loss": 0.6127, + "step": 20840 + }, + { + "epoch": 0.63874586244943, + "grad_norm": 2.0106570176677643, + "learning_rate": 3.0493321451357093e-06, + "loss": 0.5891, + "step": 20841 + }, + { + "epoch": 0.6387765109721711, + "grad_norm": 1.8370597218458082, + "learning_rate": 3.0488751659910533e-06, + "loss": 0.663, + "step": 20842 + }, + { + "epoch": 0.6388071594949123, + "grad_norm": 1.951023602571019, + "learning_rate": 3.048418206071326e-06, + "loss": 0.6176, + "step": 20843 + }, + { + "epoch": 0.6388378080176536, + "grad_norm": 1.822892381381252, + "learning_rate": 3.0479612653810376e-06, + "loss": 0.6322, + "step": 20844 + }, + { + "epoch": 0.6388684565403947, + "grad_norm": 0.8027179814358247, + "learning_rate": 3.0475043439246845e-06, + "loss": 0.4141, + "step": 20845 + }, + { + "epoch": 0.638899105063136, + "grad_norm": 1.801303136613631, + "learning_rate": 3.0470474417067696e-06, + "loss": 0.5529, + "step": 20846 + }, + { + "epoch": 0.6389297535858771, + "grad_norm": 0.7499922204305984, + "learning_rate": 3.0465905587317967e-06, + "loss": 0.3873, + "step": 20847 + }, + { + "epoch": 0.6389604021086184, + "grad_norm": 1.8322789356415252, + "learning_rate": 3.0461336950042664e-06, + "loss": 0.559, + "step": 20848 + }, + { + "epoch": 0.6389910506313595, + "grad_norm": 1.7924364747825583, + "learning_rate": 3.0456768505286815e-06, + "loss": 0.5535, + "step": 20849 + }, + { + "epoch": 0.6390216991541008, + "grad_norm": 1.7818878166060101, + "learning_rate": 3.0452200253095433e-06, + "loss": 0.615, + "step": 20850 + }, + { + "epoch": 0.6390523476768419, + "grad_norm": 2.119262811562729, + "learning_rate": 3.044763219351351e-06, + "loss": 0.6816, + "step": 20851 + }, + { + "epoch": 0.6390829961995832, + "grad_norm": 2.0612372819276414, + "learning_rate": 3.044306432658607e-06, + "loss": 0.6953, + "step": 20852 + }, + { + "epoch": 0.6391136447223243, + "grad_norm": 1.6251638658547274, + "learning_rate": 3.043849665235814e-06, + "loss": 0.5426, + "step": 20853 + }, + { + "epoch": 0.6391442932450656, + "grad_norm": 1.7927938315436076, + "learning_rate": 3.0433929170874676e-06, + "loss": 0.6133, + "step": 20854 + }, + { + "epoch": 0.6391749417678068, + "grad_norm": 1.8336426898186706, + "learning_rate": 3.0429361882180747e-06, + "loss": 0.6554, + "step": 20855 + }, + { + "epoch": 0.639205590290548, + "grad_norm": 1.747375164301548, + "learning_rate": 3.042479478632129e-06, + "loss": 0.6673, + "step": 20856 + }, + { + "epoch": 0.6392362388132892, + "grad_norm": 1.699313335320503, + "learning_rate": 3.042022788334137e-06, + "loss": 0.5385, + "step": 20857 + }, + { + "epoch": 0.6392668873360304, + "grad_norm": 1.75598010699695, + "learning_rate": 3.0415661173285938e-06, + "loss": 0.6328, + "step": 20858 + }, + { + "epoch": 0.6392975358587716, + "grad_norm": 1.7526804813159205, + "learning_rate": 3.04110946562e-06, + "loss": 0.416, + "step": 20859 + }, + { + "epoch": 0.6393281843815128, + "grad_norm": 1.8492593085994242, + "learning_rate": 3.040652833212857e-06, + "loss": 0.5941, + "step": 20860 + }, + { + "epoch": 0.639358832904254, + "grad_norm": 1.763220212027369, + "learning_rate": 3.040196220111663e-06, + "loss": 0.6948, + "step": 20861 + }, + { + "epoch": 0.6393894814269953, + "grad_norm": 0.7355954041464429, + "learning_rate": 3.0397396263209165e-06, + "loss": 0.4115, + "step": 20862 + }, + { + "epoch": 0.6394201299497364, + "grad_norm": 1.64121574484959, + "learning_rate": 3.0392830518451177e-06, + "loss": 0.4618, + "step": 20863 + }, + { + "epoch": 0.6394507784724777, + "grad_norm": 1.8170878039150338, + "learning_rate": 3.0388264966887636e-06, + "loss": 0.6721, + "step": 20864 + }, + { + "epoch": 0.6394814269952188, + "grad_norm": 1.8284814883901526, + "learning_rate": 3.0383699608563557e-06, + "loss": 0.6853, + "step": 20865 + }, + { + "epoch": 0.6395120755179601, + "grad_norm": 1.6658274612028967, + "learning_rate": 3.0379134443523892e-06, + "loss": 0.5865, + "step": 20866 + }, + { + "epoch": 0.6395427240407012, + "grad_norm": 0.8177885961307907, + "learning_rate": 3.0374569471813632e-06, + "loss": 0.3993, + "step": 20867 + }, + { + "epoch": 0.6395733725634425, + "grad_norm": 1.6979396480432165, + "learning_rate": 3.0370004693477767e-06, + "loss": 0.5135, + "step": 20868 + }, + { + "epoch": 0.6396040210861836, + "grad_norm": 1.7935195958069206, + "learning_rate": 3.0365440108561265e-06, + "loss": 0.6176, + "step": 20869 + }, + { + "epoch": 0.6396346696089249, + "grad_norm": 1.564562154350732, + "learning_rate": 3.03608757171091e-06, + "loss": 0.6186, + "step": 20870 + }, + { + "epoch": 0.639665318131666, + "grad_norm": 1.7472851641884377, + "learning_rate": 3.035631151916626e-06, + "loss": 0.6128, + "step": 20871 + }, + { + "epoch": 0.6396959666544073, + "grad_norm": 1.778885916564394, + "learning_rate": 3.0351747514777716e-06, + "loss": 0.5044, + "step": 20872 + }, + { + "epoch": 0.6397266151771485, + "grad_norm": 1.6829218524104341, + "learning_rate": 3.034718370398841e-06, + "loss": 0.5584, + "step": 20873 + }, + { + "epoch": 0.6397572636998896, + "grad_norm": 1.95330892639344, + "learning_rate": 3.0342620086843354e-06, + "loss": 0.5398, + "step": 20874 + }, + { + "epoch": 0.6397879122226309, + "grad_norm": 1.9565225233411043, + "learning_rate": 3.033805666338746e-06, + "loss": 0.6151, + "step": 20875 + }, + { + "epoch": 0.639818560745372, + "grad_norm": 1.8471412130744307, + "learning_rate": 3.033349343366576e-06, + "loss": 0.6382, + "step": 20876 + }, + { + "epoch": 0.6398492092681133, + "grad_norm": 1.7505851492512259, + "learning_rate": 3.0328930397723167e-06, + "loss": 0.4478, + "step": 20877 + }, + { + "epoch": 0.6398798577908544, + "grad_norm": 1.798629736638038, + "learning_rate": 3.032436755560465e-06, + "loss": 0.5896, + "step": 20878 + }, + { + "epoch": 0.6399105063135957, + "grad_norm": 1.9190072365691064, + "learning_rate": 3.031980490735518e-06, + "loss": 0.5623, + "step": 20879 + }, + { + "epoch": 0.6399411548363368, + "grad_norm": 0.8448323311607663, + "learning_rate": 3.031524245301971e-06, + "loss": 0.4262, + "step": 20880 + }, + { + "epoch": 0.6399718033590781, + "grad_norm": 2.1114935785514057, + "learning_rate": 3.031068019264318e-06, + "loss": 0.6833, + "step": 20881 + }, + { + "epoch": 0.6400024518818193, + "grad_norm": 1.6808977527850633, + "learning_rate": 3.0306118126270567e-06, + "loss": 0.5736, + "step": 20882 + }, + { + "epoch": 0.6400331004045605, + "grad_norm": 2.0452774940827836, + "learning_rate": 3.0301556253946806e-06, + "loss": 0.6389, + "step": 20883 + }, + { + "epoch": 0.6400637489273017, + "grad_norm": 1.8115667384828387, + "learning_rate": 3.0296994575716854e-06, + "loss": 0.6253, + "step": 20884 + }, + { + "epoch": 0.6400943974500429, + "grad_norm": 1.5097706508809876, + "learning_rate": 3.0292433091625672e-06, + "loss": 0.6029, + "step": 20885 + }, + { + "epoch": 0.6401250459727841, + "grad_norm": 1.8394961271288148, + "learning_rate": 3.028787180171815e-06, + "loss": 0.6258, + "step": 20886 + }, + { + "epoch": 0.6401556944955253, + "grad_norm": 1.9763652354018189, + "learning_rate": 3.028331070603931e-06, + "loss": 0.6143, + "step": 20887 + }, + { + "epoch": 0.6401863430182665, + "grad_norm": 1.7341911790173883, + "learning_rate": 3.027874980463404e-06, + "loss": 0.6305, + "step": 20888 + }, + { + "epoch": 0.6402169915410078, + "grad_norm": 1.5334916188350156, + "learning_rate": 3.0274189097547287e-06, + "loss": 0.542, + "step": 20889 + }, + { + "epoch": 0.6402476400637489, + "grad_norm": 1.6442644428473612, + "learning_rate": 3.0269628584824002e-06, + "loss": 0.5707, + "step": 20890 + }, + { + "epoch": 0.6402782885864902, + "grad_norm": 1.8630802320131412, + "learning_rate": 3.0265068266509107e-06, + "loss": 0.6952, + "step": 20891 + }, + { + "epoch": 0.6403089371092313, + "grad_norm": 1.8745275742972134, + "learning_rate": 3.0260508142647548e-06, + "loss": 0.6146, + "step": 20892 + }, + { + "epoch": 0.6403395856319726, + "grad_norm": 0.78250018558939, + "learning_rate": 3.025594821328426e-06, + "loss": 0.4057, + "step": 20893 + }, + { + "epoch": 0.6403702341547137, + "grad_norm": 1.7039953802759895, + "learning_rate": 3.0251388478464155e-06, + "loss": 0.632, + "step": 20894 + }, + { + "epoch": 0.640400882677455, + "grad_norm": 0.8004473299199153, + "learning_rate": 3.0246828938232183e-06, + "loss": 0.4172, + "step": 20895 + }, + { + "epoch": 0.6404315312001961, + "grad_norm": 0.7891897119682659, + "learning_rate": 3.0242269592633267e-06, + "loss": 0.3884, + "step": 20896 + }, + { + "epoch": 0.6404621797229374, + "grad_norm": 1.666478273085203, + "learning_rate": 3.02377104417123e-06, + "loss": 0.5409, + "step": 20897 + }, + { + "epoch": 0.6404928282456785, + "grad_norm": 1.7106219222252061, + "learning_rate": 3.0233151485514246e-06, + "loss": 0.576, + "step": 20898 + }, + { + "epoch": 0.6405234767684198, + "grad_norm": 0.7708077179022034, + "learning_rate": 3.0228592724084003e-06, + "loss": 0.4029, + "step": 20899 + }, + { + "epoch": 0.640554125291161, + "grad_norm": 1.9755926749275696, + "learning_rate": 3.022403415746649e-06, + "loss": 0.6685, + "step": 20900 + }, + { + "epoch": 0.6405847738139022, + "grad_norm": 1.73948970481558, + "learning_rate": 3.0219475785706633e-06, + "loss": 0.6169, + "step": 20901 + }, + { + "epoch": 0.6406154223366434, + "grad_norm": 0.8054436615880987, + "learning_rate": 3.021491760884934e-06, + "loss": 0.423, + "step": 20902 + }, + { + "epoch": 0.6406460708593846, + "grad_norm": 1.9655793258477947, + "learning_rate": 3.0210359626939523e-06, + "loss": 0.7265, + "step": 20903 + }, + { + "epoch": 0.6406767193821258, + "grad_norm": 0.7632759740321504, + "learning_rate": 3.020580184002211e-06, + "loss": 0.409, + "step": 20904 + }, + { + "epoch": 0.6407073679048669, + "grad_norm": 1.6115644542989944, + "learning_rate": 3.0201244248141975e-06, + "loss": 0.5952, + "step": 20905 + }, + { + "epoch": 0.6407380164276082, + "grad_norm": 1.686613961228727, + "learning_rate": 3.019668685134406e-06, + "loss": 0.6319, + "step": 20906 + }, + { + "epoch": 0.6407686649503493, + "grad_norm": 1.8038764273722732, + "learning_rate": 3.0192129649673267e-06, + "loss": 0.6703, + "step": 20907 + }, + { + "epoch": 0.6407993134730906, + "grad_norm": 2.0734413421554048, + "learning_rate": 3.0187572643174465e-06, + "loss": 0.687, + "step": 20908 + }, + { + "epoch": 0.6408299619958318, + "grad_norm": 1.8700845069836407, + "learning_rate": 3.0183015831892592e-06, + "loss": 0.5547, + "step": 20909 + }, + { + "epoch": 0.640860610518573, + "grad_norm": 1.9084709760293195, + "learning_rate": 3.0178459215872528e-06, + "loss": 0.6611, + "step": 20910 + }, + { + "epoch": 0.6408912590413142, + "grad_norm": 2.172873013337938, + "learning_rate": 3.017390279515918e-06, + "loss": 0.6512, + "step": 20911 + }, + { + "epoch": 0.6409219075640554, + "grad_norm": 1.476396899479965, + "learning_rate": 3.0169346569797446e-06, + "loss": 0.5368, + "step": 20912 + }, + { + "epoch": 0.6409525560867966, + "grad_norm": 1.6230614798084653, + "learning_rate": 3.01647905398322e-06, + "loss": 0.5913, + "step": 20913 + }, + { + "epoch": 0.6409832046095378, + "grad_norm": 1.9106748147854964, + "learning_rate": 3.0160234705308366e-06, + "loss": 0.6549, + "step": 20914 + }, + { + "epoch": 0.641013853132279, + "grad_norm": 1.7581579637167442, + "learning_rate": 3.0155679066270803e-06, + "loss": 0.5527, + "step": 20915 + }, + { + "epoch": 0.6410445016550202, + "grad_norm": 1.4393800355144657, + "learning_rate": 3.015112362276441e-06, + "loss": 0.5294, + "step": 20916 + }, + { + "epoch": 0.6410751501777614, + "grad_norm": 1.8199089543394686, + "learning_rate": 3.0146568374834095e-06, + "loss": 0.6412, + "step": 20917 + }, + { + "epoch": 0.6411057987005027, + "grad_norm": 1.8883265897994361, + "learning_rate": 3.014201332252471e-06, + "loss": 0.5546, + "step": 20918 + }, + { + "epoch": 0.6411364472232438, + "grad_norm": 2.01805989357932, + "learning_rate": 3.013745846588114e-06, + "loss": 0.529, + "step": 20919 + }, + { + "epoch": 0.6411670957459851, + "grad_norm": 1.969316788888211, + "learning_rate": 3.0132903804948276e-06, + "loss": 0.6961, + "step": 20920 + }, + { + "epoch": 0.6411977442687262, + "grad_norm": 1.7751272703267766, + "learning_rate": 3.012834933977099e-06, + "loss": 0.6318, + "step": 20921 + }, + { + "epoch": 0.6412283927914675, + "grad_norm": 1.9606603863613112, + "learning_rate": 3.012379507039418e-06, + "loss": 0.6492, + "step": 20922 + }, + { + "epoch": 0.6412590413142086, + "grad_norm": 1.8090252507460425, + "learning_rate": 3.0119240996862693e-06, + "loss": 0.5614, + "step": 20923 + }, + { + "epoch": 0.6412896898369499, + "grad_norm": 1.841400348386901, + "learning_rate": 3.0114687119221398e-06, + "loss": 0.6929, + "step": 20924 + }, + { + "epoch": 0.641320338359691, + "grad_norm": 1.9458459295836614, + "learning_rate": 3.0110133437515197e-06, + "loss": 0.59, + "step": 20925 + }, + { + "epoch": 0.6413509868824323, + "grad_norm": 1.9071865931814367, + "learning_rate": 3.0105579951788944e-06, + "loss": 0.6642, + "step": 20926 + }, + { + "epoch": 0.6413816354051735, + "grad_norm": 1.7848647871081138, + "learning_rate": 3.0101026662087474e-06, + "loss": 0.5841, + "step": 20927 + }, + { + "epoch": 0.6414122839279147, + "grad_norm": 0.8148957114239677, + "learning_rate": 3.0096473568455716e-06, + "loss": 0.4056, + "step": 20928 + }, + { + "epoch": 0.6414429324506559, + "grad_norm": 1.754237354844692, + "learning_rate": 3.009192067093846e-06, + "loss": 0.6096, + "step": 20929 + }, + { + "epoch": 0.6414735809733971, + "grad_norm": 0.8430566680548981, + "learning_rate": 3.0087367969580635e-06, + "loss": 0.4513, + "step": 20930 + }, + { + "epoch": 0.6415042294961383, + "grad_norm": 1.8064709920819997, + "learning_rate": 3.008281546442706e-06, + "loss": 0.6125, + "step": 20931 + }, + { + "epoch": 0.6415348780188795, + "grad_norm": 0.7888197179395905, + "learning_rate": 3.007826315552258e-06, + "loss": 0.4197, + "step": 20932 + }, + { + "epoch": 0.6415655265416207, + "grad_norm": 1.9742593179864667, + "learning_rate": 3.007371104291209e-06, + "loss": 0.766, + "step": 20933 + }, + { + "epoch": 0.641596175064362, + "grad_norm": 2.10539948353522, + "learning_rate": 3.0069159126640426e-06, + "loss": 0.6723, + "step": 20934 + }, + { + "epoch": 0.6416268235871031, + "grad_norm": 0.7505400119571873, + "learning_rate": 3.006460740675242e-06, + "loss": 0.4151, + "step": 20935 + }, + { + "epoch": 0.6416574721098443, + "grad_norm": 1.7590176167425478, + "learning_rate": 3.0060055883292947e-06, + "loss": 0.6503, + "step": 20936 + }, + { + "epoch": 0.6416881206325855, + "grad_norm": 1.6271562602597944, + "learning_rate": 3.0055504556306842e-06, + "loss": 0.5237, + "step": 20937 + }, + { + "epoch": 0.6417187691553267, + "grad_norm": 1.7688732617801208, + "learning_rate": 3.005095342583896e-06, + "loss": 0.5243, + "step": 20938 + }, + { + "epoch": 0.6417494176780679, + "grad_norm": 1.897675094861759, + "learning_rate": 3.0046402491934147e-06, + "loss": 0.6053, + "step": 20939 + }, + { + "epoch": 0.6417800662008091, + "grad_norm": 0.8266491036544462, + "learning_rate": 3.004185175463721e-06, + "loss": 0.409, + "step": 20940 + }, + { + "epoch": 0.6418107147235503, + "grad_norm": 1.7961352519011655, + "learning_rate": 3.003730121399304e-06, + "loss": 0.6083, + "step": 20941 + }, + { + "epoch": 0.6418413632462915, + "grad_norm": 1.805480231766555, + "learning_rate": 3.0032750870046435e-06, + "loss": 0.5858, + "step": 20942 + }, + { + "epoch": 0.6418720117690327, + "grad_norm": 1.7989562489092579, + "learning_rate": 3.0028200722842243e-06, + "loss": 0.6884, + "step": 20943 + }, + { + "epoch": 0.6419026602917739, + "grad_norm": 1.8393988196597817, + "learning_rate": 3.0023650772425306e-06, + "loss": 0.6324, + "step": 20944 + }, + { + "epoch": 0.6419333088145152, + "grad_norm": 2.125360618382555, + "learning_rate": 3.0019101018840446e-06, + "loss": 0.6124, + "step": 20945 + }, + { + "epoch": 0.6419639573372563, + "grad_norm": 1.6015596235789187, + "learning_rate": 3.001455146213249e-06, + "loss": 0.5178, + "step": 20946 + }, + { + "epoch": 0.6419946058599976, + "grad_norm": 1.7847388052588211, + "learning_rate": 3.001000210234628e-06, + "loss": 0.661, + "step": 20947 + }, + { + "epoch": 0.6420252543827387, + "grad_norm": 2.009190243027548, + "learning_rate": 3.0005452939526624e-06, + "loss": 0.5684, + "step": 20948 + }, + { + "epoch": 0.64205590290548, + "grad_norm": 1.8308180472183238, + "learning_rate": 3.0000903973718365e-06, + "loss": 0.6304, + "step": 20949 + }, + { + "epoch": 0.6420865514282211, + "grad_norm": 1.6920775024755144, + "learning_rate": 2.9996355204966334e-06, + "loss": 0.5448, + "step": 20950 + }, + { + "epoch": 0.6421171999509624, + "grad_norm": 1.7829326122513764, + "learning_rate": 2.999180663331529e-06, + "loss": 0.6642, + "step": 20951 + }, + { + "epoch": 0.6421478484737035, + "grad_norm": 1.8335537105731217, + "learning_rate": 2.9987258258810135e-06, + "loss": 0.7462, + "step": 20952 + }, + { + "epoch": 0.6421784969964448, + "grad_norm": 1.8438001704086893, + "learning_rate": 2.998271008149563e-06, + "loss": 0.5734, + "step": 20953 + }, + { + "epoch": 0.642209145519186, + "grad_norm": 1.718906187779067, + "learning_rate": 2.9978162101416595e-06, + "loss": 0.5439, + "step": 20954 + }, + { + "epoch": 0.6422397940419272, + "grad_norm": 1.6315510544042133, + "learning_rate": 2.997361431861786e-06, + "loss": 0.5935, + "step": 20955 + }, + { + "epoch": 0.6422704425646684, + "grad_norm": 1.8306888298606787, + "learning_rate": 2.9969066733144225e-06, + "loss": 0.6746, + "step": 20956 + }, + { + "epoch": 0.6423010910874096, + "grad_norm": 1.6721192424830231, + "learning_rate": 2.9964519345040506e-06, + "loss": 0.6296, + "step": 20957 + }, + { + "epoch": 0.6423317396101508, + "grad_norm": 2.0987632080363356, + "learning_rate": 2.9959972154351504e-06, + "loss": 0.6119, + "step": 20958 + }, + { + "epoch": 0.642362388132892, + "grad_norm": 1.8025260614287055, + "learning_rate": 2.995542516112201e-06, + "loss": 0.6512, + "step": 20959 + }, + { + "epoch": 0.6423930366556332, + "grad_norm": 1.6638527207634572, + "learning_rate": 2.9950878365396865e-06, + "loss": 0.5611, + "step": 20960 + }, + { + "epoch": 0.6424236851783744, + "grad_norm": 1.6273878991773336, + "learning_rate": 2.994633176722083e-06, + "loss": 0.5465, + "step": 20961 + }, + { + "epoch": 0.6424543337011156, + "grad_norm": 0.7721737911985957, + "learning_rate": 2.9941785366638715e-06, + "loss": 0.4179, + "step": 20962 + }, + { + "epoch": 0.6424849822238569, + "grad_norm": 1.7939249794677954, + "learning_rate": 2.993723916369533e-06, + "loss": 0.5684, + "step": 20963 + }, + { + "epoch": 0.642515630746598, + "grad_norm": 1.8325601887498828, + "learning_rate": 2.9932693158435444e-06, + "loss": 0.5602, + "step": 20964 + }, + { + "epoch": 0.6425462792693393, + "grad_norm": 1.8910952027231787, + "learning_rate": 2.9928147350903887e-06, + "loss": 0.6077, + "step": 20965 + }, + { + "epoch": 0.6425769277920804, + "grad_norm": 1.9632363546498912, + "learning_rate": 2.9923601741145427e-06, + "loss": 0.6928, + "step": 20966 + }, + { + "epoch": 0.6426075763148216, + "grad_norm": 0.806500874191416, + "learning_rate": 2.9919056329204845e-06, + "loss": 0.4065, + "step": 20967 + }, + { + "epoch": 0.6426382248375628, + "grad_norm": 2.0742282520629884, + "learning_rate": 2.991451111512694e-06, + "loss": 0.6628, + "step": 20968 + }, + { + "epoch": 0.642668873360304, + "grad_norm": 1.7275879708591269, + "learning_rate": 2.9909966098956515e-06, + "loss": 0.5758, + "step": 20969 + }, + { + "epoch": 0.6426995218830452, + "grad_norm": 1.8221478174289807, + "learning_rate": 2.9905421280738307e-06, + "loss": 0.6549, + "step": 20970 + }, + { + "epoch": 0.6427301704057864, + "grad_norm": 1.69785700359273, + "learning_rate": 2.9900876660517155e-06, + "loss": 0.5936, + "step": 20971 + }, + { + "epoch": 0.6427608189285277, + "grad_norm": 1.8476480763571146, + "learning_rate": 2.989633223833779e-06, + "loss": 0.6062, + "step": 20972 + }, + { + "epoch": 0.6427914674512688, + "grad_norm": 1.8151240332668985, + "learning_rate": 2.9891788014245e-06, + "loss": 0.587, + "step": 20973 + }, + { + "epoch": 0.6428221159740101, + "grad_norm": 1.643825633811113, + "learning_rate": 2.9887243988283576e-06, + "loss": 0.6267, + "step": 20974 + }, + { + "epoch": 0.6428527644967512, + "grad_norm": 1.7436246873351386, + "learning_rate": 2.988270016049828e-06, + "loss": 0.5782, + "step": 20975 + }, + { + "epoch": 0.6428834130194925, + "grad_norm": 1.8644926427438544, + "learning_rate": 2.9878156530933887e-06, + "loss": 0.6857, + "step": 20976 + }, + { + "epoch": 0.6429140615422336, + "grad_norm": 1.8277126462260542, + "learning_rate": 2.9873613099635166e-06, + "loss": 0.5765, + "step": 20977 + }, + { + "epoch": 0.6429447100649749, + "grad_norm": 1.7313090300411214, + "learning_rate": 2.9869069866646882e-06, + "loss": 0.6876, + "step": 20978 + }, + { + "epoch": 0.642975358587716, + "grad_norm": 1.5905346058684497, + "learning_rate": 2.9864526832013813e-06, + "loss": 0.6042, + "step": 20979 + }, + { + "epoch": 0.6430060071104573, + "grad_norm": 1.713550154501349, + "learning_rate": 2.985998399578072e-06, + "loss": 0.5902, + "step": 20980 + }, + { + "epoch": 0.6430366556331985, + "grad_norm": 2.072029165843892, + "learning_rate": 2.9855441357992333e-06, + "loss": 0.6853, + "step": 20981 + }, + { + "epoch": 0.6430673041559397, + "grad_norm": 1.9120746320955169, + "learning_rate": 2.985089891869346e-06, + "loss": 0.5946, + "step": 20982 + }, + { + "epoch": 0.6430979526786809, + "grad_norm": 1.8650811525069086, + "learning_rate": 2.9846356677928805e-06, + "loss": 0.6627, + "step": 20983 + }, + { + "epoch": 0.6431286012014221, + "grad_norm": 1.7701648970645192, + "learning_rate": 2.9841814635743188e-06, + "loss": 0.6401, + "step": 20984 + }, + { + "epoch": 0.6431592497241633, + "grad_norm": 1.871479201552037, + "learning_rate": 2.9837272792181316e-06, + "loss": 0.5712, + "step": 20985 + }, + { + "epoch": 0.6431898982469045, + "grad_norm": 2.0699458863876066, + "learning_rate": 2.983273114728795e-06, + "loss": 0.6691, + "step": 20986 + }, + { + "epoch": 0.6432205467696457, + "grad_norm": 1.8057510151141931, + "learning_rate": 2.9828189701107848e-06, + "loss": 0.6485, + "step": 20987 + }, + { + "epoch": 0.643251195292387, + "grad_norm": 2.0309236753783857, + "learning_rate": 2.982364845368576e-06, + "loss": 0.643, + "step": 20988 + }, + { + "epoch": 0.6432818438151281, + "grad_norm": 1.9728257654397194, + "learning_rate": 2.9819107405066407e-06, + "loss": 0.5737, + "step": 20989 + }, + { + "epoch": 0.6433124923378694, + "grad_norm": 2.062404089591156, + "learning_rate": 2.9814566555294567e-06, + "loss": 0.6754, + "step": 20990 + }, + { + "epoch": 0.6433431408606105, + "grad_norm": 1.7222001229284045, + "learning_rate": 2.9810025904414962e-06, + "loss": 0.5925, + "step": 20991 + }, + { + "epoch": 0.6433737893833518, + "grad_norm": 1.8642275509272657, + "learning_rate": 2.980548545247234e-06, + "loss": 0.5887, + "step": 20992 + }, + { + "epoch": 0.6434044379060929, + "grad_norm": 1.8696884984116773, + "learning_rate": 2.9800945199511453e-06, + "loss": 0.6272, + "step": 20993 + }, + { + "epoch": 0.6434350864288342, + "grad_norm": 1.887812694099518, + "learning_rate": 2.979640514557699e-06, + "loss": 0.6563, + "step": 20994 + }, + { + "epoch": 0.6434657349515753, + "grad_norm": 0.8029272613027989, + "learning_rate": 2.979186529071374e-06, + "loss": 0.4192, + "step": 20995 + }, + { + "epoch": 0.6434963834743166, + "grad_norm": 1.8495311343219827, + "learning_rate": 2.978732563496641e-06, + "loss": 0.6538, + "step": 20996 + }, + { + "epoch": 0.6435270319970577, + "grad_norm": 1.796215339550137, + "learning_rate": 2.978278617837972e-06, + "loss": 0.5905, + "step": 20997 + }, + { + "epoch": 0.6435576805197989, + "grad_norm": 2.103815345328435, + "learning_rate": 2.9778246920998418e-06, + "loss": 0.4832, + "step": 20998 + }, + { + "epoch": 0.6435883290425402, + "grad_norm": 1.6652219151860823, + "learning_rate": 2.9773707862867217e-06, + "loss": 0.5407, + "step": 20999 + }, + { + "epoch": 0.6436189775652813, + "grad_norm": 1.6586337788511814, + "learning_rate": 2.976916900403085e-06, + "loss": 0.6027, + "step": 21000 + }, + { + "epoch": 0.6436496260880226, + "grad_norm": 1.668081363949488, + "learning_rate": 2.976463034453404e-06, + "loss": 0.5888, + "step": 21001 + }, + { + "epoch": 0.6436802746107637, + "grad_norm": 1.837330276898257, + "learning_rate": 2.9760091884421493e-06, + "loss": 0.5971, + "step": 21002 + }, + { + "epoch": 0.643710923133505, + "grad_norm": 1.481435085621304, + "learning_rate": 2.9755553623737952e-06, + "loss": 0.532, + "step": 21003 + }, + { + "epoch": 0.6437415716562461, + "grad_norm": 1.8851120271193418, + "learning_rate": 2.975101556252814e-06, + "loss": 0.5851, + "step": 21004 + }, + { + "epoch": 0.6437722201789874, + "grad_norm": 0.7575138331686915, + "learning_rate": 2.9746477700836717e-06, + "loss": 0.4058, + "step": 21005 + }, + { + "epoch": 0.6438028687017285, + "grad_norm": 1.659849862762036, + "learning_rate": 2.974194003870845e-06, + "loss": 0.6655, + "step": 21006 + }, + { + "epoch": 0.6438335172244698, + "grad_norm": 1.7362162764906448, + "learning_rate": 2.9737402576188036e-06, + "loss": 0.642, + "step": 21007 + }, + { + "epoch": 0.643864165747211, + "grad_norm": 1.6069120209005598, + "learning_rate": 2.9732865313320157e-06, + "loss": 0.5601, + "step": 21008 + }, + { + "epoch": 0.6438948142699522, + "grad_norm": 2.1638853066304398, + "learning_rate": 2.9728328250149557e-06, + "loss": 0.5607, + "step": 21009 + }, + { + "epoch": 0.6439254627926934, + "grad_norm": 1.817278597070588, + "learning_rate": 2.9723791386720915e-06, + "loss": 0.6707, + "step": 21010 + }, + { + "epoch": 0.6439561113154346, + "grad_norm": 1.7070496186017514, + "learning_rate": 2.9719254723078956e-06, + "loss": 0.5593, + "step": 21011 + }, + { + "epoch": 0.6439867598381758, + "grad_norm": 2.0322899174954667, + "learning_rate": 2.9714718259268383e-06, + "loss": 0.6211, + "step": 21012 + }, + { + "epoch": 0.644017408360917, + "grad_norm": 1.7190635619277534, + "learning_rate": 2.9710181995333843e-06, + "loss": 0.5995, + "step": 21013 + }, + { + "epoch": 0.6440480568836582, + "grad_norm": 1.681527425184132, + "learning_rate": 2.9705645931320103e-06, + "loss": 0.5375, + "step": 21014 + }, + { + "epoch": 0.6440787054063994, + "grad_norm": 0.7992014039451616, + "learning_rate": 2.970111006727182e-06, + "loss": 0.4187, + "step": 21015 + }, + { + "epoch": 0.6441093539291406, + "grad_norm": 2.2078089463085044, + "learning_rate": 2.969657440323368e-06, + "loss": 0.6325, + "step": 21016 + }, + { + "epoch": 0.6441400024518819, + "grad_norm": 1.837136099941989, + "learning_rate": 2.96920389392504e-06, + "loss": 0.6531, + "step": 21017 + }, + { + "epoch": 0.644170650974623, + "grad_norm": 0.8417423358237371, + "learning_rate": 2.9687503675366643e-06, + "loss": 0.3978, + "step": 21018 + }, + { + "epoch": 0.6442012994973643, + "grad_norm": 1.7010808253545409, + "learning_rate": 2.9682968611627116e-06, + "loss": 0.5782, + "step": 21019 + }, + { + "epoch": 0.6442319480201054, + "grad_norm": 2.0145115516314505, + "learning_rate": 2.9678433748076508e-06, + "loss": 0.6721, + "step": 21020 + }, + { + "epoch": 0.6442625965428467, + "grad_norm": 1.7609762204043515, + "learning_rate": 2.9673899084759474e-06, + "loss": 0.7038, + "step": 21021 + }, + { + "epoch": 0.6442932450655878, + "grad_norm": 1.8833348522531936, + "learning_rate": 2.9669364621720725e-06, + "loss": 0.6267, + "step": 21022 + }, + { + "epoch": 0.6443238935883291, + "grad_norm": 1.7571005522085357, + "learning_rate": 2.9664830359004937e-06, + "loss": 0.6155, + "step": 21023 + }, + { + "epoch": 0.6443545421110702, + "grad_norm": 1.679236796513888, + "learning_rate": 2.9660296296656754e-06, + "loss": 0.6345, + "step": 21024 + }, + { + "epoch": 0.6443851906338115, + "grad_norm": 0.8220455114531299, + "learning_rate": 2.9655762434720904e-06, + "loss": 0.4158, + "step": 21025 + }, + { + "epoch": 0.6444158391565527, + "grad_norm": 1.6714165671185939, + "learning_rate": 2.9651228773242015e-06, + "loss": 0.4912, + "step": 21026 + }, + { + "epoch": 0.6444464876792939, + "grad_norm": 2.2245758705687066, + "learning_rate": 2.9646695312264774e-06, + "loss": 0.62, + "step": 21027 + }, + { + "epoch": 0.6444771362020351, + "grad_norm": 1.8707654506106925, + "learning_rate": 2.9642162051833858e-06, + "loss": 0.5947, + "step": 21028 + }, + { + "epoch": 0.6445077847247762, + "grad_norm": 1.6659198150010934, + "learning_rate": 2.9637628991993917e-06, + "loss": 0.6482, + "step": 21029 + }, + { + "epoch": 0.6445384332475175, + "grad_norm": 1.7606111501557313, + "learning_rate": 2.9633096132789636e-06, + "loss": 0.5771, + "step": 21030 + }, + { + "epoch": 0.6445690817702586, + "grad_norm": 0.7673408325159138, + "learning_rate": 2.962856347426567e-06, + "loss": 0.4249, + "step": 21031 + }, + { + "epoch": 0.6445997302929999, + "grad_norm": 1.6904308169989124, + "learning_rate": 2.962403101646667e-06, + "loss": 0.5321, + "step": 21032 + }, + { + "epoch": 0.644630378815741, + "grad_norm": 0.7966371976847993, + "learning_rate": 2.9619498759437325e-06, + "loss": 0.4277, + "step": 21033 + }, + { + "epoch": 0.6446610273384823, + "grad_norm": 1.8285068551912063, + "learning_rate": 2.9614966703222274e-06, + "loss": 0.6849, + "step": 21034 + }, + { + "epoch": 0.6446916758612234, + "grad_norm": 1.5962495399277816, + "learning_rate": 2.9610434847866144e-06, + "loss": 0.6014, + "step": 21035 + }, + { + "epoch": 0.6447223243839647, + "grad_norm": 1.7048864687640142, + "learning_rate": 2.9605903193413647e-06, + "loss": 0.6133, + "step": 21036 + }, + { + "epoch": 0.6447529729067059, + "grad_norm": 1.6343121209170526, + "learning_rate": 2.9601371739909374e-06, + "loss": 0.5328, + "step": 21037 + }, + { + "epoch": 0.6447836214294471, + "grad_norm": 0.7632541864386995, + "learning_rate": 2.9596840487398036e-06, + "loss": 0.3986, + "step": 21038 + }, + { + "epoch": 0.6448142699521883, + "grad_norm": 1.9160848562733785, + "learning_rate": 2.959230943592424e-06, + "loss": 0.6045, + "step": 21039 + }, + { + "epoch": 0.6448449184749295, + "grad_norm": 1.8602872499721659, + "learning_rate": 2.9587778585532623e-06, + "loss": 0.5988, + "step": 21040 + }, + { + "epoch": 0.6448755669976707, + "grad_norm": 1.6533398596883757, + "learning_rate": 2.9583247936267863e-06, + "loss": 0.5197, + "step": 21041 + }, + { + "epoch": 0.6449062155204119, + "grad_norm": 1.5787614593967725, + "learning_rate": 2.957871748817458e-06, + "loss": 0.5951, + "step": 21042 + }, + { + "epoch": 0.6449368640431531, + "grad_norm": 1.8778732925876302, + "learning_rate": 2.9574187241297413e-06, + "loss": 0.6716, + "step": 21043 + }, + { + "epoch": 0.6449675125658944, + "grad_norm": 2.0846205184740856, + "learning_rate": 2.9569657195681013e-06, + "loss": 0.746, + "step": 21044 + }, + { + "epoch": 0.6449981610886355, + "grad_norm": 1.8550061271525367, + "learning_rate": 2.9565127351370005e-06, + "loss": 0.6663, + "step": 21045 + }, + { + "epoch": 0.6450288096113768, + "grad_norm": 1.7876983778838194, + "learning_rate": 2.956059770840902e-06, + "loss": 0.559, + "step": 21046 + }, + { + "epoch": 0.6450594581341179, + "grad_norm": 1.6636222606051054, + "learning_rate": 2.955606826684272e-06, + "loss": 0.6464, + "step": 21047 + }, + { + "epoch": 0.6450901066568592, + "grad_norm": 0.8503492276306204, + "learning_rate": 2.9551539026715675e-06, + "loss": 0.4274, + "step": 21048 + }, + { + "epoch": 0.6451207551796003, + "grad_norm": 1.9464427995456908, + "learning_rate": 2.954700998807258e-06, + "loss": 0.7069, + "step": 21049 + }, + { + "epoch": 0.6451514037023416, + "grad_norm": 1.5290711532276853, + "learning_rate": 2.9542481150958013e-06, + "loss": 0.5653, + "step": 21050 + }, + { + "epoch": 0.6451820522250827, + "grad_norm": 1.8557137370110337, + "learning_rate": 2.9537952515416613e-06, + "loss": 0.5861, + "step": 21051 + }, + { + "epoch": 0.645212700747824, + "grad_norm": 1.7367645016555997, + "learning_rate": 2.9533424081493e-06, + "loss": 0.5616, + "step": 21052 + }, + { + "epoch": 0.6452433492705651, + "grad_norm": 1.5950392024213773, + "learning_rate": 2.952889584923181e-06, + "loss": 0.6075, + "step": 21053 + }, + { + "epoch": 0.6452739977933064, + "grad_norm": 0.7808104690278662, + "learning_rate": 2.952436781867762e-06, + "loss": 0.4227, + "step": 21054 + }, + { + "epoch": 0.6453046463160476, + "grad_norm": 1.7093300988571691, + "learning_rate": 2.95198399898751e-06, + "loss": 0.684, + "step": 21055 + }, + { + "epoch": 0.6453352948387888, + "grad_norm": 1.8579296474286733, + "learning_rate": 2.951531236286882e-06, + "loss": 0.5417, + "step": 21056 + }, + { + "epoch": 0.64536594336153, + "grad_norm": 0.7704892286651932, + "learning_rate": 2.951078493770343e-06, + "loss": 0.4151, + "step": 21057 + }, + { + "epoch": 0.6453965918842712, + "grad_norm": 0.7724461603744557, + "learning_rate": 2.9506257714423504e-06, + "loss": 0.4153, + "step": 21058 + }, + { + "epoch": 0.6454272404070124, + "grad_norm": 1.7444206583517092, + "learning_rate": 2.9501730693073656e-06, + "loss": 0.6188, + "step": 21059 + }, + { + "epoch": 0.6454578889297535, + "grad_norm": 1.7655774389024705, + "learning_rate": 2.949720387369851e-06, + "loss": 0.6805, + "step": 21060 + }, + { + "epoch": 0.6454885374524948, + "grad_norm": 0.7674097070876732, + "learning_rate": 2.9492677256342663e-06, + "loss": 0.421, + "step": 21061 + }, + { + "epoch": 0.6455191859752359, + "grad_norm": 1.7022899712800552, + "learning_rate": 2.94881508410507e-06, + "loss": 0.5289, + "step": 21062 + }, + { + "epoch": 0.6455498344979772, + "grad_norm": 1.9332852544526309, + "learning_rate": 2.9483624627867246e-06, + "loss": 0.6355, + "step": 21063 + }, + { + "epoch": 0.6455804830207184, + "grad_norm": 1.7329857223468927, + "learning_rate": 2.9479098616836876e-06, + "loss": 0.583, + "step": 21064 + }, + { + "epoch": 0.6456111315434596, + "grad_norm": 2.039179570074876, + "learning_rate": 2.947457280800421e-06, + "loss": 0.7315, + "step": 21065 + }, + { + "epoch": 0.6456417800662008, + "grad_norm": 0.741342308490082, + "learning_rate": 2.9470047201413843e-06, + "loss": 0.3865, + "step": 21066 + }, + { + "epoch": 0.645672428588942, + "grad_norm": 2.064885180566934, + "learning_rate": 2.946552179711032e-06, + "loss": 0.5963, + "step": 21067 + }, + { + "epoch": 0.6457030771116832, + "grad_norm": 1.676821279954323, + "learning_rate": 2.94609965951383e-06, + "loss": 0.5353, + "step": 21068 + }, + { + "epoch": 0.6457337256344244, + "grad_norm": 1.8785115232010858, + "learning_rate": 2.9456471595542317e-06, + "loss": 0.6068, + "step": 21069 + }, + { + "epoch": 0.6457643741571656, + "grad_norm": 1.5440533591328476, + "learning_rate": 2.945194679836697e-06, + "loss": 0.5293, + "step": 21070 + }, + { + "epoch": 0.6457950226799068, + "grad_norm": 1.807335537109018, + "learning_rate": 2.944742220365686e-06, + "loss": 0.6429, + "step": 21071 + }, + { + "epoch": 0.645825671202648, + "grad_norm": 1.6436060433569277, + "learning_rate": 2.9442897811456545e-06, + "loss": 0.5353, + "step": 21072 + }, + { + "epoch": 0.6458563197253893, + "grad_norm": 1.5641597428820417, + "learning_rate": 2.943837362181063e-06, + "loss": 0.5808, + "step": 21073 + }, + { + "epoch": 0.6458869682481304, + "grad_norm": 1.9725553625037746, + "learning_rate": 2.9433849634763678e-06, + "loss": 0.6936, + "step": 21074 + }, + { + "epoch": 0.6459176167708717, + "grad_norm": 1.8841043067230283, + "learning_rate": 2.9429325850360257e-06, + "loss": 0.572, + "step": 21075 + }, + { + "epoch": 0.6459482652936128, + "grad_norm": 1.7218583637982035, + "learning_rate": 2.942480226864497e-06, + "loss": 0.6084, + "step": 21076 + }, + { + "epoch": 0.6459789138163541, + "grad_norm": 1.6445157216200248, + "learning_rate": 2.9420278889662366e-06, + "loss": 0.6416, + "step": 21077 + }, + { + "epoch": 0.6460095623390952, + "grad_norm": 1.7477090042604655, + "learning_rate": 2.9415755713457008e-06, + "loss": 0.5699, + "step": 21078 + }, + { + "epoch": 0.6460402108618365, + "grad_norm": 1.8170711622752906, + "learning_rate": 2.9411232740073493e-06, + "loss": 0.6456, + "step": 21079 + }, + { + "epoch": 0.6460708593845776, + "grad_norm": 1.8927656138916304, + "learning_rate": 2.9406709969556356e-06, + "loss": 0.5613, + "step": 21080 + }, + { + "epoch": 0.6461015079073189, + "grad_norm": 1.9687504339570812, + "learning_rate": 2.940218740195018e-06, + "loss": 0.607, + "step": 21081 + }, + { + "epoch": 0.6461321564300601, + "grad_norm": 1.9200141980377499, + "learning_rate": 2.9397665037299518e-06, + "loss": 0.5819, + "step": 21082 + }, + { + "epoch": 0.6461628049528013, + "grad_norm": 0.8087039149161467, + "learning_rate": 2.939314287564893e-06, + "loss": 0.4262, + "step": 21083 + }, + { + "epoch": 0.6461934534755425, + "grad_norm": 1.8286800537738268, + "learning_rate": 2.9388620917042987e-06, + "loss": 0.5639, + "step": 21084 + }, + { + "epoch": 0.6462241019982837, + "grad_norm": 1.7632521463527175, + "learning_rate": 2.9384099161526236e-06, + "loss": 0.6155, + "step": 21085 + }, + { + "epoch": 0.6462547505210249, + "grad_norm": 1.9858184747728163, + "learning_rate": 2.937957760914322e-06, + "loss": 0.6567, + "step": 21086 + }, + { + "epoch": 0.6462853990437661, + "grad_norm": 1.9522122950975636, + "learning_rate": 2.9375056259938507e-06, + "loss": 0.6355, + "step": 21087 + }, + { + "epoch": 0.6463160475665073, + "grad_norm": 1.6124045825217008, + "learning_rate": 2.9370535113956654e-06, + "loss": 0.6058, + "step": 21088 + }, + { + "epoch": 0.6463466960892486, + "grad_norm": 1.8893855618264075, + "learning_rate": 2.9366014171242173e-06, + "loss": 0.5698, + "step": 21089 + }, + { + "epoch": 0.6463773446119897, + "grad_norm": 1.9129041004145912, + "learning_rate": 2.936149343183966e-06, + "loss": 0.6013, + "step": 21090 + }, + { + "epoch": 0.6464079931347309, + "grad_norm": 1.573051642312755, + "learning_rate": 2.93569728957936e-06, + "loss": 0.6247, + "step": 21091 + }, + { + "epoch": 0.6464386416574721, + "grad_norm": 2.172468182780583, + "learning_rate": 2.9352452563148605e-06, + "loss": 0.534, + "step": 21092 + }, + { + "epoch": 0.6464692901802133, + "grad_norm": 1.7850522317934556, + "learning_rate": 2.934793243394916e-06, + "loss": 0.5695, + "step": 21093 + }, + { + "epoch": 0.6464999387029545, + "grad_norm": 1.8810438865020465, + "learning_rate": 2.9343412508239817e-06, + "loss": 0.5617, + "step": 21094 + }, + { + "epoch": 0.6465305872256957, + "grad_norm": 1.755742512039154, + "learning_rate": 2.9338892786065123e-06, + "loss": 0.5459, + "step": 21095 + }, + { + "epoch": 0.6465612357484369, + "grad_norm": 1.7562126765887414, + "learning_rate": 2.93343732674696e-06, + "loss": 0.6225, + "step": 21096 + }, + { + "epoch": 0.6465918842711781, + "grad_norm": 1.7439500190591355, + "learning_rate": 2.932985395249778e-06, + "loss": 0.6106, + "step": 21097 + }, + { + "epoch": 0.6466225327939193, + "grad_norm": 1.9021496397015736, + "learning_rate": 2.932533484119421e-06, + "loss": 0.6022, + "step": 21098 + }, + { + "epoch": 0.6466531813166605, + "grad_norm": 1.6918649308941631, + "learning_rate": 2.932081593360341e-06, + "loss": 0.5448, + "step": 21099 + }, + { + "epoch": 0.6466838298394018, + "grad_norm": 1.7964940530503002, + "learning_rate": 2.9316297229769873e-06, + "loss": 0.6483, + "step": 21100 + }, + { + "epoch": 0.6467144783621429, + "grad_norm": 1.8754506456607745, + "learning_rate": 2.9311778729738183e-06, + "loss": 0.5801, + "step": 21101 + }, + { + "epoch": 0.6467451268848842, + "grad_norm": 0.8133008321127955, + "learning_rate": 2.930726043355281e-06, + "loss": 0.4237, + "step": 21102 + }, + { + "epoch": 0.6467757754076253, + "grad_norm": 1.8481924912387875, + "learning_rate": 2.9302742341258306e-06, + "loss": 0.622, + "step": 21103 + }, + { + "epoch": 0.6468064239303666, + "grad_norm": 1.6480315725387804, + "learning_rate": 2.9298224452899177e-06, + "loss": 0.6296, + "step": 21104 + }, + { + "epoch": 0.6468370724531077, + "grad_norm": 1.6148978585258464, + "learning_rate": 2.9293706768519925e-06, + "loss": 0.6, + "step": 21105 + }, + { + "epoch": 0.646867720975849, + "grad_norm": 1.8354928195784268, + "learning_rate": 2.9289189288165098e-06, + "loss": 0.5793, + "step": 21106 + }, + { + "epoch": 0.6468983694985901, + "grad_norm": 2.012238370347361, + "learning_rate": 2.928467201187918e-06, + "loss": 0.5938, + "step": 21107 + }, + { + "epoch": 0.6469290180213314, + "grad_norm": 0.7675828883084757, + "learning_rate": 2.928015493970668e-06, + "loss": 0.4207, + "step": 21108 + }, + { + "epoch": 0.6469596665440726, + "grad_norm": 0.8399874306463831, + "learning_rate": 2.9275638071692137e-06, + "loss": 0.4124, + "step": 21109 + }, + { + "epoch": 0.6469903150668138, + "grad_norm": 1.5121660365564231, + "learning_rate": 2.9271121407879997e-06, + "loss": 0.5803, + "step": 21110 + }, + { + "epoch": 0.647020963589555, + "grad_norm": 1.7434384686928905, + "learning_rate": 2.9266604948314843e-06, + "loss": 0.683, + "step": 21111 + }, + { + "epoch": 0.6470516121122962, + "grad_norm": 1.808521531234815, + "learning_rate": 2.9262088693041114e-06, + "loss": 0.5951, + "step": 21112 + }, + { + "epoch": 0.6470822606350374, + "grad_norm": 1.8915911092996693, + "learning_rate": 2.9257572642103327e-06, + "loss": 0.6614, + "step": 21113 + }, + { + "epoch": 0.6471129091577786, + "grad_norm": 1.855243921515353, + "learning_rate": 2.9253056795545996e-06, + "loss": 0.6377, + "step": 21114 + }, + { + "epoch": 0.6471435576805198, + "grad_norm": 1.7102678305042989, + "learning_rate": 2.92485411534136e-06, + "loss": 0.5442, + "step": 21115 + }, + { + "epoch": 0.647174206203261, + "grad_norm": 1.780200960569097, + "learning_rate": 2.9244025715750634e-06, + "loss": 0.6373, + "step": 21116 + }, + { + "epoch": 0.6472048547260022, + "grad_norm": 1.735541583037074, + "learning_rate": 2.9239510482601596e-06, + "loss": 0.6502, + "step": 21117 + }, + { + "epoch": 0.6472355032487435, + "grad_norm": 0.8467610629161428, + "learning_rate": 2.9234995454010968e-06, + "loss": 0.4469, + "step": 21118 + }, + { + "epoch": 0.6472661517714846, + "grad_norm": 0.7852418083843932, + "learning_rate": 2.923048063002325e-06, + "loss": 0.415, + "step": 21119 + }, + { + "epoch": 0.6472968002942259, + "grad_norm": 2.077640419924069, + "learning_rate": 2.9225966010682933e-06, + "loss": 0.5951, + "step": 21120 + }, + { + "epoch": 0.647327448816967, + "grad_norm": 1.8722041886870604, + "learning_rate": 2.9221451596034457e-06, + "loss": 0.6656, + "step": 21121 + }, + { + "epoch": 0.6473580973397082, + "grad_norm": 2.0673057279004503, + "learning_rate": 2.921693738612236e-06, + "loss": 0.62, + "step": 21122 + }, + { + "epoch": 0.6473887458624494, + "grad_norm": 1.7577717962595458, + "learning_rate": 2.9212423380991093e-06, + "loss": 0.5959, + "step": 21123 + }, + { + "epoch": 0.6474193943851906, + "grad_norm": 1.639820658626309, + "learning_rate": 2.9207909580685123e-06, + "loss": 0.5838, + "step": 21124 + }, + { + "epoch": 0.6474500429079318, + "grad_norm": 1.860991513179502, + "learning_rate": 2.9203395985248954e-06, + "loss": 0.552, + "step": 21125 + }, + { + "epoch": 0.647480691430673, + "grad_norm": 1.8683505662772788, + "learning_rate": 2.919888259472704e-06, + "loss": 0.6343, + "step": 21126 + }, + { + "epoch": 0.6475113399534143, + "grad_norm": 1.5699244469001552, + "learning_rate": 2.919436940916386e-06, + "loss": 0.5847, + "step": 21127 + }, + { + "epoch": 0.6475419884761554, + "grad_norm": 1.6017646491597, + "learning_rate": 2.918985642860388e-06, + "loss": 0.6283, + "step": 21128 + }, + { + "epoch": 0.6475726369988967, + "grad_norm": 1.7878410336250978, + "learning_rate": 2.9185343653091557e-06, + "loss": 0.5069, + "step": 21129 + }, + { + "epoch": 0.6476032855216378, + "grad_norm": 1.8608042688921929, + "learning_rate": 2.9180831082671396e-06, + "loss": 0.6307, + "step": 21130 + }, + { + "epoch": 0.6476339340443791, + "grad_norm": 0.8599520112500522, + "learning_rate": 2.9176318717387808e-06, + "loss": 0.4044, + "step": 21131 + }, + { + "epoch": 0.6476645825671202, + "grad_norm": 1.611384541410393, + "learning_rate": 2.917180655728529e-06, + "loss": 0.6131, + "step": 21132 + }, + { + "epoch": 0.6476952310898615, + "grad_norm": 1.7721456486040053, + "learning_rate": 2.91672946024083e-06, + "loss": 0.5455, + "step": 21133 + }, + { + "epoch": 0.6477258796126026, + "grad_norm": 1.9095913117728673, + "learning_rate": 2.9162782852801268e-06, + "loss": 0.5474, + "step": 21134 + }, + { + "epoch": 0.6477565281353439, + "grad_norm": 1.7384303483988046, + "learning_rate": 2.9158271308508667e-06, + "loss": 0.6695, + "step": 21135 + }, + { + "epoch": 0.647787176658085, + "grad_norm": 0.8121881124974981, + "learning_rate": 2.915375996957498e-06, + "loss": 0.4109, + "step": 21136 + }, + { + "epoch": 0.6478178251808263, + "grad_norm": 1.604354354405965, + "learning_rate": 2.91492488360446e-06, + "loss": 0.5956, + "step": 21137 + }, + { + "epoch": 0.6478484737035675, + "grad_norm": 1.942405227634912, + "learning_rate": 2.9144737907962036e-06, + "loss": 0.6558, + "step": 21138 + }, + { + "epoch": 0.6478791222263087, + "grad_norm": 1.9170202817936783, + "learning_rate": 2.914022718537168e-06, + "loss": 0.5813, + "step": 21139 + }, + { + "epoch": 0.6479097707490499, + "grad_norm": 1.9090023908856981, + "learning_rate": 2.9135716668318e-06, + "loss": 0.7029, + "step": 21140 + }, + { + "epoch": 0.6479404192717911, + "grad_norm": 0.7880923781902116, + "learning_rate": 2.9131206356845463e-06, + "loss": 0.4069, + "step": 21141 + }, + { + "epoch": 0.6479710677945323, + "grad_norm": 0.7758152766280073, + "learning_rate": 2.912669625099847e-06, + "loss": 0.4276, + "step": 21142 + }, + { + "epoch": 0.6480017163172735, + "grad_norm": 1.8069106612031296, + "learning_rate": 2.912218635082148e-06, + "loss": 0.5428, + "step": 21143 + }, + { + "epoch": 0.6480323648400147, + "grad_norm": 1.6486191954485507, + "learning_rate": 2.911767665635895e-06, + "loss": 0.6203, + "step": 21144 + }, + { + "epoch": 0.648063013362756, + "grad_norm": 1.8802256165336388, + "learning_rate": 2.9113167167655277e-06, + "loss": 0.6457, + "step": 21145 + }, + { + "epoch": 0.6480936618854971, + "grad_norm": 1.7027322461471488, + "learning_rate": 2.9108657884754908e-06, + "loss": 0.6454, + "step": 21146 + }, + { + "epoch": 0.6481243104082384, + "grad_norm": 1.982031393459396, + "learning_rate": 2.9104148807702302e-06, + "loss": 0.726, + "step": 21147 + }, + { + "epoch": 0.6481549589309795, + "grad_norm": 1.704862750619392, + "learning_rate": 2.9099639936541845e-06, + "loss": 0.6609, + "step": 21148 + }, + { + "epoch": 0.6481856074537208, + "grad_norm": 1.6929620249277366, + "learning_rate": 2.9095131271318e-06, + "loss": 0.4622, + "step": 21149 + }, + { + "epoch": 0.6482162559764619, + "grad_norm": 1.700069254323458, + "learning_rate": 2.909062281207515e-06, + "loss": 0.6096, + "step": 21150 + }, + { + "epoch": 0.6482469044992032, + "grad_norm": 1.895587541348312, + "learning_rate": 2.908611455885775e-06, + "loss": 0.646, + "step": 21151 + }, + { + "epoch": 0.6482775530219443, + "grad_norm": 1.6285748442183108, + "learning_rate": 2.9081606511710236e-06, + "loss": 0.5956, + "step": 21152 + }, + { + "epoch": 0.6483082015446855, + "grad_norm": 1.7471545530695725, + "learning_rate": 2.9077098670676983e-06, + "loss": 0.5917, + "step": 21153 + }, + { + "epoch": 0.6483388500674268, + "grad_norm": 1.7607605239051305, + "learning_rate": 2.9072591035802435e-06, + "loss": 0.5505, + "step": 21154 + }, + { + "epoch": 0.6483694985901679, + "grad_norm": 1.9027771933896056, + "learning_rate": 2.9068083607131015e-06, + "loss": 0.5425, + "step": 21155 + }, + { + "epoch": 0.6484001471129092, + "grad_norm": 1.7423160500742827, + "learning_rate": 2.9063576384707103e-06, + "loss": 0.5722, + "step": 21156 + }, + { + "epoch": 0.6484307956356503, + "grad_norm": 1.809088495803579, + "learning_rate": 2.905906936857513e-06, + "loss": 0.6681, + "step": 21157 + }, + { + "epoch": 0.6484614441583916, + "grad_norm": 1.7987354788663625, + "learning_rate": 2.9054562558779524e-06, + "loss": 0.7025, + "step": 21158 + }, + { + "epoch": 0.6484920926811327, + "grad_norm": 2.1503475656295308, + "learning_rate": 2.905005595536465e-06, + "loss": 0.6516, + "step": 21159 + }, + { + "epoch": 0.648522741203874, + "grad_norm": 1.9583277809473716, + "learning_rate": 2.9045549558374965e-06, + "loss": 0.5306, + "step": 21160 + }, + { + "epoch": 0.6485533897266151, + "grad_norm": 1.7166517404883155, + "learning_rate": 2.9041043367854815e-06, + "loss": 0.6016, + "step": 21161 + }, + { + "epoch": 0.6485840382493564, + "grad_norm": 2.032727493643695, + "learning_rate": 2.903653738384863e-06, + "loss": 0.6567, + "step": 21162 + }, + { + "epoch": 0.6486146867720975, + "grad_norm": 1.7719016219203076, + "learning_rate": 2.9032031606400825e-06, + "loss": 0.6314, + "step": 21163 + }, + { + "epoch": 0.6486453352948388, + "grad_norm": 1.7362033803184573, + "learning_rate": 2.9027526035555754e-06, + "loss": 0.5182, + "step": 21164 + }, + { + "epoch": 0.64867598381758, + "grad_norm": 1.5613111881475064, + "learning_rate": 2.9023020671357837e-06, + "loss": 0.5889, + "step": 21165 + }, + { + "epoch": 0.6487066323403212, + "grad_norm": 1.9237689323728724, + "learning_rate": 2.9018515513851487e-06, + "loss": 0.6048, + "step": 21166 + }, + { + "epoch": 0.6487372808630624, + "grad_norm": 1.8604632758553856, + "learning_rate": 2.901401056308105e-06, + "loss": 0.7054, + "step": 21167 + }, + { + "epoch": 0.6487679293858036, + "grad_norm": 1.9474732301319921, + "learning_rate": 2.9009505819090934e-06, + "loss": 0.6712, + "step": 21168 + }, + { + "epoch": 0.6487985779085448, + "grad_norm": 1.7387950692233634, + "learning_rate": 2.9005001281925548e-06, + "loss": 0.5382, + "step": 21169 + }, + { + "epoch": 0.648829226431286, + "grad_norm": 1.7994046859184873, + "learning_rate": 2.9000496951629244e-06, + "loss": 0.685, + "step": 21170 + }, + { + "epoch": 0.6488598749540272, + "grad_norm": 1.7989103298272864, + "learning_rate": 2.8995992828246428e-06, + "loss": 0.5844, + "step": 21171 + }, + { + "epoch": 0.6488905234767685, + "grad_norm": 1.7989579418927184, + "learning_rate": 2.8991488911821455e-06, + "loss": 0.6086, + "step": 21172 + }, + { + "epoch": 0.6489211719995096, + "grad_norm": 1.7615356759731056, + "learning_rate": 2.898698520239871e-06, + "loss": 0.6283, + "step": 21173 + }, + { + "epoch": 0.6489518205222509, + "grad_norm": 1.649936277201448, + "learning_rate": 2.8982481700022604e-06, + "loss": 0.6339, + "step": 21174 + }, + { + "epoch": 0.648982469044992, + "grad_norm": 1.716035554710949, + "learning_rate": 2.8977978404737458e-06, + "loss": 0.6125, + "step": 21175 + }, + { + "epoch": 0.6490131175677333, + "grad_norm": 1.8757485054746303, + "learning_rate": 2.8973475316587667e-06, + "loss": 0.6171, + "step": 21176 + }, + { + "epoch": 0.6490437660904744, + "grad_norm": 1.7267486242738959, + "learning_rate": 2.8968972435617624e-06, + "loss": 0.634, + "step": 21177 + }, + { + "epoch": 0.6490744146132157, + "grad_norm": 1.8470426653077208, + "learning_rate": 2.896446976187166e-06, + "loss": 0.5931, + "step": 21178 + }, + { + "epoch": 0.6491050631359568, + "grad_norm": 0.8611766667368776, + "learning_rate": 2.8959967295394183e-06, + "loss": 0.4365, + "step": 21179 + }, + { + "epoch": 0.6491357116586981, + "grad_norm": 1.7686044124546272, + "learning_rate": 2.8955465036229503e-06, + "loss": 0.6226, + "step": 21180 + }, + { + "epoch": 0.6491663601814393, + "grad_norm": 1.575978917546248, + "learning_rate": 2.8950962984422015e-06, + "loss": 0.5646, + "step": 21181 + }, + { + "epoch": 0.6491970087041805, + "grad_norm": 1.4925726885758075, + "learning_rate": 2.894646114001609e-06, + "loss": 0.4188, + "step": 21182 + }, + { + "epoch": 0.6492276572269217, + "grad_norm": 1.598731890765702, + "learning_rate": 2.8941959503056053e-06, + "loss": 0.5492, + "step": 21183 + }, + { + "epoch": 0.6492583057496628, + "grad_norm": 1.9705932064123075, + "learning_rate": 2.8937458073586276e-06, + "loss": 0.5887, + "step": 21184 + }, + { + "epoch": 0.6492889542724041, + "grad_norm": 1.7816568043978585, + "learning_rate": 2.893295685165114e-06, + "loss": 0.604, + "step": 21185 + }, + { + "epoch": 0.6493196027951452, + "grad_norm": 1.6858637421507754, + "learning_rate": 2.8928455837294944e-06, + "loss": 0.5467, + "step": 21186 + }, + { + "epoch": 0.6493502513178865, + "grad_norm": 1.8989091993893334, + "learning_rate": 2.892395503056207e-06, + "loss": 0.5998, + "step": 21187 + }, + { + "epoch": 0.6493808998406276, + "grad_norm": 0.8110550240504627, + "learning_rate": 2.891945443149687e-06, + "loss": 0.4099, + "step": 21188 + }, + { + "epoch": 0.6494115483633689, + "grad_norm": 1.8799060395671585, + "learning_rate": 2.891495404014366e-06, + "loss": 0.473, + "step": 21189 + }, + { + "epoch": 0.64944219688611, + "grad_norm": 1.7422757295252915, + "learning_rate": 2.891045385654683e-06, + "loss": 0.5963, + "step": 21190 + }, + { + "epoch": 0.6494728454088513, + "grad_norm": 1.5385168079886702, + "learning_rate": 2.8905953880750646e-06, + "loss": 0.6162, + "step": 21191 + }, + { + "epoch": 0.6495034939315925, + "grad_norm": 0.799519465808415, + "learning_rate": 2.8901454112799543e-06, + "loss": 0.4259, + "step": 21192 + }, + { + "epoch": 0.6495341424543337, + "grad_norm": 1.7277339769676863, + "learning_rate": 2.889695455273781e-06, + "loss": 0.5723, + "step": 21193 + }, + { + "epoch": 0.6495647909770749, + "grad_norm": 1.8321414596059333, + "learning_rate": 2.889245520060976e-06, + "loss": 0.6246, + "step": 21194 + }, + { + "epoch": 0.6495954394998161, + "grad_norm": 1.6956784863492766, + "learning_rate": 2.8887956056459755e-06, + "loss": 0.5601, + "step": 21195 + }, + { + "epoch": 0.6496260880225573, + "grad_norm": 0.7587203388059279, + "learning_rate": 2.8883457120332136e-06, + "loss": 0.4251, + "step": 21196 + }, + { + "epoch": 0.6496567365452985, + "grad_norm": 1.7883616665203155, + "learning_rate": 2.8878958392271194e-06, + "loss": 0.5282, + "step": 21197 + }, + { + "epoch": 0.6496873850680397, + "grad_norm": 0.800220178420437, + "learning_rate": 2.8874459872321283e-06, + "loss": 0.4208, + "step": 21198 + }, + { + "epoch": 0.649718033590781, + "grad_norm": 1.8058867057800312, + "learning_rate": 2.886996156052673e-06, + "loss": 0.598, + "step": 21199 + }, + { + "epoch": 0.6497486821135221, + "grad_norm": 1.7320011961183668, + "learning_rate": 2.8865463456931865e-06, + "loss": 0.6444, + "step": 21200 + }, + { + "epoch": 0.6497793306362634, + "grad_norm": 1.8427165299402053, + "learning_rate": 2.886096556158099e-06, + "loss": 0.5632, + "step": 21201 + }, + { + "epoch": 0.6498099791590045, + "grad_norm": 1.8284642128628787, + "learning_rate": 2.88564678745184e-06, + "loss": 0.5987, + "step": 21202 + }, + { + "epoch": 0.6498406276817458, + "grad_norm": 1.6419436000731673, + "learning_rate": 2.8851970395788474e-06, + "loss": 0.5843, + "step": 21203 + }, + { + "epoch": 0.6498712762044869, + "grad_norm": 1.5981116149353911, + "learning_rate": 2.8847473125435497e-06, + "loss": 0.6088, + "step": 21204 + }, + { + "epoch": 0.6499019247272282, + "grad_norm": 1.7118018300186095, + "learning_rate": 2.884297606350377e-06, + "loss": 0.5668, + "step": 21205 + }, + { + "epoch": 0.6499325732499693, + "grad_norm": 1.6861687739202071, + "learning_rate": 2.8838479210037603e-06, + "loss": 0.6042, + "step": 21206 + }, + { + "epoch": 0.6499632217727106, + "grad_norm": 1.685440166693117, + "learning_rate": 2.883398256508133e-06, + "loss": 0.542, + "step": 21207 + }, + { + "epoch": 0.6499938702954517, + "grad_norm": 1.784920782231317, + "learning_rate": 2.8829486128679234e-06, + "loss": 0.6449, + "step": 21208 + }, + { + "epoch": 0.650024518818193, + "grad_norm": 1.740621647039357, + "learning_rate": 2.8824989900875623e-06, + "loss": 0.5856, + "step": 21209 + }, + { + "epoch": 0.6500551673409342, + "grad_norm": 1.602555210302882, + "learning_rate": 2.8820493881714807e-06, + "loss": 0.5886, + "step": 21210 + }, + { + "epoch": 0.6500858158636754, + "grad_norm": 1.49393186501103, + "learning_rate": 2.8815998071241097e-06, + "loss": 0.4778, + "step": 21211 + }, + { + "epoch": 0.6501164643864166, + "grad_norm": 2.0772169712941735, + "learning_rate": 2.881150246949878e-06, + "loss": 0.6532, + "step": 21212 + }, + { + "epoch": 0.6501471129091578, + "grad_norm": 1.6153053139431912, + "learning_rate": 2.880700707653211e-06, + "loss": 0.5672, + "step": 21213 + }, + { + "epoch": 0.650177761431899, + "grad_norm": 1.7516002040321517, + "learning_rate": 2.8802511892385466e-06, + "loss": 0.65, + "step": 21214 + }, + { + "epoch": 0.6502084099546401, + "grad_norm": 1.6399470995009913, + "learning_rate": 2.87980169171031e-06, + "loss": 0.5751, + "step": 21215 + }, + { + "epoch": 0.6502390584773814, + "grad_norm": 1.8344177127918808, + "learning_rate": 2.879352215072927e-06, + "loss": 0.5936, + "step": 21216 + }, + { + "epoch": 0.6502697070001225, + "grad_norm": 0.8814447387557726, + "learning_rate": 2.8789027593308295e-06, + "loss": 0.4303, + "step": 21217 + }, + { + "epoch": 0.6503003555228638, + "grad_norm": 1.643847818859935, + "learning_rate": 2.878453324488446e-06, + "loss": 0.6061, + "step": 21218 + }, + { + "epoch": 0.650331004045605, + "grad_norm": 0.8341278583720135, + "learning_rate": 2.878003910550206e-06, + "loss": 0.4248, + "step": 21219 + }, + { + "epoch": 0.6503616525683462, + "grad_norm": 1.7257604707586836, + "learning_rate": 2.877554517520535e-06, + "loss": 0.5552, + "step": 21220 + }, + { + "epoch": 0.6503923010910874, + "grad_norm": 1.6141672823343647, + "learning_rate": 2.8771051454038622e-06, + "loss": 0.5126, + "step": 21221 + }, + { + "epoch": 0.6504229496138286, + "grad_norm": 1.9471511245778435, + "learning_rate": 2.876655794204618e-06, + "loss": 0.6218, + "step": 21222 + }, + { + "epoch": 0.6504535981365698, + "grad_norm": 1.8022541075280618, + "learning_rate": 2.876206463927227e-06, + "loss": 0.5669, + "step": 21223 + }, + { + "epoch": 0.650484246659311, + "grad_norm": 1.7477643556336988, + "learning_rate": 2.8757571545761152e-06, + "loss": 0.5793, + "step": 21224 + }, + { + "epoch": 0.6505148951820522, + "grad_norm": 1.6830370210107148, + "learning_rate": 2.875307866155712e-06, + "loss": 0.684, + "step": 21225 + }, + { + "epoch": 0.6505455437047934, + "grad_norm": 1.7597900418226666, + "learning_rate": 2.8748585986704437e-06, + "loss": 0.5938, + "step": 21226 + }, + { + "epoch": 0.6505761922275346, + "grad_norm": 2.1334000782459386, + "learning_rate": 2.8744093521247396e-06, + "loss": 0.7154, + "step": 21227 + }, + { + "epoch": 0.6506068407502759, + "grad_norm": 1.8665082455807185, + "learning_rate": 2.8739601265230216e-06, + "loss": 0.6835, + "step": 21228 + }, + { + "epoch": 0.650637489273017, + "grad_norm": 1.6899821178448424, + "learning_rate": 2.873510921869719e-06, + "loss": 0.5967, + "step": 21229 + }, + { + "epoch": 0.6506681377957583, + "grad_norm": 1.9551793373273603, + "learning_rate": 2.8730617381692583e-06, + "loss": 0.5295, + "step": 21230 + }, + { + "epoch": 0.6506987863184994, + "grad_norm": 1.8725811655307594, + "learning_rate": 2.872612575426066e-06, + "loss": 0.6987, + "step": 21231 + }, + { + "epoch": 0.6507294348412407, + "grad_norm": 1.6799131357534627, + "learning_rate": 2.8721634336445616e-06, + "loss": 0.5779, + "step": 21232 + }, + { + "epoch": 0.6507600833639818, + "grad_norm": 1.9195982480304454, + "learning_rate": 2.871714312829179e-06, + "loss": 0.6317, + "step": 21233 + }, + { + "epoch": 0.6507907318867231, + "grad_norm": 1.7695712341826315, + "learning_rate": 2.87126521298434e-06, + "loss": 0.5855, + "step": 21234 + }, + { + "epoch": 0.6508213804094642, + "grad_norm": 1.857968635188257, + "learning_rate": 2.870816134114468e-06, + "loss": 0.6945, + "step": 21235 + }, + { + "epoch": 0.6508520289322055, + "grad_norm": 1.7349246287400557, + "learning_rate": 2.8703670762239886e-06, + "loss": 0.5465, + "step": 21236 + }, + { + "epoch": 0.6508826774549467, + "grad_norm": 1.928458098893157, + "learning_rate": 2.8699180393173275e-06, + "loss": 0.65, + "step": 21237 + }, + { + "epoch": 0.6509133259776879, + "grad_norm": 1.8367076171108876, + "learning_rate": 2.8694690233989116e-06, + "loss": 0.6777, + "step": 21238 + }, + { + "epoch": 0.6509439745004291, + "grad_norm": 0.7894611929357419, + "learning_rate": 2.86902002847316e-06, + "loss": 0.4101, + "step": 21239 + }, + { + "epoch": 0.6509746230231703, + "grad_norm": 2.1297880243960097, + "learning_rate": 2.8685710545444996e-06, + "loss": 0.5948, + "step": 21240 + }, + { + "epoch": 0.6510052715459115, + "grad_norm": 1.7856966460464796, + "learning_rate": 2.8681221016173554e-06, + "loss": 0.5588, + "step": 21241 + }, + { + "epoch": 0.6510359200686527, + "grad_norm": 1.6827954908615188, + "learning_rate": 2.86767316969615e-06, + "loss": 0.6482, + "step": 21242 + }, + { + "epoch": 0.6510665685913939, + "grad_norm": 1.5325770105071128, + "learning_rate": 2.867224258785303e-06, + "loss": 0.7642, + "step": 21243 + }, + { + "epoch": 0.6510972171141352, + "grad_norm": 1.6935520650756108, + "learning_rate": 2.8667753688892442e-06, + "loss": 0.5718, + "step": 21244 + }, + { + "epoch": 0.6511278656368763, + "grad_norm": 1.7514405810737828, + "learning_rate": 2.866326500012392e-06, + "loss": 0.6971, + "step": 21245 + }, + { + "epoch": 0.6511585141596175, + "grad_norm": 0.7555831316386609, + "learning_rate": 2.865877652159172e-06, + "loss": 0.3941, + "step": 21246 + }, + { + "epoch": 0.6511891626823587, + "grad_norm": 1.8872497561424857, + "learning_rate": 2.865428825334004e-06, + "loss": 0.6136, + "step": 21247 + }, + { + "epoch": 0.6512198112050999, + "grad_norm": 1.7010337812715528, + "learning_rate": 2.864980019541312e-06, + "loss": 0.5895, + "step": 21248 + }, + { + "epoch": 0.6512504597278411, + "grad_norm": 1.8035856592868469, + "learning_rate": 2.8645312347855204e-06, + "loss": 0.6678, + "step": 21249 + }, + { + "epoch": 0.6512811082505823, + "grad_norm": 1.969030452576208, + "learning_rate": 2.8640824710710464e-06, + "loss": 0.6398, + "step": 21250 + }, + { + "epoch": 0.6513117567733235, + "grad_norm": 1.8479062458822282, + "learning_rate": 2.8636337284023143e-06, + "loss": 0.5462, + "step": 21251 + }, + { + "epoch": 0.6513424052960647, + "grad_norm": 1.968232606651758, + "learning_rate": 2.863185006783748e-06, + "loss": 0.5814, + "step": 21252 + }, + { + "epoch": 0.651373053818806, + "grad_norm": 1.7303614399284084, + "learning_rate": 2.8627363062197664e-06, + "loss": 0.5715, + "step": 21253 + }, + { + "epoch": 0.6514037023415471, + "grad_norm": 1.8185477764252511, + "learning_rate": 2.862287626714787e-06, + "loss": 0.6112, + "step": 21254 + }, + { + "epoch": 0.6514343508642884, + "grad_norm": 1.8878519290671683, + "learning_rate": 2.861838968273238e-06, + "loss": 0.6289, + "step": 21255 + }, + { + "epoch": 0.6514649993870295, + "grad_norm": 1.7001433963712367, + "learning_rate": 2.8613903308995356e-06, + "loss": 0.6092, + "step": 21256 + }, + { + "epoch": 0.6514956479097708, + "grad_norm": 1.7344412787167536, + "learning_rate": 2.8609417145981034e-06, + "loss": 0.5913, + "step": 21257 + }, + { + "epoch": 0.6515262964325119, + "grad_norm": 1.8469518716786788, + "learning_rate": 2.860493119373357e-06, + "loss": 0.5899, + "step": 21258 + }, + { + "epoch": 0.6515569449552532, + "grad_norm": 1.7826957864843893, + "learning_rate": 2.86004454522972e-06, + "loss": 0.5407, + "step": 21259 + }, + { + "epoch": 0.6515875934779943, + "grad_norm": 1.684368817851811, + "learning_rate": 2.859595992171613e-06, + "loss": 0.5819, + "step": 21260 + }, + { + "epoch": 0.6516182420007356, + "grad_norm": 1.790402687595252, + "learning_rate": 2.859147460203453e-06, + "loss": 0.5792, + "step": 21261 + }, + { + "epoch": 0.6516488905234767, + "grad_norm": 1.7982282405105912, + "learning_rate": 2.8586989493296603e-06, + "loss": 0.632, + "step": 21262 + }, + { + "epoch": 0.651679539046218, + "grad_norm": 1.6061255393857625, + "learning_rate": 2.858250459554657e-06, + "loss": 0.5485, + "step": 21263 + }, + { + "epoch": 0.6517101875689592, + "grad_norm": 0.7793641632987017, + "learning_rate": 2.857801990882858e-06, + "loss": 0.4127, + "step": 21264 + }, + { + "epoch": 0.6517408360917004, + "grad_norm": 1.7612306682307781, + "learning_rate": 2.857353543318684e-06, + "loss": 0.6836, + "step": 21265 + }, + { + "epoch": 0.6517714846144416, + "grad_norm": 2.1102349750964806, + "learning_rate": 2.856905116866556e-06, + "loss": 0.6783, + "step": 21266 + }, + { + "epoch": 0.6518021331371828, + "grad_norm": 1.7029737107648075, + "learning_rate": 2.856456711530887e-06, + "loss": 0.5762, + "step": 21267 + }, + { + "epoch": 0.651832781659924, + "grad_norm": 2.036318230140799, + "learning_rate": 2.856008327316102e-06, + "loss": 0.631, + "step": 21268 + }, + { + "epoch": 0.6518634301826652, + "grad_norm": 2.269805594552434, + "learning_rate": 2.855559964226613e-06, + "loss": 0.5681, + "step": 21269 + }, + { + "epoch": 0.6518940787054064, + "grad_norm": 1.7823261270592703, + "learning_rate": 2.85511162226684e-06, + "loss": 0.6134, + "step": 21270 + }, + { + "epoch": 0.6519247272281476, + "grad_norm": 1.5204890332309307, + "learning_rate": 2.8546633014412035e-06, + "loss": 0.5343, + "step": 21271 + }, + { + "epoch": 0.6519553757508888, + "grad_norm": 1.465870070499181, + "learning_rate": 2.8542150017541158e-06, + "loss": 0.5311, + "step": 21272 + }, + { + "epoch": 0.6519860242736301, + "grad_norm": 1.999488372551191, + "learning_rate": 2.8537667232099975e-06, + "loss": 0.5963, + "step": 21273 + }, + { + "epoch": 0.6520166727963712, + "grad_norm": 1.773266655286881, + "learning_rate": 2.8533184658132662e-06, + "loss": 0.624, + "step": 21274 + }, + { + "epoch": 0.6520473213191125, + "grad_norm": 1.6956460727019083, + "learning_rate": 2.852870229568335e-06, + "loss": 0.5685, + "step": 21275 + }, + { + "epoch": 0.6520779698418536, + "grad_norm": 1.5781281792389195, + "learning_rate": 2.8524220144796257e-06, + "loss": 0.5017, + "step": 21276 + }, + { + "epoch": 0.6521086183645948, + "grad_norm": 1.718496685750533, + "learning_rate": 2.851973820551549e-06, + "loss": 0.5422, + "step": 21277 + }, + { + "epoch": 0.652139266887336, + "grad_norm": 1.6688679117427467, + "learning_rate": 2.8515256477885247e-06, + "loss": 0.5941, + "step": 21278 + }, + { + "epoch": 0.6521699154100772, + "grad_norm": 1.8301493005288247, + "learning_rate": 2.8510774961949694e-06, + "loss": 0.6195, + "step": 21279 + }, + { + "epoch": 0.6522005639328184, + "grad_norm": 1.6109496712362787, + "learning_rate": 2.8506293657752947e-06, + "loss": 0.5913, + "step": 21280 + }, + { + "epoch": 0.6522312124555596, + "grad_norm": 1.6002666353760469, + "learning_rate": 2.85018125653392e-06, + "loss": 0.5721, + "step": 21281 + }, + { + "epoch": 0.6522618609783009, + "grad_norm": 1.8789734160034501, + "learning_rate": 2.8497331684752605e-06, + "loss": 0.6653, + "step": 21282 + }, + { + "epoch": 0.652292509501042, + "grad_norm": 1.4848791988639805, + "learning_rate": 2.849285101603729e-06, + "loss": 0.5491, + "step": 21283 + }, + { + "epoch": 0.6523231580237833, + "grad_norm": 1.7250018791356405, + "learning_rate": 2.848837055923741e-06, + "loss": 0.5566, + "step": 21284 + }, + { + "epoch": 0.6523538065465244, + "grad_norm": 1.9791771555770643, + "learning_rate": 2.8483890314397145e-06, + "loss": 0.6162, + "step": 21285 + }, + { + "epoch": 0.6523844550692657, + "grad_norm": 0.8216696050369154, + "learning_rate": 2.8479410281560595e-06, + "loss": 0.4313, + "step": 21286 + }, + { + "epoch": 0.6524151035920068, + "grad_norm": 1.795562735929404, + "learning_rate": 2.8474930460771933e-06, + "loss": 0.6012, + "step": 21287 + }, + { + "epoch": 0.6524457521147481, + "grad_norm": 1.7404170462642976, + "learning_rate": 2.8470450852075273e-06, + "loss": 0.5782, + "step": 21288 + }, + { + "epoch": 0.6524764006374892, + "grad_norm": 1.8185102155859447, + "learning_rate": 2.8465971455514774e-06, + "loss": 0.6216, + "step": 21289 + }, + { + "epoch": 0.6525070491602305, + "grad_norm": 0.7909121015602187, + "learning_rate": 2.8461492271134585e-06, + "loss": 0.4259, + "step": 21290 + }, + { + "epoch": 0.6525376976829717, + "grad_norm": 1.9363808443877004, + "learning_rate": 2.8457013298978797e-06, + "loss": 0.5356, + "step": 21291 + }, + { + "epoch": 0.6525683462057129, + "grad_norm": 1.6250558620808369, + "learning_rate": 2.8452534539091574e-06, + "loss": 0.508, + "step": 21292 + }, + { + "epoch": 0.6525989947284541, + "grad_norm": 1.8639833037339757, + "learning_rate": 2.8448055991517065e-06, + "loss": 0.617, + "step": 21293 + }, + { + "epoch": 0.6526296432511953, + "grad_norm": 1.8812704096061443, + "learning_rate": 2.844357765629935e-06, + "loss": 0.5315, + "step": 21294 + }, + { + "epoch": 0.6526602917739365, + "grad_norm": 1.784642359976055, + "learning_rate": 2.843909953348258e-06, + "loss": 0.5469, + "step": 21295 + }, + { + "epoch": 0.6526909402966777, + "grad_norm": 1.9470396566618584, + "learning_rate": 2.8434621623110904e-06, + "loss": 0.6087, + "step": 21296 + }, + { + "epoch": 0.6527215888194189, + "grad_norm": 1.6800681093831844, + "learning_rate": 2.8430143925228394e-06, + "loss": 0.5542, + "step": 21297 + }, + { + "epoch": 0.6527522373421601, + "grad_norm": 0.7936767794946292, + "learning_rate": 2.8425666439879207e-06, + "loss": 0.4285, + "step": 21298 + }, + { + "epoch": 0.6527828858649013, + "grad_norm": 1.683818675194742, + "learning_rate": 2.8421189167107422e-06, + "loss": 0.5717, + "step": 21299 + }, + { + "epoch": 0.6528135343876426, + "grad_norm": 1.8078149400919798, + "learning_rate": 2.8416712106957213e-06, + "loss": 0.5887, + "step": 21300 + }, + { + "epoch": 0.6528441829103837, + "grad_norm": 1.7395698808094664, + "learning_rate": 2.8412235259472663e-06, + "loss": 0.6359, + "step": 21301 + }, + { + "epoch": 0.652874831433125, + "grad_norm": 1.8483640527452332, + "learning_rate": 2.840775862469787e-06, + "loss": 0.7238, + "step": 21302 + }, + { + "epoch": 0.6529054799558661, + "grad_norm": 1.6397164688823607, + "learning_rate": 2.840328220267695e-06, + "loss": 0.5554, + "step": 21303 + }, + { + "epoch": 0.6529361284786074, + "grad_norm": 1.604516101728577, + "learning_rate": 2.8398805993454037e-06, + "loss": 0.6128, + "step": 21304 + }, + { + "epoch": 0.6529667770013485, + "grad_norm": 1.7447030010683995, + "learning_rate": 2.8394329997073193e-06, + "loss": 0.6095, + "step": 21305 + }, + { + "epoch": 0.6529974255240898, + "grad_norm": 1.7445245682703994, + "learning_rate": 2.838985421357855e-06, + "loss": 0.5583, + "step": 21306 + }, + { + "epoch": 0.6530280740468309, + "grad_norm": 1.9321295189929912, + "learning_rate": 2.8385378643014215e-06, + "loss": 0.598, + "step": 21307 + }, + { + "epoch": 0.6530587225695721, + "grad_norm": 1.731247451481944, + "learning_rate": 2.838090328542426e-06, + "loss": 0.6032, + "step": 21308 + }, + { + "epoch": 0.6530893710923134, + "grad_norm": 1.8826909866198405, + "learning_rate": 2.8376428140852812e-06, + "loss": 0.6947, + "step": 21309 + }, + { + "epoch": 0.6531200196150545, + "grad_norm": 1.8252688647063942, + "learning_rate": 2.8371953209343918e-06, + "loss": 0.6317, + "step": 21310 + }, + { + "epoch": 0.6531506681377958, + "grad_norm": 1.5934642280761977, + "learning_rate": 2.8367478490941737e-06, + "loss": 0.605, + "step": 21311 + }, + { + "epoch": 0.6531813166605369, + "grad_norm": 1.6214022684839153, + "learning_rate": 2.8363003985690323e-06, + "loss": 0.5221, + "step": 21312 + }, + { + "epoch": 0.6532119651832782, + "grad_norm": 0.7944624214209801, + "learning_rate": 2.8358529693633752e-06, + "loss": 0.4188, + "step": 21313 + }, + { + "epoch": 0.6532426137060193, + "grad_norm": 1.8671225218315655, + "learning_rate": 2.835405561481612e-06, + "loss": 0.5864, + "step": 21314 + }, + { + "epoch": 0.6532732622287606, + "grad_norm": 2.055091508494061, + "learning_rate": 2.834958174928154e-06, + "loss": 0.5939, + "step": 21315 + }, + { + "epoch": 0.6533039107515017, + "grad_norm": 1.5950895197191584, + "learning_rate": 2.834510809707405e-06, + "loss": 0.5518, + "step": 21316 + }, + { + "epoch": 0.653334559274243, + "grad_norm": 2.0254718337616233, + "learning_rate": 2.8340634658237747e-06, + "loss": 0.7646, + "step": 21317 + }, + { + "epoch": 0.6533652077969841, + "grad_norm": 0.773364667662555, + "learning_rate": 2.8336161432816716e-06, + "loss": 0.4081, + "step": 21318 + }, + { + "epoch": 0.6533958563197254, + "grad_norm": 1.7952748845514566, + "learning_rate": 2.833168842085505e-06, + "loss": 0.6092, + "step": 21319 + }, + { + "epoch": 0.6534265048424666, + "grad_norm": 2.120822073540666, + "learning_rate": 2.8327215622396803e-06, + "loss": 0.6302, + "step": 21320 + }, + { + "epoch": 0.6534571533652078, + "grad_norm": 2.0177830149006026, + "learning_rate": 2.8322743037486022e-06, + "loss": 0.5659, + "step": 21321 + }, + { + "epoch": 0.653487801887949, + "grad_norm": 1.7319602964921188, + "learning_rate": 2.83182706661668e-06, + "loss": 0.6359, + "step": 21322 + }, + { + "epoch": 0.6535184504106902, + "grad_norm": 1.92217999987481, + "learning_rate": 2.8313798508483226e-06, + "loss": 0.6367, + "step": 21323 + }, + { + "epoch": 0.6535490989334314, + "grad_norm": 1.7670934720178357, + "learning_rate": 2.8309326564479328e-06, + "loss": 0.6811, + "step": 21324 + }, + { + "epoch": 0.6535797474561726, + "grad_norm": 1.7417479093757247, + "learning_rate": 2.830485483419918e-06, + "loss": 0.5745, + "step": 21325 + }, + { + "epoch": 0.6536103959789138, + "grad_norm": 1.6967015977644937, + "learning_rate": 2.830038331768685e-06, + "loss": 0.5466, + "step": 21326 + }, + { + "epoch": 0.653641044501655, + "grad_norm": 1.802012046078078, + "learning_rate": 2.8295912014986417e-06, + "loss": 0.5816, + "step": 21327 + }, + { + "epoch": 0.6536716930243962, + "grad_norm": 2.118250131593261, + "learning_rate": 2.8291440926141912e-06, + "loss": 0.6706, + "step": 21328 + }, + { + "epoch": 0.6537023415471375, + "grad_norm": 1.6269912458498377, + "learning_rate": 2.828697005119736e-06, + "loss": 0.5524, + "step": 21329 + }, + { + "epoch": 0.6537329900698786, + "grad_norm": 1.8699192862395253, + "learning_rate": 2.8282499390196883e-06, + "loss": 0.6831, + "step": 21330 + }, + { + "epoch": 0.6537636385926199, + "grad_norm": 1.858497525275795, + "learning_rate": 2.82780289431845e-06, + "loss": 0.6496, + "step": 21331 + }, + { + "epoch": 0.653794287115361, + "grad_norm": 1.853466753555718, + "learning_rate": 2.827355871020423e-06, + "loss": 0.6839, + "step": 21332 + }, + { + "epoch": 0.6538249356381023, + "grad_norm": 1.6103226386996734, + "learning_rate": 2.826908869130015e-06, + "loss": 0.5667, + "step": 21333 + }, + { + "epoch": 0.6538555841608434, + "grad_norm": 1.5213772031661985, + "learning_rate": 2.8264618886516315e-06, + "loss": 0.4859, + "step": 21334 + }, + { + "epoch": 0.6538862326835847, + "grad_norm": 1.7795844428591496, + "learning_rate": 2.8260149295896734e-06, + "loss": 0.6298, + "step": 21335 + }, + { + "epoch": 0.6539168812063259, + "grad_norm": 1.9608365407717556, + "learning_rate": 2.825567991948546e-06, + "loss": 0.6217, + "step": 21336 + }, + { + "epoch": 0.6539475297290671, + "grad_norm": 0.793819808991368, + "learning_rate": 2.825121075732654e-06, + "loss": 0.4265, + "step": 21337 + }, + { + "epoch": 0.6539781782518083, + "grad_norm": 1.7915097344224713, + "learning_rate": 2.8246741809464024e-06, + "loss": 0.5412, + "step": 21338 + }, + { + "epoch": 0.6540088267745494, + "grad_norm": 1.6954834402440484, + "learning_rate": 2.824227307594193e-06, + "loss": 0.6256, + "step": 21339 + }, + { + "epoch": 0.6540394752972907, + "grad_norm": 1.7572247510514007, + "learning_rate": 2.823780455680424e-06, + "loss": 0.6379, + "step": 21340 + }, + { + "epoch": 0.6540701238200318, + "grad_norm": 1.8605836941332285, + "learning_rate": 2.8233336252095073e-06, + "loss": 0.5859, + "step": 21341 + }, + { + "epoch": 0.6541007723427731, + "grad_norm": 1.8494269988494862, + "learning_rate": 2.8228868161858413e-06, + "loss": 0.6101, + "step": 21342 + }, + { + "epoch": 0.6541314208655142, + "grad_norm": 1.5956202420017223, + "learning_rate": 2.8224400286138264e-06, + "loss": 0.5879, + "step": 21343 + }, + { + "epoch": 0.6541620693882555, + "grad_norm": 1.6258477160739158, + "learning_rate": 2.8219932624978675e-06, + "loss": 0.6344, + "step": 21344 + }, + { + "epoch": 0.6541927179109966, + "grad_norm": 1.5823091397818323, + "learning_rate": 2.8215465178423663e-06, + "loss": 0.5935, + "step": 21345 + }, + { + "epoch": 0.6542233664337379, + "grad_norm": 1.8037322012381838, + "learning_rate": 2.821099794651726e-06, + "loss": 0.5646, + "step": 21346 + }, + { + "epoch": 0.6542540149564791, + "grad_norm": 0.7943416766926213, + "learning_rate": 2.820653092930345e-06, + "loss": 0.4077, + "step": 21347 + }, + { + "epoch": 0.6542846634792203, + "grad_norm": 1.8806490402076028, + "learning_rate": 2.820206412682627e-06, + "loss": 0.5666, + "step": 21348 + }, + { + "epoch": 0.6543153120019615, + "grad_norm": 2.1407043417170604, + "learning_rate": 2.819759753912975e-06, + "loss": 0.615, + "step": 21349 + }, + { + "epoch": 0.6543459605247027, + "grad_norm": 1.7005081437394414, + "learning_rate": 2.8193131166257875e-06, + "loss": 0.6077, + "step": 21350 + }, + { + "epoch": 0.6543766090474439, + "grad_norm": 1.9937684609137134, + "learning_rate": 2.8188665008254622e-06, + "loss": 0.7495, + "step": 21351 + }, + { + "epoch": 0.6544072575701851, + "grad_norm": 1.784961525153754, + "learning_rate": 2.8184199065164076e-06, + "loss": 0.6008, + "step": 21352 + }, + { + "epoch": 0.6544379060929263, + "grad_norm": 0.7607827053930196, + "learning_rate": 2.8179733337030167e-06, + "loss": 0.4229, + "step": 21353 + }, + { + "epoch": 0.6544685546156676, + "grad_norm": 2.1606061379555017, + "learning_rate": 2.817526782389696e-06, + "loss": 0.7106, + "step": 21354 + }, + { + "epoch": 0.6544992031384087, + "grad_norm": 1.726005207485666, + "learning_rate": 2.8170802525808398e-06, + "loss": 0.5766, + "step": 21355 + }, + { + "epoch": 0.65452985166115, + "grad_norm": 1.9241542227306423, + "learning_rate": 2.816633744280851e-06, + "loss": 0.6002, + "step": 21356 + }, + { + "epoch": 0.6545605001838911, + "grad_norm": 1.7305166963724843, + "learning_rate": 2.8161872574941295e-06, + "loss": 0.5939, + "step": 21357 + }, + { + "epoch": 0.6545911487066324, + "grad_norm": 1.9890081510662239, + "learning_rate": 2.8157407922250725e-06, + "loss": 0.7018, + "step": 21358 + }, + { + "epoch": 0.6546217972293735, + "grad_norm": 1.6465511675158606, + "learning_rate": 2.8152943484780804e-06, + "loss": 0.6212, + "step": 21359 + }, + { + "epoch": 0.6546524457521148, + "grad_norm": 0.7962310765239404, + "learning_rate": 2.8148479262575536e-06, + "loss": 0.4079, + "step": 21360 + }, + { + "epoch": 0.6546830942748559, + "grad_norm": 1.8056985674638402, + "learning_rate": 2.81440152556789e-06, + "loss": 0.606, + "step": 21361 + }, + { + "epoch": 0.6547137427975972, + "grad_norm": 1.7814889018897935, + "learning_rate": 2.8139551464134827e-06, + "loss": 0.581, + "step": 21362 + }, + { + "epoch": 0.6547443913203383, + "grad_norm": 1.866648695423336, + "learning_rate": 2.813508788798739e-06, + "loss": 0.6171, + "step": 21363 + }, + { + "epoch": 0.6547750398430796, + "grad_norm": 0.7874163180229834, + "learning_rate": 2.81306245272805e-06, + "loss": 0.4129, + "step": 21364 + }, + { + "epoch": 0.6548056883658208, + "grad_norm": 1.8992730968718252, + "learning_rate": 2.812616138205819e-06, + "loss": 0.6101, + "step": 21365 + }, + { + "epoch": 0.654836336888562, + "grad_norm": 2.0343960113230346, + "learning_rate": 2.812169845236439e-06, + "loss": 0.4744, + "step": 21366 + }, + { + "epoch": 0.6548669854113032, + "grad_norm": 1.7765874448461836, + "learning_rate": 2.8117235738243087e-06, + "loss": 0.6421, + "step": 21367 + }, + { + "epoch": 0.6548976339340444, + "grad_norm": 2.0368885179776677, + "learning_rate": 2.811277323973828e-06, + "loss": 0.5797, + "step": 21368 + }, + { + "epoch": 0.6549282824567856, + "grad_norm": 1.8019804626918114, + "learning_rate": 2.8108310956893896e-06, + "loss": 0.5726, + "step": 21369 + }, + { + "epoch": 0.6549589309795267, + "grad_norm": 1.8652578130153599, + "learning_rate": 2.810384888975393e-06, + "loss": 0.5432, + "step": 21370 + }, + { + "epoch": 0.654989579502268, + "grad_norm": 1.7492508064616112, + "learning_rate": 2.8099387038362357e-06, + "loss": 0.6255, + "step": 21371 + }, + { + "epoch": 0.6550202280250091, + "grad_norm": 1.958122173182121, + "learning_rate": 2.809492540276312e-06, + "loss": 0.5661, + "step": 21372 + }, + { + "epoch": 0.6550508765477504, + "grad_norm": 1.888378787912314, + "learning_rate": 2.809046398300019e-06, + "loss": 0.6351, + "step": 21373 + }, + { + "epoch": 0.6550815250704916, + "grad_norm": 1.8524963938061052, + "learning_rate": 2.8086002779117515e-06, + "loss": 0.6459, + "step": 21374 + }, + { + "epoch": 0.6551121735932328, + "grad_norm": 1.822399755024566, + "learning_rate": 2.8081541791159063e-06, + "loss": 0.6425, + "step": 21375 + }, + { + "epoch": 0.655142822115974, + "grad_norm": 1.8367212465075515, + "learning_rate": 2.8077081019168804e-06, + "loss": 0.5941, + "step": 21376 + }, + { + "epoch": 0.6551734706387152, + "grad_norm": 1.671080667648415, + "learning_rate": 2.807262046319066e-06, + "loss": 0.5817, + "step": 21377 + }, + { + "epoch": 0.6552041191614564, + "grad_norm": 1.7395092774025627, + "learning_rate": 2.806816012326859e-06, + "loss": 0.5477, + "step": 21378 + }, + { + "epoch": 0.6552347676841976, + "grad_norm": 1.8312941197103, + "learning_rate": 2.806369999944657e-06, + "loss": 0.6954, + "step": 21379 + }, + { + "epoch": 0.6552654162069388, + "grad_norm": 1.5791424416012165, + "learning_rate": 2.8059240091768514e-06, + "loss": 0.5195, + "step": 21380 + }, + { + "epoch": 0.65529606472968, + "grad_norm": 0.8753246533064106, + "learning_rate": 2.805478040027837e-06, + "loss": 0.4082, + "step": 21381 + }, + { + "epoch": 0.6553267132524212, + "grad_norm": 2.030137607772288, + "learning_rate": 2.8050320925020112e-06, + "loss": 0.5327, + "step": 21382 + }, + { + "epoch": 0.6553573617751625, + "grad_norm": 1.6772432959405064, + "learning_rate": 2.8045861666037645e-06, + "loss": 0.5427, + "step": 21383 + }, + { + "epoch": 0.6553880102979036, + "grad_norm": 1.8305614797393084, + "learning_rate": 2.8041402623374936e-06, + "loss": 0.5631, + "step": 21384 + }, + { + "epoch": 0.6554186588206449, + "grad_norm": 1.836189654496562, + "learning_rate": 2.8036943797075884e-06, + "loss": 0.606, + "step": 21385 + }, + { + "epoch": 0.655449307343386, + "grad_norm": 1.9407774768354578, + "learning_rate": 2.8032485187184446e-06, + "loss": 0.5522, + "step": 21386 + }, + { + "epoch": 0.6554799558661273, + "grad_norm": 1.7512094129034395, + "learning_rate": 2.802802679374457e-06, + "loss": 0.6022, + "step": 21387 + }, + { + "epoch": 0.6555106043888684, + "grad_norm": 0.7955712021507424, + "learning_rate": 2.8023568616800147e-06, + "loss": 0.4286, + "step": 21388 + }, + { + "epoch": 0.6555412529116097, + "grad_norm": 0.7683781135435055, + "learning_rate": 2.8019110656395124e-06, + "loss": 0.3855, + "step": 21389 + }, + { + "epoch": 0.6555719014343508, + "grad_norm": 1.675714912351998, + "learning_rate": 2.8014652912573453e-06, + "loss": 0.588, + "step": 21390 + }, + { + "epoch": 0.6556025499570921, + "grad_norm": 0.8165591852590856, + "learning_rate": 2.8010195385379014e-06, + "loss": 0.4282, + "step": 21391 + }, + { + "epoch": 0.6556331984798333, + "grad_norm": 1.8730074767920943, + "learning_rate": 2.800573807485574e-06, + "loss": 0.6121, + "step": 21392 + }, + { + "epoch": 0.6556638470025745, + "grad_norm": 1.7476470119869174, + "learning_rate": 2.8001280981047574e-06, + "loss": 0.5662, + "step": 21393 + }, + { + "epoch": 0.6556944955253157, + "grad_norm": 0.7720529416656503, + "learning_rate": 2.7996824103998398e-06, + "loss": 0.4027, + "step": 21394 + }, + { + "epoch": 0.6557251440480569, + "grad_norm": 1.835786044141044, + "learning_rate": 2.7992367443752167e-06, + "loss": 0.5341, + "step": 21395 + }, + { + "epoch": 0.6557557925707981, + "grad_norm": 1.7132231868070382, + "learning_rate": 2.7987911000352752e-06, + "loss": 0.5091, + "step": 21396 + }, + { + "epoch": 0.6557864410935393, + "grad_norm": 1.7975188789055885, + "learning_rate": 2.7983454773844078e-06, + "loss": 0.5903, + "step": 21397 + }, + { + "epoch": 0.6558170896162805, + "grad_norm": 1.9274067754698736, + "learning_rate": 2.797899876427008e-06, + "loss": 0.6309, + "step": 21398 + }, + { + "epoch": 0.6558477381390218, + "grad_norm": 0.7706900440948502, + "learning_rate": 2.7974542971674614e-06, + "loss": 0.4024, + "step": 21399 + }, + { + "epoch": 0.6558783866617629, + "grad_norm": 1.8359159892811296, + "learning_rate": 2.797008739610162e-06, + "loss": 0.6392, + "step": 21400 + }, + { + "epoch": 0.655909035184504, + "grad_norm": 2.043488798396027, + "learning_rate": 2.7965632037595002e-06, + "loss": 0.6412, + "step": 21401 + }, + { + "epoch": 0.6559396837072453, + "grad_norm": 1.5969109078370662, + "learning_rate": 2.7961176896198637e-06, + "loss": 0.5749, + "step": 21402 + }, + { + "epoch": 0.6559703322299865, + "grad_norm": 1.7616998132091244, + "learning_rate": 2.7956721971956435e-06, + "loss": 0.6139, + "step": 21403 + }, + { + "epoch": 0.6560009807527277, + "grad_norm": 1.6731910416905222, + "learning_rate": 2.7952267264912314e-06, + "loss": 0.5579, + "step": 21404 + }, + { + "epoch": 0.6560316292754689, + "grad_norm": 1.8561570697987677, + "learning_rate": 2.7947812775110117e-06, + "loss": 0.7399, + "step": 21405 + }, + { + "epoch": 0.6560622777982101, + "grad_norm": 1.6410941805015573, + "learning_rate": 2.7943358502593787e-06, + "loss": 0.5599, + "step": 21406 + }, + { + "epoch": 0.6560929263209513, + "grad_norm": 1.788246589235603, + "learning_rate": 2.793890444740715e-06, + "loss": 0.5775, + "step": 21407 + }, + { + "epoch": 0.6561235748436925, + "grad_norm": 1.7844929961597795, + "learning_rate": 2.793445060959417e-06, + "loss": 0.6102, + "step": 21408 + }, + { + "epoch": 0.6561542233664337, + "grad_norm": 1.933517557375261, + "learning_rate": 2.7929996989198695e-06, + "loss": 0.7438, + "step": 21409 + }, + { + "epoch": 0.656184871889175, + "grad_norm": 1.7894284626828527, + "learning_rate": 2.7925543586264588e-06, + "loss": 0.6153, + "step": 21410 + }, + { + "epoch": 0.6562155204119161, + "grad_norm": 1.9463686830526585, + "learning_rate": 2.7921090400835747e-06, + "loss": 0.5999, + "step": 21411 + }, + { + "epoch": 0.6562461689346574, + "grad_norm": 1.9690931130307114, + "learning_rate": 2.7916637432956066e-06, + "loss": 0.6692, + "step": 21412 + }, + { + "epoch": 0.6562768174573985, + "grad_norm": 1.7138930721778811, + "learning_rate": 2.7912184682669396e-06, + "loss": 0.5676, + "step": 21413 + }, + { + "epoch": 0.6563074659801398, + "grad_norm": 1.9772924041243076, + "learning_rate": 2.7907732150019617e-06, + "loss": 0.5125, + "step": 21414 + }, + { + "epoch": 0.6563381145028809, + "grad_norm": 1.8918132071837734, + "learning_rate": 2.790327983505062e-06, + "loss": 0.6882, + "step": 21415 + }, + { + "epoch": 0.6563687630256222, + "grad_norm": 1.7300934410674211, + "learning_rate": 2.789882773780625e-06, + "loss": 0.5882, + "step": 21416 + }, + { + "epoch": 0.6563994115483633, + "grad_norm": 1.836069452102581, + "learning_rate": 2.78943758583304e-06, + "loss": 0.5685, + "step": 21417 + }, + { + "epoch": 0.6564300600711046, + "grad_norm": 1.7625981569615596, + "learning_rate": 2.7889924196666908e-06, + "loss": 0.6105, + "step": 21418 + }, + { + "epoch": 0.6564607085938458, + "grad_norm": 0.8228398055770814, + "learning_rate": 2.788547275285964e-06, + "loss": 0.423, + "step": 21419 + }, + { + "epoch": 0.656491357116587, + "grad_norm": 0.820798669964529, + "learning_rate": 2.788102152695249e-06, + "loss": 0.4202, + "step": 21420 + }, + { + "epoch": 0.6565220056393282, + "grad_norm": 1.87706840021976, + "learning_rate": 2.787657051898928e-06, + "loss": 0.6449, + "step": 21421 + }, + { + "epoch": 0.6565526541620694, + "grad_norm": 1.9619239857161566, + "learning_rate": 2.787211972901387e-06, + "loss": 0.678, + "step": 21422 + }, + { + "epoch": 0.6565833026848106, + "grad_norm": 1.76018640023994, + "learning_rate": 2.7867669157070155e-06, + "loss": 0.5837, + "step": 21423 + }, + { + "epoch": 0.6566139512075518, + "grad_norm": 1.7099207056463877, + "learning_rate": 2.7863218803201938e-06, + "loss": 0.6195, + "step": 21424 + }, + { + "epoch": 0.656644599730293, + "grad_norm": 2.0067135984873556, + "learning_rate": 2.7858768667453107e-06, + "loss": 0.6877, + "step": 21425 + }, + { + "epoch": 0.6566752482530342, + "grad_norm": 1.771443326904726, + "learning_rate": 2.7854318749867454e-06, + "loss": 0.5652, + "step": 21426 + }, + { + "epoch": 0.6567058967757754, + "grad_norm": 1.8287287731246717, + "learning_rate": 2.784986905048891e-06, + "loss": 0.6309, + "step": 21427 + }, + { + "epoch": 0.6567365452985167, + "grad_norm": 1.732869690521806, + "learning_rate": 2.7845419569361263e-06, + "loss": 0.6053, + "step": 21428 + }, + { + "epoch": 0.6567671938212578, + "grad_norm": 2.0272022919921477, + "learning_rate": 2.784097030652835e-06, + "loss": 0.6055, + "step": 21429 + }, + { + "epoch": 0.6567978423439991, + "grad_norm": 1.9430908254828452, + "learning_rate": 2.7836521262034034e-06, + "loss": 0.5548, + "step": 21430 + }, + { + "epoch": 0.6568284908667402, + "grad_norm": 0.7731673754756109, + "learning_rate": 2.7832072435922154e-06, + "loss": 0.3976, + "step": 21431 + }, + { + "epoch": 0.6568591393894814, + "grad_norm": 1.7014729388832104, + "learning_rate": 2.7827623828236523e-06, + "loss": 0.5851, + "step": 21432 + }, + { + "epoch": 0.6568897879122226, + "grad_norm": 1.8149339303456398, + "learning_rate": 2.7823175439020984e-06, + "loss": 0.6258, + "step": 21433 + }, + { + "epoch": 0.6569204364349638, + "grad_norm": 1.7933067542538084, + "learning_rate": 2.781872726831939e-06, + "loss": 0.5702, + "step": 21434 + }, + { + "epoch": 0.656951084957705, + "grad_norm": 1.7256444794088968, + "learning_rate": 2.781427931617554e-06, + "loss": 0.6548, + "step": 21435 + }, + { + "epoch": 0.6569817334804462, + "grad_norm": 1.9809864560984254, + "learning_rate": 2.7809831582633284e-06, + "loss": 0.6632, + "step": 21436 + }, + { + "epoch": 0.6570123820031875, + "grad_norm": 1.7009967307899136, + "learning_rate": 2.7805384067736397e-06, + "loss": 0.5729, + "step": 21437 + }, + { + "epoch": 0.6570430305259286, + "grad_norm": 0.7930365048687101, + "learning_rate": 2.780093677152878e-06, + "loss": 0.411, + "step": 21438 + }, + { + "epoch": 0.6570736790486699, + "grad_norm": 1.6757414595312399, + "learning_rate": 2.7796489694054214e-06, + "loss": 0.5178, + "step": 21439 + }, + { + "epoch": 0.657104327571411, + "grad_norm": 2.021057404563855, + "learning_rate": 2.7792042835356492e-06, + "loss": 0.6243, + "step": 21440 + }, + { + "epoch": 0.6571349760941523, + "grad_norm": 1.6826506211352288, + "learning_rate": 2.778759619547946e-06, + "loss": 0.5399, + "step": 21441 + }, + { + "epoch": 0.6571656246168934, + "grad_norm": 1.572373738305891, + "learning_rate": 2.7783149774466944e-06, + "loss": 0.5329, + "step": 21442 + }, + { + "epoch": 0.6571962731396347, + "grad_norm": 0.7847245247327581, + "learning_rate": 2.7778703572362714e-06, + "loss": 0.4321, + "step": 21443 + }, + { + "epoch": 0.6572269216623758, + "grad_norm": 1.7575531844163093, + "learning_rate": 2.7774257589210606e-06, + "loss": 0.5917, + "step": 21444 + }, + { + "epoch": 0.6572575701851171, + "grad_norm": 1.7529745341291707, + "learning_rate": 2.7769811825054427e-06, + "loss": 0.5609, + "step": 21445 + }, + { + "epoch": 0.6572882187078583, + "grad_norm": 1.8983470438467638, + "learning_rate": 2.7765366279938e-06, + "loss": 0.6398, + "step": 21446 + }, + { + "epoch": 0.6573188672305995, + "grad_norm": 2.1469990357227355, + "learning_rate": 2.7760920953905104e-06, + "loss": 0.6888, + "step": 21447 + }, + { + "epoch": 0.6573495157533407, + "grad_norm": 0.7791425998460099, + "learning_rate": 2.7756475846999503e-06, + "loss": 0.4114, + "step": 21448 + }, + { + "epoch": 0.6573801642760819, + "grad_norm": 1.7374972484889717, + "learning_rate": 2.775203095926508e-06, + "loss": 0.5828, + "step": 21449 + }, + { + "epoch": 0.6574108127988231, + "grad_norm": 1.7875469925565528, + "learning_rate": 2.7747586290745586e-06, + "loss": 0.5868, + "step": 21450 + }, + { + "epoch": 0.6574414613215643, + "grad_norm": 1.7230954233584932, + "learning_rate": 2.77431418414848e-06, + "loss": 0.6817, + "step": 21451 + }, + { + "epoch": 0.6574721098443055, + "grad_norm": 1.7219905387557268, + "learning_rate": 2.7738697611526533e-06, + "loss": 0.5675, + "step": 21452 + }, + { + "epoch": 0.6575027583670467, + "grad_norm": 1.8774119118948334, + "learning_rate": 2.773425360091457e-06, + "loss": 0.6906, + "step": 21453 + }, + { + "epoch": 0.6575334068897879, + "grad_norm": 1.6040308426815049, + "learning_rate": 2.7729809809692734e-06, + "loss": 0.5911, + "step": 21454 + }, + { + "epoch": 0.6575640554125292, + "grad_norm": 0.7889255899845492, + "learning_rate": 2.772536623790475e-06, + "loss": 0.4332, + "step": 21455 + }, + { + "epoch": 0.6575947039352703, + "grad_norm": 1.633094350418341, + "learning_rate": 2.7720922885594433e-06, + "loss": 0.6735, + "step": 21456 + }, + { + "epoch": 0.6576253524580116, + "grad_norm": 1.8829556949435886, + "learning_rate": 2.771647975280558e-06, + "loss": 0.5492, + "step": 21457 + }, + { + "epoch": 0.6576560009807527, + "grad_norm": 2.0008337972066834, + "learning_rate": 2.7712036839581956e-06, + "loss": 0.6076, + "step": 21458 + }, + { + "epoch": 0.657686649503494, + "grad_norm": 1.9801198143774943, + "learning_rate": 2.77075941459673e-06, + "loss": 0.6555, + "step": 21459 + }, + { + "epoch": 0.6577172980262351, + "grad_norm": 0.834755436007959, + "learning_rate": 2.7703151672005457e-06, + "loss": 0.4148, + "step": 21460 + }, + { + "epoch": 0.6577479465489764, + "grad_norm": 1.7638191808940262, + "learning_rate": 2.7698709417740165e-06, + "loss": 0.5781, + "step": 21461 + }, + { + "epoch": 0.6577785950717175, + "grad_norm": 1.649848447513304, + "learning_rate": 2.769426738321518e-06, + "loss": 0.631, + "step": 21462 + }, + { + "epoch": 0.6578092435944587, + "grad_norm": 1.6204362602507218, + "learning_rate": 2.768982556847429e-06, + "loss": 0.6183, + "step": 21463 + }, + { + "epoch": 0.6578398921172, + "grad_norm": 1.908098309170488, + "learning_rate": 2.768538397356125e-06, + "loss": 0.6501, + "step": 21464 + }, + { + "epoch": 0.6578705406399411, + "grad_norm": 1.8337082271385612, + "learning_rate": 2.768094259851985e-06, + "loss": 0.6493, + "step": 21465 + }, + { + "epoch": 0.6579011891626824, + "grad_norm": 2.0553952473764108, + "learning_rate": 2.767650144339381e-06, + "loss": 0.6004, + "step": 21466 + }, + { + "epoch": 0.6579318376854235, + "grad_norm": 1.8095415513808435, + "learning_rate": 2.7672060508226923e-06, + "loss": 0.5802, + "step": 21467 + }, + { + "epoch": 0.6579624862081648, + "grad_norm": 1.7116132439298601, + "learning_rate": 2.766761979306295e-06, + "loss": 0.5562, + "step": 21468 + }, + { + "epoch": 0.6579931347309059, + "grad_norm": 1.5598689487150472, + "learning_rate": 2.7663179297945637e-06, + "loss": 0.5838, + "step": 21469 + }, + { + "epoch": 0.6580237832536472, + "grad_norm": 1.5680076178926563, + "learning_rate": 2.765873902291871e-06, + "loss": 0.5194, + "step": 21470 + }, + { + "epoch": 0.6580544317763883, + "grad_norm": 1.8408705274193828, + "learning_rate": 2.765429896802595e-06, + "loss": 0.5613, + "step": 21471 + }, + { + "epoch": 0.6580850802991296, + "grad_norm": 1.9248889907337876, + "learning_rate": 2.7649859133311092e-06, + "loss": 0.6403, + "step": 21472 + }, + { + "epoch": 0.6581157288218707, + "grad_norm": 0.801285040487242, + "learning_rate": 2.764541951881791e-06, + "loss": 0.4217, + "step": 21473 + }, + { + "epoch": 0.658146377344612, + "grad_norm": 1.9648930038157644, + "learning_rate": 2.7640980124590113e-06, + "loss": 0.6504, + "step": 21474 + }, + { + "epoch": 0.6581770258673532, + "grad_norm": 0.836619507053905, + "learning_rate": 2.7636540950671463e-06, + "loss": 0.4283, + "step": 21475 + }, + { + "epoch": 0.6582076743900944, + "grad_norm": 1.7431472848891634, + "learning_rate": 2.7632101997105708e-06, + "loss": 0.572, + "step": 21476 + }, + { + "epoch": 0.6582383229128356, + "grad_norm": 1.7886661261760541, + "learning_rate": 2.7627663263936582e-06, + "loss": 0.5813, + "step": 21477 + }, + { + "epoch": 0.6582689714355768, + "grad_norm": 1.7460627676862313, + "learning_rate": 2.7623224751207773e-06, + "loss": 0.6555, + "step": 21478 + }, + { + "epoch": 0.658299619958318, + "grad_norm": 1.862391460025425, + "learning_rate": 2.7618786458963096e-06, + "loss": 0.5805, + "step": 21479 + }, + { + "epoch": 0.6583302684810592, + "grad_norm": 1.6738460248197384, + "learning_rate": 2.761434838724622e-06, + "loss": 0.5989, + "step": 21480 + }, + { + "epoch": 0.6583609170038004, + "grad_norm": 1.720947459032968, + "learning_rate": 2.760991053610092e-06, + "loss": 0.6247, + "step": 21481 + }, + { + "epoch": 0.6583915655265417, + "grad_norm": 2.042967023918439, + "learning_rate": 2.7605472905570875e-06, + "loss": 0.6358, + "step": 21482 + }, + { + "epoch": 0.6584222140492828, + "grad_norm": 1.9177689567420126, + "learning_rate": 2.7601035495699843e-06, + "loss": 0.6417, + "step": 21483 + }, + { + "epoch": 0.6584528625720241, + "grad_norm": 1.8227800717051492, + "learning_rate": 2.7596598306531554e-06, + "loss": 0.5814, + "step": 21484 + }, + { + "epoch": 0.6584835110947652, + "grad_norm": 1.7769285728126032, + "learning_rate": 2.75921613381097e-06, + "loss": 0.5867, + "step": 21485 + }, + { + "epoch": 0.6585141596175065, + "grad_norm": 2.020969631932591, + "learning_rate": 2.7587724590478005e-06, + "loss": 0.5988, + "step": 21486 + }, + { + "epoch": 0.6585448081402476, + "grad_norm": 1.723253082137025, + "learning_rate": 2.7583288063680214e-06, + "loss": 0.558, + "step": 21487 + }, + { + "epoch": 0.6585754566629889, + "grad_norm": 1.6533756714876287, + "learning_rate": 2.757885175776003e-06, + "loss": 0.5641, + "step": 21488 + }, + { + "epoch": 0.65860610518573, + "grad_norm": 1.683996333490617, + "learning_rate": 2.7574415672761113e-06, + "loss": 0.5901, + "step": 21489 + }, + { + "epoch": 0.6586367537084713, + "grad_norm": 1.753071012756386, + "learning_rate": 2.7569979808727255e-06, + "loss": 0.5454, + "step": 21490 + }, + { + "epoch": 0.6586674022312125, + "grad_norm": 1.7711615060288997, + "learning_rate": 2.75655441657021e-06, + "loss": 0.518, + "step": 21491 + }, + { + "epoch": 0.6586980507539537, + "grad_norm": 0.8612131672942541, + "learning_rate": 2.75611087437294e-06, + "loss": 0.4125, + "step": 21492 + }, + { + "epoch": 0.6587286992766949, + "grad_norm": 2.0404758361711552, + "learning_rate": 2.7556673542852825e-06, + "loss": 0.6272, + "step": 21493 + }, + { + "epoch": 0.658759347799436, + "grad_norm": 1.8332728054432315, + "learning_rate": 2.7552238563116086e-06, + "loss": 0.6759, + "step": 21494 + }, + { + "epoch": 0.6587899963221773, + "grad_norm": 1.9638841247828014, + "learning_rate": 2.75478038045629e-06, + "loss": 0.6842, + "step": 21495 + }, + { + "epoch": 0.6588206448449184, + "grad_norm": 1.7801380693844397, + "learning_rate": 2.754336926723693e-06, + "loss": 0.6025, + "step": 21496 + }, + { + "epoch": 0.6588512933676597, + "grad_norm": 1.7232117836939604, + "learning_rate": 2.7538934951181884e-06, + "loss": 0.6765, + "step": 21497 + }, + { + "epoch": 0.6588819418904008, + "grad_norm": 1.6719622582908924, + "learning_rate": 2.7534500856441483e-06, + "loss": 0.5977, + "step": 21498 + }, + { + "epoch": 0.6589125904131421, + "grad_norm": 1.6980389681814771, + "learning_rate": 2.7530066983059365e-06, + "loss": 0.675, + "step": 21499 + }, + { + "epoch": 0.6589432389358832, + "grad_norm": 1.5229736662851396, + "learning_rate": 2.752563333107926e-06, + "loss": 0.6246, + "step": 21500 + }, + { + "epoch": 0.6589738874586245, + "grad_norm": 1.6620355819166588, + "learning_rate": 2.7521199900544847e-06, + "loss": 0.611, + "step": 21501 + }, + { + "epoch": 0.6590045359813657, + "grad_norm": 1.7958642887485639, + "learning_rate": 2.7516766691499797e-06, + "loss": 0.5974, + "step": 21502 + }, + { + "epoch": 0.6590351845041069, + "grad_norm": 0.7659042486333623, + "learning_rate": 2.7512333703987803e-06, + "loss": 0.4383, + "step": 21503 + }, + { + "epoch": 0.6590658330268481, + "grad_norm": 0.8044829084614901, + "learning_rate": 2.750790093805253e-06, + "loss": 0.4116, + "step": 21504 + }, + { + "epoch": 0.6590964815495893, + "grad_norm": 1.696736221505712, + "learning_rate": 2.750346839373766e-06, + "loss": 0.5643, + "step": 21505 + }, + { + "epoch": 0.6591271300723305, + "grad_norm": 1.8267301597143992, + "learning_rate": 2.7499036071086893e-06, + "loss": 0.6107, + "step": 21506 + }, + { + "epoch": 0.6591577785950717, + "grad_norm": 1.6637777422088096, + "learning_rate": 2.749460397014385e-06, + "loss": 0.499, + "step": 21507 + }, + { + "epoch": 0.6591884271178129, + "grad_norm": 1.6534723771215374, + "learning_rate": 2.749017209095225e-06, + "loss": 0.5507, + "step": 21508 + }, + { + "epoch": 0.6592190756405542, + "grad_norm": 1.9149328596595268, + "learning_rate": 2.7485740433555753e-06, + "loss": 0.5118, + "step": 21509 + }, + { + "epoch": 0.6592497241632953, + "grad_norm": 1.9992374615773663, + "learning_rate": 2.7481308997998e-06, + "loss": 0.5744, + "step": 21510 + }, + { + "epoch": 0.6592803726860366, + "grad_norm": 0.7632218121518454, + "learning_rate": 2.7476877784322662e-06, + "loss": 0.4075, + "step": 21511 + }, + { + "epoch": 0.6593110212087777, + "grad_norm": 1.993489130158554, + "learning_rate": 2.7472446792573435e-06, + "loss": 0.665, + "step": 21512 + }, + { + "epoch": 0.659341669731519, + "grad_norm": 2.273815751371751, + "learning_rate": 2.746801602279394e-06, + "loss": 0.521, + "step": 21513 + }, + { + "epoch": 0.6593723182542601, + "grad_norm": 1.639558440252497, + "learning_rate": 2.7463585475027866e-06, + "loss": 0.5419, + "step": 21514 + }, + { + "epoch": 0.6594029667770014, + "grad_norm": 2.0137219968727518, + "learning_rate": 2.7459155149318828e-06, + "loss": 0.585, + "step": 21515 + }, + { + "epoch": 0.6594336152997425, + "grad_norm": 1.9614122976336996, + "learning_rate": 2.74547250457105e-06, + "loss": 0.6188, + "step": 21516 + }, + { + "epoch": 0.6594642638224838, + "grad_norm": 1.6206149718606815, + "learning_rate": 2.7450295164246556e-06, + "loss": 0.5549, + "step": 21517 + }, + { + "epoch": 0.659494912345225, + "grad_norm": 1.8376572320685785, + "learning_rate": 2.74458655049706e-06, + "loss": 0.5499, + "step": 21518 + }, + { + "epoch": 0.6595255608679662, + "grad_norm": 1.8103713992504833, + "learning_rate": 2.7441436067926307e-06, + "loss": 0.6372, + "step": 21519 + }, + { + "epoch": 0.6595562093907074, + "grad_norm": 1.5792782202166433, + "learning_rate": 2.743700685315734e-06, + "loss": 0.5758, + "step": 21520 + }, + { + "epoch": 0.6595868579134486, + "grad_norm": 1.5733577469159525, + "learning_rate": 2.743257786070729e-06, + "loss": 0.538, + "step": 21521 + }, + { + "epoch": 0.6596175064361898, + "grad_norm": 0.817641026258193, + "learning_rate": 2.742814909061985e-06, + "loss": 0.4401, + "step": 21522 + }, + { + "epoch": 0.659648154958931, + "grad_norm": 1.6958151536407273, + "learning_rate": 2.74237205429386e-06, + "loss": 0.63, + "step": 21523 + }, + { + "epoch": 0.6596788034816722, + "grad_norm": 1.530046735493897, + "learning_rate": 2.741929221770723e-06, + "loss": 0.528, + "step": 21524 + }, + { + "epoch": 0.6597094520044133, + "grad_norm": 1.8600064502647262, + "learning_rate": 2.7414864114969355e-06, + "loss": 0.6607, + "step": 21525 + }, + { + "epoch": 0.6597401005271546, + "grad_norm": 1.9045637684318661, + "learning_rate": 2.7410436234768584e-06, + "loss": 0.6131, + "step": 21526 + }, + { + "epoch": 0.6597707490498957, + "grad_norm": 1.6772566923906214, + "learning_rate": 2.740600857714857e-06, + "loss": 0.5622, + "step": 21527 + }, + { + "epoch": 0.659801397572637, + "grad_norm": 1.701592667383242, + "learning_rate": 2.7401581142152945e-06, + "loss": 0.6135, + "step": 21528 + }, + { + "epoch": 0.6598320460953782, + "grad_norm": 1.8768467008774672, + "learning_rate": 2.7397153929825317e-06, + "loss": 0.6213, + "step": 21529 + }, + { + "epoch": 0.6598626946181194, + "grad_norm": 1.9539176478754672, + "learning_rate": 2.73927269402093e-06, + "loss": 0.5667, + "step": 21530 + }, + { + "epoch": 0.6598933431408606, + "grad_norm": 1.6713626916907818, + "learning_rate": 2.7388300173348557e-06, + "loss": 0.5412, + "step": 21531 + }, + { + "epoch": 0.6599239916636018, + "grad_norm": 1.5035663659673983, + "learning_rate": 2.7383873629286658e-06, + "loss": 0.5724, + "step": 21532 + }, + { + "epoch": 0.659954640186343, + "grad_norm": 1.9126941408902194, + "learning_rate": 2.737944730806725e-06, + "loss": 0.5336, + "step": 21533 + }, + { + "epoch": 0.6599852887090842, + "grad_norm": 1.689923643008036, + "learning_rate": 2.737502120973391e-06, + "loss": 0.6043, + "step": 21534 + }, + { + "epoch": 0.6600159372318254, + "grad_norm": 1.828934207479329, + "learning_rate": 2.737059533433031e-06, + "loss": 0.6762, + "step": 21535 + }, + { + "epoch": 0.6600465857545667, + "grad_norm": 1.8531350767067252, + "learning_rate": 2.7366169681900013e-06, + "loss": 0.6725, + "step": 21536 + }, + { + "epoch": 0.6600772342773078, + "grad_norm": 1.6062059220299936, + "learning_rate": 2.7361744252486626e-06, + "loss": 0.5926, + "step": 21537 + }, + { + "epoch": 0.6601078828000491, + "grad_norm": 1.971469613210802, + "learning_rate": 2.735731904613377e-06, + "loss": 0.6022, + "step": 21538 + }, + { + "epoch": 0.6601385313227902, + "grad_norm": 0.8330317321473742, + "learning_rate": 2.735289406288505e-06, + "loss": 0.4123, + "step": 21539 + }, + { + "epoch": 0.6601691798455315, + "grad_norm": 0.796145475427795, + "learning_rate": 2.734846930278405e-06, + "loss": 0.4189, + "step": 21540 + }, + { + "epoch": 0.6601998283682726, + "grad_norm": 2.192381388925538, + "learning_rate": 2.734404476587438e-06, + "loss": 0.5789, + "step": 21541 + }, + { + "epoch": 0.6602304768910139, + "grad_norm": 0.8248332022730025, + "learning_rate": 2.7339620452199646e-06, + "loss": 0.399, + "step": 21542 + }, + { + "epoch": 0.660261125413755, + "grad_norm": 1.5603856769723785, + "learning_rate": 2.7335196361803408e-06, + "loss": 0.5551, + "step": 21543 + }, + { + "epoch": 0.6602917739364963, + "grad_norm": 1.8895327806982183, + "learning_rate": 2.7330772494729304e-06, + "loss": 0.6728, + "step": 21544 + }, + { + "epoch": 0.6603224224592374, + "grad_norm": 1.6787650886546819, + "learning_rate": 2.732634885102086e-06, + "loss": 0.6147, + "step": 21545 + }, + { + "epoch": 0.6603530709819787, + "grad_norm": 1.7989540061256508, + "learning_rate": 2.732192543072174e-06, + "loss": 0.5477, + "step": 21546 + }, + { + "epoch": 0.6603837195047199, + "grad_norm": 1.7007861870807959, + "learning_rate": 2.7317502233875487e-06, + "loss": 0.5878, + "step": 21547 + }, + { + "epoch": 0.6604143680274611, + "grad_norm": 1.7632442269052195, + "learning_rate": 2.731307926052568e-06, + "loss": 0.6268, + "step": 21548 + }, + { + "epoch": 0.6604450165502023, + "grad_norm": 1.7604807792956512, + "learning_rate": 2.730865651071589e-06, + "loss": 0.5974, + "step": 21549 + }, + { + "epoch": 0.6604756650729435, + "grad_norm": 1.7561672465149578, + "learning_rate": 2.7304233984489746e-06, + "loss": 0.5738, + "step": 21550 + }, + { + "epoch": 0.6605063135956847, + "grad_norm": 1.6682654405176869, + "learning_rate": 2.7299811681890764e-06, + "loss": 0.5863, + "step": 21551 + }, + { + "epoch": 0.6605369621184259, + "grad_norm": 2.027838838418941, + "learning_rate": 2.729538960296255e-06, + "loss": 0.5484, + "step": 21552 + }, + { + "epoch": 0.6605676106411671, + "grad_norm": 1.6518838157418538, + "learning_rate": 2.7290967747748676e-06, + "loss": 0.6032, + "step": 21553 + }, + { + "epoch": 0.6605982591639084, + "grad_norm": 0.875093992631532, + "learning_rate": 2.7286546116292722e-06, + "loss": 0.4407, + "step": 21554 + }, + { + "epoch": 0.6606289076866495, + "grad_norm": 1.9339168953581356, + "learning_rate": 2.7282124708638242e-06, + "loss": 0.6577, + "step": 21555 + }, + { + "epoch": 0.6606595562093907, + "grad_norm": 2.127470116126005, + "learning_rate": 2.7277703524828757e-06, + "loss": 0.5717, + "step": 21556 + }, + { + "epoch": 0.6606902047321319, + "grad_norm": 1.7942876737475884, + "learning_rate": 2.7273282564907918e-06, + "loss": 0.6679, + "step": 21557 + }, + { + "epoch": 0.6607208532548731, + "grad_norm": 0.9574425388695201, + "learning_rate": 2.7268861828919237e-06, + "loss": 0.4097, + "step": 21558 + }, + { + "epoch": 0.6607515017776143, + "grad_norm": 1.6320787254976235, + "learning_rate": 2.7264441316906253e-06, + "loss": 0.5772, + "step": 21559 + }, + { + "epoch": 0.6607821503003555, + "grad_norm": 1.7786931316992867, + "learning_rate": 2.7260021028912553e-06, + "loss": 0.6436, + "step": 21560 + }, + { + "epoch": 0.6608127988230967, + "grad_norm": 1.658557711262421, + "learning_rate": 2.7255600964981683e-06, + "loss": 0.5484, + "step": 21561 + }, + { + "epoch": 0.6608434473458379, + "grad_norm": 1.8948624157069234, + "learning_rate": 2.725118112515721e-06, + "loss": 0.6332, + "step": 21562 + }, + { + "epoch": 0.6608740958685791, + "grad_norm": 1.8971097549965976, + "learning_rate": 2.7246761509482657e-06, + "loss": 0.6163, + "step": 21563 + }, + { + "epoch": 0.6609047443913203, + "grad_norm": 1.7157232450663977, + "learning_rate": 2.7242342118001584e-06, + "loss": 0.5168, + "step": 21564 + }, + { + "epoch": 0.6609353929140616, + "grad_norm": 1.8341015839617858, + "learning_rate": 2.7237922950757554e-06, + "loss": 0.6107, + "step": 21565 + }, + { + "epoch": 0.6609660414368027, + "grad_norm": 1.7759934973913203, + "learning_rate": 2.7233504007794093e-06, + "loss": 0.6154, + "step": 21566 + }, + { + "epoch": 0.660996689959544, + "grad_norm": 1.852157856039164, + "learning_rate": 2.722908528915472e-06, + "loss": 0.6101, + "step": 21567 + }, + { + "epoch": 0.6610273384822851, + "grad_norm": 0.8050933777101579, + "learning_rate": 2.7224666794883002e-06, + "loss": 0.3972, + "step": 21568 + }, + { + "epoch": 0.6610579870050264, + "grad_norm": 1.7076365226815056, + "learning_rate": 2.7220248525022485e-06, + "loss": 0.5562, + "step": 21569 + }, + { + "epoch": 0.6610886355277675, + "grad_norm": 1.7780439233911303, + "learning_rate": 2.721583047961667e-06, + "loss": 0.5781, + "step": 21570 + }, + { + "epoch": 0.6611192840505088, + "grad_norm": 1.8939405953988937, + "learning_rate": 2.72114126587091e-06, + "loss": 0.5864, + "step": 21571 + }, + { + "epoch": 0.6611499325732499, + "grad_norm": 1.433383535742693, + "learning_rate": 2.7206995062343323e-06, + "loss": 0.5104, + "step": 21572 + }, + { + "epoch": 0.6611805810959912, + "grad_norm": 0.7718158450347843, + "learning_rate": 2.720257769056287e-06, + "loss": 0.4289, + "step": 21573 + }, + { + "epoch": 0.6612112296187324, + "grad_norm": 1.752903155322817, + "learning_rate": 2.719816054341125e-06, + "loss": 0.6026, + "step": 21574 + }, + { + "epoch": 0.6612418781414736, + "grad_norm": 1.827992795456818, + "learning_rate": 2.719374362093195e-06, + "loss": 0.6149, + "step": 21575 + }, + { + "epoch": 0.6612725266642148, + "grad_norm": 2.1059792567354747, + "learning_rate": 2.7189326923168567e-06, + "loss": 0.6103, + "step": 21576 + }, + { + "epoch": 0.661303175186956, + "grad_norm": 1.8301729853000004, + "learning_rate": 2.7184910450164586e-06, + "loss": 0.6011, + "step": 21577 + }, + { + "epoch": 0.6613338237096972, + "grad_norm": 1.8084727295392298, + "learning_rate": 2.7180494201963505e-06, + "loss": 0.6056, + "step": 21578 + }, + { + "epoch": 0.6613644722324384, + "grad_norm": 1.9405263271188797, + "learning_rate": 2.7176078178608844e-06, + "loss": 0.5901, + "step": 21579 + }, + { + "epoch": 0.6613951207551796, + "grad_norm": 1.6768347640468002, + "learning_rate": 2.7171662380144124e-06, + "loss": 0.5753, + "step": 21580 + }, + { + "epoch": 0.6614257692779208, + "grad_norm": 1.577359222494013, + "learning_rate": 2.716724680661288e-06, + "loss": 0.5063, + "step": 21581 + }, + { + "epoch": 0.661456417800662, + "grad_norm": 1.7660016096519895, + "learning_rate": 2.7162831458058573e-06, + "loss": 0.5905, + "step": 21582 + }, + { + "epoch": 0.6614870663234033, + "grad_norm": 1.8069809767744862, + "learning_rate": 2.7158416334524728e-06, + "loss": 0.5882, + "step": 21583 + }, + { + "epoch": 0.6615177148461444, + "grad_norm": 1.5376269007638905, + "learning_rate": 2.7154001436054876e-06, + "loss": 0.5646, + "step": 21584 + }, + { + "epoch": 0.6615483633688857, + "grad_norm": 1.744267026042035, + "learning_rate": 2.714958676269249e-06, + "loss": 0.6341, + "step": 21585 + }, + { + "epoch": 0.6615790118916268, + "grad_norm": 1.8934413213415127, + "learning_rate": 2.7145172314481037e-06, + "loss": 0.5058, + "step": 21586 + }, + { + "epoch": 0.661609660414368, + "grad_norm": 1.7910909033319193, + "learning_rate": 2.714075809146409e-06, + "loss": 0.5368, + "step": 21587 + }, + { + "epoch": 0.6616403089371092, + "grad_norm": 1.9721723985922783, + "learning_rate": 2.7136344093685075e-06, + "loss": 0.6653, + "step": 21588 + }, + { + "epoch": 0.6616709574598504, + "grad_norm": 0.8166070576298633, + "learning_rate": 2.713193032118754e-06, + "loss": 0.4193, + "step": 21589 + }, + { + "epoch": 0.6617016059825916, + "grad_norm": 1.8908691572356664, + "learning_rate": 2.7127516774014915e-06, + "loss": 0.6503, + "step": 21590 + }, + { + "epoch": 0.6617322545053328, + "grad_norm": 1.7341968030479875, + "learning_rate": 2.712310345221073e-06, + "loss": 0.5548, + "step": 21591 + }, + { + "epoch": 0.6617629030280741, + "grad_norm": 1.9088976633640542, + "learning_rate": 2.711869035581848e-06, + "loss": 0.5832, + "step": 21592 + }, + { + "epoch": 0.6617935515508152, + "grad_norm": 1.807869251415954, + "learning_rate": 2.71142774848816e-06, + "loss": 0.6015, + "step": 21593 + }, + { + "epoch": 0.6618242000735565, + "grad_norm": 1.7640407376717484, + "learning_rate": 2.71098648394436e-06, + "loss": 0.5707, + "step": 21594 + }, + { + "epoch": 0.6618548485962976, + "grad_norm": 1.679918443092076, + "learning_rate": 2.7105452419547982e-06, + "loss": 0.594, + "step": 21595 + }, + { + "epoch": 0.6618854971190389, + "grad_norm": 1.7674715114096493, + "learning_rate": 2.7101040225238205e-06, + "loss": 0.6144, + "step": 21596 + }, + { + "epoch": 0.66191614564178, + "grad_norm": 1.711294392817366, + "learning_rate": 2.709662825655769e-06, + "loss": 0.6497, + "step": 21597 + }, + { + "epoch": 0.6619467941645213, + "grad_norm": 1.851577763702357, + "learning_rate": 2.7092216513549997e-06, + "loss": 0.6532, + "step": 21598 + }, + { + "epoch": 0.6619774426872624, + "grad_norm": 1.7772208448827134, + "learning_rate": 2.708780499625854e-06, + "loss": 0.5828, + "step": 21599 + }, + { + "epoch": 0.6620080912100037, + "grad_norm": 1.8914462140150827, + "learning_rate": 2.7083393704726824e-06, + "loss": 0.6179, + "step": 21600 + }, + { + "epoch": 0.6620387397327449, + "grad_norm": 1.7427351153103696, + "learning_rate": 2.7078982638998265e-06, + "loss": 0.5561, + "step": 21601 + }, + { + "epoch": 0.6620693882554861, + "grad_norm": 2.151466722543075, + "learning_rate": 2.7074571799116354e-06, + "loss": 0.6452, + "step": 21602 + }, + { + "epoch": 0.6621000367782273, + "grad_norm": 1.534078966649181, + "learning_rate": 2.7070161185124582e-06, + "loss": 0.5247, + "step": 21603 + }, + { + "epoch": 0.6621306853009685, + "grad_norm": 1.9637878099656076, + "learning_rate": 2.706575079706636e-06, + "loss": 0.5128, + "step": 21604 + }, + { + "epoch": 0.6621613338237097, + "grad_norm": 1.8838484895535739, + "learning_rate": 2.7061340634985155e-06, + "loss": 0.6187, + "step": 21605 + }, + { + "epoch": 0.6621919823464509, + "grad_norm": 1.8510398748768766, + "learning_rate": 2.7056930698924457e-06, + "loss": 0.6041, + "step": 21606 + }, + { + "epoch": 0.6622226308691921, + "grad_norm": 1.6898897072136867, + "learning_rate": 2.7052520988927666e-06, + "loss": 0.6292, + "step": 21607 + }, + { + "epoch": 0.6622532793919333, + "grad_norm": 1.8187702495234515, + "learning_rate": 2.7048111505038253e-06, + "loss": 0.6168, + "step": 21608 + }, + { + "epoch": 0.6622839279146745, + "grad_norm": 1.872935920678388, + "learning_rate": 2.7043702247299695e-06, + "loss": 0.579, + "step": 21609 + }, + { + "epoch": 0.6623145764374158, + "grad_norm": 1.7486427836893244, + "learning_rate": 2.703929321575539e-06, + "loss": 0.6125, + "step": 21610 + }, + { + "epoch": 0.6623452249601569, + "grad_norm": 1.8473758764409285, + "learning_rate": 2.703488441044883e-06, + "loss": 0.6151, + "step": 21611 + }, + { + "epoch": 0.6623758734828982, + "grad_norm": 1.6731164742313955, + "learning_rate": 2.7030475831423406e-06, + "loss": 0.5821, + "step": 21612 + }, + { + "epoch": 0.6624065220056393, + "grad_norm": 2.102696926475303, + "learning_rate": 2.702606747872258e-06, + "loss": 0.6943, + "step": 21613 + }, + { + "epoch": 0.6624371705283806, + "grad_norm": 1.6602672172630477, + "learning_rate": 2.7021659352389814e-06, + "loss": 0.541, + "step": 21614 + }, + { + "epoch": 0.6624678190511217, + "grad_norm": 1.91051320378168, + "learning_rate": 2.701725145246849e-06, + "loss": 0.633, + "step": 21615 + }, + { + "epoch": 0.662498467573863, + "grad_norm": 1.8458115516676274, + "learning_rate": 2.7012843779002074e-06, + "loss": 0.5583, + "step": 21616 + }, + { + "epoch": 0.6625291160966041, + "grad_norm": 1.823363658466097, + "learning_rate": 2.7008436332034004e-06, + "loss": 0.6502, + "step": 21617 + }, + { + "epoch": 0.6625597646193453, + "grad_norm": 1.9841055729389558, + "learning_rate": 2.700402911160768e-06, + "loss": 0.5889, + "step": 21618 + }, + { + "epoch": 0.6625904131420866, + "grad_norm": 1.869471807326267, + "learning_rate": 2.6999622117766553e-06, + "loss": 0.5362, + "step": 21619 + }, + { + "epoch": 0.6626210616648277, + "grad_norm": 1.5860968793409536, + "learning_rate": 2.6995215350554015e-06, + "loss": 0.611, + "step": 21620 + }, + { + "epoch": 0.662651710187569, + "grad_norm": 0.8178459550932387, + "learning_rate": 2.699080881001351e-06, + "loss": 0.4331, + "step": 21621 + }, + { + "epoch": 0.6626823587103101, + "grad_norm": 1.815615368066207, + "learning_rate": 2.698640249618848e-06, + "loss": 0.5572, + "step": 21622 + }, + { + "epoch": 0.6627130072330514, + "grad_norm": 1.7932215285430104, + "learning_rate": 2.6981996409122285e-06, + "loss": 0.5937, + "step": 21623 + }, + { + "epoch": 0.6627436557557925, + "grad_norm": 1.9212330998877178, + "learning_rate": 2.697759054885837e-06, + "loss": 0.6744, + "step": 21624 + }, + { + "epoch": 0.6627743042785338, + "grad_norm": 1.7683590531029654, + "learning_rate": 2.6973184915440165e-06, + "loss": 0.5433, + "step": 21625 + }, + { + "epoch": 0.6628049528012749, + "grad_norm": 0.8092198841019088, + "learning_rate": 2.6968779508911047e-06, + "loss": 0.4211, + "step": 21626 + }, + { + "epoch": 0.6628356013240162, + "grad_norm": 1.7044474508871004, + "learning_rate": 2.696437432931443e-06, + "loss": 0.5461, + "step": 21627 + }, + { + "epoch": 0.6628662498467573, + "grad_norm": 1.904394268427711, + "learning_rate": 2.695996937669375e-06, + "loss": 0.5658, + "step": 21628 + }, + { + "epoch": 0.6628968983694986, + "grad_norm": 0.8065110920360804, + "learning_rate": 2.6955564651092368e-06, + "loss": 0.4022, + "step": 21629 + }, + { + "epoch": 0.6629275468922398, + "grad_norm": 0.7757165727137288, + "learning_rate": 2.6951160152553724e-06, + "loss": 0.4018, + "step": 21630 + }, + { + "epoch": 0.662958195414981, + "grad_norm": 2.084307477116131, + "learning_rate": 2.694675588112117e-06, + "loss": 0.6457, + "step": 21631 + }, + { + "epoch": 0.6629888439377222, + "grad_norm": 1.8756422597812221, + "learning_rate": 2.6942351836838133e-06, + "loss": 0.614, + "step": 21632 + }, + { + "epoch": 0.6630194924604634, + "grad_norm": 1.8387963827598037, + "learning_rate": 2.6937948019748024e-06, + "loss": 0.6273, + "step": 21633 + }, + { + "epoch": 0.6630501409832046, + "grad_norm": 0.7968775243392415, + "learning_rate": 2.6933544429894192e-06, + "loss": 0.4129, + "step": 21634 + }, + { + "epoch": 0.6630807895059458, + "grad_norm": 1.6352382431184955, + "learning_rate": 2.6929141067320052e-06, + "loss": 0.5682, + "step": 21635 + }, + { + "epoch": 0.663111438028687, + "grad_norm": 1.7583134416968789, + "learning_rate": 2.6924737932069003e-06, + "loss": 0.6026, + "step": 21636 + }, + { + "epoch": 0.6631420865514283, + "grad_norm": 2.0956389936976887, + "learning_rate": 2.6920335024184398e-06, + "loss": 0.6252, + "step": 21637 + }, + { + "epoch": 0.6631727350741694, + "grad_norm": 1.8660405679483485, + "learning_rate": 2.691593234370964e-06, + "loss": 0.5877, + "step": 21638 + }, + { + "epoch": 0.6632033835969107, + "grad_norm": 1.694152498488241, + "learning_rate": 2.691152989068812e-06, + "loss": 0.6515, + "step": 21639 + }, + { + "epoch": 0.6632340321196518, + "grad_norm": 2.02462098468681, + "learning_rate": 2.690712766516319e-06, + "loss": 0.5467, + "step": 21640 + }, + { + "epoch": 0.6632646806423931, + "grad_norm": 0.7972345051753768, + "learning_rate": 2.6902725667178254e-06, + "loss": 0.4251, + "step": 21641 + }, + { + "epoch": 0.6632953291651342, + "grad_norm": 1.9979856538721357, + "learning_rate": 2.689832389677666e-06, + "loss": 0.6669, + "step": 21642 + }, + { + "epoch": 0.6633259776878755, + "grad_norm": 1.994568325520549, + "learning_rate": 2.6893922354001777e-06, + "loss": 0.5982, + "step": 21643 + }, + { + "epoch": 0.6633566262106166, + "grad_norm": 2.064187788516215, + "learning_rate": 2.6889521038897022e-06, + "loss": 0.618, + "step": 21644 + }, + { + "epoch": 0.6633872747333579, + "grad_norm": 1.9341538439770365, + "learning_rate": 2.68851199515057e-06, + "loss": 0.5841, + "step": 21645 + }, + { + "epoch": 0.663417923256099, + "grad_norm": 1.9577777591089658, + "learning_rate": 2.6880719091871212e-06, + "loss": 0.7012, + "step": 21646 + }, + { + "epoch": 0.6634485717788403, + "grad_norm": 0.8153195676144326, + "learning_rate": 2.687631846003693e-06, + "loss": 0.4488, + "step": 21647 + }, + { + "epoch": 0.6634792203015815, + "grad_norm": 1.8805587690596408, + "learning_rate": 2.6871918056046186e-06, + "loss": 0.5761, + "step": 21648 + }, + { + "epoch": 0.6635098688243226, + "grad_norm": 1.933143106321402, + "learning_rate": 2.6867517879942345e-06, + "loss": 0.6032, + "step": 21649 + }, + { + "epoch": 0.6635405173470639, + "grad_norm": 1.820904828154029, + "learning_rate": 2.686311793176879e-06, + "loss": 0.5834, + "step": 21650 + }, + { + "epoch": 0.663571165869805, + "grad_norm": 1.6440723339818986, + "learning_rate": 2.6858718211568834e-06, + "loss": 0.5969, + "step": 21651 + }, + { + "epoch": 0.6636018143925463, + "grad_norm": 1.7704486572601628, + "learning_rate": 2.685431871938587e-06, + "loss": 0.5596, + "step": 21652 + }, + { + "epoch": 0.6636324629152874, + "grad_norm": 1.7386740945273509, + "learning_rate": 2.6849919455263183e-06, + "loss": 0.5656, + "step": 21653 + }, + { + "epoch": 0.6636631114380287, + "grad_norm": 1.8068598890474552, + "learning_rate": 2.684552041924421e-06, + "loss": 0.5627, + "step": 21654 + }, + { + "epoch": 0.6636937599607698, + "grad_norm": 1.7204466989075942, + "learning_rate": 2.6841121611372234e-06, + "loss": 0.6029, + "step": 21655 + }, + { + "epoch": 0.6637244084835111, + "grad_norm": 0.8633375542956515, + "learning_rate": 2.6836723031690604e-06, + "loss": 0.4055, + "step": 21656 + }, + { + "epoch": 0.6637550570062523, + "grad_norm": 1.8805030048594964, + "learning_rate": 2.6832324680242667e-06, + "loss": 0.6158, + "step": 21657 + }, + { + "epoch": 0.6637857055289935, + "grad_norm": 1.8230181708129678, + "learning_rate": 2.682792655707178e-06, + "loss": 0.5564, + "step": 21658 + }, + { + "epoch": 0.6638163540517347, + "grad_norm": 1.738472238125573, + "learning_rate": 2.6823528662221245e-06, + "loss": 0.5379, + "step": 21659 + }, + { + "epoch": 0.6638470025744759, + "grad_norm": 1.9338681877105555, + "learning_rate": 2.681913099573441e-06, + "loss": 0.6523, + "step": 21660 + }, + { + "epoch": 0.6638776510972171, + "grad_norm": 1.8682449594808752, + "learning_rate": 2.6814733557654604e-06, + "loss": 0.5535, + "step": 21661 + }, + { + "epoch": 0.6639082996199583, + "grad_norm": 0.7365945246973161, + "learning_rate": 2.6810336348025185e-06, + "loss": 0.392, + "step": 21662 + }, + { + "epoch": 0.6639389481426995, + "grad_norm": 1.7516078418460117, + "learning_rate": 2.6805939366889455e-06, + "loss": 0.6714, + "step": 21663 + }, + { + "epoch": 0.6639695966654408, + "grad_norm": 1.9360002296328385, + "learning_rate": 2.680154261429072e-06, + "loss": 0.5626, + "step": 21664 + }, + { + "epoch": 0.6640002451881819, + "grad_norm": 1.7326823860343648, + "learning_rate": 2.679714609027232e-06, + "loss": 0.5761, + "step": 21665 + }, + { + "epoch": 0.6640308937109232, + "grad_norm": 0.809939101300181, + "learning_rate": 2.679274979487759e-06, + "loss": 0.4215, + "step": 21666 + }, + { + "epoch": 0.6640615422336643, + "grad_norm": 0.9068055763840914, + "learning_rate": 2.6788353728149826e-06, + "loss": 0.4172, + "step": 21667 + }, + { + "epoch": 0.6640921907564056, + "grad_norm": 1.6858867829001305, + "learning_rate": 2.6783957890132344e-06, + "loss": 0.6043, + "step": 21668 + }, + { + "epoch": 0.6641228392791467, + "grad_norm": 1.8805772670336987, + "learning_rate": 2.677956228086849e-06, + "loss": 0.626, + "step": 21669 + }, + { + "epoch": 0.664153487801888, + "grad_norm": 0.8020853288577546, + "learning_rate": 2.6775166900401527e-06, + "loss": 0.4195, + "step": 21670 + }, + { + "epoch": 0.6641841363246291, + "grad_norm": 1.8758240902496686, + "learning_rate": 2.6770771748774806e-06, + "loss": 0.5887, + "step": 21671 + }, + { + "epoch": 0.6642147848473704, + "grad_norm": 0.7726238870653127, + "learning_rate": 2.676637682603157e-06, + "loss": 0.4327, + "step": 21672 + }, + { + "epoch": 0.6642454333701115, + "grad_norm": 1.794578958743807, + "learning_rate": 2.6761982132215212e-06, + "loss": 0.6848, + "step": 21673 + }, + { + "epoch": 0.6642760818928528, + "grad_norm": 1.5768987340725549, + "learning_rate": 2.6757587667368996e-06, + "loss": 0.5269, + "step": 21674 + }, + { + "epoch": 0.664306730415594, + "grad_norm": 0.7660461303784329, + "learning_rate": 2.675319343153619e-06, + "loss": 0.4149, + "step": 21675 + }, + { + "epoch": 0.6643373789383352, + "grad_norm": 1.6838327546057696, + "learning_rate": 2.674879942476012e-06, + "loss": 0.6026, + "step": 21676 + }, + { + "epoch": 0.6643680274610764, + "grad_norm": 2.0019171874110837, + "learning_rate": 2.674440564708409e-06, + "loss": 0.7185, + "step": 21677 + }, + { + "epoch": 0.6643986759838176, + "grad_norm": 1.9212876392479885, + "learning_rate": 2.674001209855137e-06, + "loss": 0.6536, + "step": 21678 + }, + { + "epoch": 0.6644293245065588, + "grad_norm": 1.6645068912983156, + "learning_rate": 2.673561877920526e-06, + "loss": 0.5675, + "step": 21679 + }, + { + "epoch": 0.6644599730292999, + "grad_norm": 0.7743313832344311, + "learning_rate": 2.6731225689089045e-06, + "loss": 0.4031, + "step": 21680 + }, + { + "epoch": 0.6644906215520412, + "grad_norm": 1.7126437807462676, + "learning_rate": 2.672683282824604e-06, + "loss": 0.5468, + "step": 21681 + }, + { + "epoch": 0.6645212700747823, + "grad_norm": 2.052475634069661, + "learning_rate": 2.6722440196719514e-06, + "loss": 0.6648, + "step": 21682 + }, + { + "epoch": 0.6645519185975236, + "grad_norm": 1.947107079787878, + "learning_rate": 2.6718047794552693e-06, + "loss": 0.585, + "step": 21683 + }, + { + "epoch": 0.6645825671202648, + "grad_norm": 1.8241003534839666, + "learning_rate": 2.6713655621788944e-06, + "loss": 0.64, + "step": 21684 + }, + { + "epoch": 0.664613215643006, + "grad_norm": 1.909008721842043, + "learning_rate": 2.6709263678471504e-06, + "loss": 0.6625, + "step": 21685 + }, + { + "epoch": 0.6646438641657472, + "grad_norm": 2.024580151879502, + "learning_rate": 2.670487196464363e-06, + "loss": 0.5964, + "step": 21686 + }, + { + "epoch": 0.6646745126884884, + "grad_norm": 0.7961181381110762, + "learning_rate": 2.670048048034861e-06, + "loss": 0.3879, + "step": 21687 + }, + { + "epoch": 0.6647051612112296, + "grad_norm": 1.941036593300389, + "learning_rate": 2.6696089225629718e-06, + "loss": 0.6132, + "step": 21688 + }, + { + "epoch": 0.6647358097339708, + "grad_norm": 1.6716496772714313, + "learning_rate": 2.6691698200530247e-06, + "loss": 0.5282, + "step": 21689 + }, + { + "epoch": 0.664766458256712, + "grad_norm": 1.7716606481053125, + "learning_rate": 2.668730740509341e-06, + "loss": 0.5841, + "step": 21690 + }, + { + "epoch": 0.6647971067794533, + "grad_norm": 1.9130977620842151, + "learning_rate": 2.6682916839362504e-06, + "loss": 0.5418, + "step": 21691 + }, + { + "epoch": 0.6648277553021944, + "grad_norm": 1.6975565054679114, + "learning_rate": 2.6678526503380795e-06, + "loss": 0.5567, + "step": 21692 + }, + { + "epoch": 0.6648584038249357, + "grad_norm": 1.7522887445081332, + "learning_rate": 2.667413639719154e-06, + "loss": 0.6256, + "step": 21693 + }, + { + "epoch": 0.6648890523476768, + "grad_norm": 1.8036362513973183, + "learning_rate": 2.666974652083795e-06, + "loss": 0.6177, + "step": 21694 + }, + { + "epoch": 0.6649197008704181, + "grad_norm": 0.7574057368662901, + "learning_rate": 2.666535687436335e-06, + "loss": 0.4235, + "step": 21695 + }, + { + "epoch": 0.6649503493931592, + "grad_norm": 0.7555978450114201, + "learning_rate": 2.666096745781096e-06, + "loss": 0.4021, + "step": 21696 + }, + { + "epoch": 0.6649809979159005, + "grad_norm": 1.7347818939787671, + "learning_rate": 2.665657827122401e-06, + "loss": 0.5806, + "step": 21697 + }, + { + "epoch": 0.6650116464386416, + "grad_norm": 1.8631915815837012, + "learning_rate": 2.665218931464577e-06, + "loss": 0.6456, + "step": 21698 + }, + { + "epoch": 0.6650422949613829, + "grad_norm": 2.188907671497736, + "learning_rate": 2.6647800588119477e-06, + "loss": 0.5929, + "step": 21699 + }, + { + "epoch": 0.665072943484124, + "grad_norm": 1.7085057026855652, + "learning_rate": 2.6643412091688403e-06, + "loss": 0.5374, + "step": 21700 + }, + { + "epoch": 0.6651035920068653, + "grad_norm": 1.688935727055663, + "learning_rate": 2.663902382539575e-06, + "loss": 0.5882, + "step": 21701 + }, + { + "epoch": 0.6651342405296065, + "grad_norm": 1.9213591416142175, + "learning_rate": 2.6634635789284762e-06, + "loss": 0.6884, + "step": 21702 + }, + { + "epoch": 0.6651648890523477, + "grad_norm": 1.5488800214950245, + "learning_rate": 2.6630247983398717e-06, + "loss": 0.574, + "step": 21703 + }, + { + "epoch": 0.6651955375750889, + "grad_norm": 1.8804713634988603, + "learning_rate": 2.6625860407780806e-06, + "loss": 0.6615, + "step": 21704 + }, + { + "epoch": 0.6652261860978301, + "grad_norm": 1.7252351954832523, + "learning_rate": 2.6621473062474244e-06, + "loss": 0.6922, + "step": 21705 + }, + { + "epoch": 0.6652568346205713, + "grad_norm": 1.9323515566006433, + "learning_rate": 2.6617085947522325e-06, + "loss": 0.7265, + "step": 21706 + }, + { + "epoch": 0.6652874831433125, + "grad_norm": 1.7765543914720443, + "learning_rate": 2.6612699062968217e-06, + "loss": 0.6398, + "step": 21707 + }, + { + "epoch": 0.6653181316660537, + "grad_norm": 1.843078451481265, + "learning_rate": 2.66083124088552e-06, + "loss": 0.588, + "step": 21708 + }, + { + "epoch": 0.665348780188795, + "grad_norm": 1.8296316585814452, + "learning_rate": 2.660392598522643e-06, + "loss": 0.6319, + "step": 21709 + }, + { + "epoch": 0.6653794287115361, + "grad_norm": 1.554092191227008, + "learning_rate": 2.659953979212517e-06, + "loss": 0.5316, + "step": 21710 + }, + { + "epoch": 0.6654100772342773, + "grad_norm": 1.8009921647759242, + "learning_rate": 2.6595153829594654e-06, + "loss": 0.5957, + "step": 21711 + }, + { + "epoch": 0.6654407257570185, + "grad_norm": 1.888468843370118, + "learning_rate": 2.659076809767806e-06, + "loss": 0.632, + "step": 21712 + }, + { + "epoch": 0.6654713742797597, + "grad_norm": 1.6361278490794318, + "learning_rate": 2.6586382596418615e-06, + "loss": 0.5645, + "step": 21713 + }, + { + "epoch": 0.6655020228025009, + "grad_norm": 1.6634945467425553, + "learning_rate": 2.658199732585955e-06, + "loss": 0.5205, + "step": 21714 + }, + { + "epoch": 0.6655326713252421, + "grad_norm": 2.054229920654402, + "learning_rate": 2.657761228604404e-06, + "loss": 0.5977, + "step": 21715 + }, + { + "epoch": 0.6655633198479833, + "grad_norm": 1.7806419164648128, + "learning_rate": 2.657322747701532e-06, + "loss": 0.5371, + "step": 21716 + }, + { + "epoch": 0.6655939683707245, + "grad_norm": 1.7310081537416167, + "learning_rate": 2.656884289881657e-06, + "loss": 0.4919, + "step": 21717 + }, + { + "epoch": 0.6656246168934657, + "grad_norm": 1.8154833368220473, + "learning_rate": 2.656445855149101e-06, + "loss": 0.5856, + "step": 21718 + }, + { + "epoch": 0.6656552654162069, + "grad_norm": 1.6990315118360917, + "learning_rate": 2.656007443508185e-06, + "loss": 0.5531, + "step": 21719 + }, + { + "epoch": 0.6656859139389482, + "grad_norm": 1.905583549994219, + "learning_rate": 2.655569054963226e-06, + "loss": 0.554, + "step": 21720 + }, + { + "epoch": 0.6657165624616893, + "grad_norm": 2.2280663680870045, + "learning_rate": 2.6551306895185447e-06, + "loss": 0.6471, + "step": 21721 + }, + { + "epoch": 0.6657472109844306, + "grad_norm": 1.7563903987306289, + "learning_rate": 2.6546923471784623e-06, + "loss": 0.601, + "step": 21722 + }, + { + "epoch": 0.6657778595071717, + "grad_norm": 1.713204324020995, + "learning_rate": 2.6542540279472974e-06, + "loss": 0.5909, + "step": 21723 + }, + { + "epoch": 0.665808508029913, + "grad_norm": 1.9183831880112527, + "learning_rate": 2.653815731829362e-06, + "loss": 0.5132, + "step": 21724 + }, + { + "epoch": 0.6658391565526541, + "grad_norm": 1.685796614810446, + "learning_rate": 2.653377458828986e-06, + "loss": 0.6043, + "step": 21725 + }, + { + "epoch": 0.6658698050753954, + "grad_norm": 1.7548034069743326, + "learning_rate": 2.6529392089504798e-06, + "loss": 0.5351, + "step": 21726 + }, + { + "epoch": 0.6659004535981365, + "grad_norm": 1.6180287153776525, + "learning_rate": 2.6525009821981663e-06, + "loss": 0.4864, + "step": 21727 + }, + { + "epoch": 0.6659311021208778, + "grad_norm": 1.8019773370818044, + "learning_rate": 2.6520627785763588e-06, + "loss": 0.6416, + "step": 21728 + }, + { + "epoch": 0.665961750643619, + "grad_norm": 1.8128201080239, + "learning_rate": 2.6516245980893775e-06, + "loss": 0.596, + "step": 21729 + }, + { + "epoch": 0.6659923991663602, + "grad_norm": 1.8554715169320604, + "learning_rate": 2.6511864407415423e-06, + "loss": 0.6265, + "step": 21730 + }, + { + "epoch": 0.6660230476891014, + "grad_norm": 0.821342903526592, + "learning_rate": 2.6507483065371654e-06, + "loss": 0.4217, + "step": 21731 + }, + { + "epoch": 0.6660536962118426, + "grad_norm": 1.8665773494775497, + "learning_rate": 2.650310195480567e-06, + "loss": 0.6655, + "step": 21732 + }, + { + "epoch": 0.6660843447345838, + "grad_norm": 1.7327703661364549, + "learning_rate": 2.649872107576066e-06, + "loss": 0.623, + "step": 21733 + }, + { + "epoch": 0.666114993257325, + "grad_norm": 1.7633100359157101, + "learning_rate": 2.649434042827973e-06, + "loss": 0.5559, + "step": 21734 + }, + { + "epoch": 0.6661456417800662, + "grad_norm": 1.593736573419118, + "learning_rate": 2.648996001240609e-06, + "loss": 0.5391, + "step": 21735 + }, + { + "epoch": 0.6661762903028075, + "grad_norm": 1.6146371575368026, + "learning_rate": 2.6485579828182908e-06, + "loss": 0.5552, + "step": 21736 + }, + { + "epoch": 0.6662069388255486, + "grad_norm": 1.7173572988773618, + "learning_rate": 2.6481199875653296e-06, + "loss": 0.5605, + "step": 21737 + }, + { + "epoch": 0.6662375873482899, + "grad_norm": 0.8108957180134369, + "learning_rate": 2.6476820154860467e-06, + "loss": 0.426, + "step": 21738 + }, + { + "epoch": 0.666268235871031, + "grad_norm": 0.7640335582337809, + "learning_rate": 2.647244066584753e-06, + "loss": 0.3901, + "step": 21739 + }, + { + "epoch": 0.6662988843937723, + "grad_norm": 1.957197875910531, + "learning_rate": 2.6468061408657647e-06, + "loss": 0.5633, + "step": 21740 + }, + { + "epoch": 0.6663295329165134, + "grad_norm": 1.6906889582989744, + "learning_rate": 2.6463682383333998e-06, + "loss": 0.5972, + "step": 21741 + }, + { + "epoch": 0.6663601814392546, + "grad_norm": 1.7863786031279867, + "learning_rate": 2.6459303589919693e-06, + "loss": 0.6072, + "step": 21742 + }, + { + "epoch": 0.6663908299619958, + "grad_norm": 1.838144092667392, + "learning_rate": 2.645492502845789e-06, + "loss": 0.5812, + "step": 21743 + }, + { + "epoch": 0.666421478484737, + "grad_norm": 1.7037443664329464, + "learning_rate": 2.645054669899175e-06, + "loss": 0.5423, + "step": 21744 + }, + { + "epoch": 0.6664521270074782, + "grad_norm": 1.979050891703926, + "learning_rate": 2.6446168601564387e-06, + "loss": 0.6373, + "step": 21745 + }, + { + "epoch": 0.6664827755302194, + "grad_norm": 1.7134301956952473, + "learning_rate": 2.644179073621895e-06, + "loss": 0.5204, + "step": 21746 + }, + { + "epoch": 0.6665134240529607, + "grad_norm": 1.745413324314963, + "learning_rate": 2.643741310299859e-06, + "loss": 0.5691, + "step": 21747 + }, + { + "epoch": 0.6665440725757018, + "grad_norm": 1.6724740814933532, + "learning_rate": 2.643303570194641e-06, + "loss": 0.5327, + "step": 21748 + }, + { + "epoch": 0.6665747210984431, + "grad_norm": 1.6928458676556177, + "learning_rate": 2.6428658533105585e-06, + "loss": 0.6383, + "step": 21749 + }, + { + "epoch": 0.6666053696211842, + "grad_norm": 1.8732388965927906, + "learning_rate": 2.64242815965192e-06, + "loss": 0.6053, + "step": 21750 + }, + { + "epoch": 0.6666360181439255, + "grad_norm": 2.040478276772601, + "learning_rate": 2.6419904892230386e-06, + "loss": 0.6502, + "step": 21751 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.5549842240925327, + "learning_rate": 2.641552842028231e-06, + "loss": 0.5904, + "step": 21752 + }, + { + "epoch": 0.6666973151894079, + "grad_norm": 1.871368125098814, + "learning_rate": 2.6411152180718046e-06, + "loss": 0.6787, + "step": 21753 + }, + { + "epoch": 0.666727963712149, + "grad_norm": 2.0451598403161957, + "learning_rate": 2.640677617358074e-06, + "loss": 0.6187, + "step": 21754 + }, + { + "epoch": 0.6667586122348903, + "grad_norm": 1.7607358796911818, + "learning_rate": 2.6402400398913525e-06, + "loss": 0.6366, + "step": 21755 + }, + { + "epoch": 0.6667892607576315, + "grad_norm": 1.7504035663679092, + "learning_rate": 2.6398024856759472e-06, + "loss": 0.5115, + "step": 21756 + }, + { + "epoch": 0.6668199092803727, + "grad_norm": 1.6302040474447985, + "learning_rate": 2.639364954716172e-06, + "loss": 0.5237, + "step": 21757 + }, + { + "epoch": 0.6668505578031139, + "grad_norm": 0.7794592297365178, + "learning_rate": 2.63892744701634e-06, + "loss": 0.4141, + "step": 21758 + }, + { + "epoch": 0.6668812063258551, + "grad_norm": 1.7474367501346983, + "learning_rate": 2.638489962580758e-06, + "loss": 0.492, + "step": 21759 + }, + { + "epoch": 0.6669118548485963, + "grad_norm": 1.9264060695286975, + "learning_rate": 2.63805250141374e-06, + "loss": 0.6582, + "step": 21760 + }, + { + "epoch": 0.6669425033713375, + "grad_norm": 1.861286271639303, + "learning_rate": 2.6376150635195942e-06, + "loss": 0.6946, + "step": 21761 + }, + { + "epoch": 0.6669731518940787, + "grad_norm": 1.8272180487281189, + "learning_rate": 2.6371776489026307e-06, + "loss": 0.5504, + "step": 21762 + }, + { + "epoch": 0.66700380041682, + "grad_norm": 1.877839428177604, + "learning_rate": 2.636740257567163e-06, + "loss": 0.6151, + "step": 21763 + }, + { + "epoch": 0.6670344489395611, + "grad_norm": 1.8476097635748956, + "learning_rate": 2.636302889517496e-06, + "loss": 0.5721, + "step": 21764 + }, + { + "epoch": 0.6670650974623024, + "grad_norm": 1.9632718238463824, + "learning_rate": 2.6358655447579407e-06, + "loss": 0.5718, + "step": 21765 + }, + { + "epoch": 0.6670957459850435, + "grad_norm": 1.584617836492945, + "learning_rate": 2.6354282232928098e-06, + "loss": 0.6377, + "step": 21766 + }, + { + "epoch": 0.6671263945077848, + "grad_norm": 1.8927743341584782, + "learning_rate": 2.634990925126407e-06, + "loss": 0.6333, + "step": 21767 + }, + { + "epoch": 0.6671570430305259, + "grad_norm": 1.7251728612189572, + "learning_rate": 2.6345536502630464e-06, + "loss": 0.6124, + "step": 21768 + }, + { + "epoch": 0.6671876915532672, + "grad_norm": 1.832822899232748, + "learning_rate": 2.634116398707032e-06, + "loss": 0.6117, + "step": 21769 + }, + { + "epoch": 0.6672183400760083, + "grad_norm": 1.8599116849979198, + "learning_rate": 2.633679170462674e-06, + "loss": 0.6882, + "step": 21770 + }, + { + "epoch": 0.6672489885987496, + "grad_norm": 1.834869324708159, + "learning_rate": 2.6332419655342823e-06, + "loss": 0.689, + "step": 21771 + }, + { + "epoch": 0.6672796371214907, + "grad_norm": 1.6259780988094112, + "learning_rate": 2.6328047839261608e-06, + "loss": 0.6464, + "step": 21772 + }, + { + "epoch": 0.6673102856442319, + "grad_norm": 2.0771919227968203, + "learning_rate": 2.63236762564262e-06, + "loss": 0.6885, + "step": 21773 + }, + { + "epoch": 0.6673409341669732, + "grad_norm": 1.7883089867603832, + "learning_rate": 2.6319304906879682e-06, + "loss": 0.6111, + "step": 21774 + }, + { + "epoch": 0.6673715826897143, + "grad_norm": 1.8260899806295239, + "learning_rate": 2.63149337906651e-06, + "loss": 0.5338, + "step": 21775 + }, + { + "epoch": 0.6674022312124556, + "grad_norm": 1.7038441569638998, + "learning_rate": 2.631056290782553e-06, + "loss": 0.6304, + "step": 21776 + }, + { + "epoch": 0.6674328797351967, + "grad_norm": 0.7998893810166467, + "learning_rate": 2.6306192258404062e-06, + "loss": 0.4294, + "step": 21777 + }, + { + "epoch": 0.667463528257938, + "grad_norm": 0.8177583661295383, + "learning_rate": 2.6301821842443732e-06, + "loss": 0.4084, + "step": 21778 + }, + { + "epoch": 0.6674941767806791, + "grad_norm": 1.8950189572963978, + "learning_rate": 2.6297451659987626e-06, + "loss": 0.6155, + "step": 21779 + }, + { + "epoch": 0.6675248253034204, + "grad_norm": 1.7806663694520393, + "learning_rate": 2.629308171107876e-06, + "loss": 0.6011, + "step": 21780 + }, + { + "epoch": 0.6675554738261615, + "grad_norm": 1.6480931667166046, + "learning_rate": 2.628871199576026e-06, + "loss": 0.6341, + "step": 21781 + }, + { + "epoch": 0.6675861223489028, + "grad_norm": 0.805074232695288, + "learning_rate": 2.6284342514075155e-06, + "loss": 0.413, + "step": 21782 + }, + { + "epoch": 0.667616770871644, + "grad_norm": 1.8806413969001332, + "learning_rate": 2.627997326606646e-06, + "loss": 0.6129, + "step": 21783 + }, + { + "epoch": 0.6676474193943852, + "grad_norm": 1.6246263527755371, + "learning_rate": 2.6275604251777265e-06, + "loss": 0.5344, + "step": 21784 + }, + { + "epoch": 0.6676780679171264, + "grad_norm": 1.8006047704561625, + "learning_rate": 2.6271235471250633e-06, + "loss": 0.5977, + "step": 21785 + }, + { + "epoch": 0.6677087164398676, + "grad_norm": 1.871386147077009, + "learning_rate": 2.6266866924529566e-06, + "loss": 0.6588, + "step": 21786 + }, + { + "epoch": 0.6677393649626088, + "grad_norm": 1.80921635434187, + "learning_rate": 2.6262498611657134e-06, + "loss": 0.5934, + "step": 21787 + }, + { + "epoch": 0.66777001348535, + "grad_norm": 1.7041452954176963, + "learning_rate": 2.625813053267637e-06, + "loss": 0.6141, + "step": 21788 + }, + { + "epoch": 0.6678006620080912, + "grad_norm": 0.7353612311863992, + "learning_rate": 2.625376268763035e-06, + "loss": 0.3866, + "step": 21789 + }, + { + "epoch": 0.6678313105308324, + "grad_norm": 2.08789285857896, + "learning_rate": 2.6249395076562078e-06, + "loss": 0.597, + "step": 21790 + }, + { + "epoch": 0.6678619590535736, + "grad_norm": 1.6399787949182094, + "learning_rate": 2.6245027699514554e-06, + "loss": 0.6091, + "step": 21791 + }, + { + "epoch": 0.6678926075763149, + "grad_norm": 1.972475764159773, + "learning_rate": 2.624066055653089e-06, + "loss": 0.5898, + "step": 21792 + }, + { + "epoch": 0.667923256099056, + "grad_norm": 1.6776580357028634, + "learning_rate": 2.6236293647654077e-06, + "loss": 0.5694, + "step": 21793 + }, + { + "epoch": 0.6679539046217973, + "grad_norm": 1.6905984833971892, + "learning_rate": 2.623192697292712e-06, + "loss": 0.6396, + "step": 21794 + }, + { + "epoch": 0.6679845531445384, + "grad_norm": 1.7303202669769802, + "learning_rate": 2.622756053239307e-06, + "loss": 0.6049, + "step": 21795 + }, + { + "epoch": 0.6680152016672797, + "grad_norm": 1.7848460151553387, + "learning_rate": 2.6223194326094966e-06, + "loss": 0.6592, + "step": 21796 + }, + { + "epoch": 0.6680458501900208, + "grad_norm": 1.6677995439412883, + "learning_rate": 2.621882835407579e-06, + "loss": 0.5392, + "step": 21797 + }, + { + "epoch": 0.6680764987127621, + "grad_norm": 1.7744597141837555, + "learning_rate": 2.621446261637859e-06, + "loss": 0.5476, + "step": 21798 + }, + { + "epoch": 0.6681071472355032, + "grad_norm": 1.6811277360610095, + "learning_rate": 2.6210097113046373e-06, + "loss": 0.6574, + "step": 21799 + }, + { + "epoch": 0.6681377957582445, + "grad_norm": 0.7990621046180609, + "learning_rate": 2.620573184412217e-06, + "loss": 0.4151, + "step": 21800 + }, + { + "epoch": 0.6681684442809857, + "grad_norm": 2.018444046301471, + "learning_rate": 2.6201366809648986e-06, + "loss": 0.5996, + "step": 21801 + }, + { + "epoch": 0.6681990928037269, + "grad_norm": 1.8067702412260174, + "learning_rate": 2.6197002009669804e-06, + "loss": 0.5403, + "step": 21802 + }, + { + "epoch": 0.6682297413264681, + "grad_norm": 1.6453818571563097, + "learning_rate": 2.6192637444227646e-06, + "loss": 0.6612, + "step": 21803 + }, + { + "epoch": 0.6682603898492092, + "grad_norm": 1.7281967892013785, + "learning_rate": 2.618827311336555e-06, + "loss": 0.5589, + "step": 21804 + }, + { + "epoch": 0.6682910383719505, + "grad_norm": 1.8425991632283667, + "learning_rate": 2.6183909017126462e-06, + "loss": 0.6101, + "step": 21805 + }, + { + "epoch": 0.6683216868946916, + "grad_norm": 1.7951409132608125, + "learning_rate": 2.617954515555342e-06, + "loss": 0.6326, + "step": 21806 + }, + { + "epoch": 0.6683523354174329, + "grad_norm": 0.8399534334575518, + "learning_rate": 2.6175181528689416e-06, + "loss": 0.4165, + "step": 21807 + }, + { + "epoch": 0.668382983940174, + "grad_norm": 1.8903120801785196, + "learning_rate": 2.617081813657746e-06, + "loss": 0.5984, + "step": 21808 + }, + { + "epoch": 0.6684136324629153, + "grad_norm": 1.7194771397245727, + "learning_rate": 2.6166454979260525e-06, + "loss": 0.5122, + "step": 21809 + }, + { + "epoch": 0.6684442809856564, + "grad_norm": 1.7805805318115193, + "learning_rate": 2.6162092056781573e-06, + "loss": 0.6878, + "step": 21810 + }, + { + "epoch": 0.6684749295083977, + "grad_norm": 1.7507898708541745, + "learning_rate": 2.615772936918367e-06, + "loss": 0.5477, + "step": 21811 + }, + { + "epoch": 0.6685055780311389, + "grad_norm": 2.012444596031975, + "learning_rate": 2.6153366916509757e-06, + "loss": 0.6295, + "step": 21812 + }, + { + "epoch": 0.6685362265538801, + "grad_norm": 1.527399299042975, + "learning_rate": 2.61490046988028e-06, + "loss": 0.4838, + "step": 21813 + }, + { + "epoch": 0.6685668750766213, + "grad_norm": 1.6129780715803586, + "learning_rate": 2.6144642716105805e-06, + "loss": 0.5908, + "step": 21814 + }, + { + "epoch": 0.6685975235993625, + "grad_norm": 1.6481019225285227, + "learning_rate": 2.6140280968461746e-06, + "loss": 0.4674, + "step": 21815 + }, + { + "epoch": 0.6686281721221037, + "grad_norm": 1.7160441047842128, + "learning_rate": 2.613591945591362e-06, + "loss": 0.5331, + "step": 21816 + }, + { + "epoch": 0.6686588206448449, + "grad_norm": 1.637035693289162, + "learning_rate": 2.6131558178504375e-06, + "loss": 0.5892, + "step": 21817 + }, + { + "epoch": 0.6686894691675861, + "grad_norm": 1.8870117312135972, + "learning_rate": 2.6127197136276987e-06, + "loss": 0.5939, + "step": 21818 + }, + { + "epoch": 0.6687201176903274, + "grad_norm": 1.893732794259613, + "learning_rate": 2.612283632927446e-06, + "loss": 0.6566, + "step": 21819 + }, + { + "epoch": 0.6687507662130685, + "grad_norm": 0.8500229698583389, + "learning_rate": 2.611847575753973e-06, + "loss": 0.4414, + "step": 21820 + }, + { + "epoch": 0.6687814147358098, + "grad_norm": 1.7188549151383585, + "learning_rate": 2.6114115421115727e-06, + "loss": 0.6113, + "step": 21821 + }, + { + "epoch": 0.6688120632585509, + "grad_norm": 1.7717505035948662, + "learning_rate": 2.6109755320045505e-06, + "loss": 0.5926, + "step": 21822 + }, + { + "epoch": 0.6688427117812922, + "grad_norm": 1.6584272827822855, + "learning_rate": 2.610539545437196e-06, + "loss": 0.5433, + "step": 21823 + }, + { + "epoch": 0.6688733603040333, + "grad_norm": 1.6764426320032495, + "learning_rate": 2.6101035824138064e-06, + "loss": 0.6461, + "step": 21824 + }, + { + "epoch": 0.6689040088267746, + "grad_norm": 1.935175299311811, + "learning_rate": 2.6096676429386767e-06, + "loss": 0.5884, + "step": 21825 + }, + { + "epoch": 0.6689346573495157, + "grad_norm": 1.7380349723181434, + "learning_rate": 2.6092317270161037e-06, + "loss": 0.5842, + "step": 21826 + }, + { + "epoch": 0.668965305872257, + "grad_norm": 1.7748275825446966, + "learning_rate": 2.608795834650385e-06, + "loss": 0.6192, + "step": 21827 + }, + { + "epoch": 0.6689959543949981, + "grad_norm": 1.7817083857804843, + "learning_rate": 2.6083599658458096e-06, + "loss": 0.5559, + "step": 21828 + }, + { + "epoch": 0.6690266029177394, + "grad_norm": 0.827901503270176, + "learning_rate": 2.607924120606676e-06, + "loss": 0.4074, + "step": 21829 + }, + { + "epoch": 0.6690572514404806, + "grad_norm": 1.925401904747045, + "learning_rate": 2.6074882989372798e-06, + "loss": 0.5586, + "step": 21830 + }, + { + "epoch": 0.6690878999632218, + "grad_norm": 1.7318397705161772, + "learning_rate": 2.6070525008419135e-06, + "loss": 0.6408, + "step": 21831 + }, + { + "epoch": 0.669118548485963, + "grad_norm": 1.6521919525647302, + "learning_rate": 2.6066167263248677e-06, + "loss": 0.6318, + "step": 21832 + }, + { + "epoch": 0.6691491970087042, + "grad_norm": 1.7387789478197684, + "learning_rate": 2.6061809753904426e-06, + "loss": 0.5737, + "step": 21833 + }, + { + "epoch": 0.6691798455314454, + "grad_norm": 1.8896502934150676, + "learning_rate": 2.6057452480429278e-06, + "loss": 0.6386, + "step": 21834 + }, + { + "epoch": 0.6692104940541865, + "grad_norm": 1.635676605922519, + "learning_rate": 2.6053095442866196e-06, + "loss": 0.549, + "step": 21835 + }, + { + "epoch": 0.6692411425769278, + "grad_norm": 0.780402807278071, + "learning_rate": 2.6048738641258063e-06, + "loss": 0.4127, + "step": 21836 + }, + { + "epoch": 0.669271791099669, + "grad_norm": 1.909412603638179, + "learning_rate": 2.6044382075647844e-06, + "loss": 0.7037, + "step": 21837 + }, + { + "epoch": 0.6693024396224102, + "grad_norm": 1.9294529611632103, + "learning_rate": 2.604002574607847e-06, + "loss": 0.5656, + "step": 21838 + }, + { + "epoch": 0.6693330881451514, + "grad_norm": 1.9236070174800846, + "learning_rate": 2.6035669652592843e-06, + "loss": 0.6855, + "step": 21839 + }, + { + "epoch": 0.6693637366678926, + "grad_norm": 0.7775235343556633, + "learning_rate": 2.6031313795233894e-06, + "loss": 0.4173, + "step": 21840 + }, + { + "epoch": 0.6693943851906338, + "grad_norm": 1.697687543109491, + "learning_rate": 2.6026958174044557e-06, + "loss": 0.6751, + "step": 21841 + }, + { + "epoch": 0.669425033713375, + "grad_norm": 1.7718614995535147, + "learning_rate": 2.602260278906772e-06, + "loss": 0.5478, + "step": 21842 + }, + { + "epoch": 0.6694556822361162, + "grad_norm": 1.683796511346792, + "learning_rate": 2.6018247640346304e-06, + "loss": 0.5541, + "step": 21843 + }, + { + "epoch": 0.6694863307588574, + "grad_norm": 1.7458918316275982, + "learning_rate": 2.601389272792326e-06, + "loss": 0.568, + "step": 21844 + }, + { + "epoch": 0.6695169792815986, + "grad_norm": 1.8848401908204522, + "learning_rate": 2.6009538051841443e-06, + "loss": 0.6253, + "step": 21845 + }, + { + "epoch": 0.6695476278043399, + "grad_norm": 1.99323362386098, + "learning_rate": 2.60051836121438e-06, + "loss": 0.5272, + "step": 21846 + }, + { + "epoch": 0.669578276327081, + "grad_norm": 1.6746610818631618, + "learning_rate": 2.600082940887321e-06, + "loss": 0.5897, + "step": 21847 + }, + { + "epoch": 0.6696089248498223, + "grad_norm": 1.7545188385779595, + "learning_rate": 2.599647544207259e-06, + "loss": 0.6836, + "step": 21848 + }, + { + "epoch": 0.6696395733725634, + "grad_norm": 1.7382290682547827, + "learning_rate": 2.5992121711784858e-06, + "loss": 0.6021, + "step": 21849 + }, + { + "epoch": 0.6696702218953047, + "grad_norm": 1.6388480240304144, + "learning_rate": 2.5987768218052866e-06, + "loss": 0.5864, + "step": 21850 + }, + { + "epoch": 0.6697008704180458, + "grad_norm": 1.6734256398329512, + "learning_rate": 2.5983414960919547e-06, + "loss": 0.7049, + "step": 21851 + }, + { + "epoch": 0.6697315189407871, + "grad_norm": 1.8380596193014391, + "learning_rate": 2.5979061940427798e-06, + "loss": 0.6003, + "step": 21852 + }, + { + "epoch": 0.6697621674635282, + "grad_norm": 1.8536572759849543, + "learning_rate": 2.5974709156620483e-06, + "loss": 0.6103, + "step": 21853 + }, + { + "epoch": 0.6697928159862695, + "grad_norm": 1.737591864590427, + "learning_rate": 2.5970356609540522e-06, + "loss": 0.5831, + "step": 21854 + }, + { + "epoch": 0.6698234645090106, + "grad_norm": 1.8558413165782046, + "learning_rate": 2.596600429923076e-06, + "loss": 0.6445, + "step": 21855 + }, + { + "epoch": 0.6698541130317519, + "grad_norm": 2.329877392365582, + "learning_rate": 2.5961652225734126e-06, + "loss": 0.6347, + "step": 21856 + }, + { + "epoch": 0.6698847615544931, + "grad_norm": 0.763769490040948, + "learning_rate": 2.5957300389093486e-06, + "loss": 0.3804, + "step": 21857 + }, + { + "epoch": 0.6699154100772343, + "grad_norm": 0.8363257762531874, + "learning_rate": 2.5952948789351708e-06, + "loss": 0.3996, + "step": 21858 + }, + { + "epoch": 0.6699460585999755, + "grad_norm": 1.5591184673541134, + "learning_rate": 2.594859742655167e-06, + "loss": 0.608, + "step": 21859 + }, + { + "epoch": 0.6699767071227167, + "grad_norm": 1.8001354598110497, + "learning_rate": 2.5944246300736274e-06, + "loss": 0.5626, + "step": 21860 + }, + { + "epoch": 0.6700073556454579, + "grad_norm": 1.82355604551239, + "learning_rate": 2.5939895411948355e-06, + "loss": 0.598, + "step": 21861 + }, + { + "epoch": 0.6700380041681991, + "grad_norm": 1.8772228697212776, + "learning_rate": 2.5935544760230813e-06, + "loss": 0.5242, + "step": 21862 + }, + { + "epoch": 0.6700686526909403, + "grad_norm": 1.6714449870754007, + "learning_rate": 2.5931194345626516e-06, + "loss": 0.6165, + "step": 21863 + }, + { + "epoch": 0.6700993012136816, + "grad_norm": 1.8712671482616585, + "learning_rate": 2.59268441681783e-06, + "loss": 0.6431, + "step": 21864 + }, + { + "epoch": 0.6701299497364227, + "grad_norm": 1.8077887011046494, + "learning_rate": 2.592249422792907e-06, + "loss": 0.4935, + "step": 21865 + }, + { + "epoch": 0.6701605982591639, + "grad_norm": 1.8427850973652644, + "learning_rate": 2.591814452492164e-06, + "loss": 0.6259, + "step": 21866 + }, + { + "epoch": 0.6701912467819051, + "grad_norm": 0.7784150780545899, + "learning_rate": 2.59137950591989e-06, + "loss": 0.4192, + "step": 21867 + }, + { + "epoch": 0.6702218953046463, + "grad_norm": 1.7822494361266183, + "learning_rate": 2.590944583080372e-06, + "loss": 0.6342, + "step": 21868 + }, + { + "epoch": 0.6702525438273875, + "grad_norm": 1.7051453919181327, + "learning_rate": 2.5905096839778907e-06, + "loss": 0.5928, + "step": 21869 + }, + { + "epoch": 0.6702831923501287, + "grad_norm": 1.8105975139628954, + "learning_rate": 2.590074808616735e-06, + "loss": 0.6218, + "step": 21870 + }, + { + "epoch": 0.6703138408728699, + "grad_norm": 1.7948005109708136, + "learning_rate": 2.58963995700119e-06, + "loss": 0.6274, + "step": 21871 + }, + { + "epoch": 0.6703444893956111, + "grad_norm": 0.7944895090556647, + "learning_rate": 2.589205129135538e-06, + "loss": 0.4159, + "step": 21872 + }, + { + "epoch": 0.6703751379183523, + "grad_norm": 1.9204830522649756, + "learning_rate": 2.5887703250240637e-06, + "loss": 0.5743, + "step": 21873 + }, + { + "epoch": 0.6704057864410935, + "grad_norm": 2.233207877301456, + "learning_rate": 2.5883355446710547e-06, + "loss": 0.6179, + "step": 21874 + }, + { + "epoch": 0.6704364349638348, + "grad_norm": 1.757075605423191, + "learning_rate": 2.587900788080791e-06, + "loss": 0.6366, + "step": 21875 + }, + { + "epoch": 0.6704670834865759, + "grad_norm": 1.453824452833262, + "learning_rate": 2.58746605525756e-06, + "loss": 0.5107, + "step": 21876 + }, + { + "epoch": 0.6704977320093172, + "grad_norm": 0.7613078623405779, + "learning_rate": 2.5870313462056405e-06, + "loss": 0.4088, + "step": 21877 + }, + { + "epoch": 0.6705283805320583, + "grad_norm": 1.651804812997721, + "learning_rate": 2.5865966609293193e-06, + "loss": 0.4854, + "step": 21878 + }, + { + "epoch": 0.6705590290547996, + "grad_norm": 1.910364528452746, + "learning_rate": 2.5861619994328802e-06, + "loss": 0.6608, + "step": 21879 + }, + { + "epoch": 0.6705896775775407, + "grad_norm": 1.7281681620808704, + "learning_rate": 2.5857273617206024e-06, + "loss": 0.5249, + "step": 21880 + }, + { + "epoch": 0.670620326100282, + "grad_norm": 1.581519082963641, + "learning_rate": 2.5852927477967714e-06, + "loss": 0.6222, + "step": 21881 + }, + { + "epoch": 0.6706509746230231, + "grad_norm": 1.9336889877163839, + "learning_rate": 2.5848581576656707e-06, + "loss": 0.6034, + "step": 21882 + }, + { + "epoch": 0.6706816231457644, + "grad_norm": 1.8672289971073974, + "learning_rate": 2.5844235913315773e-06, + "loss": 0.5579, + "step": 21883 + }, + { + "epoch": 0.6707122716685056, + "grad_norm": 1.5387985580541126, + "learning_rate": 2.5839890487987773e-06, + "loss": 0.5583, + "step": 21884 + }, + { + "epoch": 0.6707429201912468, + "grad_norm": 1.9670665497685231, + "learning_rate": 2.5835545300715537e-06, + "loss": 0.6838, + "step": 21885 + }, + { + "epoch": 0.670773568713988, + "grad_norm": 1.8308636123118651, + "learning_rate": 2.583120035154183e-06, + "loss": 0.6435, + "step": 21886 + }, + { + "epoch": 0.6708042172367292, + "grad_norm": 1.750697408993787, + "learning_rate": 2.5826855640509507e-06, + "loss": 0.605, + "step": 21887 + }, + { + "epoch": 0.6708348657594704, + "grad_norm": 1.9030078499879495, + "learning_rate": 2.5822511167661328e-06, + "loss": 0.6535, + "step": 21888 + }, + { + "epoch": 0.6708655142822116, + "grad_norm": 1.8803665114584154, + "learning_rate": 2.581816693304017e-06, + "loss": 0.6222, + "step": 21889 + }, + { + "epoch": 0.6708961628049528, + "grad_norm": 1.9743882446630543, + "learning_rate": 2.58138229366888e-06, + "loss": 0.6108, + "step": 21890 + }, + { + "epoch": 0.670926811327694, + "grad_norm": 1.9457874564996942, + "learning_rate": 2.580947917865e-06, + "loss": 0.561, + "step": 21891 + }, + { + "epoch": 0.6709574598504352, + "grad_norm": 0.8115251864002397, + "learning_rate": 2.580513565896659e-06, + "loss": 0.4273, + "step": 21892 + }, + { + "epoch": 0.6709881083731765, + "grad_norm": 0.7654166954656801, + "learning_rate": 2.5800792377681386e-06, + "loss": 0.4032, + "step": 21893 + }, + { + "epoch": 0.6710187568959176, + "grad_norm": 2.000911911452428, + "learning_rate": 2.579644933483715e-06, + "loss": 0.6619, + "step": 21894 + }, + { + "epoch": 0.6710494054186589, + "grad_norm": 1.6832380698945404, + "learning_rate": 2.5792106530476695e-06, + "loss": 0.558, + "step": 21895 + }, + { + "epoch": 0.6710800539414, + "grad_norm": 1.6686782540391996, + "learning_rate": 2.578776396464281e-06, + "loss": 0.6301, + "step": 21896 + }, + { + "epoch": 0.6711107024641412, + "grad_norm": 0.8366396883628605, + "learning_rate": 2.5783421637378293e-06, + "loss": 0.434, + "step": 21897 + }, + { + "epoch": 0.6711413509868824, + "grad_norm": 1.8714111287381499, + "learning_rate": 2.5779079548725923e-06, + "loss": 0.5622, + "step": 21898 + }, + { + "epoch": 0.6711719995096236, + "grad_norm": 1.6037916617880479, + "learning_rate": 2.5774737698728458e-06, + "loss": 0.5571, + "step": 21899 + }, + { + "epoch": 0.6712026480323648, + "grad_norm": 3.645745604572711, + "learning_rate": 2.57703960874287e-06, + "loss": 0.6288, + "step": 21900 + }, + { + "epoch": 0.671233296555106, + "grad_norm": 1.9545345209624816, + "learning_rate": 2.576605471486945e-06, + "loss": 0.6484, + "step": 21901 + }, + { + "epoch": 0.6712639450778473, + "grad_norm": 1.6276567273471303, + "learning_rate": 2.5761713581093444e-06, + "loss": 0.5062, + "step": 21902 + }, + { + "epoch": 0.6712945936005884, + "grad_norm": 1.6065327537768939, + "learning_rate": 2.5757372686143478e-06, + "loss": 0.4489, + "step": 21903 + }, + { + "epoch": 0.6713252421233297, + "grad_norm": 0.7984993989342111, + "learning_rate": 2.5753032030062337e-06, + "loss": 0.4155, + "step": 21904 + }, + { + "epoch": 0.6713558906460708, + "grad_norm": 1.8221992718575355, + "learning_rate": 2.5748691612892757e-06, + "loss": 0.5992, + "step": 21905 + }, + { + "epoch": 0.6713865391688121, + "grad_norm": 1.8647864344174885, + "learning_rate": 2.5744351434677544e-06, + "loss": 0.5969, + "step": 21906 + }, + { + "epoch": 0.6714171876915532, + "grad_norm": 1.9217019290394433, + "learning_rate": 2.5740011495459403e-06, + "loss": 0.6361, + "step": 21907 + }, + { + "epoch": 0.6714478362142945, + "grad_norm": 2.012290958673959, + "learning_rate": 2.5735671795281177e-06, + "loss": 0.6455, + "step": 21908 + }, + { + "epoch": 0.6714784847370356, + "grad_norm": 1.7381381027309362, + "learning_rate": 2.5731332334185577e-06, + "loss": 0.5262, + "step": 21909 + }, + { + "epoch": 0.6715091332597769, + "grad_norm": 1.8029549867903272, + "learning_rate": 2.572699311221536e-06, + "loss": 0.5802, + "step": 21910 + }, + { + "epoch": 0.671539781782518, + "grad_norm": 1.8351874621120834, + "learning_rate": 2.5722654129413283e-06, + "loss": 0.5704, + "step": 21911 + }, + { + "epoch": 0.6715704303052593, + "grad_norm": 1.8284005645106853, + "learning_rate": 2.571831538582213e-06, + "loss": 0.5624, + "step": 21912 + }, + { + "epoch": 0.6716010788280005, + "grad_norm": 1.54251728377942, + "learning_rate": 2.5713976881484605e-06, + "loss": 0.6091, + "step": 21913 + }, + { + "epoch": 0.6716317273507417, + "grad_norm": 1.9981018748090298, + "learning_rate": 2.5709638616443483e-06, + "loss": 0.6748, + "step": 21914 + }, + { + "epoch": 0.6716623758734829, + "grad_norm": 1.8924053741133615, + "learning_rate": 2.570530059074151e-06, + "loss": 0.5713, + "step": 21915 + }, + { + "epoch": 0.6716930243962241, + "grad_norm": 1.7168698797398514, + "learning_rate": 2.570096280442144e-06, + "loss": 0.495, + "step": 21916 + }, + { + "epoch": 0.6717236729189653, + "grad_norm": 1.860782264003547, + "learning_rate": 2.5696625257526e-06, + "loss": 0.6243, + "step": 21917 + }, + { + "epoch": 0.6717543214417065, + "grad_norm": 1.8420742739547686, + "learning_rate": 2.5692287950097894e-06, + "loss": 0.5588, + "step": 21918 + }, + { + "epoch": 0.6717849699644477, + "grad_norm": 1.6570962263445632, + "learning_rate": 2.5687950882179935e-06, + "loss": 0.6347, + "step": 21919 + }, + { + "epoch": 0.671815618487189, + "grad_norm": 1.614873060272562, + "learning_rate": 2.568361405381481e-06, + "loss": 0.5794, + "step": 21920 + }, + { + "epoch": 0.6718462670099301, + "grad_norm": 2.212505760566257, + "learning_rate": 2.567927746504524e-06, + "loss": 0.6895, + "step": 21921 + }, + { + "epoch": 0.6718769155326714, + "grad_norm": 1.7268893555945721, + "learning_rate": 2.5674941115913975e-06, + "loss": 0.5638, + "step": 21922 + }, + { + "epoch": 0.6719075640554125, + "grad_norm": 1.7601334237996766, + "learning_rate": 2.567060500646373e-06, + "loss": 0.5723, + "step": 21923 + }, + { + "epoch": 0.6719382125781538, + "grad_norm": 1.7266141926152652, + "learning_rate": 2.5666269136737277e-06, + "loss": 0.4938, + "step": 21924 + }, + { + "epoch": 0.6719688611008949, + "grad_norm": 1.9226254990942597, + "learning_rate": 2.5661933506777266e-06, + "loss": 0.6764, + "step": 21925 + }, + { + "epoch": 0.6719995096236362, + "grad_norm": 1.8141679737465424, + "learning_rate": 2.5657598116626454e-06, + "loss": 0.6433, + "step": 21926 + }, + { + "epoch": 0.6720301581463773, + "grad_norm": 0.8086088084452044, + "learning_rate": 2.5653262966327572e-06, + "loss": 0.4317, + "step": 21927 + }, + { + "epoch": 0.6720608066691185, + "grad_norm": 1.7959608925773034, + "learning_rate": 2.564892805592333e-06, + "loss": 0.601, + "step": 21928 + }, + { + "epoch": 0.6720914551918598, + "grad_norm": 1.5622466208524484, + "learning_rate": 2.5644593385456386e-06, + "loss": 0.5499, + "step": 21929 + }, + { + "epoch": 0.6721221037146009, + "grad_norm": 1.731772284566023, + "learning_rate": 2.5640258954969533e-06, + "loss": 0.6798, + "step": 21930 + }, + { + "epoch": 0.6721527522373422, + "grad_norm": 2.296413594512218, + "learning_rate": 2.5635924764505437e-06, + "loss": 0.5822, + "step": 21931 + }, + { + "epoch": 0.6721834007600833, + "grad_norm": 0.8034605355480093, + "learning_rate": 2.5631590814106793e-06, + "loss": 0.4248, + "step": 21932 + }, + { + "epoch": 0.6722140492828246, + "grad_norm": 1.6969182952929858, + "learning_rate": 2.5627257103816315e-06, + "loss": 0.6081, + "step": 21933 + }, + { + "epoch": 0.6722446978055657, + "grad_norm": 1.8445635089485406, + "learning_rate": 2.5622923633676715e-06, + "loss": 0.5847, + "step": 21934 + }, + { + "epoch": 0.672275346328307, + "grad_norm": 1.7179222575493207, + "learning_rate": 2.5618590403730702e-06, + "loss": 0.6071, + "step": 21935 + }, + { + "epoch": 0.6723059948510481, + "grad_norm": 2.0032999469554795, + "learning_rate": 2.5614257414020936e-06, + "loss": 0.5755, + "step": 21936 + }, + { + "epoch": 0.6723366433737894, + "grad_norm": 2.0283694070231837, + "learning_rate": 2.5609924664590136e-06, + "loss": 0.6743, + "step": 21937 + }, + { + "epoch": 0.6723672918965306, + "grad_norm": 2.0668126375988045, + "learning_rate": 2.5605592155481007e-06, + "loss": 0.6338, + "step": 21938 + }, + { + "epoch": 0.6723979404192718, + "grad_norm": 1.762075339836358, + "learning_rate": 2.5601259886736217e-06, + "loss": 0.6147, + "step": 21939 + }, + { + "epoch": 0.672428588942013, + "grad_norm": 1.568636138283126, + "learning_rate": 2.559692785839842e-06, + "loss": 0.6027, + "step": 21940 + }, + { + "epoch": 0.6724592374647542, + "grad_norm": 1.8362376182781641, + "learning_rate": 2.5592596070510375e-06, + "loss": 0.6439, + "step": 21941 + }, + { + "epoch": 0.6724898859874954, + "grad_norm": 1.8905281181163054, + "learning_rate": 2.5588264523114703e-06, + "loss": 0.622, + "step": 21942 + }, + { + "epoch": 0.6725205345102366, + "grad_norm": 1.6738712440667407, + "learning_rate": 2.5583933216254133e-06, + "loss": 0.6024, + "step": 21943 + }, + { + "epoch": 0.6725511830329778, + "grad_norm": 1.6723485446675543, + "learning_rate": 2.5579602149971282e-06, + "loss": 0.6113, + "step": 21944 + }, + { + "epoch": 0.672581831555719, + "grad_norm": 2.0493299496644752, + "learning_rate": 2.5575271324308876e-06, + "loss": 0.6983, + "step": 21945 + }, + { + "epoch": 0.6726124800784602, + "grad_norm": 0.8417890240295134, + "learning_rate": 2.557094073930958e-06, + "loss": 0.4407, + "step": 21946 + }, + { + "epoch": 0.6726431286012015, + "grad_norm": 0.8206764915655789, + "learning_rate": 2.5566610395016047e-06, + "loss": 0.4167, + "step": 21947 + }, + { + "epoch": 0.6726737771239426, + "grad_norm": 2.2465667462565646, + "learning_rate": 2.556228029147094e-06, + "loss": 0.7125, + "step": 21948 + }, + { + "epoch": 0.6727044256466839, + "grad_norm": 1.9427167534310872, + "learning_rate": 2.555795042871696e-06, + "loss": 0.6277, + "step": 21949 + }, + { + "epoch": 0.672735074169425, + "grad_norm": 1.7112264239765542, + "learning_rate": 2.555362080679675e-06, + "loss": 0.6309, + "step": 21950 + }, + { + "epoch": 0.6727657226921663, + "grad_norm": 0.8581160025634121, + "learning_rate": 2.5549291425752954e-06, + "loss": 0.4173, + "step": 21951 + }, + { + "epoch": 0.6727963712149074, + "grad_norm": 1.9581992368919272, + "learning_rate": 2.5544962285628243e-06, + "loss": 0.6703, + "step": 21952 + }, + { + "epoch": 0.6728270197376487, + "grad_norm": 1.8823653278945431, + "learning_rate": 2.5540633386465276e-06, + "loss": 0.5881, + "step": 21953 + }, + { + "epoch": 0.6728576682603898, + "grad_norm": 1.6890734662174618, + "learning_rate": 2.5536304728306725e-06, + "loss": 0.5574, + "step": 21954 + }, + { + "epoch": 0.6728883167831311, + "grad_norm": 1.8283079434683398, + "learning_rate": 2.5531976311195205e-06, + "loss": 0.5617, + "step": 21955 + }, + { + "epoch": 0.6729189653058723, + "grad_norm": 1.7165327548866605, + "learning_rate": 2.5527648135173377e-06, + "loss": 0.5926, + "step": 21956 + }, + { + "epoch": 0.6729496138286135, + "grad_norm": 1.5935656336942148, + "learning_rate": 2.552332020028392e-06, + "loss": 0.5066, + "step": 21957 + }, + { + "epoch": 0.6729802623513547, + "grad_norm": 1.923294886522952, + "learning_rate": 2.5518992506569453e-06, + "loss": 0.6412, + "step": 21958 + }, + { + "epoch": 0.6730109108740958, + "grad_norm": 1.8470455412374873, + "learning_rate": 2.5514665054072572e-06, + "loss": 0.6086, + "step": 21959 + }, + { + "epoch": 0.6730415593968371, + "grad_norm": 0.8396268736377983, + "learning_rate": 2.5510337842835997e-06, + "loss": 0.4249, + "step": 21960 + }, + { + "epoch": 0.6730722079195782, + "grad_norm": 1.917951873290679, + "learning_rate": 2.550601087290232e-06, + "loss": 0.6001, + "step": 21961 + }, + { + "epoch": 0.6731028564423195, + "grad_norm": 1.9054428031318809, + "learning_rate": 2.55016841443142e-06, + "loss": 0.666, + "step": 21962 + }, + { + "epoch": 0.6731335049650606, + "grad_norm": 0.7940558485948119, + "learning_rate": 2.549735765711423e-06, + "loss": 0.4101, + "step": 21963 + }, + { + "epoch": 0.6731641534878019, + "grad_norm": 1.8395948706401002, + "learning_rate": 2.549303141134507e-06, + "loss": 0.5742, + "step": 21964 + }, + { + "epoch": 0.673194802010543, + "grad_norm": 1.7751141692246515, + "learning_rate": 2.5488705407049353e-06, + "loss": 0.558, + "step": 21965 + }, + { + "epoch": 0.6732254505332843, + "grad_norm": 1.7869892564363152, + "learning_rate": 2.5484379644269687e-06, + "loss": 0.535, + "step": 21966 + }, + { + "epoch": 0.6732560990560255, + "grad_norm": 1.9732129368334572, + "learning_rate": 2.5480054123048693e-06, + "loss": 0.6254, + "step": 21967 + }, + { + "epoch": 0.6732867475787667, + "grad_norm": 1.8208703598368792, + "learning_rate": 2.5475728843429017e-06, + "loss": 0.6145, + "step": 21968 + }, + { + "epoch": 0.6733173961015079, + "grad_norm": 0.8907257950865834, + "learning_rate": 2.547140380545324e-06, + "loss": 0.4225, + "step": 21969 + }, + { + "epoch": 0.6733480446242491, + "grad_norm": 0.7630576650292407, + "learning_rate": 2.5467079009164e-06, + "loss": 0.415, + "step": 21970 + }, + { + "epoch": 0.6733786931469903, + "grad_norm": 1.8956995868263384, + "learning_rate": 2.5462754454603927e-06, + "loss": 0.5791, + "step": 21971 + }, + { + "epoch": 0.6734093416697315, + "grad_norm": 1.9890094127808922, + "learning_rate": 2.545843014181559e-06, + "loss": 0.6573, + "step": 21972 + }, + { + "epoch": 0.6734399901924727, + "grad_norm": 1.746363059300152, + "learning_rate": 2.5454106070841644e-06, + "loss": 0.661, + "step": 21973 + }, + { + "epoch": 0.673470638715214, + "grad_norm": 2.0227253345738725, + "learning_rate": 2.544978224172465e-06, + "loss": 0.6704, + "step": 21974 + }, + { + "epoch": 0.6735012872379551, + "grad_norm": 1.934998485217486, + "learning_rate": 2.544545865450724e-06, + "loss": 0.6374, + "step": 21975 + }, + { + "epoch": 0.6735319357606964, + "grad_norm": 1.880762513322935, + "learning_rate": 2.544113530923201e-06, + "loss": 0.583, + "step": 21976 + }, + { + "epoch": 0.6735625842834375, + "grad_norm": 1.7564375148150078, + "learning_rate": 2.543681220594155e-06, + "loss": 0.626, + "step": 21977 + }, + { + "epoch": 0.6735932328061788, + "grad_norm": 1.6981162803711463, + "learning_rate": 2.5432489344678467e-06, + "loss": 0.6203, + "step": 21978 + }, + { + "epoch": 0.6736238813289199, + "grad_norm": 1.7867794161544253, + "learning_rate": 2.5428166725485372e-06, + "loss": 0.4911, + "step": 21979 + }, + { + "epoch": 0.6736545298516612, + "grad_norm": 1.743619820273211, + "learning_rate": 2.5423844348404812e-06, + "loss": 0.6741, + "step": 21980 + }, + { + "epoch": 0.6736851783744023, + "grad_norm": 1.7023725847891213, + "learning_rate": 2.54195222134794e-06, + "loss": 0.5049, + "step": 21981 + }, + { + "epoch": 0.6737158268971436, + "grad_norm": 1.7807619432402915, + "learning_rate": 2.5415200320751754e-06, + "loss": 0.4861, + "step": 21982 + }, + { + "epoch": 0.6737464754198847, + "grad_norm": 1.7909344957933433, + "learning_rate": 2.54108786702644e-06, + "loss": 0.6492, + "step": 21983 + }, + { + "epoch": 0.673777123942626, + "grad_norm": 2.008849518874982, + "learning_rate": 2.5406557262059973e-06, + "loss": 0.5535, + "step": 21984 + }, + { + "epoch": 0.6738077724653672, + "grad_norm": 2.067839332877294, + "learning_rate": 2.540223609618101e-06, + "loss": 0.7079, + "step": 21985 + }, + { + "epoch": 0.6738384209881084, + "grad_norm": 1.8518073417622969, + "learning_rate": 2.5397915172670105e-06, + "loss": 0.6253, + "step": 21986 + }, + { + "epoch": 0.6738690695108496, + "grad_norm": 1.7019415602676187, + "learning_rate": 2.539359449156986e-06, + "loss": 0.6252, + "step": 21987 + }, + { + "epoch": 0.6738997180335908, + "grad_norm": 0.8067789477080879, + "learning_rate": 2.5389274052922807e-06, + "loss": 0.4148, + "step": 21988 + }, + { + "epoch": 0.673930366556332, + "grad_norm": 0.7716236386776563, + "learning_rate": 2.5384953856771533e-06, + "loss": 0.3981, + "step": 21989 + }, + { + "epoch": 0.6739610150790731, + "grad_norm": 1.9458136032018638, + "learning_rate": 2.5380633903158623e-06, + "loss": 0.6361, + "step": 21990 + }, + { + "epoch": 0.6739916636018144, + "grad_norm": 1.798462749379286, + "learning_rate": 2.537631419212661e-06, + "loss": 0.5658, + "step": 21991 + }, + { + "epoch": 0.6740223121245555, + "grad_norm": 1.668874527365498, + "learning_rate": 2.5371994723718075e-06, + "loss": 0.4943, + "step": 21992 + }, + { + "epoch": 0.6740529606472968, + "grad_norm": 1.6243519823658343, + "learning_rate": 2.536767549797559e-06, + "loss": 0.5593, + "step": 21993 + }, + { + "epoch": 0.674083609170038, + "grad_norm": 1.6357927505825443, + "learning_rate": 2.5363356514941684e-06, + "loss": 0.5116, + "step": 21994 + }, + { + "epoch": 0.6741142576927792, + "grad_norm": 1.667509655954575, + "learning_rate": 2.535903777465895e-06, + "loss": 0.5375, + "step": 21995 + }, + { + "epoch": 0.6741449062155204, + "grad_norm": 1.928533001696845, + "learning_rate": 2.5354719277169906e-06, + "loss": 0.6367, + "step": 21996 + }, + { + "epoch": 0.6741755547382616, + "grad_norm": 1.6458476407630978, + "learning_rate": 2.5350401022517114e-06, + "loss": 0.5612, + "step": 21997 + }, + { + "epoch": 0.6742062032610028, + "grad_norm": 1.7248888283210804, + "learning_rate": 2.534608301074315e-06, + "loss": 0.5647, + "step": 21998 + }, + { + "epoch": 0.674236851783744, + "grad_norm": 1.833608815682671, + "learning_rate": 2.5341765241890516e-06, + "loss": 0.6231, + "step": 21999 + }, + { + "epoch": 0.6742675003064852, + "grad_norm": 2.056935215892492, + "learning_rate": 2.5337447716001773e-06, + "loss": 0.6201, + "step": 22000 + }, + { + "epoch": 0.6742981488292265, + "grad_norm": 0.7726058123581973, + "learning_rate": 2.5333130433119495e-06, + "loss": 0.4003, + "step": 22001 + }, + { + "epoch": 0.6743287973519676, + "grad_norm": 1.744829798403336, + "learning_rate": 2.532881339328617e-06, + "loss": 0.6156, + "step": 22002 + }, + { + "epoch": 0.6743594458747089, + "grad_norm": 1.768267581573039, + "learning_rate": 2.5324496596544383e-06, + "loss": 0.5585, + "step": 22003 + }, + { + "epoch": 0.67439009439745, + "grad_norm": 1.7981711123019792, + "learning_rate": 2.5320180042936627e-06, + "loss": 0.5524, + "step": 22004 + }, + { + "epoch": 0.6744207429201913, + "grad_norm": 1.9201505753591572, + "learning_rate": 2.531586373250544e-06, + "loss": 0.6634, + "step": 22005 + }, + { + "epoch": 0.6744513914429324, + "grad_norm": 1.7953128252414206, + "learning_rate": 2.5311547665293397e-06, + "loss": 0.626, + "step": 22006 + }, + { + "epoch": 0.6744820399656737, + "grad_norm": 1.69380549520526, + "learning_rate": 2.5307231841342962e-06, + "loss": 0.5719, + "step": 22007 + }, + { + "epoch": 0.6745126884884148, + "grad_norm": 2.047989368468826, + "learning_rate": 2.5302916260696698e-06, + "loss": 0.5965, + "step": 22008 + }, + { + "epoch": 0.6745433370111561, + "grad_norm": 0.7756901564407175, + "learning_rate": 2.5298600923397133e-06, + "loss": 0.4064, + "step": 22009 + }, + { + "epoch": 0.6745739855338972, + "grad_norm": 1.6977713746422718, + "learning_rate": 2.529428582948675e-06, + "loss": 0.5605, + "step": 22010 + }, + { + "epoch": 0.6746046340566385, + "grad_norm": 1.9214024580164872, + "learning_rate": 2.52899709790081e-06, + "loss": 0.6069, + "step": 22011 + }, + { + "epoch": 0.6746352825793797, + "grad_norm": 1.771880994702041, + "learning_rate": 2.52856563720037e-06, + "loss": 0.5991, + "step": 22012 + }, + { + "epoch": 0.6746659311021209, + "grad_norm": 1.6042754789718547, + "learning_rate": 2.5281342008516035e-06, + "loss": 0.5938, + "step": 22013 + }, + { + "epoch": 0.6746965796248621, + "grad_norm": 1.783096978249992, + "learning_rate": 2.527702788858765e-06, + "loss": 0.6362, + "step": 22014 + }, + { + "epoch": 0.6747272281476033, + "grad_norm": 1.5443983258809009, + "learning_rate": 2.5272714012260996e-06, + "loss": 0.6128, + "step": 22015 + }, + { + "epoch": 0.6747578766703445, + "grad_norm": 1.9207471583709572, + "learning_rate": 2.5268400379578663e-06, + "loss": 0.5684, + "step": 22016 + }, + { + "epoch": 0.6747885251930857, + "grad_norm": 1.640156251600711, + "learning_rate": 2.5264086990583097e-06, + "loss": 0.5838, + "step": 22017 + }, + { + "epoch": 0.6748191737158269, + "grad_norm": 1.7537072965391685, + "learning_rate": 2.5259773845316798e-06, + "loss": 0.6502, + "step": 22018 + }, + { + "epoch": 0.6748498222385682, + "grad_norm": 1.9163771678006485, + "learning_rate": 2.5255460943822273e-06, + "loss": 0.5698, + "step": 22019 + }, + { + "epoch": 0.6748804707613093, + "grad_norm": 1.7984643514693546, + "learning_rate": 2.5251148286142045e-06, + "loss": 0.6137, + "step": 22020 + }, + { + "epoch": 0.6749111192840505, + "grad_norm": 2.118959282846533, + "learning_rate": 2.524683587231857e-06, + "loss": 0.6293, + "step": 22021 + }, + { + "epoch": 0.6749417678067917, + "grad_norm": 1.8396650050394208, + "learning_rate": 2.524252370239435e-06, + "loss": 0.6162, + "step": 22022 + }, + { + "epoch": 0.6749724163295329, + "grad_norm": 1.818390254814153, + "learning_rate": 2.523821177641188e-06, + "loss": 0.56, + "step": 22023 + }, + { + "epoch": 0.6750030648522741, + "grad_norm": 1.71975946452933, + "learning_rate": 2.5233900094413668e-06, + "loss": 0.5728, + "step": 22024 + }, + { + "epoch": 0.6750337133750153, + "grad_norm": 1.816062358246703, + "learning_rate": 2.522958865644217e-06, + "loss": 0.685, + "step": 22025 + }, + { + "epoch": 0.6750643618977565, + "grad_norm": 1.86497256057978, + "learning_rate": 2.5225277462539833e-06, + "loss": 0.5896, + "step": 22026 + }, + { + "epoch": 0.6750950104204977, + "grad_norm": 2.1859079521898273, + "learning_rate": 2.5220966512749213e-06, + "loss": 0.6117, + "step": 22027 + }, + { + "epoch": 0.675125658943239, + "grad_norm": 0.7942845018760125, + "learning_rate": 2.5216655807112756e-06, + "loss": 0.3988, + "step": 22028 + }, + { + "epoch": 0.6751563074659801, + "grad_norm": 1.8026373983130357, + "learning_rate": 2.521234534567291e-06, + "loss": 0.678, + "step": 22029 + }, + { + "epoch": 0.6751869559887214, + "grad_norm": 1.9113389016904545, + "learning_rate": 2.5208035128472164e-06, + "loss": 0.6249, + "step": 22030 + }, + { + "epoch": 0.6752176045114625, + "grad_norm": 1.8482889574189352, + "learning_rate": 2.5203725155553012e-06, + "loss": 0.5571, + "step": 22031 + }, + { + "epoch": 0.6752482530342038, + "grad_norm": 1.744212614608669, + "learning_rate": 2.519941542695788e-06, + "loss": 0.6495, + "step": 22032 + }, + { + "epoch": 0.6752789015569449, + "grad_norm": 1.6013121594049038, + "learning_rate": 2.5195105942729257e-06, + "loss": 0.5642, + "step": 22033 + }, + { + "epoch": 0.6753095500796862, + "grad_norm": 1.8442364199279158, + "learning_rate": 2.51907967029096e-06, + "loss": 0.6535, + "step": 22034 + }, + { + "epoch": 0.6753401986024273, + "grad_norm": 1.8236808295678821, + "learning_rate": 2.5186487707541384e-06, + "loss": 0.521, + "step": 22035 + }, + { + "epoch": 0.6753708471251686, + "grad_norm": 0.7855488195542449, + "learning_rate": 2.5182178956667057e-06, + "loss": 0.4026, + "step": 22036 + }, + { + "epoch": 0.6754014956479097, + "grad_norm": 2.152508678634075, + "learning_rate": 2.517787045032904e-06, + "loss": 0.6398, + "step": 22037 + }, + { + "epoch": 0.675432144170651, + "grad_norm": 2.238977441445674, + "learning_rate": 2.5173562188569843e-06, + "loss": 0.6481, + "step": 22038 + }, + { + "epoch": 0.6754627926933922, + "grad_norm": 1.5775328215614073, + "learning_rate": 2.5169254171431903e-06, + "loss": 0.5647, + "step": 22039 + }, + { + "epoch": 0.6754934412161334, + "grad_norm": 1.8381236889399983, + "learning_rate": 2.5164946398957624e-06, + "loss": 0.5756, + "step": 22040 + }, + { + "epoch": 0.6755240897388746, + "grad_norm": 1.8259173256394408, + "learning_rate": 2.516063887118949e-06, + "loss": 0.5604, + "step": 22041 + }, + { + "epoch": 0.6755547382616158, + "grad_norm": 1.9378104216900172, + "learning_rate": 2.5156331588169937e-06, + "loss": 0.7405, + "step": 22042 + }, + { + "epoch": 0.675585386784357, + "grad_norm": 1.8142434507990528, + "learning_rate": 2.5152024549941417e-06, + "loss": 0.609, + "step": 22043 + }, + { + "epoch": 0.6756160353070982, + "grad_norm": 1.7103094278164852, + "learning_rate": 2.5147717756546343e-06, + "loss": 0.5777, + "step": 22044 + }, + { + "epoch": 0.6756466838298394, + "grad_norm": 0.7488709324031041, + "learning_rate": 2.5143411208027165e-06, + "loss": 0.3906, + "step": 22045 + }, + { + "epoch": 0.6756773323525807, + "grad_norm": 1.727000553374951, + "learning_rate": 2.513910490442633e-06, + "loss": 0.5723, + "step": 22046 + }, + { + "epoch": 0.6757079808753218, + "grad_norm": 1.7380795915869947, + "learning_rate": 2.513479884578626e-06, + "loss": 0.5012, + "step": 22047 + }, + { + "epoch": 0.6757386293980631, + "grad_norm": 1.781741889434329, + "learning_rate": 2.513049303214936e-06, + "loss": 0.6193, + "step": 22048 + }, + { + "epoch": 0.6757692779208042, + "grad_norm": 2.0001254274557287, + "learning_rate": 2.512618746355807e-06, + "loss": 0.6248, + "step": 22049 + }, + { + "epoch": 0.6757999264435455, + "grad_norm": 1.6088558313734234, + "learning_rate": 2.5121882140054834e-06, + "loss": 0.5171, + "step": 22050 + }, + { + "epoch": 0.6758305749662866, + "grad_norm": 1.6921988619909871, + "learning_rate": 2.5117577061682063e-06, + "loss": 0.5618, + "step": 22051 + }, + { + "epoch": 0.6758612234890278, + "grad_norm": 0.8090760005167188, + "learning_rate": 2.5113272228482157e-06, + "loss": 0.398, + "step": 22052 + }, + { + "epoch": 0.675891872011769, + "grad_norm": 1.8660226683915262, + "learning_rate": 2.5108967640497544e-06, + "loss": 0.6747, + "step": 22053 + }, + { + "epoch": 0.6759225205345102, + "grad_norm": 1.9559821458287452, + "learning_rate": 2.5104663297770664e-06, + "loss": 0.7632, + "step": 22054 + }, + { + "epoch": 0.6759531690572514, + "grad_norm": 1.8428902615537972, + "learning_rate": 2.5100359200343903e-06, + "loss": 0.6684, + "step": 22055 + }, + { + "epoch": 0.6759838175799926, + "grad_norm": 1.7959299149241832, + "learning_rate": 2.509605534825964e-06, + "loss": 0.5788, + "step": 22056 + }, + { + "epoch": 0.6760144661027339, + "grad_norm": 1.9109250883684143, + "learning_rate": 2.5091751741560353e-06, + "loss": 0.6477, + "step": 22057 + }, + { + "epoch": 0.676045114625475, + "grad_norm": 1.7365681467417304, + "learning_rate": 2.508744838028841e-06, + "loss": 0.5427, + "step": 22058 + }, + { + "epoch": 0.6760757631482163, + "grad_norm": 2.6837703758205858, + "learning_rate": 2.5083145264486193e-06, + "loss": 0.6179, + "step": 22059 + }, + { + "epoch": 0.6761064116709574, + "grad_norm": 1.6799421456503187, + "learning_rate": 2.507884239419612e-06, + "loss": 0.5791, + "step": 22060 + }, + { + "epoch": 0.6761370601936987, + "grad_norm": 1.689407016518336, + "learning_rate": 2.5074539769460588e-06, + "loss": 0.5889, + "step": 22061 + }, + { + "epoch": 0.6761677087164398, + "grad_norm": 1.9320180968067533, + "learning_rate": 2.507023739032201e-06, + "loss": 0.5798, + "step": 22062 + }, + { + "epoch": 0.6761983572391811, + "grad_norm": 1.9984276282087916, + "learning_rate": 2.506593525682275e-06, + "loss": 0.7045, + "step": 22063 + }, + { + "epoch": 0.6762290057619222, + "grad_norm": 1.8898638717275371, + "learning_rate": 2.50616333690052e-06, + "loss": 0.5904, + "step": 22064 + }, + { + "epoch": 0.6762596542846635, + "grad_norm": 1.8660846505929718, + "learning_rate": 2.505733172691178e-06, + "loss": 0.598, + "step": 22065 + }, + { + "epoch": 0.6762903028074047, + "grad_norm": 1.868771955267229, + "learning_rate": 2.5053030330584858e-06, + "loss": 0.6468, + "step": 22066 + }, + { + "epoch": 0.6763209513301459, + "grad_norm": 1.9824600795829737, + "learning_rate": 2.5048729180066765e-06, + "loss": 0.5818, + "step": 22067 + }, + { + "epoch": 0.6763515998528871, + "grad_norm": 1.7375657057958829, + "learning_rate": 2.5044428275399968e-06, + "loss": 0.5795, + "step": 22068 + }, + { + "epoch": 0.6763822483756283, + "grad_norm": 0.8060976843638097, + "learning_rate": 2.5040127616626784e-06, + "loss": 0.3988, + "step": 22069 + }, + { + "epoch": 0.6764128968983695, + "grad_norm": 1.7626286079486162, + "learning_rate": 2.503582720378964e-06, + "loss": 0.6005, + "step": 22070 + }, + { + "epoch": 0.6764435454211107, + "grad_norm": 1.9860987371420806, + "learning_rate": 2.503152703693085e-06, + "loss": 0.6833, + "step": 22071 + }, + { + "epoch": 0.6764741939438519, + "grad_norm": 1.7534994614931465, + "learning_rate": 2.5027227116092806e-06, + "loss": 0.6217, + "step": 22072 + }, + { + "epoch": 0.6765048424665931, + "grad_norm": 0.7816517765231794, + "learning_rate": 2.5022927441317912e-06, + "loss": 0.4233, + "step": 22073 + }, + { + "epoch": 0.6765354909893343, + "grad_norm": 1.6894639128356337, + "learning_rate": 2.501862801264848e-06, + "loss": 0.5551, + "step": 22074 + }, + { + "epoch": 0.6765661395120756, + "grad_norm": 1.7753436380034229, + "learning_rate": 2.50143288301269e-06, + "loss": 0.6699, + "step": 22075 + }, + { + "epoch": 0.6765967880348167, + "grad_norm": 2.0099948673788375, + "learning_rate": 2.5010029893795546e-06, + "loss": 0.6368, + "step": 22076 + }, + { + "epoch": 0.676627436557558, + "grad_norm": 0.8019653405595906, + "learning_rate": 2.500573120369675e-06, + "loss": 0.4247, + "step": 22077 + }, + { + "epoch": 0.6766580850802991, + "grad_norm": 1.747480086691618, + "learning_rate": 2.5001432759872867e-06, + "loss": 0.5674, + "step": 22078 + }, + { + "epoch": 0.6766887336030404, + "grad_norm": 1.8679074043556319, + "learning_rate": 2.4997134562366293e-06, + "loss": 0.6528, + "step": 22079 + }, + { + "epoch": 0.6767193821257815, + "grad_norm": 1.5584687130582588, + "learning_rate": 2.499283661121933e-06, + "loss": 0.5563, + "step": 22080 + }, + { + "epoch": 0.6767500306485228, + "grad_norm": 1.5223239879881023, + "learning_rate": 2.4988538906474357e-06, + "loss": 0.5508, + "step": 22081 + }, + { + "epoch": 0.6767806791712639, + "grad_norm": 0.7779617398240641, + "learning_rate": 2.4984241448173703e-06, + "loss": 0.4025, + "step": 22082 + }, + { + "epoch": 0.6768113276940051, + "grad_norm": 0.8173260351741337, + "learning_rate": 2.497994423635971e-06, + "loss": 0.4093, + "step": 22083 + }, + { + "epoch": 0.6768419762167464, + "grad_norm": 1.7163915218027708, + "learning_rate": 2.497564727107475e-06, + "loss": 0.5568, + "step": 22084 + }, + { + "epoch": 0.6768726247394875, + "grad_norm": 1.887957864404645, + "learning_rate": 2.4971350552361117e-06, + "loss": 0.6688, + "step": 22085 + }, + { + "epoch": 0.6769032732622288, + "grad_norm": 0.8432043998399672, + "learning_rate": 2.4967054080261173e-06, + "loss": 0.3969, + "step": 22086 + }, + { + "epoch": 0.6769339217849699, + "grad_norm": 0.7648287428436569, + "learning_rate": 2.496275785481727e-06, + "loss": 0.4099, + "step": 22087 + }, + { + "epoch": 0.6769645703077112, + "grad_norm": 1.752907445709292, + "learning_rate": 2.4958461876071704e-06, + "loss": 0.5929, + "step": 22088 + }, + { + "epoch": 0.6769952188304523, + "grad_norm": 1.7999728621026339, + "learning_rate": 2.4954166144066815e-06, + "loss": 0.6159, + "step": 22089 + }, + { + "epoch": 0.6770258673531936, + "grad_norm": 1.7493690404375586, + "learning_rate": 2.4949870658844953e-06, + "loss": 0.6099, + "step": 22090 + }, + { + "epoch": 0.6770565158759347, + "grad_norm": 0.7669097274596365, + "learning_rate": 2.4945575420448417e-06, + "loss": 0.3879, + "step": 22091 + }, + { + "epoch": 0.677087164398676, + "grad_norm": 2.014728729997503, + "learning_rate": 2.4941280428919543e-06, + "loss": 0.5879, + "step": 22092 + }, + { + "epoch": 0.6771178129214172, + "grad_norm": 1.7060726266757602, + "learning_rate": 2.493698568430063e-06, + "loss": 0.6367, + "step": 22093 + }, + { + "epoch": 0.6771484614441584, + "grad_norm": 1.6189614110320956, + "learning_rate": 2.493269118663401e-06, + "loss": 0.6123, + "step": 22094 + }, + { + "epoch": 0.6771791099668996, + "grad_norm": 1.6321311016761795, + "learning_rate": 2.492839693596202e-06, + "loss": 0.5136, + "step": 22095 + }, + { + "epoch": 0.6772097584896408, + "grad_norm": 1.632080681059797, + "learning_rate": 2.4924102932326927e-06, + "loss": 0.6574, + "step": 22096 + }, + { + "epoch": 0.677240407012382, + "grad_norm": 1.9015499063103012, + "learning_rate": 2.4919809175771066e-06, + "loss": 0.6719, + "step": 22097 + }, + { + "epoch": 0.6772710555351232, + "grad_norm": 1.908982424571469, + "learning_rate": 2.4915515666336758e-06, + "loss": 0.6364, + "step": 22098 + }, + { + "epoch": 0.6773017040578644, + "grad_norm": 1.783508269838168, + "learning_rate": 2.4911222404066274e-06, + "loss": 0.5161, + "step": 22099 + }, + { + "epoch": 0.6773323525806056, + "grad_norm": 1.8623668903991484, + "learning_rate": 2.4906929389001954e-06, + "loss": 0.6528, + "step": 22100 + }, + { + "epoch": 0.6773630011033468, + "grad_norm": 1.7167873136875864, + "learning_rate": 2.490263662118606e-06, + "loss": 0.557, + "step": 22101 + }, + { + "epoch": 0.6773936496260881, + "grad_norm": 1.8382905570412085, + "learning_rate": 2.489834410066091e-06, + "loss": 0.636, + "step": 22102 + }, + { + "epoch": 0.6774242981488292, + "grad_norm": 1.8862979669580024, + "learning_rate": 2.4894051827468817e-06, + "loss": 0.6665, + "step": 22103 + }, + { + "epoch": 0.6774549466715705, + "grad_norm": 1.9609740972579988, + "learning_rate": 2.4889759801652035e-06, + "loss": 0.6797, + "step": 22104 + }, + { + "epoch": 0.6774855951943116, + "grad_norm": 1.8368681250083, + "learning_rate": 2.488546802325288e-06, + "loss": 0.6336, + "step": 22105 + }, + { + "epoch": 0.6775162437170529, + "grad_norm": 1.7470984772480713, + "learning_rate": 2.488117649231365e-06, + "loss": 0.5296, + "step": 22106 + }, + { + "epoch": 0.677546892239794, + "grad_norm": 1.7362799265591538, + "learning_rate": 2.487688520887659e-06, + "loss": 0.571, + "step": 22107 + }, + { + "epoch": 0.6775775407625353, + "grad_norm": 1.7540845193846548, + "learning_rate": 2.487259417298401e-06, + "loss": 0.6543, + "step": 22108 + }, + { + "epoch": 0.6776081892852764, + "grad_norm": 0.8371459219132994, + "learning_rate": 2.4868303384678216e-06, + "loss": 0.4256, + "step": 22109 + }, + { + "epoch": 0.6776388378080177, + "grad_norm": 0.8115067220318264, + "learning_rate": 2.4864012844001435e-06, + "loss": 0.3952, + "step": 22110 + }, + { + "epoch": 0.6776694863307589, + "grad_norm": 1.5158694276749647, + "learning_rate": 2.4859722550995978e-06, + "loss": 0.5191, + "step": 22111 + }, + { + "epoch": 0.6777001348535001, + "grad_norm": 1.9880012088998145, + "learning_rate": 2.4855432505704095e-06, + "loss": 0.5315, + "step": 22112 + }, + { + "epoch": 0.6777307833762413, + "grad_norm": 1.868485396358371, + "learning_rate": 2.4851142708168075e-06, + "loss": 0.7636, + "step": 22113 + }, + { + "epoch": 0.6777614318989824, + "grad_norm": 1.8907334968607514, + "learning_rate": 2.484685315843019e-06, + "loss": 0.5232, + "step": 22114 + }, + { + "epoch": 0.6777920804217237, + "grad_norm": 1.7485869927768716, + "learning_rate": 2.484256385653268e-06, + "loss": 0.6323, + "step": 22115 + }, + { + "epoch": 0.6778227289444648, + "grad_norm": 1.7298194336127577, + "learning_rate": 2.483827480251783e-06, + "loss": 0.5326, + "step": 22116 + }, + { + "epoch": 0.6778533774672061, + "grad_norm": 1.6576609121666575, + "learning_rate": 2.483398599642791e-06, + "loss": 0.5979, + "step": 22117 + }, + { + "epoch": 0.6778840259899472, + "grad_norm": 2.066107630132374, + "learning_rate": 2.482969743830515e-06, + "loss": 0.6512, + "step": 22118 + }, + { + "epoch": 0.6779146745126885, + "grad_norm": 1.8733261701378106, + "learning_rate": 2.4825409128191818e-06, + "loss": 0.5617, + "step": 22119 + }, + { + "epoch": 0.6779453230354296, + "grad_norm": 0.8162664724014713, + "learning_rate": 2.482112106613019e-06, + "loss": 0.4314, + "step": 22120 + }, + { + "epoch": 0.6779759715581709, + "grad_norm": 1.6904906918916192, + "learning_rate": 2.4816833252162482e-06, + "loss": 0.5716, + "step": 22121 + }, + { + "epoch": 0.6780066200809121, + "grad_norm": 1.7598058837512092, + "learning_rate": 2.4812545686330976e-06, + "loss": 0.5503, + "step": 22122 + }, + { + "epoch": 0.6780372686036533, + "grad_norm": 1.74460951512267, + "learning_rate": 2.4808258368677863e-06, + "loss": 0.5931, + "step": 22123 + }, + { + "epoch": 0.6780679171263945, + "grad_norm": 0.7987961618432551, + "learning_rate": 2.4803971299245467e-06, + "loss": 0.4463, + "step": 22124 + }, + { + "epoch": 0.6780985656491357, + "grad_norm": 2.0578070309257295, + "learning_rate": 2.4799684478075985e-06, + "loss": 0.6937, + "step": 22125 + }, + { + "epoch": 0.6781292141718769, + "grad_norm": 1.724140416593605, + "learning_rate": 2.4795397905211644e-06, + "loss": 0.5566, + "step": 22126 + }, + { + "epoch": 0.6781598626946181, + "grad_norm": 1.6569814584316498, + "learning_rate": 2.479111158069469e-06, + "loss": 0.6147, + "step": 22127 + }, + { + "epoch": 0.6781905112173593, + "grad_norm": 1.8652322923203428, + "learning_rate": 2.478682550456739e-06, + "loss": 0.5568, + "step": 22128 + }, + { + "epoch": 0.6782211597401006, + "grad_norm": 1.7562727199283639, + "learning_rate": 2.4782539676871932e-06, + "loss": 0.6106, + "step": 22129 + }, + { + "epoch": 0.6782518082628417, + "grad_norm": 1.9674329733800457, + "learning_rate": 2.477825409765056e-06, + "loss": 0.6464, + "step": 22130 + }, + { + "epoch": 0.678282456785583, + "grad_norm": 1.7609015193463387, + "learning_rate": 2.4773968766945516e-06, + "loss": 0.4894, + "step": 22131 + }, + { + "epoch": 0.6783131053083241, + "grad_norm": 1.8381741950187307, + "learning_rate": 2.4769683684799003e-06, + "loss": 0.6387, + "step": 22132 + }, + { + "epoch": 0.6783437538310654, + "grad_norm": 1.8447949878205432, + "learning_rate": 2.4765398851253262e-06, + "loss": 0.6503, + "step": 22133 + }, + { + "epoch": 0.6783744023538065, + "grad_norm": 2.0512527477399907, + "learning_rate": 2.4761114266350473e-06, + "loss": 0.5745, + "step": 22134 + }, + { + "epoch": 0.6784050508765478, + "grad_norm": 0.7728596933506172, + "learning_rate": 2.4756829930132915e-06, + "loss": 0.4137, + "step": 22135 + }, + { + "epoch": 0.6784356993992889, + "grad_norm": 1.6678879999512009, + "learning_rate": 2.475254584264277e-06, + "loss": 0.5677, + "step": 22136 + }, + { + "epoch": 0.6784663479220302, + "grad_norm": 1.9777058977560575, + "learning_rate": 2.4748262003922234e-06, + "loss": 0.5545, + "step": 22137 + }, + { + "epoch": 0.6784969964447713, + "grad_norm": 1.7026074488158234, + "learning_rate": 2.4743978414013535e-06, + "loss": 0.603, + "step": 22138 + }, + { + "epoch": 0.6785276449675126, + "grad_norm": 1.6801830897177548, + "learning_rate": 2.4739695072958898e-06, + "loss": 0.6452, + "step": 22139 + }, + { + "epoch": 0.6785582934902538, + "grad_norm": 1.8703800312876238, + "learning_rate": 2.4735411980800483e-06, + "loss": 0.5528, + "step": 22140 + }, + { + "epoch": 0.678588942012995, + "grad_norm": 1.7392470008773173, + "learning_rate": 2.4731129137580524e-06, + "loss": 0.6357, + "step": 22141 + }, + { + "epoch": 0.6786195905357362, + "grad_norm": 1.8981226545623282, + "learning_rate": 2.4726846543341215e-06, + "loss": 0.5771, + "step": 22142 + }, + { + "epoch": 0.6786502390584774, + "grad_norm": 1.8702221508595274, + "learning_rate": 2.472256419812477e-06, + "loss": 0.5195, + "step": 22143 + }, + { + "epoch": 0.6786808875812186, + "grad_norm": 1.8064998316176055, + "learning_rate": 2.471828210197337e-06, + "loss": 0.5495, + "step": 22144 + }, + { + "epoch": 0.6787115361039597, + "grad_norm": 1.8772201800343025, + "learning_rate": 2.4714000254929183e-06, + "loss": 0.5648, + "step": 22145 + }, + { + "epoch": 0.678742184626701, + "grad_norm": 1.9992299959960729, + "learning_rate": 2.470971865703442e-06, + "loss": 0.5965, + "step": 22146 + }, + { + "epoch": 0.6787728331494421, + "grad_norm": 1.8047285711506706, + "learning_rate": 2.4705437308331292e-06, + "loss": 0.5655, + "step": 22147 + }, + { + "epoch": 0.6788034816721834, + "grad_norm": 1.5659417653963477, + "learning_rate": 2.4701156208861944e-06, + "loss": 0.5697, + "step": 22148 + }, + { + "epoch": 0.6788341301949246, + "grad_norm": 1.8171345159650765, + "learning_rate": 2.469687535866858e-06, + "loss": 0.6165, + "step": 22149 + }, + { + "epoch": 0.6788647787176658, + "grad_norm": 1.8767404531931196, + "learning_rate": 2.469259475779337e-06, + "loss": 0.5543, + "step": 22150 + }, + { + "epoch": 0.678895427240407, + "grad_norm": 0.7693886146977718, + "learning_rate": 2.468831440627852e-06, + "loss": 0.4028, + "step": 22151 + }, + { + "epoch": 0.6789260757631482, + "grad_norm": 1.9425997138046938, + "learning_rate": 2.468403430416618e-06, + "loss": 0.7137, + "step": 22152 + }, + { + "epoch": 0.6789567242858894, + "grad_norm": 1.889810633354062, + "learning_rate": 2.46797544514985e-06, + "loss": 0.6084, + "step": 22153 + }, + { + "epoch": 0.6789873728086306, + "grad_norm": 1.8735649544367616, + "learning_rate": 2.467547484831771e-06, + "loss": 0.5964, + "step": 22154 + }, + { + "epoch": 0.6790180213313718, + "grad_norm": 1.7670100799629056, + "learning_rate": 2.4671195494665946e-06, + "loss": 0.6537, + "step": 22155 + }, + { + "epoch": 0.679048669854113, + "grad_norm": 1.8914525558118886, + "learning_rate": 2.4666916390585354e-06, + "loss": 0.5405, + "step": 22156 + }, + { + "epoch": 0.6790793183768542, + "grad_norm": 0.7858442734599289, + "learning_rate": 2.4662637536118116e-06, + "loss": 0.4219, + "step": 22157 + }, + { + "epoch": 0.6791099668995955, + "grad_norm": 1.7408161181308224, + "learning_rate": 2.4658358931306415e-06, + "loss": 0.5804, + "step": 22158 + }, + { + "epoch": 0.6791406154223366, + "grad_norm": 1.6513006548860312, + "learning_rate": 2.465408057619237e-06, + "loss": 0.5612, + "step": 22159 + }, + { + "epoch": 0.6791712639450779, + "grad_norm": 1.8863853910613464, + "learning_rate": 2.4649802470818146e-06, + "loss": 0.4851, + "step": 22160 + }, + { + "epoch": 0.679201912467819, + "grad_norm": 1.8346147307218903, + "learning_rate": 2.464552461522591e-06, + "loss": 0.5619, + "step": 22161 + }, + { + "epoch": 0.6792325609905603, + "grad_norm": 1.9031294327252037, + "learning_rate": 2.4641247009457827e-06, + "loss": 0.6813, + "step": 22162 + }, + { + "epoch": 0.6792632095133014, + "grad_norm": 2.217629267967749, + "learning_rate": 2.463696965355602e-06, + "loss": 0.608, + "step": 22163 + }, + { + "epoch": 0.6792938580360427, + "grad_norm": 1.923058910538259, + "learning_rate": 2.463269254756261e-06, + "loss": 0.6651, + "step": 22164 + }, + { + "epoch": 0.6793245065587838, + "grad_norm": 1.69221645485473, + "learning_rate": 2.4628415691519804e-06, + "loss": 0.6327, + "step": 22165 + }, + { + "epoch": 0.6793551550815251, + "grad_norm": 1.874401608547081, + "learning_rate": 2.462413908546971e-06, + "loss": 0.6427, + "step": 22166 + }, + { + "epoch": 0.6793858036042663, + "grad_norm": 1.905702482927558, + "learning_rate": 2.4619862729454447e-06, + "loss": 0.7194, + "step": 22167 + }, + { + "epoch": 0.6794164521270075, + "grad_norm": 1.8816313688511732, + "learning_rate": 2.4615586623516174e-06, + "loss": 0.6297, + "step": 22168 + }, + { + "epoch": 0.6794471006497487, + "grad_norm": 0.8022574955674248, + "learning_rate": 2.4611310767697015e-06, + "loss": 0.4402, + "step": 22169 + }, + { + "epoch": 0.6794777491724899, + "grad_norm": 1.7754856880850667, + "learning_rate": 2.4607035162039135e-06, + "loss": 0.6289, + "step": 22170 + }, + { + "epoch": 0.6795083976952311, + "grad_norm": 1.6635511302103165, + "learning_rate": 2.460275980658461e-06, + "loss": 0.4871, + "step": 22171 + }, + { + "epoch": 0.6795390462179723, + "grad_norm": 1.9718407802621662, + "learning_rate": 2.459848470137559e-06, + "loss": 0.6062, + "step": 22172 + }, + { + "epoch": 0.6795696947407135, + "grad_norm": 0.7934360335861526, + "learning_rate": 2.459420984645422e-06, + "loss": 0.42, + "step": 22173 + }, + { + "epoch": 0.6796003432634548, + "grad_norm": 1.9232770217701098, + "learning_rate": 2.45899352418626e-06, + "loss": 0.6292, + "step": 22174 + }, + { + "epoch": 0.6796309917861959, + "grad_norm": 1.7733673900068936, + "learning_rate": 2.458566088764281e-06, + "loss": 0.5646, + "step": 22175 + }, + { + "epoch": 0.679661640308937, + "grad_norm": 1.738369382794623, + "learning_rate": 2.458138678383705e-06, + "loss": 0.5543, + "step": 22176 + }, + { + "epoch": 0.6796922888316783, + "grad_norm": 2.0731817479149104, + "learning_rate": 2.457711293048736e-06, + "loss": 0.6398, + "step": 22177 + }, + { + "epoch": 0.6797229373544195, + "grad_norm": 1.6565864114388784, + "learning_rate": 2.4572839327635904e-06, + "loss": 0.5056, + "step": 22178 + }, + { + "epoch": 0.6797535858771607, + "grad_norm": 1.734399557264204, + "learning_rate": 2.4568565975324755e-06, + "loss": 0.5991, + "step": 22179 + }, + { + "epoch": 0.6797842343999019, + "grad_norm": 1.790018878186368, + "learning_rate": 2.456429287359603e-06, + "loss": 0.5684, + "step": 22180 + }, + { + "epoch": 0.6798148829226431, + "grad_norm": 1.844725449838903, + "learning_rate": 2.456002002249185e-06, + "loss": 0.6354, + "step": 22181 + }, + { + "epoch": 0.6798455314453843, + "grad_norm": 0.7957103545131919, + "learning_rate": 2.4555747422054287e-06, + "loss": 0.4133, + "step": 22182 + }, + { + "epoch": 0.6798761799681255, + "grad_norm": 1.8553679596765407, + "learning_rate": 2.4551475072325453e-06, + "loss": 0.6316, + "step": 22183 + }, + { + "epoch": 0.6799068284908667, + "grad_norm": 1.9003486393871305, + "learning_rate": 2.454720297334747e-06, + "loss": 0.5024, + "step": 22184 + }, + { + "epoch": 0.679937477013608, + "grad_norm": 1.8460047279337624, + "learning_rate": 2.45429311251624e-06, + "loss": 0.5515, + "step": 22185 + }, + { + "epoch": 0.6799681255363491, + "grad_norm": 1.7374583750125874, + "learning_rate": 2.453865952781231e-06, + "loss": 0.5526, + "step": 22186 + }, + { + "epoch": 0.6799987740590904, + "grad_norm": 2.015970552868417, + "learning_rate": 2.453438818133936e-06, + "loss": 0.6712, + "step": 22187 + }, + { + "epoch": 0.6800294225818315, + "grad_norm": 1.638159502543959, + "learning_rate": 2.4530117085785576e-06, + "loss": 0.5866, + "step": 22188 + }, + { + "epoch": 0.6800600711045728, + "grad_norm": 1.7931041542761101, + "learning_rate": 2.452584624119309e-06, + "loss": 0.5403, + "step": 22189 + }, + { + "epoch": 0.6800907196273139, + "grad_norm": 1.6268489743831958, + "learning_rate": 2.4521575647603936e-06, + "loss": 0.5284, + "step": 22190 + }, + { + "epoch": 0.6801213681500552, + "grad_norm": 1.8987654282219573, + "learning_rate": 2.451730530506022e-06, + "loss": 0.671, + "step": 22191 + }, + { + "epoch": 0.6801520166727963, + "grad_norm": 2.128792108919214, + "learning_rate": 2.451303521360403e-06, + "loss": 0.5668, + "step": 22192 + }, + { + "epoch": 0.6801826651955376, + "grad_norm": 1.8819524192426453, + "learning_rate": 2.4508765373277412e-06, + "loss": 0.689, + "step": 22193 + }, + { + "epoch": 0.6802133137182788, + "grad_norm": 1.8171934257178568, + "learning_rate": 2.450449578412244e-06, + "loss": 0.5777, + "step": 22194 + }, + { + "epoch": 0.68024396224102, + "grad_norm": 1.898806861425785, + "learning_rate": 2.4500226446181217e-06, + "loss": 0.6558, + "step": 22195 + }, + { + "epoch": 0.6802746107637612, + "grad_norm": 1.7621078338129077, + "learning_rate": 2.4495957359495774e-06, + "loss": 0.5027, + "step": 22196 + }, + { + "epoch": 0.6803052592865024, + "grad_norm": 1.6972569643945594, + "learning_rate": 2.449168852410821e-06, + "loss": 0.5983, + "step": 22197 + }, + { + "epoch": 0.6803359078092436, + "grad_norm": 1.8395556146026724, + "learning_rate": 2.4487419940060538e-06, + "loss": 0.6523, + "step": 22198 + }, + { + "epoch": 0.6803665563319848, + "grad_norm": 0.8011180085235149, + "learning_rate": 2.448315160739485e-06, + "loss": 0.4189, + "step": 22199 + }, + { + "epoch": 0.680397204854726, + "grad_norm": 1.7942916027602, + "learning_rate": 2.447888352615321e-06, + "loss": 0.5953, + "step": 22200 + }, + { + "epoch": 0.6804278533774673, + "grad_norm": 1.467756757501893, + "learning_rate": 2.447461569637765e-06, + "loss": 0.6225, + "step": 22201 + }, + { + "epoch": 0.6804585019002084, + "grad_norm": 1.89217818181874, + "learning_rate": 2.447034811811023e-06, + "loss": 0.6725, + "step": 22202 + }, + { + "epoch": 0.6804891504229497, + "grad_norm": 1.6851482656104524, + "learning_rate": 2.446608079139302e-06, + "loss": 0.5535, + "step": 22203 + }, + { + "epoch": 0.6805197989456908, + "grad_norm": 1.7687199485048901, + "learning_rate": 2.446181371626803e-06, + "loss": 0.5385, + "step": 22204 + }, + { + "epoch": 0.6805504474684321, + "grad_norm": 1.701336599163847, + "learning_rate": 2.445754689277732e-06, + "loss": 0.6224, + "step": 22205 + }, + { + "epoch": 0.6805810959911732, + "grad_norm": 1.676134041568258, + "learning_rate": 2.4453280320962964e-06, + "loss": 0.5972, + "step": 22206 + }, + { + "epoch": 0.6806117445139144, + "grad_norm": 0.7614474829017344, + "learning_rate": 2.4449014000866948e-06, + "loss": 0.4265, + "step": 22207 + }, + { + "epoch": 0.6806423930366556, + "grad_norm": 1.884995930807032, + "learning_rate": 2.4444747932531354e-06, + "loss": 0.6409, + "step": 22208 + }, + { + "epoch": 0.6806730415593968, + "grad_norm": 1.7821299870731342, + "learning_rate": 2.4440482115998182e-06, + "loss": 0.6095, + "step": 22209 + }, + { + "epoch": 0.680703690082138, + "grad_norm": 1.9894505833824903, + "learning_rate": 2.443621655130947e-06, + "loss": 0.7023, + "step": 22210 + }, + { + "epoch": 0.6807343386048792, + "grad_norm": 1.8128382533163276, + "learning_rate": 2.4431951238507285e-06, + "loss": 0.6203, + "step": 22211 + }, + { + "epoch": 0.6807649871276205, + "grad_norm": 1.99812256441558, + "learning_rate": 2.442768617763361e-06, + "loss": 0.5075, + "step": 22212 + }, + { + "epoch": 0.6807956356503616, + "grad_norm": 1.844600551018593, + "learning_rate": 2.4423421368730477e-06, + "loss": 0.6227, + "step": 22213 + }, + { + "epoch": 0.6808262841731029, + "grad_norm": 0.7897634462775404, + "learning_rate": 2.441915681183994e-06, + "loss": 0.4189, + "step": 22214 + }, + { + "epoch": 0.680856932695844, + "grad_norm": 1.771017399367015, + "learning_rate": 2.441489250700398e-06, + "loss": 0.5153, + "step": 22215 + }, + { + "epoch": 0.6808875812185853, + "grad_norm": 0.7492555038192007, + "learning_rate": 2.4410628454264625e-06, + "loss": 0.3984, + "step": 22216 + }, + { + "epoch": 0.6809182297413264, + "grad_norm": 1.9418524259895613, + "learning_rate": 2.4406364653663917e-06, + "loss": 0.6545, + "step": 22217 + }, + { + "epoch": 0.6809488782640677, + "grad_norm": 0.7981082887418481, + "learning_rate": 2.4402101105243824e-06, + "loss": 0.4406, + "step": 22218 + }, + { + "epoch": 0.6809795267868088, + "grad_norm": 1.8217417649970782, + "learning_rate": 2.4397837809046405e-06, + "loss": 0.6029, + "step": 22219 + }, + { + "epoch": 0.6810101753095501, + "grad_norm": 1.6062527762473762, + "learning_rate": 2.4393574765113616e-06, + "loss": 0.5339, + "step": 22220 + }, + { + "epoch": 0.6810408238322913, + "grad_norm": 2.0538918722021893, + "learning_rate": 2.438931197348749e-06, + "loss": 0.624, + "step": 22221 + }, + { + "epoch": 0.6810714723550325, + "grad_norm": 0.7941970888992014, + "learning_rate": 2.438504943421004e-06, + "loss": 0.4095, + "step": 22222 + }, + { + "epoch": 0.6811021208777737, + "grad_norm": 1.7219589255839378, + "learning_rate": 2.4380787147323236e-06, + "loss": 0.5963, + "step": 22223 + }, + { + "epoch": 0.6811327694005149, + "grad_norm": 0.7928901371943714, + "learning_rate": 2.437652511286909e-06, + "loss": 0.4114, + "step": 22224 + }, + { + "epoch": 0.6811634179232561, + "grad_norm": 1.9320117576876665, + "learning_rate": 2.4372263330889616e-06, + "loss": 0.7193, + "step": 22225 + }, + { + "epoch": 0.6811940664459973, + "grad_norm": 1.7655380087186971, + "learning_rate": 2.436800180142677e-06, + "loss": 0.5802, + "step": 22226 + }, + { + "epoch": 0.6812247149687385, + "grad_norm": 1.9372267179740679, + "learning_rate": 2.4363740524522567e-06, + "loss": 0.6096, + "step": 22227 + }, + { + "epoch": 0.6812553634914797, + "grad_norm": 1.6594207010686344, + "learning_rate": 2.4359479500218995e-06, + "loss": 0.5729, + "step": 22228 + }, + { + "epoch": 0.6812860120142209, + "grad_norm": 1.8227799056880223, + "learning_rate": 2.4355218728558022e-06, + "loss": 0.5489, + "step": 22229 + }, + { + "epoch": 0.6813166605369622, + "grad_norm": 1.812710335630218, + "learning_rate": 2.435095820958166e-06, + "loss": 0.5649, + "step": 22230 + }, + { + "epoch": 0.6813473090597033, + "grad_norm": 1.6352853880570686, + "learning_rate": 2.4346697943331826e-06, + "loss": 0.5808, + "step": 22231 + }, + { + "epoch": 0.6813779575824446, + "grad_norm": 1.7402821105081752, + "learning_rate": 2.434243792985058e-06, + "loss": 0.6709, + "step": 22232 + }, + { + "epoch": 0.6814086061051857, + "grad_norm": 1.7239010494643723, + "learning_rate": 2.433817816917986e-06, + "loss": 0.6207, + "step": 22233 + }, + { + "epoch": 0.681439254627927, + "grad_norm": 0.8160889919563656, + "learning_rate": 2.4333918661361616e-06, + "loss": 0.4172, + "step": 22234 + }, + { + "epoch": 0.6814699031506681, + "grad_norm": 0.7866031256817667, + "learning_rate": 2.432965940643784e-06, + "loss": 0.3952, + "step": 22235 + }, + { + "epoch": 0.6815005516734094, + "grad_norm": 1.8063846625234488, + "learning_rate": 2.432540040445052e-06, + "loss": 0.6178, + "step": 22236 + }, + { + "epoch": 0.6815312001961505, + "grad_norm": 1.835095630615003, + "learning_rate": 2.4321141655441573e-06, + "loss": 0.5641, + "step": 22237 + }, + { + "epoch": 0.6815618487188917, + "grad_norm": 1.5702001879565612, + "learning_rate": 2.4316883159452985e-06, + "loss": 0.5037, + "step": 22238 + }, + { + "epoch": 0.681592497241633, + "grad_norm": 2.00234479749754, + "learning_rate": 2.4312624916526744e-06, + "loss": 0.6448, + "step": 22239 + }, + { + "epoch": 0.6816231457643741, + "grad_norm": 1.7969986596863012, + "learning_rate": 2.4308366926704763e-06, + "loss": 0.5536, + "step": 22240 + }, + { + "epoch": 0.6816537942871154, + "grad_norm": 1.7085951992369948, + "learning_rate": 2.4304109190029036e-06, + "loss": 0.5857, + "step": 22241 + }, + { + "epoch": 0.6816844428098565, + "grad_norm": 1.670086737786867, + "learning_rate": 2.4299851706541473e-06, + "loss": 0.6525, + "step": 22242 + }, + { + "epoch": 0.6817150913325978, + "grad_norm": 0.7894654861289839, + "learning_rate": 2.4295594476284044e-06, + "loss": 0.4089, + "step": 22243 + }, + { + "epoch": 0.6817457398553389, + "grad_norm": 0.8538657006831514, + "learning_rate": 2.429133749929873e-06, + "loss": 0.4145, + "step": 22244 + }, + { + "epoch": 0.6817763883780802, + "grad_norm": 1.8143979409286075, + "learning_rate": 2.4287080775627413e-06, + "loss": 0.544, + "step": 22245 + }, + { + "epoch": 0.6818070369008213, + "grad_norm": 1.6148656193888766, + "learning_rate": 2.4282824305312075e-06, + "loss": 0.5494, + "step": 22246 + }, + { + "epoch": 0.6818376854235626, + "grad_norm": 1.8759464961244685, + "learning_rate": 2.4278568088394674e-06, + "loss": 0.5683, + "step": 22247 + }, + { + "epoch": 0.6818683339463038, + "grad_norm": 2.08920755973338, + "learning_rate": 2.4274312124917094e-06, + "loss": 0.5781, + "step": 22248 + }, + { + "epoch": 0.681898982469045, + "grad_norm": 1.740416682365453, + "learning_rate": 2.427005641492132e-06, + "loss": 0.5516, + "step": 22249 + }, + { + "epoch": 0.6819296309917862, + "grad_norm": 1.9573003446009967, + "learning_rate": 2.4265800958449227e-06, + "loss": 0.5769, + "step": 22250 + }, + { + "epoch": 0.6819602795145274, + "grad_norm": 2.1292911765830618, + "learning_rate": 2.426154575554282e-06, + "loss": 0.5921, + "step": 22251 + }, + { + "epoch": 0.6819909280372686, + "grad_norm": 1.7427049435073831, + "learning_rate": 2.4257290806243983e-06, + "loss": 0.5538, + "step": 22252 + }, + { + "epoch": 0.6820215765600098, + "grad_norm": 1.6239078806641416, + "learning_rate": 2.4253036110594634e-06, + "loss": 0.4877, + "step": 22253 + }, + { + "epoch": 0.682052225082751, + "grad_norm": 1.8485831709297138, + "learning_rate": 2.4248781668636704e-06, + "loss": 0.6511, + "step": 22254 + }, + { + "epoch": 0.6820828736054922, + "grad_norm": 1.8250894401735789, + "learning_rate": 2.424452748041214e-06, + "loss": 0.5939, + "step": 22255 + }, + { + "epoch": 0.6821135221282334, + "grad_norm": 1.7092632932335532, + "learning_rate": 2.424027354596281e-06, + "loss": 0.756, + "step": 22256 + }, + { + "epoch": 0.6821441706509747, + "grad_norm": 0.7807651935704306, + "learning_rate": 2.4236019865330664e-06, + "loss": 0.4075, + "step": 22257 + }, + { + "epoch": 0.6821748191737158, + "grad_norm": 1.872412313551224, + "learning_rate": 2.4231766438557604e-06, + "loss": 0.5371, + "step": 22258 + }, + { + "epoch": 0.6822054676964571, + "grad_norm": 1.620149861148656, + "learning_rate": 2.4227513265685558e-06, + "loss": 0.506, + "step": 22259 + }, + { + "epoch": 0.6822361162191982, + "grad_norm": 1.82639062986331, + "learning_rate": 2.4223260346756416e-06, + "loss": 0.6671, + "step": 22260 + }, + { + "epoch": 0.6822667647419395, + "grad_norm": 1.7956408390668304, + "learning_rate": 2.421900768181205e-06, + "loss": 0.5934, + "step": 22261 + }, + { + "epoch": 0.6822974132646806, + "grad_norm": 1.6631240003709593, + "learning_rate": 2.421475527089444e-06, + "loss": 0.5538, + "step": 22262 + }, + { + "epoch": 0.6823280617874219, + "grad_norm": 1.6648961586979663, + "learning_rate": 2.421050311404544e-06, + "loss": 0.6072, + "step": 22263 + }, + { + "epoch": 0.682358710310163, + "grad_norm": 1.7343461245227494, + "learning_rate": 2.4206251211306935e-06, + "loss": 0.593, + "step": 22264 + }, + { + "epoch": 0.6823893588329043, + "grad_norm": 0.8253904709752574, + "learning_rate": 2.4201999562720835e-06, + "loss": 0.4307, + "step": 22265 + }, + { + "epoch": 0.6824200073556455, + "grad_norm": 1.5758305889722146, + "learning_rate": 2.419774816832905e-06, + "loss": 0.5019, + "step": 22266 + }, + { + "epoch": 0.6824506558783867, + "grad_norm": 1.8432791793923686, + "learning_rate": 2.4193497028173435e-06, + "loss": 0.6453, + "step": 22267 + }, + { + "epoch": 0.6824813044011279, + "grad_norm": 1.7911598149937076, + "learning_rate": 2.4189246142295904e-06, + "loss": 0.5541, + "step": 22268 + }, + { + "epoch": 0.682511952923869, + "grad_norm": 1.6477006882084353, + "learning_rate": 2.418499551073833e-06, + "loss": 0.6113, + "step": 22269 + }, + { + "epoch": 0.6825426014466103, + "grad_norm": 1.680157107062543, + "learning_rate": 2.4180745133542617e-06, + "loss": 0.5501, + "step": 22270 + }, + { + "epoch": 0.6825732499693514, + "grad_norm": 1.8355406994929595, + "learning_rate": 2.4176495010750626e-06, + "loss": 0.6198, + "step": 22271 + }, + { + "epoch": 0.6826038984920927, + "grad_norm": 3.231379786524138, + "learning_rate": 2.4172245142404207e-06, + "loss": 0.5283, + "step": 22272 + }, + { + "epoch": 0.6826345470148338, + "grad_norm": 1.664699709322351, + "learning_rate": 2.4167995528545296e-06, + "loss": 0.5577, + "step": 22273 + }, + { + "epoch": 0.6826651955375751, + "grad_norm": 1.760246530601096, + "learning_rate": 2.416374616921574e-06, + "loss": 0.5816, + "step": 22274 + }, + { + "epoch": 0.6826958440603162, + "grad_norm": 1.8094408684618897, + "learning_rate": 2.415949706445738e-06, + "loss": 0.6038, + "step": 22275 + }, + { + "epoch": 0.6827264925830575, + "grad_norm": 2.0335548421167613, + "learning_rate": 2.415524821431211e-06, + "loss": 0.5978, + "step": 22276 + }, + { + "epoch": 0.6827571411057987, + "grad_norm": 1.9697060201132164, + "learning_rate": 2.415099961882179e-06, + "loss": 0.5678, + "step": 22277 + }, + { + "epoch": 0.6827877896285399, + "grad_norm": 1.7553524839060062, + "learning_rate": 2.4146751278028306e-06, + "loss": 0.571, + "step": 22278 + }, + { + "epoch": 0.6828184381512811, + "grad_norm": 1.7548102298664046, + "learning_rate": 2.4142503191973475e-06, + "loss": 0.4923, + "step": 22279 + }, + { + "epoch": 0.6828490866740223, + "grad_norm": 1.9597265270741464, + "learning_rate": 2.4138255360699183e-06, + "loss": 0.5729, + "step": 22280 + }, + { + "epoch": 0.6828797351967635, + "grad_norm": 1.5578953463435201, + "learning_rate": 2.4134007784247287e-06, + "loss": 0.5547, + "step": 22281 + }, + { + "epoch": 0.6829103837195047, + "grad_norm": 1.9079732307645603, + "learning_rate": 2.4129760462659634e-06, + "loss": 0.6653, + "step": 22282 + }, + { + "epoch": 0.6829410322422459, + "grad_norm": 1.6988644535471387, + "learning_rate": 2.4125513395978034e-06, + "loss": 0.609, + "step": 22283 + }, + { + "epoch": 0.6829716807649872, + "grad_norm": 1.847402008572745, + "learning_rate": 2.4121266584244407e-06, + "loss": 0.5745, + "step": 22284 + }, + { + "epoch": 0.6830023292877283, + "grad_norm": 2.171856052079096, + "learning_rate": 2.411702002750056e-06, + "loss": 0.6097, + "step": 22285 + }, + { + "epoch": 0.6830329778104696, + "grad_norm": 0.8457899208061878, + "learning_rate": 2.4112773725788324e-06, + "loss": 0.4064, + "step": 22286 + }, + { + "epoch": 0.6830636263332107, + "grad_norm": 1.7775220615029685, + "learning_rate": 2.4108527679149548e-06, + "loss": 0.5981, + "step": 22287 + }, + { + "epoch": 0.683094274855952, + "grad_norm": 0.8240279509742976, + "learning_rate": 2.4104281887626075e-06, + "loss": 0.4237, + "step": 22288 + }, + { + "epoch": 0.6831249233786931, + "grad_norm": 2.2080907072916487, + "learning_rate": 2.4100036351259754e-06, + "loss": 0.7361, + "step": 22289 + }, + { + "epoch": 0.6831555719014344, + "grad_norm": 2.013668914186918, + "learning_rate": 2.4095791070092385e-06, + "loss": 0.5794, + "step": 22290 + }, + { + "epoch": 0.6831862204241755, + "grad_norm": 0.8190901673119516, + "learning_rate": 2.4091546044165816e-06, + "loss": 0.421, + "step": 22291 + }, + { + "epoch": 0.6832168689469168, + "grad_norm": 1.8633861768510511, + "learning_rate": 2.4087301273521883e-06, + "loss": 0.6616, + "step": 22292 + }, + { + "epoch": 0.683247517469658, + "grad_norm": 1.7959739682240812, + "learning_rate": 2.408305675820241e-06, + "loss": 0.5957, + "step": 22293 + }, + { + "epoch": 0.6832781659923992, + "grad_norm": 1.9509577237175915, + "learning_rate": 2.407881249824919e-06, + "loss": 0.6521, + "step": 22294 + }, + { + "epoch": 0.6833088145151404, + "grad_norm": 1.7575808568006295, + "learning_rate": 2.407456849370406e-06, + "loss": 0.647, + "step": 22295 + }, + { + "epoch": 0.6833394630378816, + "grad_norm": 0.8068847751724794, + "learning_rate": 2.407032474460884e-06, + "loss": 0.4098, + "step": 22296 + }, + { + "epoch": 0.6833701115606228, + "grad_norm": 1.9522072282157636, + "learning_rate": 2.406608125100536e-06, + "loss": 0.6454, + "step": 22297 + }, + { + "epoch": 0.683400760083364, + "grad_norm": 1.8871347536179863, + "learning_rate": 2.4061838012935405e-06, + "loss": 0.6322, + "step": 22298 + }, + { + "epoch": 0.6834314086061052, + "grad_norm": 2.04932085795346, + "learning_rate": 2.40575950304408e-06, + "loss": 0.7057, + "step": 22299 + }, + { + "epoch": 0.6834620571288463, + "grad_norm": 2.011622822042383, + "learning_rate": 2.405335230356336e-06, + "loss": 0.5762, + "step": 22300 + }, + { + "epoch": 0.6834927056515876, + "grad_norm": 1.8664330855839608, + "learning_rate": 2.404910983234488e-06, + "loss": 0.6121, + "step": 22301 + }, + { + "epoch": 0.6835233541743287, + "grad_norm": 1.921329649782291, + "learning_rate": 2.404486761682712e-06, + "loss": 0.5825, + "step": 22302 + }, + { + "epoch": 0.68355400269707, + "grad_norm": 1.7916304569403554, + "learning_rate": 2.4040625657051965e-06, + "loss": 0.5501, + "step": 22303 + }, + { + "epoch": 0.6835846512198112, + "grad_norm": 1.9717251965397486, + "learning_rate": 2.403638395306114e-06, + "loss": 0.5527, + "step": 22304 + }, + { + "epoch": 0.6836152997425524, + "grad_norm": 1.7142858448901184, + "learning_rate": 2.4032142504896494e-06, + "loss": 0.6304, + "step": 22305 + }, + { + "epoch": 0.6836459482652936, + "grad_norm": 1.6777824877763337, + "learning_rate": 2.4027901312599773e-06, + "loss": 0.5436, + "step": 22306 + }, + { + "epoch": 0.6836765967880348, + "grad_norm": 1.7477590811972374, + "learning_rate": 2.4023660376212783e-06, + "loss": 0.5608, + "step": 22307 + }, + { + "epoch": 0.683707245310776, + "grad_norm": 1.7178681614839721, + "learning_rate": 2.401941969577733e-06, + "loss": 0.5517, + "step": 22308 + }, + { + "epoch": 0.6837378938335172, + "grad_norm": 1.5584984496207375, + "learning_rate": 2.4015179271335167e-06, + "loss": 0.5157, + "step": 22309 + }, + { + "epoch": 0.6837685423562584, + "grad_norm": 0.8096446901814351, + "learning_rate": 2.4010939102928086e-06, + "loss": 0.4073, + "step": 22310 + }, + { + "epoch": 0.6837991908789997, + "grad_norm": 2.2077505418897982, + "learning_rate": 2.4006699190597895e-06, + "loss": 0.6668, + "step": 22311 + }, + { + "epoch": 0.6838298394017408, + "grad_norm": 1.6429154301797786, + "learning_rate": 2.400245953438635e-06, + "loss": 0.6816, + "step": 22312 + }, + { + "epoch": 0.6838604879244821, + "grad_norm": 0.8204543265073959, + "learning_rate": 2.399822013433518e-06, + "loss": 0.4087, + "step": 22313 + }, + { + "epoch": 0.6838911364472232, + "grad_norm": 1.5623885950547876, + "learning_rate": 2.3993980990486238e-06, + "loss": 0.5447, + "step": 22314 + }, + { + "epoch": 0.6839217849699645, + "grad_norm": 1.8765534408005808, + "learning_rate": 2.3989742102881234e-06, + "loss": 0.6768, + "step": 22315 + }, + { + "epoch": 0.6839524334927056, + "grad_norm": 1.959605939595533, + "learning_rate": 2.398550347156198e-06, + "loss": 0.4711, + "step": 22316 + }, + { + "epoch": 0.6839830820154469, + "grad_norm": 1.8073413812207373, + "learning_rate": 2.39812650965702e-06, + "loss": 0.5391, + "step": 22317 + }, + { + "epoch": 0.684013730538188, + "grad_norm": 1.8444616650546446, + "learning_rate": 2.3977026977947666e-06, + "loss": 0.5964, + "step": 22318 + }, + { + "epoch": 0.6840443790609293, + "grad_norm": 0.7871803155026944, + "learning_rate": 2.397278911573617e-06, + "loss": 0.4304, + "step": 22319 + }, + { + "epoch": 0.6840750275836704, + "grad_norm": 1.5467307070249445, + "learning_rate": 2.3968551509977413e-06, + "loss": 0.4944, + "step": 22320 + }, + { + "epoch": 0.6841056761064117, + "grad_norm": 1.827863717100983, + "learning_rate": 2.396431416071318e-06, + "loss": 0.6288, + "step": 22321 + }, + { + "epoch": 0.6841363246291529, + "grad_norm": 2.0226536096582466, + "learning_rate": 2.396007706798525e-06, + "loss": 0.5542, + "step": 22322 + }, + { + "epoch": 0.6841669731518941, + "grad_norm": 2.0733353257254783, + "learning_rate": 2.3955840231835314e-06, + "loss": 0.6582, + "step": 22323 + }, + { + "epoch": 0.6841976216746353, + "grad_norm": 1.627903007154448, + "learning_rate": 2.395160365230515e-06, + "loss": 0.4836, + "step": 22324 + }, + { + "epoch": 0.6842282701973765, + "grad_norm": 1.8912043627270017, + "learning_rate": 2.3947367329436523e-06, + "loss": 0.5373, + "step": 22325 + }, + { + "epoch": 0.6842589187201177, + "grad_norm": 0.8185199553590968, + "learning_rate": 2.394313126327113e-06, + "loss": 0.3966, + "step": 22326 + }, + { + "epoch": 0.6842895672428589, + "grad_norm": 1.5190609825915984, + "learning_rate": 2.3938895453850753e-06, + "loss": 0.5288, + "step": 22327 + }, + { + "epoch": 0.6843202157656001, + "grad_norm": 0.7770081951277756, + "learning_rate": 2.393465990121708e-06, + "loss": 0.395, + "step": 22328 + }, + { + "epoch": 0.6843508642883414, + "grad_norm": 0.8351762381097857, + "learning_rate": 2.3930424605411885e-06, + "loss": 0.3987, + "step": 22329 + }, + { + "epoch": 0.6843815128110825, + "grad_norm": 1.6722980728531636, + "learning_rate": 2.392618956647689e-06, + "loss": 0.574, + "step": 22330 + }, + { + "epoch": 0.6844121613338237, + "grad_norm": 1.6315401548888309, + "learning_rate": 2.3921954784453814e-06, + "loss": 0.5275, + "step": 22331 + }, + { + "epoch": 0.6844428098565649, + "grad_norm": 1.8028967325507421, + "learning_rate": 2.3917720259384386e-06, + "loss": 0.5887, + "step": 22332 + }, + { + "epoch": 0.6844734583793061, + "grad_norm": 1.8405465081022363, + "learning_rate": 2.3913485991310352e-06, + "loss": 0.6591, + "step": 22333 + }, + { + "epoch": 0.6845041069020473, + "grad_norm": 1.7991910972522738, + "learning_rate": 2.3909251980273397e-06, + "loss": 0.5747, + "step": 22334 + }, + { + "epoch": 0.6845347554247885, + "grad_norm": 1.6838994946786106, + "learning_rate": 2.3905018226315256e-06, + "loss": 0.5195, + "step": 22335 + }, + { + "epoch": 0.6845654039475297, + "grad_norm": 1.7724477868339057, + "learning_rate": 2.3900784729477672e-06, + "loss": 0.5355, + "step": 22336 + }, + { + "epoch": 0.6845960524702709, + "grad_norm": 1.9335789312593858, + "learning_rate": 2.3896551489802307e-06, + "loss": 0.6376, + "step": 22337 + }, + { + "epoch": 0.6846267009930121, + "grad_norm": 1.7966128891863211, + "learning_rate": 2.389231850733092e-06, + "loss": 0.6615, + "step": 22338 + }, + { + "epoch": 0.6846573495157533, + "grad_norm": 2.0402309326212187, + "learning_rate": 2.388808578210518e-06, + "loss": 0.6216, + "step": 22339 + }, + { + "epoch": 0.6846879980384946, + "grad_norm": 0.8092609999597371, + "learning_rate": 2.3883853314166815e-06, + "loss": 0.422, + "step": 22340 + }, + { + "epoch": 0.6847186465612357, + "grad_norm": 1.759853295726998, + "learning_rate": 2.3879621103557545e-06, + "loss": 0.5798, + "step": 22341 + }, + { + "epoch": 0.684749295083977, + "grad_norm": 0.7934517008062152, + "learning_rate": 2.387538915031903e-06, + "loss": 0.4172, + "step": 22342 + }, + { + "epoch": 0.6847799436067181, + "grad_norm": 2.013485097012287, + "learning_rate": 2.3871157454492987e-06, + "loss": 0.6065, + "step": 22343 + }, + { + "epoch": 0.6848105921294594, + "grad_norm": 0.8279849538835156, + "learning_rate": 2.386692601612114e-06, + "loss": 0.4029, + "step": 22344 + }, + { + "epoch": 0.6848412406522005, + "grad_norm": 1.5022711658478052, + "learning_rate": 2.386269483524513e-06, + "loss": 0.5106, + "step": 22345 + }, + { + "epoch": 0.6848718891749418, + "grad_norm": 0.7714693215930606, + "learning_rate": 2.3858463911906704e-06, + "loss": 0.4291, + "step": 22346 + }, + { + "epoch": 0.684902537697683, + "grad_norm": 1.5825099185603957, + "learning_rate": 2.3854233246147494e-06, + "loss": 0.5416, + "step": 22347 + }, + { + "epoch": 0.6849331862204242, + "grad_norm": 1.7057848162713212, + "learning_rate": 2.3850002838009216e-06, + "loss": 0.6212, + "step": 22348 + }, + { + "epoch": 0.6849638347431654, + "grad_norm": 1.5769677770304684, + "learning_rate": 2.3845772687533576e-06, + "loss": 0.4725, + "step": 22349 + }, + { + "epoch": 0.6849944832659066, + "grad_norm": 1.657184699416461, + "learning_rate": 2.384154279476221e-06, + "loss": 0.5547, + "step": 22350 + }, + { + "epoch": 0.6850251317886478, + "grad_norm": 1.8640011794604823, + "learning_rate": 2.383731315973681e-06, + "loss": 0.5701, + "step": 22351 + }, + { + "epoch": 0.685055780311389, + "grad_norm": 1.6647859603653576, + "learning_rate": 2.383308378249907e-06, + "loss": 0.5822, + "step": 22352 + }, + { + "epoch": 0.6850864288341302, + "grad_norm": 0.7806836680637119, + "learning_rate": 2.3828854663090646e-06, + "loss": 0.41, + "step": 22353 + }, + { + "epoch": 0.6851170773568714, + "grad_norm": 3.4838867053064733, + "learning_rate": 2.3824625801553203e-06, + "loss": 0.5286, + "step": 22354 + }, + { + "epoch": 0.6851477258796126, + "grad_norm": 1.8183788978581765, + "learning_rate": 2.382039719792844e-06, + "loss": 0.6056, + "step": 22355 + }, + { + "epoch": 0.6851783744023539, + "grad_norm": 1.5806201271231428, + "learning_rate": 2.3816168852257986e-06, + "loss": 0.6393, + "step": 22356 + }, + { + "epoch": 0.685209022925095, + "grad_norm": 2.1979025952612834, + "learning_rate": 2.381194076458354e-06, + "loss": 0.6812, + "step": 22357 + }, + { + "epoch": 0.6852396714478363, + "grad_norm": 1.586139695047709, + "learning_rate": 2.3807712934946703e-06, + "loss": 0.5543, + "step": 22358 + }, + { + "epoch": 0.6852703199705774, + "grad_norm": 1.831877145973323, + "learning_rate": 2.3803485363389205e-06, + "loss": 0.6391, + "step": 22359 + }, + { + "epoch": 0.6853009684933187, + "grad_norm": 1.8777156297821678, + "learning_rate": 2.3799258049952674e-06, + "loss": 0.5242, + "step": 22360 + }, + { + "epoch": 0.6853316170160598, + "grad_norm": 1.7389181815547101, + "learning_rate": 2.3795030994678736e-06, + "loss": 0.6451, + "step": 22361 + }, + { + "epoch": 0.685362265538801, + "grad_norm": 1.906009917289436, + "learning_rate": 2.3790804197609062e-06, + "loss": 0.599, + "step": 22362 + }, + { + "epoch": 0.6853929140615422, + "grad_norm": 1.8601540865417627, + "learning_rate": 2.378657765878532e-06, + "loss": 0.4652, + "step": 22363 + }, + { + "epoch": 0.6854235625842834, + "grad_norm": 0.7769799627448892, + "learning_rate": 2.378235137824912e-06, + "loss": 0.4201, + "step": 22364 + }, + { + "epoch": 0.6854542111070246, + "grad_norm": 1.8764915632754926, + "learning_rate": 2.3778125356042112e-06, + "loss": 0.5877, + "step": 22365 + }, + { + "epoch": 0.6854848596297658, + "grad_norm": 1.7429084191561472, + "learning_rate": 2.3773899592205966e-06, + "loss": 0.5945, + "step": 22366 + }, + { + "epoch": 0.6855155081525071, + "grad_norm": 1.9718923149256424, + "learning_rate": 2.3769674086782284e-06, + "loss": 0.6565, + "step": 22367 + }, + { + "epoch": 0.6855461566752482, + "grad_norm": 1.7730518131737805, + "learning_rate": 2.3765448839812727e-06, + "loss": 0.6085, + "step": 22368 + }, + { + "epoch": 0.6855768051979895, + "grad_norm": 1.7873185926620843, + "learning_rate": 2.376122385133888e-06, + "loss": 0.5805, + "step": 22369 + }, + { + "epoch": 0.6856074537207306, + "grad_norm": 0.7701636489860314, + "learning_rate": 2.3756999121402446e-06, + "loss": 0.3906, + "step": 22370 + }, + { + "epoch": 0.6856381022434719, + "grad_norm": 0.775479906829987, + "learning_rate": 2.3752774650045014e-06, + "loss": 0.4121, + "step": 22371 + }, + { + "epoch": 0.685668750766213, + "grad_norm": 1.8425390941669084, + "learning_rate": 2.3748550437308187e-06, + "loss": 0.5428, + "step": 22372 + }, + { + "epoch": 0.6856993992889543, + "grad_norm": 1.5840656791641639, + "learning_rate": 2.3744326483233615e-06, + "loss": 0.5468, + "step": 22373 + }, + { + "epoch": 0.6857300478116954, + "grad_norm": 0.8041577763839958, + "learning_rate": 2.3740102787862925e-06, + "loss": 0.426, + "step": 22374 + }, + { + "epoch": 0.6857606963344367, + "grad_norm": 1.7620813303617762, + "learning_rate": 2.3735879351237706e-06, + "loss": 0.6238, + "step": 22375 + }, + { + "epoch": 0.6857913448571779, + "grad_norm": 0.8037887000604473, + "learning_rate": 2.3731656173399585e-06, + "loss": 0.3987, + "step": 22376 + }, + { + "epoch": 0.6858219933799191, + "grad_norm": 1.7991406440334399, + "learning_rate": 2.372743325439018e-06, + "loss": 0.5078, + "step": 22377 + }, + { + "epoch": 0.6858526419026603, + "grad_norm": 1.8042395367305044, + "learning_rate": 2.372321059425111e-06, + "loss": 0.6502, + "step": 22378 + }, + { + "epoch": 0.6858832904254015, + "grad_norm": 0.808089270085057, + "learning_rate": 2.3718988193023977e-06, + "loss": 0.4202, + "step": 22379 + }, + { + "epoch": 0.6859139389481427, + "grad_norm": 1.627298049458244, + "learning_rate": 2.3714766050750355e-06, + "loss": 0.6328, + "step": 22380 + }, + { + "epoch": 0.6859445874708839, + "grad_norm": 1.9237504169518693, + "learning_rate": 2.3710544167471867e-06, + "loss": 0.6056, + "step": 22381 + }, + { + "epoch": 0.6859752359936251, + "grad_norm": 1.8361849141954292, + "learning_rate": 2.3706322543230136e-06, + "loss": 0.533, + "step": 22382 + }, + { + "epoch": 0.6860058845163663, + "grad_norm": 1.7270607030563327, + "learning_rate": 2.3702101178066718e-06, + "loss": 0.5443, + "step": 22383 + }, + { + "epoch": 0.6860365330391075, + "grad_norm": 1.6408255529055178, + "learning_rate": 2.3697880072023223e-06, + "loss": 0.544, + "step": 22384 + }, + { + "epoch": 0.6860671815618488, + "grad_norm": 1.7321293934829274, + "learning_rate": 2.369365922514125e-06, + "loss": 0.5875, + "step": 22385 + }, + { + "epoch": 0.6860978300845899, + "grad_norm": 1.7429254570295059, + "learning_rate": 2.3689438637462393e-06, + "loss": 0.6495, + "step": 22386 + }, + { + "epoch": 0.6861284786073312, + "grad_norm": 2.334482067066235, + "learning_rate": 2.368521830902822e-06, + "loss": 0.6696, + "step": 22387 + }, + { + "epoch": 0.6861591271300723, + "grad_norm": 1.9906116497754738, + "learning_rate": 2.3680998239880315e-06, + "loss": 0.5439, + "step": 22388 + }, + { + "epoch": 0.6861897756528136, + "grad_norm": 0.8092853058935727, + "learning_rate": 2.367677843006029e-06, + "loss": 0.4021, + "step": 22389 + }, + { + "epoch": 0.6862204241755547, + "grad_norm": 1.7861538563022767, + "learning_rate": 2.3672558879609707e-06, + "loss": 0.6016, + "step": 22390 + }, + { + "epoch": 0.686251072698296, + "grad_norm": 1.9343429630359705, + "learning_rate": 2.3668339588570115e-06, + "loss": 0.6298, + "step": 22391 + }, + { + "epoch": 0.6862817212210371, + "grad_norm": 1.8662078861535334, + "learning_rate": 2.366412055698311e-06, + "loss": 0.6323, + "step": 22392 + }, + { + "epoch": 0.6863123697437783, + "grad_norm": 0.7637519861638581, + "learning_rate": 2.365990178489028e-06, + "loss": 0.4056, + "step": 22393 + }, + { + "epoch": 0.6863430182665196, + "grad_norm": 1.9705062714241839, + "learning_rate": 2.3655683272333163e-06, + "loss": 0.6624, + "step": 22394 + }, + { + "epoch": 0.6863736667892607, + "grad_norm": 2.096557520692855, + "learning_rate": 2.365146501935334e-06, + "loss": 0.5811, + "step": 22395 + }, + { + "epoch": 0.686404315312002, + "grad_norm": 1.7225215514279402, + "learning_rate": 2.364724702599237e-06, + "loss": 0.5467, + "step": 22396 + }, + { + "epoch": 0.6864349638347431, + "grad_norm": 2.0225214172614123, + "learning_rate": 2.364302929229184e-06, + "loss": 0.6509, + "step": 22397 + }, + { + "epoch": 0.6864656123574844, + "grad_norm": 1.8872231502639811, + "learning_rate": 2.3638811818293284e-06, + "loss": 0.5792, + "step": 22398 + }, + { + "epoch": 0.6864962608802255, + "grad_norm": 0.7565121453633376, + "learning_rate": 2.363459460403822e-06, + "loss": 0.3957, + "step": 22399 + }, + { + "epoch": 0.6865269094029668, + "grad_norm": 1.7540268304651048, + "learning_rate": 2.363037764956828e-06, + "loss": 0.5962, + "step": 22400 + }, + { + "epoch": 0.6865575579257079, + "grad_norm": 1.7308131297356557, + "learning_rate": 2.362616095492498e-06, + "loss": 0.5881, + "step": 22401 + }, + { + "epoch": 0.6865882064484492, + "grad_norm": 1.7730220915460913, + "learning_rate": 2.3621944520149842e-06, + "loss": 0.5077, + "step": 22402 + }, + { + "epoch": 0.6866188549711904, + "grad_norm": 1.8077390035345442, + "learning_rate": 2.3617728345284434e-06, + "loss": 0.611, + "step": 22403 + }, + { + "epoch": 0.6866495034939316, + "grad_norm": 1.9252105761845941, + "learning_rate": 2.36135124303703e-06, + "loss": 0.6702, + "step": 22404 + }, + { + "epoch": 0.6866801520166728, + "grad_norm": 1.8152504372683478, + "learning_rate": 2.3609296775448998e-06, + "loss": 0.5784, + "step": 22405 + }, + { + "epoch": 0.686710800539414, + "grad_norm": 0.7875107133459092, + "learning_rate": 2.360508138056203e-06, + "loss": 0.415, + "step": 22406 + }, + { + "epoch": 0.6867414490621552, + "grad_norm": 1.9695100239231789, + "learning_rate": 2.360086624575094e-06, + "loss": 0.677, + "step": 22407 + }, + { + "epoch": 0.6867720975848964, + "grad_norm": 1.7023042732721145, + "learning_rate": 2.3596651371057293e-06, + "loss": 0.619, + "step": 22408 + }, + { + "epoch": 0.6868027461076376, + "grad_norm": 1.9099007498177307, + "learning_rate": 2.35924367565226e-06, + "loss": 0.6311, + "step": 22409 + }, + { + "epoch": 0.6868333946303788, + "grad_norm": 1.759253457607473, + "learning_rate": 2.3588222402188343e-06, + "loss": 0.5953, + "step": 22410 + }, + { + "epoch": 0.68686404315312, + "grad_norm": 2.0708033116537417, + "learning_rate": 2.3584008308096127e-06, + "loss": 0.5623, + "step": 22411 + }, + { + "epoch": 0.6868946916758613, + "grad_norm": 1.803633566949237, + "learning_rate": 2.3579794474287416e-06, + "loss": 0.6378, + "step": 22412 + }, + { + "epoch": 0.6869253401986024, + "grad_norm": 1.7825112753163745, + "learning_rate": 2.357558090080377e-06, + "loss": 0.5986, + "step": 22413 + }, + { + "epoch": 0.6869559887213437, + "grad_norm": 1.7037992395545514, + "learning_rate": 2.3571367587686667e-06, + "loss": 0.6189, + "step": 22414 + }, + { + "epoch": 0.6869866372440848, + "grad_norm": 1.6982726183706771, + "learning_rate": 2.3567154534977643e-06, + "loss": 0.4602, + "step": 22415 + }, + { + "epoch": 0.6870172857668261, + "grad_norm": 2.1321794981116455, + "learning_rate": 2.3562941742718227e-06, + "loss": 0.59, + "step": 22416 + }, + { + "epoch": 0.6870479342895672, + "grad_norm": 1.8592989457508384, + "learning_rate": 2.355872921094989e-06, + "loss": 0.5591, + "step": 22417 + }, + { + "epoch": 0.6870785828123085, + "grad_norm": 1.8967240295678458, + "learning_rate": 2.3554516939714156e-06, + "loss": 0.6404, + "step": 22418 + }, + { + "epoch": 0.6871092313350496, + "grad_norm": 2.111119678280495, + "learning_rate": 2.355030492905256e-06, + "loss": 0.6247, + "step": 22419 + }, + { + "epoch": 0.6871398798577909, + "grad_norm": 1.857120546582853, + "learning_rate": 2.354609317900657e-06, + "loss": 0.5576, + "step": 22420 + }, + { + "epoch": 0.687170528380532, + "grad_norm": 1.8938049007788422, + "learning_rate": 2.354188168961766e-06, + "loss": 0.5967, + "step": 22421 + }, + { + "epoch": 0.6872011769032733, + "grad_norm": 1.8139650468496622, + "learning_rate": 2.353767046092739e-06, + "loss": 0.6057, + "step": 22422 + }, + { + "epoch": 0.6872318254260145, + "grad_norm": 1.7209369667373127, + "learning_rate": 2.3533459492977208e-06, + "loss": 0.6037, + "step": 22423 + }, + { + "epoch": 0.6872624739487556, + "grad_norm": 1.6729079330848433, + "learning_rate": 2.352924878580864e-06, + "loss": 0.5916, + "step": 22424 + }, + { + "epoch": 0.6872931224714969, + "grad_norm": 1.759001620283047, + "learning_rate": 2.3525038339463143e-06, + "loss": 0.606, + "step": 22425 + }, + { + "epoch": 0.687323770994238, + "grad_norm": 0.7814331069906137, + "learning_rate": 2.352082815398221e-06, + "loss": 0.4233, + "step": 22426 + }, + { + "epoch": 0.6873544195169793, + "grad_norm": 1.7841353878334094, + "learning_rate": 2.3516618229407356e-06, + "loss": 0.5944, + "step": 22427 + }, + { + "epoch": 0.6873850680397204, + "grad_norm": 1.8154298629650765, + "learning_rate": 2.3512408565780013e-06, + "loss": 0.6394, + "step": 22428 + }, + { + "epoch": 0.6874157165624617, + "grad_norm": 1.9147283494053524, + "learning_rate": 2.3508199163141694e-06, + "loss": 0.6092, + "step": 22429 + }, + { + "epoch": 0.6874463650852028, + "grad_norm": 2.0327313006677077, + "learning_rate": 2.350399002153388e-06, + "loss": 0.6174, + "step": 22430 + }, + { + "epoch": 0.6874770136079441, + "grad_norm": 0.8047836200839621, + "learning_rate": 2.3499781140998016e-06, + "loss": 0.4175, + "step": 22431 + }, + { + "epoch": 0.6875076621306853, + "grad_norm": 1.849572584148264, + "learning_rate": 2.3495572521575603e-06, + "loss": 0.4879, + "step": 22432 + }, + { + "epoch": 0.6875383106534265, + "grad_norm": 1.6870239811541294, + "learning_rate": 2.3491364163308083e-06, + "loss": 0.5699, + "step": 22433 + }, + { + "epoch": 0.6875689591761677, + "grad_norm": 1.7708839135015435, + "learning_rate": 2.3487156066236934e-06, + "loss": 0.6292, + "step": 22434 + }, + { + "epoch": 0.6875996076989089, + "grad_norm": 2.051204854538234, + "learning_rate": 2.3482948230403637e-06, + "loss": 0.6079, + "step": 22435 + }, + { + "epoch": 0.6876302562216501, + "grad_norm": 2.1939917709860306, + "learning_rate": 2.3478740655849612e-06, + "loss": 0.7016, + "step": 22436 + }, + { + "epoch": 0.6876609047443913, + "grad_norm": 1.736869946021907, + "learning_rate": 2.3474533342616344e-06, + "loss": 0.5352, + "step": 22437 + }, + { + "epoch": 0.6876915532671325, + "grad_norm": 0.7806009840830443, + "learning_rate": 2.3470326290745302e-06, + "loss": 0.3938, + "step": 22438 + }, + { + "epoch": 0.6877222017898738, + "grad_norm": 1.5776082136567424, + "learning_rate": 2.346611950027791e-06, + "loss": 0.678, + "step": 22439 + }, + { + "epoch": 0.6877528503126149, + "grad_norm": 1.8755889056061403, + "learning_rate": 2.3461912971255635e-06, + "loss": 0.5864, + "step": 22440 + }, + { + "epoch": 0.6877834988353562, + "grad_norm": 1.8213233285378483, + "learning_rate": 2.345770670371993e-06, + "loss": 0.6021, + "step": 22441 + }, + { + "epoch": 0.6878141473580973, + "grad_norm": 1.9977931871851817, + "learning_rate": 2.345350069771222e-06, + "loss": 0.7208, + "step": 22442 + }, + { + "epoch": 0.6878447958808386, + "grad_norm": 0.7982134106219315, + "learning_rate": 2.344929495327398e-06, + "loss": 0.388, + "step": 22443 + }, + { + "epoch": 0.6878754444035797, + "grad_norm": 0.8922470092321991, + "learning_rate": 2.3445089470446604e-06, + "loss": 0.3907, + "step": 22444 + }, + { + "epoch": 0.687906092926321, + "grad_norm": 2.0490193774453105, + "learning_rate": 2.344088424927156e-06, + "loss": 0.6164, + "step": 22445 + }, + { + "epoch": 0.6879367414490621, + "grad_norm": 1.7466348462742003, + "learning_rate": 2.3436679289790297e-06, + "loss": 0.5297, + "step": 22446 + }, + { + "epoch": 0.6879673899718034, + "grad_norm": 1.7248317886700573, + "learning_rate": 2.3432474592044214e-06, + "loss": 0.5623, + "step": 22447 + }, + { + "epoch": 0.6879980384945446, + "grad_norm": 1.5950563320409903, + "learning_rate": 2.342827015607475e-06, + "loss": 0.5707, + "step": 22448 + }, + { + "epoch": 0.6880286870172858, + "grad_norm": 2.1403586687858054, + "learning_rate": 2.342406598192336e-06, + "loss": 0.5824, + "step": 22449 + }, + { + "epoch": 0.688059335540027, + "grad_norm": 1.8533169726919065, + "learning_rate": 2.3419862069631433e-06, + "loss": 0.5705, + "step": 22450 + }, + { + "epoch": 0.6880899840627682, + "grad_norm": 0.7536098814122162, + "learning_rate": 2.34156584192404e-06, + "loss": 0.3966, + "step": 22451 + }, + { + "epoch": 0.6881206325855094, + "grad_norm": 1.912909756592003, + "learning_rate": 2.341145503079171e-06, + "loss": 0.6032, + "step": 22452 + }, + { + "epoch": 0.6881512811082506, + "grad_norm": 1.786453809587702, + "learning_rate": 2.3407251904326733e-06, + "loss": 0.5205, + "step": 22453 + }, + { + "epoch": 0.6881819296309918, + "grad_norm": 1.80997719286639, + "learning_rate": 2.3403049039886932e-06, + "loss": 0.5992, + "step": 22454 + }, + { + "epoch": 0.688212578153733, + "grad_norm": 1.817801833827858, + "learning_rate": 2.339884643751367e-06, + "loss": 0.4829, + "step": 22455 + }, + { + "epoch": 0.6882432266764742, + "grad_norm": 1.631450054821589, + "learning_rate": 2.339464409724838e-06, + "loss": 0.5598, + "step": 22456 + }, + { + "epoch": 0.6882738751992153, + "grad_norm": 5.310960165414161, + "learning_rate": 2.339044201913249e-06, + "loss": 0.6192, + "step": 22457 + }, + { + "epoch": 0.6883045237219566, + "grad_norm": 1.6902626725541583, + "learning_rate": 2.3386240203207365e-06, + "loss": 0.5599, + "step": 22458 + }, + { + "epoch": 0.6883351722446978, + "grad_norm": 1.8232602440572843, + "learning_rate": 2.338203864951443e-06, + "loss": 0.5807, + "step": 22459 + }, + { + "epoch": 0.688365820767439, + "grad_norm": 1.8680348728095213, + "learning_rate": 2.337783735809509e-06, + "loss": 0.6024, + "step": 22460 + }, + { + "epoch": 0.6883964692901802, + "grad_norm": 0.8165929038802157, + "learning_rate": 2.3373636328990713e-06, + "loss": 0.4089, + "step": 22461 + }, + { + "epoch": 0.6884271178129214, + "grad_norm": 1.935523008944774, + "learning_rate": 2.336943556224271e-06, + "loss": 0.5363, + "step": 22462 + }, + { + "epoch": 0.6884577663356626, + "grad_norm": 2.009717576912026, + "learning_rate": 2.336523505789249e-06, + "loss": 0.5163, + "step": 22463 + }, + { + "epoch": 0.6884884148584038, + "grad_norm": 1.9726905322163881, + "learning_rate": 2.3361034815981406e-06, + "loss": 0.71, + "step": 22464 + }, + { + "epoch": 0.688519063381145, + "grad_norm": 1.850519276971451, + "learning_rate": 2.335683483655088e-06, + "loss": 0.6395, + "step": 22465 + }, + { + "epoch": 0.6885497119038863, + "grad_norm": 1.8059405130993276, + "learning_rate": 2.3352635119642252e-06, + "loss": 0.521, + "step": 22466 + }, + { + "epoch": 0.6885803604266274, + "grad_norm": 1.855986710634367, + "learning_rate": 2.3348435665296937e-06, + "loss": 0.7158, + "step": 22467 + }, + { + "epoch": 0.6886110089493687, + "grad_norm": 1.6966962665066758, + "learning_rate": 2.334423647355632e-06, + "loss": 0.6351, + "step": 22468 + }, + { + "epoch": 0.6886416574721098, + "grad_norm": 1.6757045864433058, + "learning_rate": 2.3340037544461745e-06, + "loss": 0.5478, + "step": 22469 + }, + { + "epoch": 0.6886723059948511, + "grad_norm": 1.826166617770269, + "learning_rate": 2.3335838878054602e-06, + "loss": 0.5509, + "step": 22470 + }, + { + "epoch": 0.6887029545175922, + "grad_norm": 0.8187218380552955, + "learning_rate": 2.3331640474376277e-06, + "loss": 0.4235, + "step": 22471 + }, + { + "epoch": 0.6887336030403335, + "grad_norm": 1.8565521433664272, + "learning_rate": 2.3327442333468104e-06, + "loss": 0.5594, + "step": 22472 + }, + { + "epoch": 0.6887642515630746, + "grad_norm": 0.8728225253586057, + "learning_rate": 2.3323244455371465e-06, + "loss": 0.4199, + "step": 22473 + }, + { + "epoch": 0.6887949000858159, + "grad_norm": 1.793709752146349, + "learning_rate": 2.3319046840127742e-06, + "loss": 0.5836, + "step": 22474 + }, + { + "epoch": 0.688825548608557, + "grad_norm": 0.7709096192054176, + "learning_rate": 2.3314849487778258e-06, + "loss": 0.3783, + "step": 22475 + }, + { + "epoch": 0.6888561971312983, + "grad_norm": 1.9360567359822718, + "learning_rate": 2.3310652398364415e-06, + "loss": 0.625, + "step": 22476 + }, + { + "epoch": 0.6888868456540395, + "grad_norm": 1.6596774773992888, + "learning_rate": 2.330645557192752e-06, + "loss": 0.6373, + "step": 22477 + }, + { + "epoch": 0.6889174941767807, + "grad_norm": 0.8384980444270728, + "learning_rate": 2.3302259008508942e-06, + "loss": 0.4084, + "step": 22478 + }, + { + "epoch": 0.6889481426995219, + "grad_norm": 1.8147657398324029, + "learning_rate": 2.329806270815006e-06, + "loss": 0.6204, + "step": 22479 + }, + { + "epoch": 0.6889787912222631, + "grad_norm": 1.6385035591502237, + "learning_rate": 2.3293866670892185e-06, + "loss": 0.4804, + "step": 22480 + }, + { + "epoch": 0.6890094397450043, + "grad_norm": 1.7175008052548888, + "learning_rate": 2.3289670896776666e-06, + "loss": 0.6561, + "step": 22481 + }, + { + "epoch": 0.6890400882677455, + "grad_norm": 1.6940363592817873, + "learning_rate": 2.3285475385844876e-06, + "loss": 0.5853, + "step": 22482 + }, + { + "epoch": 0.6890707367904867, + "grad_norm": 1.868271182583457, + "learning_rate": 2.328128013813811e-06, + "loss": 0.6017, + "step": 22483 + }, + { + "epoch": 0.689101385313228, + "grad_norm": 0.8183053355853165, + "learning_rate": 2.3277085153697755e-06, + "loss": 0.3976, + "step": 22484 + }, + { + "epoch": 0.6891320338359691, + "grad_norm": 1.7488988139161536, + "learning_rate": 2.3272890432565077e-06, + "loss": 0.567, + "step": 22485 + }, + { + "epoch": 0.6891626823587104, + "grad_norm": 1.9826775527413838, + "learning_rate": 2.326869597478148e-06, + "loss": 0.7275, + "step": 22486 + }, + { + "epoch": 0.6891933308814515, + "grad_norm": 0.8062299968532429, + "learning_rate": 2.3264501780388267e-06, + "loss": 0.3991, + "step": 22487 + }, + { + "epoch": 0.6892239794041927, + "grad_norm": 0.8270905101960299, + "learning_rate": 2.3260307849426733e-06, + "loss": 0.4054, + "step": 22488 + }, + { + "epoch": 0.6892546279269339, + "grad_norm": 1.7996461014041158, + "learning_rate": 2.325611418193823e-06, + "loss": 0.6655, + "step": 22489 + }, + { + "epoch": 0.6892852764496751, + "grad_norm": 1.8873366164224021, + "learning_rate": 2.3251920777964098e-06, + "loss": 0.593, + "step": 22490 + }, + { + "epoch": 0.6893159249724163, + "grad_norm": 1.9387852434443702, + "learning_rate": 2.3247727637545612e-06, + "loss": 0.6096, + "step": 22491 + }, + { + "epoch": 0.6893465734951575, + "grad_norm": 0.873816186685427, + "learning_rate": 2.324353476072412e-06, + "loss": 0.4067, + "step": 22492 + }, + { + "epoch": 0.6893772220178987, + "grad_norm": 1.8688325664186578, + "learning_rate": 2.3239342147540932e-06, + "loss": 0.6524, + "step": 22493 + }, + { + "epoch": 0.6894078705406399, + "grad_norm": 0.7768644016524807, + "learning_rate": 2.3235149798037344e-06, + "loss": 0.4069, + "step": 22494 + }, + { + "epoch": 0.6894385190633812, + "grad_norm": 0.793235314985987, + "learning_rate": 2.3230957712254686e-06, + "loss": 0.4092, + "step": 22495 + }, + { + "epoch": 0.6894691675861223, + "grad_norm": 1.7840434605442654, + "learning_rate": 2.3226765890234216e-06, + "loss": 0.5846, + "step": 22496 + }, + { + "epoch": 0.6894998161088636, + "grad_norm": 1.6827022898259334, + "learning_rate": 2.3222574332017305e-06, + "loss": 0.6559, + "step": 22497 + }, + { + "epoch": 0.6895304646316047, + "grad_norm": 1.9796285144907024, + "learning_rate": 2.3218383037645227e-06, + "loss": 0.6967, + "step": 22498 + }, + { + "epoch": 0.689561113154346, + "grad_norm": 1.813413697873661, + "learning_rate": 2.3214192007159246e-06, + "loss": 0.6358, + "step": 22499 + }, + { + "epoch": 0.6895917616770871, + "grad_norm": 1.4792831180778974, + "learning_rate": 2.3210001240600694e-06, + "loss": 0.5441, + "step": 22500 + }, + { + "epoch": 0.6896224101998284, + "grad_norm": 1.742678549422185, + "learning_rate": 2.3205810738010866e-06, + "loss": 0.5656, + "step": 22501 + }, + { + "epoch": 0.6896530587225695, + "grad_norm": 1.9239093402227325, + "learning_rate": 2.3201620499431027e-06, + "loss": 0.6149, + "step": 22502 + }, + { + "epoch": 0.6896837072453108, + "grad_norm": 1.7555644928450793, + "learning_rate": 2.3197430524902477e-06, + "loss": 0.5548, + "step": 22503 + }, + { + "epoch": 0.689714355768052, + "grad_norm": 0.7814050069134002, + "learning_rate": 2.3193240814466493e-06, + "loss": 0.3991, + "step": 22504 + }, + { + "epoch": 0.6897450042907932, + "grad_norm": 1.873544373218666, + "learning_rate": 2.3189051368164393e-06, + "loss": 0.5563, + "step": 22505 + }, + { + "epoch": 0.6897756528135344, + "grad_norm": 2.0045601021756214, + "learning_rate": 2.318486218603743e-06, + "loss": 0.649, + "step": 22506 + }, + { + "epoch": 0.6898063013362756, + "grad_norm": 1.6784134118680345, + "learning_rate": 2.3180673268126842e-06, + "loss": 0.6594, + "step": 22507 + }, + { + "epoch": 0.6898369498590168, + "grad_norm": 1.9037569365493985, + "learning_rate": 2.317648461447398e-06, + "loss": 0.5585, + "step": 22508 + }, + { + "epoch": 0.689867598381758, + "grad_norm": 1.4929087375436947, + "learning_rate": 2.317229622512008e-06, + "loss": 0.5315, + "step": 22509 + }, + { + "epoch": 0.6898982469044992, + "grad_norm": 1.6310787204028123, + "learning_rate": 2.3168108100106383e-06, + "loss": 0.5581, + "step": 22510 + }, + { + "epoch": 0.6899288954272405, + "grad_norm": 1.7477228763589627, + "learning_rate": 2.316392023947419e-06, + "loss": 0.6035, + "step": 22511 + }, + { + "epoch": 0.6899595439499816, + "grad_norm": 1.647114780260168, + "learning_rate": 2.3159732643264752e-06, + "loss": 0.5345, + "step": 22512 + }, + { + "epoch": 0.6899901924727229, + "grad_norm": 1.9874241643000723, + "learning_rate": 2.3155545311519364e-06, + "loss": 0.6812, + "step": 22513 + }, + { + "epoch": 0.690020840995464, + "grad_norm": 2.1436212614317203, + "learning_rate": 2.3151358244279227e-06, + "loss": 0.6464, + "step": 22514 + }, + { + "epoch": 0.6900514895182053, + "grad_norm": 2.025237767823266, + "learning_rate": 2.3147171441585633e-06, + "loss": 0.6254, + "step": 22515 + }, + { + "epoch": 0.6900821380409464, + "grad_norm": 2.364702235149817, + "learning_rate": 2.3142984903479847e-06, + "loss": 0.5352, + "step": 22516 + }, + { + "epoch": 0.6901127865636877, + "grad_norm": 1.5831365010011376, + "learning_rate": 2.31387986300031e-06, + "loss": 0.5312, + "step": 22517 + }, + { + "epoch": 0.6901434350864288, + "grad_norm": 1.8211016065004721, + "learning_rate": 2.3134612621196606e-06, + "loss": 0.5746, + "step": 22518 + }, + { + "epoch": 0.69017408360917, + "grad_norm": 1.7959317033955258, + "learning_rate": 2.3130426877101686e-06, + "loss": 0.6026, + "step": 22519 + }, + { + "epoch": 0.6902047321319112, + "grad_norm": 1.699286327626318, + "learning_rate": 2.3126241397759547e-06, + "loss": 0.5297, + "step": 22520 + }, + { + "epoch": 0.6902353806546524, + "grad_norm": 1.9700634871519105, + "learning_rate": 2.3122056183211406e-06, + "loss": 0.6386, + "step": 22521 + }, + { + "epoch": 0.6902660291773937, + "grad_norm": 1.710705628527412, + "learning_rate": 2.311787123349852e-06, + "loss": 0.5394, + "step": 22522 + }, + { + "epoch": 0.6902966777001348, + "grad_norm": 1.8942950513418269, + "learning_rate": 2.3113686548662128e-06, + "loss": 0.5512, + "step": 22523 + }, + { + "epoch": 0.6903273262228761, + "grad_norm": 1.7307059473919983, + "learning_rate": 2.3109502128743483e-06, + "loss": 0.6345, + "step": 22524 + }, + { + "epoch": 0.6903579747456172, + "grad_norm": 2.0832263598334033, + "learning_rate": 2.3105317973783774e-06, + "loss": 0.7406, + "step": 22525 + }, + { + "epoch": 0.6903886232683585, + "grad_norm": 1.6562445077609445, + "learning_rate": 2.310113408382425e-06, + "loss": 0.5748, + "step": 22526 + }, + { + "epoch": 0.6904192717910996, + "grad_norm": 1.9002404366272452, + "learning_rate": 2.309695045890615e-06, + "loss": 0.6275, + "step": 22527 + }, + { + "epoch": 0.6904499203138409, + "grad_norm": 1.9212888225579032, + "learning_rate": 2.3092767099070683e-06, + "loss": 0.6157, + "step": 22528 + }, + { + "epoch": 0.690480568836582, + "grad_norm": 1.7926532308297372, + "learning_rate": 2.308858400435905e-06, + "loss": 0.5729, + "step": 22529 + }, + { + "epoch": 0.6905112173593233, + "grad_norm": 1.7065383792957711, + "learning_rate": 2.3084401174812476e-06, + "loss": 0.5743, + "step": 22530 + }, + { + "epoch": 0.6905418658820645, + "grad_norm": 1.5945857932207081, + "learning_rate": 2.308021861047219e-06, + "loss": 0.4798, + "step": 22531 + }, + { + "epoch": 0.6905725144048057, + "grad_norm": 2.080234933378633, + "learning_rate": 2.3076036311379413e-06, + "loss": 0.5843, + "step": 22532 + }, + { + "epoch": 0.6906031629275469, + "grad_norm": 1.7941855314952186, + "learning_rate": 2.3071854277575324e-06, + "loss": 0.5794, + "step": 22533 + }, + { + "epoch": 0.6906338114502881, + "grad_norm": 0.8070991633093357, + "learning_rate": 2.306767250910114e-06, + "loss": 0.4153, + "step": 22534 + }, + { + "epoch": 0.6906644599730293, + "grad_norm": 1.1784848998164064, + "learning_rate": 2.3063491005998095e-06, + "loss": 0.4075, + "step": 22535 + }, + { + "epoch": 0.6906951084957705, + "grad_norm": 1.502737661388871, + "learning_rate": 2.3059309768307364e-06, + "loss": 0.5053, + "step": 22536 + }, + { + "epoch": 0.6907257570185117, + "grad_norm": 1.64045966682889, + "learning_rate": 2.3055128796070105e-06, + "loss": 0.5199, + "step": 22537 + }, + { + "epoch": 0.690756405541253, + "grad_norm": 1.7957961661039172, + "learning_rate": 2.3050948089327594e-06, + "loss": 0.5883, + "step": 22538 + }, + { + "epoch": 0.6907870540639941, + "grad_norm": 1.7079198453503956, + "learning_rate": 2.304676764812097e-06, + "loss": 0.539, + "step": 22539 + }, + { + "epoch": 0.6908177025867354, + "grad_norm": 1.7325549570836798, + "learning_rate": 2.3042587472491463e-06, + "loss": 0.5384, + "step": 22540 + }, + { + "epoch": 0.6908483511094765, + "grad_norm": 1.6204794955597233, + "learning_rate": 2.3038407562480213e-06, + "loss": 0.6346, + "step": 22541 + }, + { + "epoch": 0.6908789996322178, + "grad_norm": 1.84512897574316, + "learning_rate": 2.3034227918128438e-06, + "loss": 0.516, + "step": 22542 + }, + { + "epoch": 0.6909096481549589, + "grad_norm": 1.7954658295934738, + "learning_rate": 2.303004853947733e-06, + "loss": 0.534, + "step": 22543 + }, + { + "epoch": 0.6909402966777002, + "grad_norm": 1.7698919279030467, + "learning_rate": 2.302586942656803e-06, + "loss": 0.5646, + "step": 22544 + }, + { + "epoch": 0.6909709452004413, + "grad_norm": 1.794791322069092, + "learning_rate": 2.3021690579441754e-06, + "loss": 0.5741, + "step": 22545 + }, + { + "epoch": 0.6910015937231826, + "grad_norm": 1.6437300581038956, + "learning_rate": 2.3017511998139667e-06, + "loss": 0.5001, + "step": 22546 + }, + { + "epoch": 0.6910322422459237, + "grad_norm": 0.8367157045292523, + "learning_rate": 2.301333368270295e-06, + "loss": 0.4057, + "step": 22547 + }, + { + "epoch": 0.691062890768665, + "grad_norm": 1.758185070219384, + "learning_rate": 2.300915563317272e-06, + "loss": 0.6214, + "step": 22548 + }, + { + "epoch": 0.6910935392914062, + "grad_norm": 0.832153281579671, + "learning_rate": 2.300497784959022e-06, + "loss": 0.4176, + "step": 22549 + }, + { + "epoch": 0.6911241878141473, + "grad_norm": 1.6773463661958068, + "learning_rate": 2.3000800331996564e-06, + "loss": 0.6381, + "step": 22550 + }, + { + "epoch": 0.6911548363368886, + "grad_norm": 1.6023648094925773, + "learning_rate": 2.299662308043295e-06, + "loss": 0.52, + "step": 22551 + }, + { + "epoch": 0.6911854848596297, + "grad_norm": 1.8985424414643088, + "learning_rate": 2.2992446094940496e-06, + "loss": 0.5656, + "step": 22552 + }, + { + "epoch": 0.691216133382371, + "grad_norm": 1.8894825288091168, + "learning_rate": 2.2988269375560383e-06, + "loss": 0.5728, + "step": 22553 + }, + { + "epoch": 0.6912467819051121, + "grad_norm": 0.7895204561239173, + "learning_rate": 2.298409292233378e-06, + "loss": 0.4226, + "step": 22554 + }, + { + "epoch": 0.6912774304278534, + "grad_norm": 1.7564663685799677, + "learning_rate": 2.2979916735301804e-06, + "loss": 0.6189, + "step": 22555 + }, + { + "epoch": 0.6913080789505945, + "grad_norm": 1.7119583296987761, + "learning_rate": 2.297574081450563e-06, + "loss": 0.5118, + "step": 22556 + }, + { + "epoch": 0.6913387274733358, + "grad_norm": 1.795595431972577, + "learning_rate": 2.29715651599864e-06, + "loss": 0.5789, + "step": 22557 + }, + { + "epoch": 0.691369375996077, + "grad_norm": 2.008725350243916, + "learning_rate": 2.2967389771785243e-06, + "loss": 0.5663, + "step": 22558 + }, + { + "epoch": 0.6914000245188182, + "grad_norm": 1.955651979236864, + "learning_rate": 2.296321464994331e-06, + "loss": 0.7268, + "step": 22559 + }, + { + "epoch": 0.6914306730415594, + "grad_norm": 1.9513312472849313, + "learning_rate": 2.295903979450176e-06, + "loss": 0.6361, + "step": 22560 + }, + { + "epoch": 0.6914613215643006, + "grad_norm": 1.8781535758856847, + "learning_rate": 2.295486520550169e-06, + "loss": 0.6576, + "step": 22561 + }, + { + "epoch": 0.6914919700870418, + "grad_norm": 0.809148340117448, + "learning_rate": 2.2950690882984274e-06, + "loss": 0.403, + "step": 22562 + }, + { + "epoch": 0.691522618609783, + "grad_norm": 1.7551593770065141, + "learning_rate": 2.29465168269906e-06, + "loss": 0.4907, + "step": 22563 + }, + { + "epoch": 0.6915532671325242, + "grad_norm": 1.8369185333067541, + "learning_rate": 2.294234303756182e-06, + "loss": 0.6288, + "step": 22564 + }, + { + "epoch": 0.6915839156552654, + "grad_norm": 1.772410090301964, + "learning_rate": 2.293816951473908e-06, + "loss": 0.6254, + "step": 22565 + }, + { + "epoch": 0.6916145641780066, + "grad_norm": 1.714754775551, + "learning_rate": 2.293399625856345e-06, + "loss": 0.5771, + "step": 22566 + }, + { + "epoch": 0.6916452127007479, + "grad_norm": 1.6867937395198942, + "learning_rate": 2.2929823269076085e-06, + "loss": 0.5665, + "step": 22567 + }, + { + "epoch": 0.691675861223489, + "grad_norm": 0.8481244387317944, + "learning_rate": 2.292565054631812e-06, + "loss": 0.4251, + "step": 22568 + }, + { + "epoch": 0.6917065097462303, + "grad_norm": 0.7572814929336024, + "learning_rate": 2.2921478090330624e-06, + "loss": 0.4202, + "step": 22569 + }, + { + "epoch": 0.6917371582689714, + "grad_norm": 1.6910858527940171, + "learning_rate": 2.2917305901154737e-06, + "loss": 0.5878, + "step": 22570 + }, + { + "epoch": 0.6917678067917127, + "grad_norm": 1.7683428099945318, + "learning_rate": 2.2913133978831582e-06, + "loss": 0.6253, + "step": 22571 + }, + { + "epoch": 0.6917984553144538, + "grad_norm": 1.8382730625600052, + "learning_rate": 2.290896232340223e-06, + "loss": 0.6473, + "step": 22572 + }, + { + "epoch": 0.6918291038371951, + "grad_norm": 0.8121226695063752, + "learning_rate": 2.2904790934907817e-06, + "loss": 0.414, + "step": 22573 + }, + { + "epoch": 0.6918597523599362, + "grad_norm": 1.7246467993873222, + "learning_rate": 2.290061981338942e-06, + "loss": 0.6019, + "step": 22574 + }, + { + "epoch": 0.6918904008826775, + "grad_norm": 2.0979793014583565, + "learning_rate": 2.2896448958888145e-06, + "loss": 0.6285, + "step": 22575 + }, + { + "epoch": 0.6919210494054187, + "grad_norm": 1.8527312401501703, + "learning_rate": 2.2892278371445107e-06, + "loss": 0.6132, + "step": 22576 + }, + { + "epoch": 0.6919516979281599, + "grad_norm": 0.788401210911544, + "learning_rate": 2.2888108051101377e-06, + "loss": 0.4163, + "step": 22577 + }, + { + "epoch": 0.6919823464509011, + "grad_norm": 1.7423808160615157, + "learning_rate": 2.2883937997898053e-06, + "loss": 0.6057, + "step": 22578 + }, + { + "epoch": 0.6920129949736423, + "grad_norm": 1.7497680413894303, + "learning_rate": 2.287976821187624e-06, + "loss": 0.6041, + "step": 22579 + }, + { + "epoch": 0.6920436434963835, + "grad_norm": 1.7763840116917056, + "learning_rate": 2.2875598693076995e-06, + "loss": 0.6459, + "step": 22580 + }, + { + "epoch": 0.6920742920191246, + "grad_norm": 0.7771884284183314, + "learning_rate": 2.287142944154144e-06, + "loss": 0.4022, + "step": 22581 + }, + { + "epoch": 0.6921049405418659, + "grad_norm": 1.7181799100592479, + "learning_rate": 2.28672604573106e-06, + "loss": 0.5808, + "step": 22582 + }, + { + "epoch": 0.692135589064607, + "grad_norm": 1.7175747078199795, + "learning_rate": 2.2863091740425597e-06, + "loss": 0.5342, + "step": 22583 + }, + { + "epoch": 0.6921662375873483, + "grad_norm": 1.7484939815471645, + "learning_rate": 2.285892329092751e-06, + "loss": 0.5607, + "step": 22584 + }, + { + "epoch": 0.6921968861100894, + "grad_norm": 1.9202842115417398, + "learning_rate": 2.2854755108857376e-06, + "loss": 0.6378, + "step": 22585 + }, + { + "epoch": 0.6922275346328307, + "grad_norm": 1.7405264035283554, + "learning_rate": 2.2850587194256284e-06, + "loss": 0.5404, + "step": 22586 + }, + { + "epoch": 0.6922581831555719, + "grad_norm": 1.6843856909926629, + "learning_rate": 2.2846419547165323e-06, + "loss": 0.6507, + "step": 22587 + }, + { + "epoch": 0.6922888316783131, + "grad_norm": 2.0782925110037604, + "learning_rate": 2.2842252167625517e-06, + "loss": 0.5748, + "step": 22588 + }, + { + "epoch": 0.6923194802010543, + "grad_norm": 1.9236849927518394, + "learning_rate": 2.283808505567795e-06, + "loss": 0.6127, + "step": 22589 + }, + { + "epoch": 0.6923501287237955, + "grad_norm": 1.6641395665328065, + "learning_rate": 2.2833918211363705e-06, + "loss": 0.5664, + "step": 22590 + }, + { + "epoch": 0.6923807772465367, + "grad_norm": 1.8556739595339222, + "learning_rate": 2.2829751634723786e-06, + "loss": 0.5993, + "step": 22591 + }, + { + "epoch": 0.6924114257692779, + "grad_norm": 0.8108482915555532, + "learning_rate": 2.28255853257993e-06, + "loss": 0.4183, + "step": 22592 + }, + { + "epoch": 0.6924420742920191, + "grad_norm": 1.6746334373144345, + "learning_rate": 2.2821419284631235e-06, + "loss": 0.6007, + "step": 22593 + }, + { + "epoch": 0.6924727228147604, + "grad_norm": 1.7021663390649122, + "learning_rate": 2.2817253511260722e-06, + "loss": 0.5325, + "step": 22594 + }, + { + "epoch": 0.6925033713375015, + "grad_norm": 2.1887623562741783, + "learning_rate": 2.281308800572876e-06, + "loss": 0.6452, + "step": 22595 + }, + { + "epoch": 0.6925340198602428, + "grad_norm": 1.5875202730378228, + "learning_rate": 2.2808922768076387e-06, + "loss": 0.5503, + "step": 22596 + }, + { + "epoch": 0.6925646683829839, + "grad_norm": 2.095826894610963, + "learning_rate": 2.2804757798344646e-06, + "loss": 0.6192, + "step": 22597 + }, + { + "epoch": 0.6925953169057252, + "grad_norm": 0.8261079260378317, + "learning_rate": 2.2800593096574607e-06, + "loss": 0.4209, + "step": 22598 + }, + { + "epoch": 0.6926259654284663, + "grad_norm": 1.8965844606766822, + "learning_rate": 2.2796428662807262e-06, + "loss": 0.6149, + "step": 22599 + }, + { + "epoch": 0.6926566139512076, + "grad_norm": 1.6993036423893244, + "learning_rate": 2.279226449708367e-06, + "loss": 0.6211, + "step": 22600 + }, + { + "epoch": 0.6926872624739487, + "grad_norm": 0.8279904138585094, + "learning_rate": 2.2788100599444873e-06, + "loss": 0.4137, + "step": 22601 + }, + { + "epoch": 0.69271791099669, + "grad_norm": 0.8677685526562499, + "learning_rate": 2.278393696993187e-06, + "loss": 0.4202, + "step": 22602 + }, + { + "epoch": 0.6927485595194312, + "grad_norm": 1.876123332827385, + "learning_rate": 2.2779773608585713e-06, + "loss": 0.6263, + "step": 22603 + }, + { + "epoch": 0.6927792080421724, + "grad_norm": 1.906571028162815, + "learning_rate": 2.2775610515447373e-06, + "loss": 0.5832, + "step": 22604 + }, + { + "epoch": 0.6928098565649136, + "grad_norm": 1.8972194316872917, + "learning_rate": 2.2771447690557948e-06, + "loss": 0.62, + "step": 22605 + }, + { + "epoch": 0.6928405050876548, + "grad_norm": 1.8016416867698604, + "learning_rate": 2.2767285133958415e-06, + "loss": 0.5671, + "step": 22606 + }, + { + "epoch": 0.692871153610396, + "grad_norm": 1.8340066585072257, + "learning_rate": 2.2763122845689772e-06, + "loss": 0.5831, + "step": 22607 + }, + { + "epoch": 0.6929018021331372, + "grad_norm": 1.945553530799004, + "learning_rate": 2.2758960825793045e-06, + "loss": 0.559, + "step": 22608 + }, + { + "epoch": 0.6929324506558784, + "grad_norm": 1.8015636443530856, + "learning_rate": 2.275479907430927e-06, + "loss": 0.4807, + "step": 22609 + }, + { + "epoch": 0.6929630991786196, + "grad_norm": 1.7293335792477815, + "learning_rate": 2.2750637591279413e-06, + "loss": 0.647, + "step": 22610 + }, + { + "epoch": 0.6929937477013608, + "grad_norm": 1.8597661905894598, + "learning_rate": 2.2746476376744493e-06, + "loss": 0.6343, + "step": 22611 + }, + { + "epoch": 0.693024396224102, + "grad_norm": 0.7778626188195052, + "learning_rate": 2.274231543074551e-06, + "loss": 0.4059, + "step": 22612 + }, + { + "epoch": 0.6930550447468432, + "grad_norm": 1.7927153614733753, + "learning_rate": 2.2738154753323495e-06, + "loss": 0.6894, + "step": 22613 + }, + { + "epoch": 0.6930856932695844, + "grad_norm": 1.6228023846247923, + "learning_rate": 2.273399434451941e-06, + "loss": 0.5718, + "step": 22614 + }, + { + "epoch": 0.6931163417923256, + "grad_norm": 0.7623345737414766, + "learning_rate": 2.272983420437422e-06, + "loss": 0.4068, + "step": 22615 + }, + { + "epoch": 0.6931469903150668, + "grad_norm": 1.7989029758557527, + "learning_rate": 2.272567433292899e-06, + "loss": 0.6458, + "step": 22616 + }, + { + "epoch": 0.693177638837808, + "grad_norm": 1.6449937246524173, + "learning_rate": 2.2721514730224664e-06, + "loss": 0.5792, + "step": 22617 + }, + { + "epoch": 0.6932082873605492, + "grad_norm": 1.7237873221878086, + "learning_rate": 2.2717355396302214e-06, + "loss": 0.6701, + "step": 22618 + }, + { + "epoch": 0.6932389358832904, + "grad_norm": 1.7454598023730603, + "learning_rate": 2.271319633120265e-06, + "loss": 0.5628, + "step": 22619 + }, + { + "epoch": 0.6932695844060316, + "grad_norm": 1.5354961632052873, + "learning_rate": 2.270903753496694e-06, + "loss": 0.508, + "step": 22620 + }, + { + "epoch": 0.6933002329287729, + "grad_norm": 1.7841098802150326, + "learning_rate": 2.2704879007636077e-06, + "loss": 0.589, + "step": 22621 + }, + { + "epoch": 0.693330881451514, + "grad_norm": 1.6619428709952326, + "learning_rate": 2.2700720749251016e-06, + "loss": 0.6064, + "step": 22622 + }, + { + "epoch": 0.6933615299742553, + "grad_norm": 2.168277972484202, + "learning_rate": 2.2696562759852738e-06, + "loss": 0.6684, + "step": 22623 + }, + { + "epoch": 0.6933921784969964, + "grad_norm": 1.928854136179048, + "learning_rate": 2.2692405039482223e-06, + "loss": 0.6333, + "step": 22624 + }, + { + "epoch": 0.6934228270197377, + "grad_norm": 1.6421399516049275, + "learning_rate": 2.2688247588180433e-06, + "loss": 0.5939, + "step": 22625 + }, + { + "epoch": 0.6934534755424788, + "grad_norm": 0.8016450394654123, + "learning_rate": 2.2684090405988315e-06, + "loss": 0.3972, + "step": 22626 + }, + { + "epoch": 0.6934841240652201, + "grad_norm": 1.802669088032458, + "learning_rate": 2.2679933492946837e-06, + "loss": 0.5838, + "step": 22627 + }, + { + "epoch": 0.6935147725879612, + "grad_norm": 1.7533636705502424, + "learning_rate": 2.267577684909698e-06, + "loss": 0.6453, + "step": 22628 + }, + { + "epoch": 0.6935454211107025, + "grad_norm": 0.752706468523437, + "learning_rate": 2.267162047447967e-06, + "loss": 0.4138, + "step": 22629 + }, + { + "epoch": 0.6935760696334436, + "grad_norm": 1.5503067624298514, + "learning_rate": 2.266746436913588e-06, + "loss": 0.5683, + "step": 22630 + }, + { + "epoch": 0.6936067181561849, + "grad_norm": 1.8234729146985962, + "learning_rate": 2.2663308533106555e-06, + "loss": 0.5511, + "step": 22631 + }, + { + "epoch": 0.6936373666789261, + "grad_norm": 1.8244538190560413, + "learning_rate": 2.265915296643266e-06, + "loss": 0.5259, + "step": 22632 + }, + { + "epoch": 0.6936680152016673, + "grad_norm": 1.593017136761153, + "learning_rate": 2.2654997669155125e-06, + "loss": 0.5041, + "step": 22633 + }, + { + "epoch": 0.6936986637244085, + "grad_norm": 0.8357134558135895, + "learning_rate": 2.2650842641314864e-06, + "loss": 0.3915, + "step": 22634 + }, + { + "epoch": 0.6937293122471497, + "grad_norm": 1.978088398349308, + "learning_rate": 2.2646687882952884e-06, + "loss": 0.6603, + "step": 22635 + }, + { + "epoch": 0.6937599607698909, + "grad_norm": 1.7791274050131123, + "learning_rate": 2.2642533394110082e-06, + "loss": 0.5579, + "step": 22636 + }, + { + "epoch": 0.6937906092926321, + "grad_norm": 1.6671891499524227, + "learning_rate": 2.2638379174827385e-06, + "loss": 0.5565, + "step": 22637 + }, + { + "epoch": 0.6938212578153733, + "grad_norm": 1.659110174146202, + "learning_rate": 2.2634225225145733e-06, + "loss": 0.5448, + "step": 22638 + }, + { + "epoch": 0.6938519063381146, + "grad_norm": 1.7662429658312313, + "learning_rate": 2.2630071545106064e-06, + "loss": 0.6212, + "step": 22639 + }, + { + "epoch": 0.6938825548608557, + "grad_norm": 1.8068662893492715, + "learning_rate": 2.2625918134749326e-06, + "loss": 0.6044, + "step": 22640 + }, + { + "epoch": 0.693913203383597, + "grad_norm": 0.8353423815460664, + "learning_rate": 2.2621764994116395e-06, + "loss": 0.4164, + "step": 22641 + }, + { + "epoch": 0.6939438519063381, + "grad_norm": 1.7336133779358742, + "learning_rate": 2.2617612123248223e-06, + "loss": 0.6193, + "step": 22642 + }, + { + "epoch": 0.6939745004290793, + "grad_norm": 1.6743386422138877, + "learning_rate": 2.2613459522185744e-06, + "loss": 0.6251, + "step": 22643 + }, + { + "epoch": 0.6940051489518205, + "grad_norm": 0.8269170670888853, + "learning_rate": 2.2609307190969852e-06, + "loss": 0.4188, + "step": 22644 + }, + { + "epoch": 0.6940357974745617, + "grad_norm": 1.8657178910566885, + "learning_rate": 2.260515512964143e-06, + "loss": 0.6419, + "step": 22645 + }, + { + "epoch": 0.6940664459973029, + "grad_norm": 1.8695355742110906, + "learning_rate": 2.260100333824146e-06, + "loss": 0.5991, + "step": 22646 + }, + { + "epoch": 0.6940970945200441, + "grad_norm": 1.7641895510759291, + "learning_rate": 2.2596851816810815e-06, + "loss": 0.582, + "step": 22647 + }, + { + "epoch": 0.6941277430427854, + "grad_norm": 1.6561623689121385, + "learning_rate": 2.259270056539038e-06, + "loss": 0.6336, + "step": 22648 + }, + { + "epoch": 0.6941583915655265, + "grad_norm": 1.821987022017556, + "learning_rate": 2.258854958402108e-06, + "loss": 0.5776, + "step": 22649 + }, + { + "epoch": 0.6941890400882678, + "grad_norm": 1.9645841500672292, + "learning_rate": 2.2584398872743817e-06, + "loss": 0.6411, + "step": 22650 + }, + { + "epoch": 0.6942196886110089, + "grad_norm": 1.8211714374928254, + "learning_rate": 2.25802484315995e-06, + "loss": 0.6504, + "step": 22651 + }, + { + "epoch": 0.6942503371337502, + "grad_norm": 0.7738056874246824, + "learning_rate": 2.2576098260629e-06, + "loss": 0.3697, + "step": 22652 + }, + { + "epoch": 0.6942809856564913, + "grad_norm": 1.7910251280310048, + "learning_rate": 2.2571948359873213e-06, + "loss": 0.6154, + "step": 22653 + }, + { + "epoch": 0.6943116341792326, + "grad_norm": 1.8041761347534284, + "learning_rate": 2.256779872937306e-06, + "loss": 0.5645, + "step": 22654 + }, + { + "epoch": 0.6943422827019737, + "grad_norm": 1.7892888620401886, + "learning_rate": 2.25636493691694e-06, + "loss": 0.5656, + "step": 22655 + }, + { + "epoch": 0.694372931224715, + "grad_norm": 2.121312162604777, + "learning_rate": 2.2559500279303087e-06, + "loss": 0.6214, + "step": 22656 + }, + { + "epoch": 0.6944035797474561, + "grad_norm": 0.7937624917633198, + "learning_rate": 2.2555351459815076e-06, + "loss": 0.4195, + "step": 22657 + }, + { + "epoch": 0.6944342282701974, + "grad_norm": 1.716439271149059, + "learning_rate": 2.2551202910746196e-06, + "loss": 0.6947, + "step": 22658 + }, + { + "epoch": 0.6944648767929386, + "grad_norm": 1.7648421824874578, + "learning_rate": 2.254705463213735e-06, + "loss": 0.5575, + "step": 22659 + }, + { + "epoch": 0.6944955253156798, + "grad_norm": 1.9491891711354292, + "learning_rate": 2.254290662402938e-06, + "loss": 0.6359, + "step": 22660 + }, + { + "epoch": 0.694526173838421, + "grad_norm": 0.7874866576181201, + "learning_rate": 2.2538758886463174e-06, + "loss": 0.4193, + "step": 22661 + }, + { + "epoch": 0.6945568223611622, + "grad_norm": 1.5674967467570387, + "learning_rate": 2.253461141947963e-06, + "loss": 0.6543, + "step": 22662 + }, + { + "epoch": 0.6945874708839034, + "grad_norm": 1.8810233885752468, + "learning_rate": 2.253046422311956e-06, + "loss": 0.5452, + "step": 22663 + }, + { + "epoch": 0.6946181194066446, + "grad_norm": 1.8349405784235446, + "learning_rate": 2.252631729742386e-06, + "loss": 0.5781, + "step": 22664 + }, + { + "epoch": 0.6946487679293858, + "grad_norm": 1.5934756317614658, + "learning_rate": 2.25221706424334e-06, + "loss": 0.6246, + "step": 22665 + }, + { + "epoch": 0.694679416452127, + "grad_norm": 2.0752784331415497, + "learning_rate": 2.2518024258189004e-06, + "loss": 0.6834, + "step": 22666 + }, + { + "epoch": 0.6947100649748682, + "grad_norm": 1.9264257453483267, + "learning_rate": 2.251387814473155e-06, + "loss": 0.6458, + "step": 22667 + }, + { + "epoch": 0.6947407134976095, + "grad_norm": 1.7618399484231493, + "learning_rate": 2.2509732302101906e-06, + "loss": 0.5459, + "step": 22668 + }, + { + "epoch": 0.6947713620203506, + "grad_norm": 1.771438676225829, + "learning_rate": 2.2505586730340884e-06, + "loss": 0.5259, + "step": 22669 + }, + { + "epoch": 0.6948020105430919, + "grad_norm": 1.6453847489640145, + "learning_rate": 2.2501441429489366e-06, + "loss": 0.5208, + "step": 22670 + }, + { + "epoch": 0.694832659065833, + "grad_norm": 1.8343607487126112, + "learning_rate": 2.2497296399588166e-06, + "loss": 0.5956, + "step": 22671 + }, + { + "epoch": 0.6948633075885743, + "grad_norm": 1.8216515208727393, + "learning_rate": 2.249315164067814e-06, + "loss": 0.6269, + "step": 22672 + }, + { + "epoch": 0.6948939561113154, + "grad_norm": 1.8489297733244867, + "learning_rate": 2.2489007152800146e-06, + "loss": 0.5755, + "step": 22673 + }, + { + "epoch": 0.6949246046340566, + "grad_norm": 1.7566955510724316, + "learning_rate": 2.248486293599499e-06, + "loss": 0.5238, + "step": 22674 + }, + { + "epoch": 0.6949552531567978, + "grad_norm": 1.8114618345149243, + "learning_rate": 2.2480718990303517e-06, + "loss": 0.5384, + "step": 22675 + }, + { + "epoch": 0.694985901679539, + "grad_norm": 1.7331874161075855, + "learning_rate": 2.247657531576658e-06, + "loss": 0.5937, + "step": 22676 + }, + { + "epoch": 0.6950165502022803, + "grad_norm": 1.9323862099562008, + "learning_rate": 2.247243191242497e-06, + "loss": 0.6342, + "step": 22677 + }, + { + "epoch": 0.6950471987250214, + "grad_norm": 1.7213345587455207, + "learning_rate": 2.246828878031955e-06, + "loss": 0.5682, + "step": 22678 + }, + { + "epoch": 0.6950778472477627, + "grad_norm": 0.8383703173268262, + "learning_rate": 2.2464145919491105e-06, + "loss": 0.406, + "step": 22679 + }, + { + "epoch": 0.6951084957705038, + "grad_norm": 1.7517496963718013, + "learning_rate": 2.246000332998047e-06, + "loss": 0.5371, + "step": 22680 + }, + { + "epoch": 0.6951391442932451, + "grad_norm": 0.7997288727195855, + "learning_rate": 2.2455861011828494e-06, + "loss": 0.4231, + "step": 22681 + }, + { + "epoch": 0.6951697928159862, + "grad_norm": 1.7690568244471108, + "learning_rate": 2.245171896507595e-06, + "loss": 0.5908, + "step": 22682 + }, + { + "epoch": 0.6952004413387275, + "grad_norm": 1.8629745175594992, + "learning_rate": 2.2447577189763662e-06, + "loss": 0.6035, + "step": 22683 + }, + { + "epoch": 0.6952310898614686, + "grad_norm": 2.06222697497315, + "learning_rate": 2.244343568593247e-06, + "loss": 0.6371, + "step": 22684 + }, + { + "epoch": 0.6952617383842099, + "grad_norm": 1.9334741083035445, + "learning_rate": 2.2439294453623135e-06, + "loss": 0.6775, + "step": 22685 + }, + { + "epoch": 0.695292386906951, + "grad_norm": 0.776645138295172, + "learning_rate": 2.2435153492876484e-06, + "loss": 0.4236, + "step": 22686 + }, + { + "epoch": 0.6953230354296923, + "grad_norm": 1.8604267742319989, + "learning_rate": 2.2431012803733337e-06, + "loss": 0.6027, + "step": 22687 + }, + { + "epoch": 0.6953536839524335, + "grad_norm": 1.7036184232418234, + "learning_rate": 2.2426872386234457e-06, + "loss": 0.5826, + "step": 22688 + }, + { + "epoch": 0.6953843324751747, + "grad_norm": 1.6598706011540547, + "learning_rate": 2.2422732240420674e-06, + "loss": 0.5836, + "step": 22689 + }, + { + "epoch": 0.6954149809979159, + "grad_norm": 1.9631350034967645, + "learning_rate": 2.2418592366332753e-06, + "loss": 0.5556, + "step": 22690 + }, + { + "epoch": 0.6954456295206571, + "grad_norm": 1.7232815021459154, + "learning_rate": 2.2414452764011495e-06, + "loss": 0.6181, + "step": 22691 + }, + { + "epoch": 0.6954762780433983, + "grad_norm": 1.54472745791131, + "learning_rate": 2.241031343349771e-06, + "loss": 0.5643, + "step": 22692 + }, + { + "epoch": 0.6955069265661395, + "grad_norm": 1.8436264282110053, + "learning_rate": 2.2406174374832147e-06, + "loss": 0.6398, + "step": 22693 + }, + { + "epoch": 0.6955375750888807, + "grad_norm": 2.121062696649265, + "learning_rate": 2.240203558805561e-06, + "loss": 0.5968, + "step": 22694 + }, + { + "epoch": 0.695568223611622, + "grad_norm": 1.8117802831669787, + "learning_rate": 2.2397897073208897e-06, + "loss": 0.6508, + "step": 22695 + }, + { + "epoch": 0.6955988721343631, + "grad_norm": 1.654248887502437, + "learning_rate": 2.2393758830332744e-06, + "loss": 0.5191, + "step": 22696 + }, + { + "epoch": 0.6956295206571044, + "grad_norm": 1.6107779046937623, + "learning_rate": 2.238962085946795e-06, + "loss": 0.593, + "step": 22697 + }, + { + "epoch": 0.6956601691798455, + "grad_norm": 1.905383713486433, + "learning_rate": 2.238548316065531e-06, + "loss": 0.6113, + "step": 22698 + }, + { + "epoch": 0.6956908177025868, + "grad_norm": 1.7197223957693888, + "learning_rate": 2.2381345733935545e-06, + "loss": 0.5169, + "step": 22699 + }, + { + "epoch": 0.6957214662253279, + "grad_norm": 1.7151689073220668, + "learning_rate": 2.2377208579349464e-06, + "loss": 0.6721, + "step": 22700 + }, + { + "epoch": 0.6957521147480692, + "grad_norm": 2.097835753650382, + "learning_rate": 2.23730716969378e-06, + "loss": 0.5936, + "step": 22701 + }, + { + "epoch": 0.6957827632708103, + "grad_norm": 1.732974981736827, + "learning_rate": 2.2368935086741326e-06, + "loss": 0.6306, + "step": 22702 + }, + { + "epoch": 0.6958134117935516, + "grad_norm": 1.9231303612826194, + "learning_rate": 2.2364798748800826e-06, + "loss": 0.6081, + "step": 22703 + }, + { + "epoch": 0.6958440603162928, + "grad_norm": 1.8258437386031807, + "learning_rate": 2.2360662683157016e-06, + "loss": 0.5605, + "step": 22704 + }, + { + "epoch": 0.6958747088390339, + "grad_norm": 1.6560632121595371, + "learning_rate": 2.2356526889850666e-06, + "loss": 0.5819, + "step": 22705 + }, + { + "epoch": 0.6959053573617752, + "grad_norm": 0.8341753684466227, + "learning_rate": 2.235239136892255e-06, + "loss": 0.4012, + "step": 22706 + }, + { + "epoch": 0.6959360058845163, + "grad_norm": 1.68154254539331, + "learning_rate": 2.234825612041338e-06, + "loss": 0.5645, + "step": 22707 + }, + { + "epoch": 0.6959666544072576, + "grad_norm": 2.0806554370173123, + "learning_rate": 2.2344121144363912e-06, + "loss": 0.666, + "step": 22708 + }, + { + "epoch": 0.6959973029299987, + "grad_norm": 2.0713953785459056, + "learning_rate": 2.2339986440814916e-06, + "loss": 0.566, + "step": 22709 + }, + { + "epoch": 0.69602795145274, + "grad_norm": 2.1515477270071752, + "learning_rate": 2.233585200980709e-06, + "loss": 0.606, + "step": 22710 + }, + { + "epoch": 0.6960585999754811, + "grad_norm": 0.7613720648788914, + "learning_rate": 2.233171785138121e-06, + "loss": 0.4167, + "step": 22711 + }, + { + "epoch": 0.6960892484982224, + "grad_norm": 1.7095880714447762, + "learning_rate": 2.2327583965577965e-06, + "loss": 0.5829, + "step": 22712 + }, + { + "epoch": 0.6961198970209636, + "grad_norm": 1.5722560474482359, + "learning_rate": 2.232345035243814e-06, + "loss": 0.5675, + "step": 22713 + }, + { + "epoch": 0.6961505455437048, + "grad_norm": 1.7790501003809076, + "learning_rate": 2.2319317012002452e-06, + "loss": 0.5937, + "step": 22714 + }, + { + "epoch": 0.696181194066446, + "grad_norm": 1.7534837423858534, + "learning_rate": 2.231518394431159e-06, + "loss": 0.533, + "step": 22715 + }, + { + "epoch": 0.6962118425891872, + "grad_norm": 1.7243798507160466, + "learning_rate": 2.2311051149406303e-06, + "loss": 0.4857, + "step": 22716 + }, + { + "epoch": 0.6962424911119284, + "grad_norm": 0.7659002287199648, + "learning_rate": 2.2306918627327335e-06, + "loss": 0.4099, + "step": 22717 + }, + { + "epoch": 0.6962731396346696, + "grad_norm": 1.8882552679141367, + "learning_rate": 2.2302786378115367e-06, + "loss": 0.6842, + "step": 22718 + }, + { + "epoch": 0.6963037881574108, + "grad_norm": 1.870043302013146, + "learning_rate": 2.2298654401811126e-06, + "loss": 0.6108, + "step": 22719 + }, + { + "epoch": 0.696334436680152, + "grad_norm": 1.4933574843436779, + "learning_rate": 2.2294522698455332e-06, + "loss": 0.5043, + "step": 22720 + }, + { + "epoch": 0.6963650852028932, + "grad_norm": 1.771406932755389, + "learning_rate": 2.229039126808872e-06, + "loss": 0.6223, + "step": 22721 + }, + { + "epoch": 0.6963957337256345, + "grad_norm": 1.7527099865794793, + "learning_rate": 2.2286260110751968e-06, + "loss": 0.5839, + "step": 22722 + }, + { + "epoch": 0.6964263822483756, + "grad_norm": 2.253457472582216, + "learning_rate": 2.2282129226485767e-06, + "loss": 0.6067, + "step": 22723 + }, + { + "epoch": 0.6964570307711169, + "grad_norm": 1.640746400165096, + "learning_rate": 2.227799861533084e-06, + "loss": 0.5411, + "step": 22724 + }, + { + "epoch": 0.696487679293858, + "grad_norm": 1.8934266441338747, + "learning_rate": 2.2273868277327896e-06, + "loss": 0.4968, + "step": 22725 + }, + { + "epoch": 0.6965183278165993, + "grad_norm": 0.8517234113488077, + "learning_rate": 2.2269738212517617e-06, + "loss": 0.421, + "step": 22726 + }, + { + "epoch": 0.6965489763393404, + "grad_norm": 1.813645140866868, + "learning_rate": 2.2265608420940694e-06, + "loss": 0.5121, + "step": 22727 + }, + { + "epoch": 0.6965796248620817, + "grad_norm": 1.8072546296521188, + "learning_rate": 2.2261478902637847e-06, + "loss": 0.5825, + "step": 22728 + }, + { + "epoch": 0.6966102733848228, + "grad_norm": 1.9057027683401109, + "learning_rate": 2.225734965764973e-06, + "loss": 0.6537, + "step": 22729 + }, + { + "epoch": 0.6966409219075641, + "grad_norm": 1.856903807998441, + "learning_rate": 2.2253220686017056e-06, + "loss": 0.5652, + "step": 22730 + }, + { + "epoch": 0.6966715704303053, + "grad_norm": 1.8510589556309136, + "learning_rate": 2.224909198778047e-06, + "loss": 0.5693, + "step": 22731 + }, + { + "epoch": 0.6967022189530465, + "grad_norm": 1.908035749774656, + "learning_rate": 2.2244963562980713e-06, + "loss": 0.6508, + "step": 22732 + }, + { + "epoch": 0.6967328674757877, + "grad_norm": 1.8153259755874758, + "learning_rate": 2.2240835411658435e-06, + "loss": 0.5841, + "step": 22733 + }, + { + "epoch": 0.6967635159985289, + "grad_norm": 1.5648843569217916, + "learning_rate": 2.2236707533854285e-06, + "loss": 0.5325, + "step": 22734 + }, + { + "epoch": 0.6967941645212701, + "grad_norm": 0.790027964159758, + "learning_rate": 2.2232579929608962e-06, + "loss": 0.4009, + "step": 22735 + }, + { + "epoch": 0.6968248130440112, + "grad_norm": 1.6495338999817106, + "learning_rate": 2.222845259896315e-06, + "loss": 0.5924, + "step": 22736 + }, + { + "epoch": 0.6968554615667525, + "grad_norm": 1.6466108004052802, + "learning_rate": 2.2224325541957483e-06, + "loss": 0.5868, + "step": 22737 + }, + { + "epoch": 0.6968861100894936, + "grad_norm": 1.6484808313440018, + "learning_rate": 2.2220198758632645e-06, + "loss": 0.6193, + "step": 22738 + }, + { + "epoch": 0.6969167586122349, + "grad_norm": 1.954945602041418, + "learning_rate": 2.221607224902929e-06, + "loss": 0.6372, + "step": 22739 + }, + { + "epoch": 0.696947407134976, + "grad_norm": 1.9510339321760652, + "learning_rate": 2.221194601318811e-06, + "loss": 0.601, + "step": 22740 + }, + { + "epoch": 0.6969780556577173, + "grad_norm": 2.0019834368964435, + "learning_rate": 2.2207820051149735e-06, + "loss": 0.5957, + "step": 22741 + }, + { + "epoch": 0.6970087041804585, + "grad_norm": 1.8811930810086088, + "learning_rate": 2.220369436295478e-06, + "loss": 0.618, + "step": 22742 + }, + { + "epoch": 0.6970393527031997, + "grad_norm": 1.8418987512819864, + "learning_rate": 2.219956894864397e-06, + "loss": 0.5764, + "step": 22743 + }, + { + "epoch": 0.6970700012259409, + "grad_norm": 1.9985676025438108, + "learning_rate": 2.219544380825793e-06, + "loss": 0.5414, + "step": 22744 + }, + { + "epoch": 0.6971006497486821, + "grad_norm": 1.804564089171388, + "learning_rate": 2.219131894183727e-06, + "loss": 0.544, + "step": 22745 + }, + { + "epoch": 0.6971312982714233, + "grad_norm": 0.7912240729562265, + "learning_rate": 2.2187194349422666e-06, + "loss": 0.4091, + "step": 22746 + }, + { + "epoch": 0.6971619467941645, + "grad_norm": 1.6753592497794894, + "learning_rate": 2.2183070031054748e-06, + "loss": 0.5762, + "step": 22747 + }, + { + "epoch": 0.6971925953169057, + "grad_norm": 0.7738619675542164, + "learning_rate": 2.2178945986774176e-06, + "loss": 0.4124, + "step": 22748 + }, + { + "epoch": 0.697223243839647, + "grad_norm": 1.8993907887277857, + "learning_rate": 2.217482221662155e-06, + "loss": 0.5993, + "step": 22749 + }, + { + "epoch": 0.6972538923623881, + "grad_norm": 0.7918723031128372, + "learning_rate": 2.217069872063752e-06, + "loss": 0.4187, + "step": 22750 + }, + { + "epoch": 0.6972845408851294, + "grad_norm": 1.7807998890923225, + "learning_rate": 2.2166575498862734e-06, + "loss": 0.5926, + "step": 22751 + }, + { + "epoch": 0.6973151894078705, + "grad_norm": 1.6735376156640178, + "learning_rate": 2.2162452551337804e-06, + "loss": 0.634, + "step": 22752 + }, + { + "epoch": 0.6973458379306118, + "grad_norm": 1.7778379796144674, + "learning_rate": 2.215832987810331e-06, + "loss": 0.5243, + "step": 22753 + }, + { + "epoch": 0.6973764864533529, + "grad_norm": 1.9309465262079186, + "learning_rate": 2.215420747919996e-06, + "loss": 0.6395, + "step": 22754 + }, + { + "epoch": 0.6974071349760942, + "grad_norm": 1.912839853521682, + "learning_rate": 2.2150085354668317e-06, + "loss": 0.6173, + "step": 22755 + }, + { + "epoch": 0.6974377834988353, + "grad_norm": 2.040587561838959, + "learning_rate": 2.2145963504548995e-06, + "loss": 0.6048, + "step": 22756 + }, + { + "epoch": 0.6974684320215766, + "grad_norm": 1.9787630159630296, + "learning_rate": 2.2141841928882624e-06, + "loss": 0.5499, + "step": 22757 + }, + { + "epoch": 0.6974990805443178, + "grad_norm": 1.698183969315406, + "learning_rate": 2.2137720627709812e-06, + "loss": 0.5717, + "step": 22758 + }, + { + "epoch": 0.697529729067059, + "grad_norm": 1.713249326644944, + "learning_rate": 2.213359960107118e-06, + "loss": 0.5807, + "step": 22759 + }, + { + "epoch": 0.6975603775898002, + "grad_norm": 1.825007990761892, + "learning_rate": 2.212947884900731e-06, + "loss": 0.5077, + "step": 22760 + }, + { + "epoch": 0.6975910261125414, + "grad_norm": 2.0452219675201517, + "learning_rate": 2.2125358371558815e-06, + "loss": 0.6348, + "step": 22761 + }, + { + "epoch": 0.6976216746352826, + "grad_norm": 1.7695152649745485, + "learning_rate": 2.212123816876631e-06, + "loss": 0.5634, + "step": 22762 + }, + { + "epoch": 0.6976523231580238, + "grad_norm": 1.921414460955554, + "learning_rate": 2.211711824067038e-06, + "loss": 0.6035, + "step": 22763 + }, + { + "epoch": 0.697682971680765, + "grad_norm": 0.8407865940180815, + "learning_rate": 2.2112998587311584e-06, + "loss": 0.4127, + "step": 22764 + }, + { + "epoch": 0.6977136202035062, + "grad_norm": 0.7628555456237724, + "learning_rate": 2.210887920873058e-06, + "loss": 0.3862, + "step": 22765 + }, + { + "epoch": 0.6977442687262474, + "grad_norm": 0.8046118608114142, + "learning_rate": 2.2104760104967915e-06, + "loss": 0.413, + "step": 22766 + }, + { + "epoch": 0.6977749172489885, + "grad_norm": 1.8975308713000456, + "learning_rate": 2.21006412760642e-06, + "loss": 0.5717, + "step": 22767 + }, + { + "epoch": 0.6978055657717298, + "grad_norm": 1.7045921918410976, + "learning_rate": 2.2096522722059987e-06, + "loss": 0.532, + "step": 22768 + }, + { + "epoch": 0.697836214294471, + "grad_norm": 0.774868717773979, + "learning_rate": 2.2092404442995872e-06, + "loss": 0.4076, + "step": 22769 + }, + { + "epoch": 0.6978668628172122, + "grad_norm": 1.816148028984821, + "learning_rate": 2.208828643891246e-06, + "loss": 0.5481, + "step": 22770 + }, + { + "epoch": 0.6978975113399534, + "grad_norm": 1.8890256504726286, + "learning_rate": 2.208416870985028e-06, + "loss": 0.5646, + "step": 22771 + }, + { + "epoch": 0.6979281598626946, + "grad_norm": 1.8271360431834327, + "learning_rate": 2.2080051255849933e-06, + "loss": 0.5985, + "step": 22772 + }, + { + "epoch": 0.6979588083854358, + "grad_norm": 2.027523184884522, + "learning_rate": 2.2075934076952e-06, + "loss": 0.5249, + "step": 22773 + }, + { + "epoch": 0.697989456908177, + "grad_norm": 1.8175086889661691, + "learning_rate": 2.2071817173197014e-06, + "loss": 0.6252, + "step": 22774 + }, + { + "epoch": 0.6980201054309182, + "grad_norm": 1.942028261370505, + "learning_rate": 2.2067700544625577e-06, + "loss": 0.6057, + "step": 22775 + }, + { + "epoch": 0.6980507539536595, + "grad_norm": 1.798231624447754, + "learning_rate": 2.2063584191278213e-06, + "loss": 0.6566, + "step": 22776 + }, + { + "epoch": 0.6980814024764006, + "grad_norm": 1.9198527492034814, + "learning_rate": 2.205946811319551e-06, + "loss": 0.603, + "step": 22777 + }, + { + "epoch": 0.6981120509991419, + "grad_norm": 1.9965721758888757, + "learning_rate": 2.205535231041803e-06, + "loss": 0.4617, + "step": 22778 + }, + { + "epoch": 0.698142699521883, + "grad_norm": 1.806696046319959, + "learning_rate": 2.2051236782986295e-06, + "loss": 0.5842, + "step": 22779 + }, + { + "epoch": 0.6981733480446243, + "grad_norm": 2.1467026465985333, + "learning_rate": 2.2047121530940873e-06, + "loss": 0.6567, + "step": 22780 + }, + { + "epoch": 0.6982039965673654, + "grad_norm": 0.8458617864157207, + "learning_rate": 2.204300655432234e-06, + "loss": 0.414, + "step": 22781 + }, + { + "epoch": 0.6982346450901067, + "grad_norm": 1.7415506446299638, + "learning_rate": 2.2038891853171213e-06, + "loss": 0.5118, + "step": 22782 + }, + { + "epoch": 0.6982652936128478, + "grad_norm": 0.8464637945193016, + "learning_rate": 2.2034777427527998e-06, + "loss": 0.4188, + "step": 22783 + }, + { + "epoch": 0.6982959421355891, + "grad_norm": 1.948735299107204, + "learning_rate": 2.2030663277433316e-06, + "loss": 0.6413, + "step": 22784 + }, + { + "epoch": 0.6983265906583302, + "grad_norm": 1.734808233986996, + "learning_rate": 2.2026549402927644e-06, + "loss": 0.5326, + "step": 22785 + }, + { + "epoch": 0.6983572391810715, + "grad_norm": 2.1337665775461745, + "learning_rate": 2.202243580405156e-06, + "loss": 0.6219, + "step": 22786 + }, + { + "epoch": 0.6983878877038127, + "grad_norm": 1.6640277792677984, + "learning_rate": 2.2018322480845554e-06, + "loss": 0.5029, + "step": 22787 + }, + { + "epoch": 0.6984185362265539, + "grad_norm": 1.6717636955247908, + "learning_rate": 2.201420943335018e-06, + "loss": 0.5846, + "step": 22788 + }, + { + "epoch": 0.6984491847492951, + "grad_norm": 1.9617789097042704, + "learning_rate": 2.2010096661605973e-06, + "loss": 0.6087, + "step": 22789 + }, + { + "epoch": 0.6984798332720363, + "grad_norm": 0.7609535884097675, + "learning_rate": 2.200598416565343e-06, + "loss": 0.3948, + "step": 22790 + }, + { + "epoch": 0.6985104817947775, + "grad_norm": 1.534927025310699, + "learning_rate": 2.2001871945533087e-06, + "loss": 0.5362, + "step": 22791 + }, + { + "epoch": 0.6985411303175187, + "grad_norm": 0.7666575372418257, + "learning_rate": 2.1997760001285485e-06, + "loss": 0.4086, + "step": 22792 + }, + { + "epoch": 0.6985717788402599, + "grad_norm": 1.9846606634989685, + "learning_rate": 2.19936483329511e-06, + "loss": 0.5925, + "step": 22793 + }, + { + "epoch": 0.6986024273630012, + "grad_norm": 1.7599367989698274, + "learning_rate": 2.198953694057046e-06, + "loss": 0.6261, + "step": 22794 + }, + { + "epoch": 0.6986330758857423, + "grad_norm": 1.8581076928347187, + "learning_rate": 2.1985425824184096e-06, + "loss": 0.522, + "step": 22795 + }, + { + "epoch": 0.6986637244084836, + "grad_norm": 1.7631258913580707, + "learning_rate": 2.1981314983832484e-06, + "loss": 0.6898, + "step": 22796 + }, + { + "epoch": 0.6986943729312247, + "grad_norm": 0.7845632317507744, + "learning_rate": 2.1977204419556163e-06, + "loss": 0.3825, + "step": 22797 + }, + { + "epoch": 0.6987250214539659, + "grad_norm": 1.9051408135488765, + "learning_rate": 2.19730941313956e-06, + "loss": 0.6434, + "step": 22798 + }, + { + "epoch": 0.6987556699767071, + "grad_norm": 1.6824930968218572, + "learning_rate": 2.1968984119391308e-06, + "loss": 0.6344, + "step": 22799 + }, + { + "epoch": 0.6987863184994483, + "grad_norm": 1.99882363910122, + "learning_rate": 2.1964874383583805e-06, + "loss": 0.5499, + "step": 22800 + }, + { + "epoch": 0.6988169670221895, + "grad_norm": 1.6865221778283968, + "learning_rate": 2.1960764924013554e-06, + "loss": 0.6166, + "step": 22801 + }, + { + "epoch": 0.6988476155449307, + "grad_norm": 1.8120804023590837, + "learning_rate": 2.1956655740721056e-06, + "loss": 0.6318, + "step": 22802 + }, + { + "epoch": 0.698878264067672, + "grad_norm": 1.9035693241579745, + "learning_rate": 2.1952546833746825e-06, + "loss": 0.6658, + "step": 22803 + }, + { + "epoch": 0.6989089125904131, + "grad_norm": 1.7341017643967376, + "learning_rate": 2.1948438203131306e-06, + "loss": 0.5472, + "step": 22804 + }, + { + "epoch": 0.6989395611131544, + "grad_norm": 1.8826041870392827, + "learning_rate": 2.194432984891501e-06, + "loss": 0.6313, + "step": 22805 + }, + { + "epoch": 0.6989702096358955, + "grad_norm": 1.8707814030444583, + "learning_rate": 2.194022177113842e-06, + "loss": 0.6512, + "step": 22806 + }, + { + "epoch": 0.6990008581586368, + "grad_norm": 1.8058903111811595, + "learning_rate": 2.193611396984199e-06, + "loss": 0.5684, + "step": 22807 + }, + { + "epoch": 0.6990315066813779, + "grad_norm": 0.7754403119270175, + "learning_rate": 2.193200644506622e-06, + "loss": 0.4143, + "step": 22808 + }, + { + "epoch": 0.6990621552041192, + "grad_norm": 2.041276512887243, + "learning_rate": 2.1927899196851564e-06, + "loss": 0.5135, + "step": 22809 + }, + { + "epoch": 0.6990928037268603, + "grad_norm": 1.817845996600563, + "learning_rate": 2.19237922252385e-06, + "loss": 0.5416, + "step": 22810 + }, + { + "epoch": 0.6991234522496016, + "grad_norm": 1.825876497125441, + "learning_rate": 2.1919685530267503e-06, + "loss": 0.5917, + "step": 22811 + }, + { + "epoch": 0.6991541007723427, + "grad_norm": 1.7872732480227917, + "learning_rate": 2.191557911197902e-06, + "loss": 0.6586, + "step": 22812 + }, + { + "epoch": 0.699184749295084, + "grad_norm": 1.466533039813173, + "learning_rate": 2.1911472970413517e-06, + "loss": 0.5443, + "step": 22813 + }, + { + "epoch": 0.6992153978178252, + "grad_norm": 1.6660468353410591, + "learning_rate": 2.1907367105611475e-06, + "loss": 0.5688, + "step": 22814 + }, + { + "epoch": 0.6992460463405664, + "grad_norm": 2.131347852356924, + "learning_rate": 2.1903261517613324e-06, + "loss": 0.5956, + "step": 22815 + }, + { + "epoch": 0.6992766948633076, + "grad_norm": 1.930047738184302, + "learning_rate": 2.1899156206459515e-06, + "loss": 0.6469, + "step": 22816 + }, + { + "epoch": 0.6993073433860488, + "grad_norm": 1.7815727493065772, + "learning_rate": 2.1895051172190535e-06, + "loss": 0.5402, + "step": 22817 + }, + { + "epoch": 0.69933799190879, + "grad_norm": 1.8081894569291301, + "learning_rate": 2.1890946414846785e-06, + "loss": 0.6676, + "step": 22818 + }, + { + "epoch": 0.6993686404315312, + "grad_norm": 1.916404578729651, + "learning_rate": 2.188684193446875e-06, + "loss": 0.5721, + "step": 22819 + }, + { + "epoch": 0.6993992889542724, + "grad_norm": 1.8205411879015394, + "learning_rate": 2.188273773109684e-06, + "loss": 0.6207, + "step": 22820 + }, + { + "epoch": 0.6994299374770137, + "grad_norm": 1.7567555139999587, + "learning_rate": 2.1878633804771506e-06, + "loss": 0.5843, + "step": 22821 + }, + { + "epoch": 0.6994605859997548, + "grad_norm": 1.6996554407557818, + "learning_rate": 2.18745301555332e-06, + "loss": 0.5956, + "step": 22822 + }, + { + "epoch": 0.6994912345224961, + "grad_norm": 1.630153539136616, + "learning_rate": 2.187042678342234e-06, + "loss": 0.5888, + "step": 22823 + }, + { + "epoch": 0.6995218830452372, + "grad_norm": 1.8721714796553732, + "learning_rate": 2.186632368847935e-06, + "loss": 0.6644, + "step": 22824 + }, + { + "epoch": 0.6995525315679785, + "grad_norm": 1.6866660847725916, + "learning_rate": 2.18622208707447e-06, + "loss": 0.5754, + "step": 22825 + }, + { + "epoch": 0.6995831800907196, + "grad_norm": 1.7190413016277915, + "learning_rate": 2.185811833025876e-06, + "loss": 0.635, + "step": 22826 + }, + { + "epoch": 0.6996138286134609, + "grad_norm": 1.8593800784998835, + "learning_rate": 2.1854016067062006e-06, + "loss": 0.5452, + "step": 22827 + }, + { + "epoch": 0.699644477136202, + "grad_norm": 1.9735014624734837, + "learning_rate": 2.184991408119481e-06, + "loss": 0.6409, + "step": 22828 + }, + { + "epoch": 0.6996751256589432, + "grad_norm": 1.6858127484942012, + "learning_rate": 2.184581237269761e-06, + "loss": 0.5719, + "step": 22829 + }, + { + "epoch": 0.6997057741816844, + "grad_norm": 1.8479834439629965, + "learning_rate": 2.184171094161085e-06, + "loss": 0.5962, + "step": 22830 + }, + { + "epoch": 0.6997364227044256, + "grad_norm": 1.5837593275078599, + "learning_rate": 2.1837609787974894e-06, + "loss": 0.4814, + "step": 22831 + }, + { + "epoch": 0.6997670712271669, + "grad_norm": 0.7877135209156009, + "learning_rate": 2.1833508911830175e-06, + "loss": 0.4021, + "step": 22832 + }, + { + "epoch": 0.699797719749908, + "grad_norm": 1.7889283692332174, + "learning_rate": 2.182940831321712e-06, + "loss": 0.6213, + "step": 22833 + }, + { + "epoch": 0.6998283682726493, + "grad_norm": 1.7901466118777165, + "learning_rate": 2.1825307992176094e-06, + "loss": 0.5926, + "step": 22834 + }, + { + "epoch": 0.6998590167953904, + "grad_norm": 0.8086512738728966, + "learning_rate": 2.1821207948747513e-06, + "loss": 0.4028, + "step": 22835 + }, + { + "epoch": 0.6998896653181317, + "grad_norm": 2.237638422239824, + "learning_rate": 2.18171081829718e-06, + "loss": 0.6438, + "step": 22836 + }, + { + "epoch": 0.6999203138408728, + "grad_norm": 1.9411643396430616, + "learning_rate": 2.1813008694889314e-06, + "loss": 0.6401, + "step": 22837 + }, + { + "epoch": 0.6999509623636141, + "grad_norm": 1.8326164784280388, + "learning_rate": 2.1808909484540486e-06, + "loss": 0.6374, + "step": 22838 + }, + { + "epoch": 0.6999816108863552, + "grad_norm": 1.8894427279900063, + "learning_rate": 2.180481055196565e-06, + "loss": 0.5453, + "step": 22839 + }, + { + "epoch": 0.7000122594090965, + "grad_norm": 1.7441155550521523, + "learning_rate": 2.180071189720526e-06, + "loss": 0.6182, + "step": 22840 + }, + { + "epoch": 0.7000429079318377, + "grad_norm": 1.7962695078713702, + "learning_rate": 2.1796613520299677e-06, + "loss": 0.6549, + "step": 22841 + }, + { + "epoch": 0.7000735564545789, + "grad_norm": 1.9243105020348978, + "learning_rate": 2.179251542128925e-06, + "loss": 0.6341, + "step": 22842 + }, + { + "epoch": 0.7001042049773201, + "grad_norm": 1.7425602216453338, + "learning_rate": 2.1788417600214398e-06, + "loss": 0.6027, + "step": 22843 + }, + { + "epoch": 0.7001348535000613, + "grad_norm": 0.7651811404263347, + "learning_rate": 2.1784320057115493e-06, + "loss": 0.4005, + "step": 22844 + }, + { + "epoch": 0.7001655020228025, + "grad_norm": 1.658776934239972, + "learning_rate": 2.1780222792032885e-06, + "loss": 0.5156, + "step": 22845 + }, + { + "epoch": 0.7001961505455437, + "grad_norm": 1.868534022962396, + "learning_rate": 2.1776125805006964e-06, + "loss": 0.6287, + "step": 22846 + }, + { + "epoch": 0.7002267990682849, + "grad_norm": 1.7766689901380281, + "learning_rate": 2.17720290960781e-06, + "loss": 0.6203, + "step": 22847 + }, + { + "epoch": 0.7002574475910261, + "grad_norm": 1.5896669772298289, + "learning_rate": 2.176793266528667e-06, + "loss": 0.5279, + "step": 22848 + }, + { + "epoch": 0.7002880961137673, + "grad_norm": 1.9243705657999028, + "learning_rate": 2.1763836512673025e-06, + "loss": 0.6688, + "step": 22849 + }, + { + "epoch": 0.7003187446365086, + "grad_norm": 1.7575782943816662, + "learning_rate": 2.1759740638277486e-06, + "loss": 0.5872, + "step": 22850 + }, + { + "epoch": 0.7003493931592497, + "grad_norm": 0.8421903470649282, + "learning_rate": 2.175564504214049e-06, + "loss": 0.4158, + "step": 22851 + }, + { + "epoch": 0.700380041681991, + "grad_norm": 1.7481724202051054, + "learning_rate": 2.175154972430234e-06, + "loss": 0.5706, + "step": 22852 + }, + { + "epoch": 0.7004106902047321, + "grad_norm": 1.8352275649620942, + "learning_rate": 2.1747454684803387e-06, + "loss": 0.6131, + "step": 22853 + }, + { + "epoch": 0.7004413387274734, + "grad_norm": 1.7912724505150674, + "learning_rate": 2.174335992368399e-06, + "loss": 0.5914, + "step": 22854 + }, + { + "epoch": 0.7004719872502145, + "grad_norm": 1.528940232842247, + "learning_rate": 2.1739265440984513e-06, + "loss": 0.4765, + "step": 22855 + }, + { + "epoch": 0.7005026357729558, + "grad_norm": 1.8947855832374714, + "learning_rate": 2.1735171236745275e-06, + "loss": 0.5326, + "step": 22856 + }, + { + "epoch": 0.700533284295697, + "grad_norm": 2.1338439018242488, + "learning_rate": 2.1731077311006616e-06, + "loss": 0.612, + "step": 22857 + }, + { + "epoch": 0.7005639328184382, + "grad_norm": 1.690758972339081, + "learning_rate": 2.172698366380889e-06, + "loss": 0.6014, + "step": 22858 + }, + { + "epoch": 0.7005945813411794, + "grad_norm": 1.7883530876840397, + "learning_rate": 2.1722890295192446e-06, + "loss": 0.5154, + "step": 22859 + }, + { + "epoch": 0.7006252298639205, + "grad_norm": 1.700017963954818, + "learning_rate": 2.17187972051976e-06, + "loss": 0.6822, + "step": 22860 + }, + { + "epoch": 0.7006558783866618, + "grad_norm": 0.7996697664759217, + "learning_rate": 2.1714704393864638e-06, + "loss": 0.3974, + "step": 22861 + }, + { + "epoch": 0.7006865269094029, + "grad_norm": 1.8366087322110007, + "learning_rate": 2.1710611861233977e-06, + "loss": 0.6067, + "step": 22862 + }, + { + "epoch": 0.7007171754321442, + "grad_norm": 1.7931876560837638, + "learning_rate": 2.170651960734589e-06, + "loss": 0.5436, + "step": 22863 + }, + { + "epoch": 0.7007478239548853, + "grad_norm": 1.6379114701091328, + "learning_rate": 2.1702427632240684e-06, + "loss": 0.5937, + "step": 22864 + }, + { + "epoch": 0.7007784724776266, + "grad_norm": 1.7831950318526826, + "learning_rate": 2.1698335935958705e-06, + "loss": 0.6468, + "step": 22865 + }, + { + "epoch": 0.7008091210003677, + "grad_norm": 1.79030272152828, + "learning_rate": 2.169424451854026e-06, + "loss": 0.5411, + "step": 22866 + }, + { + "epoch": 0.700839769523109, + "grad_norm": 2.113494158350961, + "learning_rate": 2.1690153380025685e-06, + "loss": 0.7013, + "step": 22867 + }, + { + "epoch": 0.7008704180458502, + "grad_norm": 1.8340857840946971, + "learning_rate": 2.168606252045525e-06, + "loss": 0.5973, + "step": 22868 + }, + { + "epoch": 0.7009010665685914, + "grad_norm": 1.6826603734500611, + "learning_rate": 2.1681971939869295e-06, + "loss": 0.5058, + "step": 22869 + }, + { + "epoch": 0.7009317150913326, + "grad_norm": 1.9567708236914139, + "learning_rate": 2.1677881638308124e-06, + "loss": 0.5329, + "step": 22870 + }, + { + "epoch": 0.7009623636140738, + "grad_norm": 1.7942772449448061, + "learning_rate": 2.167379161581204e-06, + "loss": 0.5672, + "step": 22871 + }, + { + "epoch": 0.700993012136815, + "grad_norm": 1.7437657502290347, + "learning_rate": 2.1669701872421313e-06, + "loss": 0.5345, + "step": 22872 + }, + { + "epoch": 0.7010236606595562, + "grad_norm": 1.6893520483426028, + "learning_rate": 2.166561240817626e-06, + "loss": 0.5599, + "step": 22873 + }, + { + "epoch": 0.7010543091822974, + "grad_norm": 0.816870341227516, + "learning_rate": 2.1661523223117176e-06, + "loss": 0.4177, + "step": 22874 + }, + { + "epoch": 0.7010849577050386, + "grad_norm": 1.8958724044205084, + "learning_rate": 2.1657434317284377e-06, + "loss": 0.6346, + "step": 22875 + }, + { + "epoch": 0.7011156062277798, + "grad_norm": 1.6315668614809629, + "learning_rate": 2.165334569071811e-06, + "loss": 0.6127, + "step": 22876 + }, + { + "epoch": 0.7011462547505211, + "grad_norm": 1.6672914520897721, + "learning_rate": 2.164925734345868e-06, + "loss": 0.5376, + "step": 22877 + }, + { + "epoch": 0.7011769032732622, + "grad_norm": 1.7546671704668273, + "learning_rate": 2.164516927554639e-06, + "loss": 0.6237, + "step": 22878 + }, + { + "epoch": 0.7012075517960035, + "grad_norm": 0.790261564945738, + "learning_rate": 2.16410814870215e-06, + "loss": 0.4233, + "step": 22879 + }, + { + "epoch": 0.7012382003187446, + "grad_norm": 1.6109330344568775, + "learning_rate": 2.163699397792426e-06, + "loss": 0.482, + "step": 22880 + }, + { + "epoch": 0.7012688488414859, + "grad_norm": 1.6583647696367196, + "learning_rate": 2.1632906748295006e-06, + "loss": 0.587, + "step": 22881 + }, + { + "epoch": 0.701299497364227, + "grad_norm": 1.8287979716482585, + "learning_rate": 2.1628819798173983e-06, + "loss": 0.6658, + "step": 22882 + }, + { + "epoch": 0.7013301458869683, + "grad_norm": 1.5255846849870978, + "learning_rate": 2.1624733127601437e-06, + "loss": 0.5815, + "step": 22883 + }, + { + "epoch": 0.7013607944097094, + "grad_norm": 1.9656176514827004, + "learning_rate": 2.1620646736617658e-06, + "loss": 0.6345, + "step": 22884 + }, + { + "epoch": 0.7013914429324507, + "grad_norm": 1.778494733646985, + "learning_rate": 2.1616560625262904e-06, + "loss": 0.6138, + "step": 22885 + }, + { + "epoch": 0.7014220914551919, + "grad_norm": 1.9031310208519212, + "learning_rate": 2.1612474793577458e-06, + "loss": 0.7343, + "step": 22886 + }, + { + "epoch": 0.7014527399779331, + "grad_norm": 0.773258860959156, + "learning_rate": 2.160838924160155e-06, + "loss": 0.4001, + "step": 22887 + }, + { + "epoch": 0.7014833885006743, + "grad_norm": 0.8448531467611754, + "learning_rate": 2.160430396937544e-06, + "loss": 0.3998, + "step": 22888 + }, + { + "epoch": 0.7015140370234155, + "grad_norm": 1.8033121385579465, + "learning_rate": 2.1600218976939413e-06, + "loss": 0.4359, + "step": 22889 + }, + { + "epoch": 0.7015446855461567, + "grad_norm": 2.2273253803067568, + "learning_rate": 2.159613426433369e-06, + "loss": 0.5795, + "step": 22890 + }, + { + "epoch": 0.7015753340688978, + "grad_norm": 1.8330283538189287, + "learning_rate": 2.1592049831598487e-06, + "loss": 0.5802, + "step": 22891 + }, + { + "epoch": 0.7016059825916391, + "grad_norm": 1.758519038416269, + "learning_rate": 2.1587965678774125e-06, + "loss": 0.5424, + "step": 22892 + }, + { + "epoch": 0.7016366311143802, + "grad_norm": 0.7665960750603136, + "learning_rate": 2.1583881805900786e-06, + "loss": 0.416, + "step": 22893 + }, + { + "epoch": 0.7016672796371215, + "grad_norm": 0.7790105960489352, + "learning_rate": 2.157979821301875e-06, + "loss": 0.4157, + "step": 22894 + }, + { + "epoch": 0.7016979281598626, + "grad_norm": 1.669737988840252, + "learning_rate": 2.1575714900168217e-06, + "loss": 0.4718, + "step": 22895 + }, + { + "epoch": 0.7017285766826039, + "grad_norm": 0.7940229093296726, + "learning_rate": 2.157163186738943e-06, + "loss": 0.4152, + "step": 22896 + }, + { + "epoch": 0.7017592252053451, + "grad_norm": 1.7536108265169046, + "learning_rate": 2.156754911472265e-06, + "loss": 0.4579, + "step": 22897 + }, + { + "epoch": 0.7017898737280863, + "grad_norm": 1.9590651622140762, + "learning_rate": 2.156346664220807e-06, + "loss": 0.7166, + "step": 22898 + }, + { + "epoch": 0.7018205222508275, + "grad_norm": 1.8733200877264609, + "learning_rate": 2.155938444988593e-06, + "loss": 0.5969, + "step": 22899 + }, + { + "epoch": 0.7018511707735687, + "grad_norm": 2.104462614324675, + "learning_rate": 2.1555302537796463e-06, + "loss": 0.582, + "step": 22900 + }, + { + "epoch": 0.7018818192963099, + "grad_norm": 2.0967730286428568, + "learning_rate": 2.1551220905979864e-06, + "loss": 0.5648, + "step": 22901 + }, + { + "epoch": 0.7019124678190511, + "grad_norm": 2.2330272120235444, + "learning_rate": 2.154713955447636e-06, + "loss": 0.6272, + "step": 22902 + }, + { + "epoch": 0.7019431163417923, + "grad_norm": 1.788533712009177, + "learning_rate": 2.154305848332619e-06, + "loss": 0.5882, + "step": 22903 + }, + { + "epoch": 0.7019737648645336, + "grad_norm": 1.761771632968035, + "learning_rate": 2.153897769256953e-06, + "loss": 0.5097, + "step": 22904 + }, + { + "epoch": 0.7020044133872747, + "grad_norm": 1.8510177401685808, + "learning_rate": 2.1534897182246623e-06, + "loss": 0.6007, + "step": 22905 + }, + { + "epoch": 0.702035061910016, + "grad_norm": 1.8260070383184794, + "learning_rate": 2.1530816952397636e-06, + "loss": 0.5765, + "step": 22906 + }, + { + "epoch": 0.7020657104327571, + "grad_norm": 1.9877132828748383, + "learning_rate": 2.15267370030628e-06, + "loss": 0.6102, + "step": 22907 + }, + { + "epoch": 0.7020963589554984, + "grad_norm": 0.7977309077679469, + "learning_rate": 2.152265733428232e-06, + "loss": 0.4125, + "step": 22908 + }, + { + "epoch": 0.7021270074782395, + "grad_norm": 1.9407940595329995, + "learning_rate": 2.151857794609637e-06, + "loss": 0.6104, + "step": 22909 + }, + { + "epoch": 0.7021576560009808, + "grad_norm": 2.2003345429847405, + "learning_rate": 2.1514498838545157e-06, + "loss": 0.6446, + "step": 22910 + }, + { + "epoch": 0.7021883045237219, + "grad_norm": 1.8522361567687546, + "learning_rate": 2.1510420011668892e-06, + "loss": 0.6196, + "step": 22911 + }, + { + "epoch": 0.7022189530464632, + "grad_norm": 1.8829303493151297, + "learning_rate": 2.1506341465507728e-06, + "loss": 0.5937, + "step": 22912 + }, + { + "epoch": 0.7022496015692044, + "grad_norm": 1.9096254871394969, + "learning_rate": 2.150226320010188e-06, + "loss": 0.5452, + "step": 22913 + }, + { + "epoch": 0.7022802500919456, + "grad_norm": 1.8406368103034785, + "learning_rate": 2.1498185215491534e-06, + "loss": 0.4593, + "step": 22914 + }, + { + "epoch": 0.7023108986146868, + "grad_norm": 1.960815962208564, + "learning_rate": 2.149410751171685e-06, + "loss": 0.5762, + "step": 22915 + }, + { + "epoch": 0.702341547137428, + "grad_norm": 2.2369324001366384, + "learning_rate": 2.1490030088818032e-06, + "loss": 0.6002, + "step": 22916 + }, + { + "epoch": 0.7023721956601692, + "grad_norm": 1.9587593702823163, + "learning_rate": 2.1485952946835227e-06, + "loss": 0.5857, + "step": 22917 + }, + { + "epoch": 0.7024028441829104, + "grad_norm": 1.7965881619414312, + "learning_rate": 2.148187608580862e-06, + "loss": 0.5896, + "step": 22918 + }, + { + "epoch": 0.7024334927056516, + "grad_norm": 1.8881646049746232, + "learning_rate": 2.1477799505778407e-06, + "loss": 0.5313, + "step": 22919 + }, + { + "epoch": 0.7024641412283928, + "grad_norm": 0.8067728944009381, + "learning_rate": 2.147372320678471e-06, + "loss": 0.4044, + "step": 22920 + }, + { + "epoch": 0.702494789751134, + "grad_norm": 1.7524009799997067, + "learning_rate": 2.146964718886772e-06, + "loss": 0.5574, + "step": 22921 + }, + { + "epoch": 0.7025254382738751, + "grad_norm": 1.913124479297782, + "learning_rate": 2.1465571452067614e-06, + "loss": 0.5694, + "step": 22922 + }, + { + "epoch": 0.7025560867966164, + "grad_norm": 1.9814365838206247, + "learning_rate": 2.1461495996424513e-06, + "loss": 0.5693, + "step": 22923 + }, + { + "epoch": 0.7025867353193576, + "grad_norm": 1.8710827562452912, + "learning_rate": 2.145742082197862e-06, + "loss": 0.5704, + "step": 22924 + }, + { + "epoch": 0.7026173838420988, + "grad_norm": 1.758573387729174, + "learning_rate": 2.1453345928770037e-06, + "loss": 0.5319, + "step": 22925 + }, + { + "epoch": 0.70264803236484, + "grad_norm": 1.9516368331735248, + "learning_rate": 2.144927131683894e-06, + "loss": 0.6699, + "step": 22926 + }, + { + "epoch": 0.7026786808875812, + "grad_norm": 1.652742969715886, + "learning_rate": 2.14451969862255e-06, + "loss": 0.5031, + "step": 22927 + }, + { + "epoch": 0.7027093294103224, + "grad_norm": 1.6394081152289748, + "learning_rate": 2.1441122936969814e-06, + "loss": 0.5477, + "step": 22928 + }, + { + "epoch": 0.7027399779330636, + "grad_norm": 0.8815962671151183, + "learning_rate": 2.1437049169112062e-06, + "loss": 0.4057, + "step": 22929 + }, + { + "epoch": 0.7027706264558048, + "grad_norm": 1.844575239375595, + "learning_rate": 2.1432975682692387e-06, + "loss": 0.667, + "step": 22930 + }, + { + "epoch": 0.702801274978546, + "grad_norm": 0.8105891039199201, + "learning_rate": 2.142890247775089e-06, + "loss": 0.392, + "step": 22931 + }, + { + "epoch": 0.7028319235012872, + "grad_norm": 1.8117837435710626, + "learning_rate": 2.142482955432773e-06, + "loss": 0.5374, + "step": 22932 + }, + { + "epoch": 0.7028625720240285, + "grad_norm": 1.9867641355217522, + "learning_rate": 2.142075691246305e-06, + "loss": 0.6654, + "step": 22933 + }, + { + "epoch": 0.7028932205467696, + "grad_norm": 1.7240760602650602, + "learning_rate": 2.1416684552196947e-06, + "loss": 0.5474, + "step": 22934 + }, + { + "epoch": 0.7029238690695109, + "grad_norm": 1.8685906619621486, + "learning_rate": 2.141261247356959e-06, + "loss": 0.6028, + "step": 22935 + }, + { + "epoch": 0.702954517592252, + "grad_norm": 1.7561506927704738, + "learning_rate": 2.1408540676621054e-06, + "loss": 0.6104, + "step": 22936 + }, + { + "epoch": 0.7029851661149933, + "grad_norm": 1.9246743455471522, + "learning_rate": 2.140446916139148e-06, + "loss": 0.5835, + "step": 22937 + }, + { + "epoch": 0.7030158146377344, + "grad_norm": 0.8025274581537983, + "learning_rate": 2.140039792792101e-06, + "loss": 0.3926, + "step": 22938 + }, + { + "epoch": 0.7030464631604757, + "grad_norm": 1.8710950424238089, + "learning_rate": 2.1396326976249716e-06, + "loss": 0.5267, + "step": 22939 + }, + { + "epoch": 0.7030771116832168, + "grad_norm": 1.8835115746599718, + "learning_rate": 2.139225630641773e-06, + "loss": 0.5523, + "step": 22940 + }, + { + "epoch": 0.7031077602059581, + "grad_norm": 1.558887077615837, + "learning_rate": 2.1388185918465183e-06, + "loss": 0.5008, + "step": 22941 + }, + { + "epoch": 0.7031384087286993, + "grad_norm": 2.06479060588471, + "learning_rate": 2.1384115812432138e-06, + "loss": 0.66, + "step": 22942 + }, + { + "epoch": 0.7031690572514405, + "grad_norm": 1.914756334135295, + "learning_rate": 2.138004598835872e-06, + "loss": 0.5795, + "step": 22943 + }, + { + "epoch": 0.7031997057741817, + "grad_norm": 0.881552445998556, + "learning_rate": 2.1375976446285057e-06, + "loss": 0.4157, + "step": 22944 + }, + { + "epoch": 0.7032303542969229, + "grad_norm": 1.911998914268049, + "learning_rate": 2.13719071862512e-06, + "loss": 0.6829, + "step": 22945 + }, + { + "epoch": 0.7032610028196641, + "grad_norm": 1.711631991950411, + "learning_rate": 2.1367838208297287e-06, + "loss": 0.5529, + "step": 22946 + }, + { + "epoch": 0.7032916513424053, + "grad_norm": 1.7275847312005883, + "learning_rate": 2.1363769512463357e-06, + "loss": 0.5938, + "step": 22947 + }, + { + "epoch": 0.7033222998651465, + "grad_norm": 0.829339768330126, + "learning_rate": 2.1359701098789558e-06, + "loss": 0.4057, + "step": 22948 + }, + { + "epoch": 0.7033529483878878, + "grad_norm": 1.6883800414961039, + "learning_rate": 2.1355632967315965e-06, + "loss": 0.5829, + "step": 22949 + }, + { + "epoch": 0.7033835969106289, + "grad_norm": 1.9134709624792579, + "learning_rate": 2.1351565118082624e-06, + "loss": 0.5884, + "step": 22950 + }, + { + "epoch": 0.7034142454333702, + "grad_norm": 0.8175530589386361, + "learning_rate": 2.1347497551129644e-06, + "loss": 0.4326, + "step": 22951 + }, + { + "epoch": 0.7034448939561113, + "grad_norm": 1.806773540509796, + "learning_rate": 2.1343430266497116e-06, + "loss": 0.5825, + "step": 22952 + }, + { + "epoch": 0.7034755424788525, + "grad_norm": 1.87534184550844, + "learning_rate": 2.1339363264225084e-06, + "loss": 0.6405, + "step": 22953 + }, + { + "epoch": 0.7035061910015937, + "grad_norm": 1.9055823780859293, + "learning_rate": 2.133529654435364e-06, + "loss": 0.6532, + "step": 22954 + }, + { + "epoch": 0.7035368395243349, + "grad_norm": 1.687605232706679, + "learning_rate": 2.1331230106922857e-06, + "loss": 0.5945, + "step": 22955 + }, + { + "epoch": 0.7035674880470761, + "grad_norm": 2.006210710544652, + "learning_rate": 2.1327163951972814e-06, + "loss": 0.6575, + "step": 22956 + }, + { + "epoch": 0.7035981365698173, + "grad_norm": 1.8211196514189854, + "learning_rate": 2.132309807954356e-06, + "loss": 0.6355, + "step": 22957 + }, + { + "epoch": 0.7036287850925586, + "grad_norm": 1.972770015127312, + "learning_rate": 2.131903248967512e-06, + "loss": 0.6067, + "step": 22958 + }, + { + "epoch": 0.7036594336152997, + "grad_norm": 1.8001282863257348, + "learning_rate": 2.131496718240763e-06, + "loss": 0.5903, + "step": 22959 + }, + { + "epoch": 0.703690082138041, + "grad_norm": 1.9337598019337037, + "learning_rate": 2.131090215778111e-06, + "loss": 0.6655, + "step": 22960 + }, + { + "epoch": 0.7037207306607821, + "grad_norm": 1.9334070585682799, + "learning_rate": 2.130683741583559e-06, + "loss": 0.6162, + "step": 22961 + }, + { + "epoch": 0.7037513791835234, + "grad_norm": 1.8729443894310724, + "learning_rate": 2.1302772956611144e-06, + "loss": 0.571, + "step": 22962 + }, + { + "epoch": 0.7037820277062645, + "grad_norm": 1.7341702411304112, + "learning_rate": 2.129870878014784e-06, + "loss": 0.5315, + "step": 22963 + }, + { + "epoch": 0.7038126762290058, + "grad_norm": 1.848741226477364, + "learning_rate": 2.1294644886485677e-06, + "loss": 0.5628, + "step": 22964 + }, + { + "epoch": 0.7038433247517469, + "grad_norm": 1.678894298873829, + "learning_rate": 2.129058127566473e-06, + "loss": 0.5886, + "step": 22965 + }, + { + "epoch": 0.7038739732744882, + "grad_norm": 1.9060063099945697, + "learning_rate": 2.128651794772503e-06, + "loss": 0.6609, + "step": 22966 + }, + { + "epoch": 0.7039046217972293, + "grad_norm": 1.6458540291827846, + "learning_rate": 2.1282454902706625e-06, + "loss": 0.5426, + "step": 22967 + }, + { + "epoch": 0.7039352703199706, + "grad_norm": 1.5716953835208802, + "learning_rate": 2.1278392140649547e-06, + "loss": 0.5543, + "step": 22968 + }, + { + "epoch": 0.7039659188427118, + "grad_norm": 1.7861671386989344, + "learning_rate": 2.1274329661593795e-06, + "loss": 0.5748, + "step": 22969 + }, + { + "epoch": 0.703996567365453, + "grad_norm": 1.595204140642166, + "learning_rate": 2.127026746557943e-06, + "loss": 0.5996, + "step": 22970 + }, + { + "epoch": 0.7040272158881942, + "grad_norm": 1.8643102885749794, + "learning_rate": 2.1266205552646485e-06, + "loss": 0.5649, + "step": 22971 + }, + { + "epoch": 0.7040578644109354, + "grad_norm": 1.7469489872150041, + "learning_rate": 2.1262143922834953e-06, + "loss": 0.5767, + "step": 22972 + }, + { + "epoch": 0.7040885129336766, + "grad_norm": 1.8836797934674026, + "learning_rate": 2.1258082576184868e-06, + "loss": 0.674, + "step": 22973 + }, + { + "epoch": 0.7041191614564178, + "grad_norm": 1.556026580495227, + "learning_rate": 2.125402151273625e-06, + "loss": 0.5498, + "step": 22974 + }, + { + "epoch": 0.704149809979159, + "grad_norm": 1.9211724335543336, + "learning_rate": 2.124996073252913e-06, + "loss": 0.7022, + "step": 22975 + }, + { + "epoch": 0.7041804585019003, + "grad_norm": 1.7577954728751737, + "learning_rate": 2.1245900235603507e-06, + "loss": 0.5313, + "step": 22976 + }, + { + "epoch": 0.7042111070246414, + "grad_norm": 1.8271679697675585, + "learning_rate": 2.124184002199934e-06, + "loss": 0.6957, + "step": 22977 + }, + { + "epoch": 0.7042417555473827, + "grad_norm": 1.625532793472091, + "learning_rate": 2.1237780091756726e-06, + "loss": 0.5593, + "step": 22978 + }, + { + "epoch": 0.7042724040701238, + "grad_norm": 1.978091423909617, + "learning_rate": 2.123372044491562e-06, + "loss": 0.6423, + "step": 22979 + }, + { + "epoch": 0.7043030525928651, + "grad_norm": 1.8050569544628843, + "learning_rate": 2.1229661081516017e-06, + "loss": 0.6086, + "step": 22980 + }, + { + "epoch": 0.7043337011156062, + "grad_norm": 1.784351238605012, + "learning_rate": 2.1225602001597918e-06, + "loss": 0.5697, + "step": 22981 + }, + { + "epoch": 0.7043643496383475, + "grad_norm": 1.941977993627292, + "learning_rate": 2.122154320520134e-06, + "loss": 0.6627, + "step": 22982 + }, + { + "epoch": 0.7043949981610886, + "grad_norm": 1.8324185899023746, + "learning_rate": 2.1217484692366245e-06, + "loss": 0.5855, + "step": 22983 + }, + { + "epoch": 0.7044256466838298, + "grad_norm": 1.8202477413814213, + "learning_rate": 2.121342646313264e-06, + "loss": 0.5951, + "step": 22984 + }, + { + "epoch": 0.704456295206571, + "grad_norm": 1.7911638803622156, + "learning_rate": 2.1209368517540506e-06, + "loss": 0.6304, + "step": 22985 + }, + { + "epoch": 0.7044869437293122, + "grad_norm": 0.8615692926877834, + "learning_rate": 2.120531085562985e-06, + "loss": 0.4104, + "step": 22986 + }, + { + "epoch": 0.7045175922520535, + "grad_norm": 1.846191260433675, + "learning_rate": 2.120125347744063e-06, + "loss": 0.5224, + "step": 22987 + }, + { + "epoch": 0.7045482407747946, + "grad_norm": 0.8114149181022915, + "learning_rate": 2.1197196383012795e-06, + "loss": 0.4251, + "step": 22988 + }, + { + "epoch": 0.7045788892975359, + "grad_norm": 1.7063857863620326, + "learning_rate": 2.119313957238639e-06, + "loss": 0.5435, + "step": 22989 + }, + { + "epoch": 0.704609537820277, + "grad_norm": 2.0700587021548387, + "learning_rate": 2.1189083045601355e-06, + "loss": 0.5799, + "step": 22990 + }, + { + "epoch": 0.7046401863430183, + "grad_norm": 1.8027389562243508, + "learning_rate": 2.118502680269763e-06, + "loss": 0.5359, + "step": 22991 + }, + { + "epoch": 0.7046708348657594, + "grad_norm": 1.6984267469528294, + "learning_rate": 2.1180970843715215e-06, + "loss": 0.4667, + "step": 22992 + }, + { + "epoch": 0.7047014833885007, + "grad_norm": 2.173482520559876, + "learning_rate": 2.1176915168694067e-06, + "loss": 0.5699, + "step": 22993 + }, + { + "epoch": 0.7047321319112418, + "grad_norm": 1.7756580966586528, + "learning_rate": 2.1172859777674164e-06, + "loss": 0.5713, + "step": 22994 + }, + { + "epoch": 0.7047627804339831, + "grad_norm": 1.7598363358336255, + "learning_rate": 2.116880467069543e-06, + "loss": 0.5445, + "step": 22995 + }, + { + "epoch": 0.7047934289567243, + "grad_norm": 2.0507682973915156, + "learning_rate": 2.1164749847797843e-06, + "loss": 0.6033, + "step": 22996 + }, + { + "epoch": 0.7048240774794655, + "grad_norm": 0.817161014236857, + "learning_rate": 2.1160695309021373e-06, + "loss": 0.4168, + "step": 22997 + }, + { + "epoch": 0.7048547260022067, + "grad_norm": 1.8181569644818394, + "learning_rate": 2.1156641054405952e-06, + "loss": 0.5885, + "step": 22998 + }, + { + "epoch": 0.7048853745249479, + "grad_norm": 1.7871289911054078, + "learning_rate": 2.1152587083991486e-06, + "loss": 0.5713, + "step": 22999 + }, + { + "epoch": 0.7049160230476891, + "grad_norm": 1.729620803355952, + "learning_rate": 2.1148533397818e-06, + "loss": 0.5208, + "step": 23000 + }, + { + "epoch": 0.7049466715704303, + "grad_norm": 1.775719617736804, + "learning_rate": 2.114447999592538e-06, + "loss": 0.5159, + "step": 23001 + }, + { + "epoch": 0.7049773200931715, + "grad_norm": 1.6914915705690847, + "learning_rate": 2.114042687835359e-06, + "loss": 0.5785, + "step": 23002 + }, + { + "epoch": 0.7050079686159128, + "grad_norm": 1.8374455214141834, + "learning_rate": 2.113637404514255e-06, + "loss": 0.5518, + "step": 23003 + }, + { + "epoch": 0.7050386171386539, + "grad_norm": 1.6849209551704016, + "learning_rate": 2.11323214963322e-06, + "loss": 0.5896, + "step": 23004 + }, + { + "epoch": 0.7050692656613952, + "grad_norm": 2.0016544931355154, + "learning_rate": 2.1128269231962485e-06, + "loss": 0.5276, + "step": 23005 + }, + { + "epoch": 0.7050999141841363, + "grad_norm": 1.5727680956667736, + "learning_rate": 2.11242172520733e-06, + "loss": 0.5606, + "step": 23006 + }, + { + "epoch": 0.7051305627068776, + "grad_norm": 1.9601973795645307, + "learning_rate": 2.1120165556704603e-06, + "loss": 0.5607, + "step": 23007 + }, + { + "epoch": 0.7051612112296187, + "grad_norm": 1.8087568383769073, + "learning_rate": 2.1116114145896314e-06, + "loss": 0.6205, + "step": 23008 + }, + { + "epoch": 0.70519185975236, + "grad_norm": 1.7463750564372802, + "learning_rate": 2.1112063019688343e-06, + "loss": 0.522, + "step": 23009 + }, + { + "epoch": 0.7052225082751011, + "grad_norm": 1.8133049876468341, + "learning_rate": 2.1108012178120575e-06, + "loss": 0.6414, + "step": 23010 + }, + { + "epoch": 0.7052531567978424, + "grad_norm": 1.9451404110099002, + "learning_rate": 2.1103961621232988e-06, + "loss": 0.5286, + "step": 23011 + }, + { + "epoch": 0.7052838053205835, + "grad_norm": 0.8152326005092515, + "learning_rate": 2.1099911349065437e-06, + "loss": 0.4427, + "step": 23012 + }, + { + "epoch": 0.7053144538433248, + "grad_norm": 1.8377202335262772, + "learning_rate": 2.1095861361657883e-06, + "loss": 0.6013, + "step": 23013 + }, + { + "epoch": 0.705345102366066, + "grad_norm": 0.8083145225453342, + "learning_rate": 2.1091811659050177e-06, + "loss": 0.407, + "step": 23014 + }, + { + "epoch": 0.7053757508888071, + "grad_norm": 1.6090987114739654, + "learning_rate": 2.1087762241282245e-06, + "loss": 0.5815, + "step": 23015 + }, + { + "epoch": 0.7054063994115484, + "grad_norm": 0.7888832506453851, + "learning_rate": 2.1083713108394015e-06, + "loss": 0.4173, + "step": 23016 + }, + { + "epoch": 0.7054370479342895, + "grad_norm": 2.079028296933661, + "learning_rate": 2.1079664260425337e-06, + "loss": 0.6214, + "step": 23017 + }, + { + "epoch": 0.7054676964570308, + "grad_norm": 1.846843336188525, + "learning_rate": 2.1075615697416123e-06, + "loss": 0.478, + "step": 23018 + }, + { + "epoch": 0.7054983449797719, + "grad_norm": 1.5658641168835674, + "learning_rate": 2.1071567419406293e-06, + "loss": 0.5731, + "step": 23019 + }, + { + "epoch": 0.7055289935025132, + "grad_norm": 1.8353568855652829, + "learning_rate": 2.1067519426435683e-06, + "loss": 0.6859, + "step": 23020 + }, + { + "epoch": 0.7055596420252543, + "grad_norm": 0.80848501658511, + "learning_rate": 2.106347171854423e-06, + "loss": 0.4177, + "step": 23021 + }, + { + "epoch": 0.7055902905479956, + "grad_norm": 0.7704352950711029, + "learning_rate": 2.105942429577178e-06, + "loss": 0.3964, + "step": 23022 + }, + { + "epoch": 0.7056209390707368, + "grad_norm": 1.964796615475596, + "learning_rate": 2.1055377158158224e-06, + "loss": 0.527, + "step": 23023 + }, + { + "epoch": 0.705651587593478, + "grad_norm": 0.792859513113005, + "learning_rate": 2.105133030574346e-06, + "loss": 0.4023, + "step": 23024 + }, + { + "epoch": 0.7056822361162192, + "grad_norm": 2.1946699349829517, + "learning_rate": 2.1047283738567326e-06, + "loss": 0.6898, + "step": 23025 + }, + { + "epoch": 0.7057128846389604, + "grad_norm": 1.880997588140535, + "learning_rate": 2.104323745666972e-06, + "loss": 0.5579, + "step": 23026 + }, + { + "epoch": 0.7057435331617016, + "grad_norm": 1.942802588793614, + "learning_rate": 2.1039191460090515e-06, + "loss": 0.6172, + "step": 23027 + }, + { + "epoch": 0.7057741816844428, + "grad_norm": 1.7598867324781466, + "learning_rate": 2.1035145748869553e-06, + "loss": 0.5833, + "step": 23028 + }, + { + "epoch": 0.705804830207184, + "grad_norm": 1.8742257073753117, + "learning_rate": 2.1031100323046703e-06, + "loss": 0.5628, + "step": 23029 + }, + { + "epoch": 0.7058354787299252, + "grad_norm": 1.6835346765321342, + "learning_rate": 2.102705518266186e-06, + "loss": 0.6024, + "step": 23030 + }, + { + "epoch": 0.7058661272526664, + "grad_norm": 1.6425801571231124, + "learning_rate": 2.1023010327754833e-06, + "loss": 0.5752, + "step": 23031 + }, + { + "epoch": 0.7058967757754077, + "grad_norm": 1.7581500465043862, + "learning_rate": 2.101896575836552e-06, + "loss": 0.6412, + "step": 23032 + }, + { + "epoch": 0.7059274242981488, + "grad_norm": 1.914765434586774, + "learning_rate": 2.1014921474533732e-06, + "loss": 0.609, + "step": 23033 + }, + { + "epoch": 0.7059580728208901, + "grad_norm": 1.5775598138004083, + "learning_rate": 2.101087747629934e-06, + "loss": 0.5104, + "step": 23034 + }, + { + "epoch": 0.7059887213436312, + "grad_norm": 1.7848234172906712, + "learning_rate": 2.1006833763702206e-06, + "loss": 0.5696, + "step": 23035 + }, + { + "epoch": 0.7060193698663725, + "grad_norm": 0.8358694846881417, + "learning_rate": 2.1002790336782143e-06, + "loss": 0.408, + "step": 23036 + }, + { + "epoch": 0.7060500183891136, + "grad_norm": 0.8070630018689796, + "learning_rate": 2.0998747195579007e-06, + "loss": 0.4109, + "step": 23037 + }, + { + "epoch": 0.7060806669118549, + "grad_norm": 0.79351854530496, + "learning_rate": 2.099470434013265e-06, + "loss": 0.4127, + "step": 23038 + }, + { + "epoch": 0.706111315434596, + "grad_norm": 2.350727468154294, + "learning_rate": 2.099066177048287e-06, + "loss": 0.7355, + "step": 23039 + }, + { + "epoch": 0.7061419639573373, + "grad_norm": 1.9598480238633567, + "learning_rate": 2.098661948666953e-06, + "loss": 0.5842, + "step": 23040 + }, + { + "epoch": 0.7061726124800785, + "grad_norm": 1.9441739228916988, + "learning_rate": 2.0982577488732464e-06, + "loss": 0.5363, + "step": 23041 + }, + { + "epoch": 0.7062032610028197, + "grad_norm": 1.7703585583794408, + "learning_rate": 2.097853577671147e-06, + "loss": 0.6332, + "step": 23042 + }, + { + "epoch": 0.7062339095255609, + "grad_norm": 1.9157751853066574, + "learning_rate": 2.0974494350646408e-06, + "loss": 0.5943, + "step": 23043 + }, + { + "epoch": 0.7062645580483021, + "grad_norm": 1.7046093373003355, + "learning_rate": 2.0970453210577058e-06, + "loss": 0.5652, + "step": 23044 + }, + { + "epoch": 0.7062952065710433, + "grad_norm": 0.7382758893278049, + "learning_rate": 2.0966412356543263e-06, + "loss": 0.3968, + "step": 23045 + }, + { + "epoch": 0.7063258550937844, + "grad_norm": 1.8924903987739563, + "learning_rate": 2.096237178858485e-06, + "loss": 0.6088, + "step": 23046 + }, + { + "epoch": 0.7063565036165257, + "grad_norm": 1.768148211787123, + "learning_rate": 2.095833150674161e-06, + "loss": 0.6123, + "step": 23047 + }, + { + "epoch": 0.7063871521392668, + "grad_norm": 1.6840560745460567, + "learning_rate": 2.0954291511053347e-06, + "loss": 0.5883, + "step": 23048 + }, + { + "epoch": 0.7064178006620081, + "grad_norm": 1.7373568209221801, + "learning_rate": 2.0950251801559906e-06, + "loss": 0.5328, + "step": 23049 + }, + { + "epoch": 0.7064484491847492, + "grad_norm": 1.7629643684415286, + "learning_rate": 2.094621237830105e-06, + "loss": 0.6562, + "step": 23050 + }, + { + "epoch": 0.7064790977074905, + "grad_norm": 1.7102956313912845, + "learning_rate": 2.0942173241316594e-06, + "loss": 0.5882, + "step": 23051 + }, + { + "epoch": 0.7065097462302317, + "grad_norm": 1.8719737842070487, + "learning_rate": 2.0938134390646357e-06, + "loss": 0.5604, + "step": 23052 + }, + { + "epoch": 0.7065403947529729, + "grad_norm": 1.9777397088917423, + "learning_rate": 2.09340958263301e-06, + "loss": 0.6284, + "step": 23053 + }, + { + "epoch": 0.7065710432757141, + "grad_norm": 0.8351522014244928, + "learning_rate": 2.0930057548407658e-06, + "loss": 0.4184, + "step": 23054 + }, + { + "epoch": 0.7066016917984553, + "grad_norm": 1.934247717397186, + "learning_rate": 2.0926019556918774e-06, + "loss": 0.6468, + "step": 23055 + }, + { + "epoch": 0.7066323403211965, + "grad_norm": 1.9530772089947357, + "learning_rate": 2.0921981851903255e-06, + "loss": 0.5708, + "step": 23056 + }, + { + "epoch": 0.7066629888439377, + "grad_norm": 0.7918230034740881, + "learning_rate": 2.0917944433400912e-06, + "loss": 0.4154, + "step": 23057 + }, + { + "epoch": 0.7066936373666789, + "grad_norm": 1.7990458231239965, + "learning_rate": 2.0913907301451485e-06, + "loss": 0.5628, + "step": 23058 + }, + { + "epoch": 0.7067242858894202, + "grad_norm": 2.262269476729392, + "learning_rate": 2.0909870456094765e-06, + "loss": 0.6364, + "step": 23059 + }, + { + "epoch": 0.7067549344121613, + "grad_norm": 1.8874386888073706, + "learning_rate": 2.090583389737056e-06, + "loss": 0.6444, + "step": 23060 + }, + { + "epoch": 0.7067855829349026, + "grad_norm": 0.7858245774390359, + "learning_rate": 2.09017976253186e-06, + "loss": 0.4025, + "step": 23061 + }, + { + "epoch": 0.7068162314576437, + "grad_norm": 1.8818648681997006, + "learning_rate": 2.089776163997867e-06, + "loss": 0.5667, + "step": 23062 + }, + { + "epoch": 0.706846879980385, + "grad_norm": 1.9360859425219188, + "learning_rate": 2.089372594139056e-06, + "loss": 0.6234, + "step": 23063 + }, + { + "epoch": 0.7068775285031261, + "grad_norm": 1.9693666232981577, + "learning_rate": 2.0889690529593993e-06, + "loss": 0.6262, + "step": 23064 + }, + { + "epoch": 0.7069081770258674, + "grad_norm": 1.6617098481380927, + "learning_rate": 2.0885655404628774e-06, + "loss": 0.5369, + "step": 23065 + }, + { + "epoch": 0.7069388255486085, + "grad_norm": 2.137089813451657, + "learning_rate": 2.088162056653462e-06, + "loss": 0.7773, + "step": 23066 + }, + { + "epoch": 0.7069694740713498, + "grad_norm": 0.7978527097727065, + "learning_rate": 2.0877586015351315e-06, + "loss": 0.4145, + "step": 23067 + }, + { + "epoch": 0.707000122594091, + "grad_norm": 1.798026961149208, + "learning_rate": 2.0873551751118624e-06, + "loss": 0.5369, + "step": 23068 + }, + { + "epoch": 0.7070307711168322, + "grad_norm": 0.7888570796459055, + "learning_rate": 2.086951777387626e-06, + "loss": 0.4134, + "step": 23069 + }, + { + "epoch": 0.7070614196395734, + "grad_norm": 2.084629303462946, + "learning_rate": 2.086548408366399e-06, + "loss": 0.5646, + "step": 23070 + }, + { + "epoch": 0.7070920681623146, + "grad_norm": 1.760221172853829, + "learning_rate": 2.0861450680521576e-06, + "loss": 0.4771, + "step": 23071 + }, + { + "epoch": 0.7071227166850558, + "grad_norm": 1.8665809632162313, + "learning_rate": 2.085741756448873e-06, + "loss": 0.6105, + "step": 23072 + }, + { + "epoch": 0.707153365207797, + "grad_norm": 1.955731122128193, + "learning_rate": 2.0853384735605227e-06, + "loss": 0.6563, + "step": 23073 + }, + { + "epoch": 0.7071840137305382, + "grad_norm": 1.690737157053022, + "learning_rate": 2.084935219391074e-06, + "loss": 0.5196, + "step": 23074 + }, + { + "epoch": 0.7072146622532794, + "grad_norm": 0.792576969024837, + "learning_rate": 2.0845319939445074e-06, + "loss": 0.4055, + "step": 23075 + }, + { + "epoch": 0.7072453107760206, + "grad_norm": 1.8097756697680842, + "learning_rate": 2.0841287972247935e-06, + "loss": 0.5286, + "step": 23076 + }, + { + "epoch": 0.7072759592987617, + "grad_norm": 1.6572095449480602, + "learning_rate": 2.083725629235903e-06, + "loss": 0.5832, + "step": 23077 + }, + { + "epoch": 0.707306607821503, + "grad_norm": 1.8374975229303327, + "learning_rate": 2.0833224899818105e-06, + "loss": 0.5616, + "step": 23078 + }, + { + "epoch": 0.7073372563442442, + "grad_norm": 1.8357112318048383, + "learning_rate": 2.082919379466489e-06, + "loss": 0.6563, + "step": 23079 + }, + { + "epoch": 0.7073679048669854, + "grad_norm": 1.608549918790453, + "learning_rate": 2.0825162976939077e-06, + "loss": 0.5594, + "step": 23080 + }, + { + "epoch": 0.7073985533897266, + "grad_norm": 1.7388456035980044, + "learning_rate": 2.0821132446680393e-06, + "loss": 0.5242, + "step": 23081 + }, + { + "epoch": 0.7074292019124678, + "grad_norm": 1.7893924271007955, + "learning_rate": 2.081710220392856e-06, + "loss": 0.6639, + "step": 23082 + }, + { + "epoch": 0.707459850435209, + "grad_norm": 1.9324463499409998, + "learning_rate": 2.0813072248723303e-06, + "loss": 0.6037, + "step": 23083 + }, + { + "epoch": 0.7074904989579502, + "grad_norm": 1.9954900973828027, + "learning_rate": 2.0809042581104318e-06, + "loss": 0.6007, + "step": 23084 + }, + { + "epoch": 0.7075211474806914, + "grad_norm": 0.7623381891495501, + "learning_rate": 2.0805013201111264e-06, + "loss": 0.3988, + "step": 23085 + }, + { + "epoch": 0.7075517960034327, + "grad_norm": 1.8916877090949993, + "learning_rate": 2.0800984108783924e-06, + "loss": 0.569, + "step": 23086 + }, + { + "epoch": 0.7075824445261738, + "grad_norm": 1.7562576896688324, + "learning_rate": 2.0796955304161954e-06, + "loss": 0.597, + "step": 23087 + }, + { + "epoch": 0.7076130930489151, + "grad_norm": 1.8925622901556964, + "learning_rate": 2.079292678728504e-06, + "loss": 0.4842, + "step": 23088 + }, + { + "epoch": 0.7076437415716562, + "grad_norm": 0.8313165270919874, + "learning_rate": 2.0788898558192887e-06, + "loss": 0.4151, + "step": 23089 + }, + { + "epoch": 0.7076743900943975, + "grad_norm": 1.7596176158118264, + "learning_rate": 2.078487061692521e-06, + "loss": 0.6531, + "step": 23090 + }, + { + "epoch": 0.7077050386171386, + "grad_norm": 1.9392680482489897, + "learning_rate": 2.0780842963521665e-06, + "loss": 0.6211, + "step": 23091 + }, + { + "epoch": 0.7077356871398799, + "grad_norm": 1.8914658591610038, + "learning_rate": 2.077681559802195e-06, + "loss": 0.6311, + "step": 23092 + }, + { + "epoch": 0.707766335662621, + "grad_norm": 1.8516848582690022, + "learning_rate": 2.077278852046574e-06, + "loss": 0.6253, + "step": 23093 + }, + { + "epoch": 0.7077969841853623, + "grad_norm": 1.7600275554054214, + "learning_rate": 2.076876173089275e-06, + "loss": 0.5647, + "step": 23094 + }, + { + "epoch": 0.7078276327081034, + "grad_norm": 1.9937418673152105, + "learning_rate": 2.0764735229342623e-06, + "loss": 0.6252, + "step": 23095 + }, + { + "epoch": 0.7078582812308447, + "grad_norm": 1.807217674443791, + "learning_rate": 2.0760709015855006e-06, + "loss": 0.6004, + "step": 23096 + }, + { + "epoch": 0.7078889297535859, + "grad_norm": 1.7861807086484405, + "learning_rate": 2.075668309046964e-06, + "loss": 0.5532, + "step": 23097 + }, + { + "epoch": 0.7079195782763271, + "grad_norm": 1.6060850557527233, + "learning_rate": 2.0752657453226162e-06, + "loss": 0.6638, + "step": 23098 + }, + { + "epoch": 0.7079502267990683, + "grad_norm": 0.830443171388324, + "learning_rate": 2.0748632104164213e-06, + "loss": 0.4253, + "step": 23099 + }, + { + "epoch": 0.7079808753218095, + "grad_norm": 1.932039649850433, + "learning_rate": 2.0744607043323477e-06, + "loss": 0.5722, + "step": 23100 + }, + { + "epoch": 0.7080115238445507, + "grad_norm": 1.7926742288288722, + "learning_rate": 2.074058227074361e-06, + "loss": 0.6441, + "step": 23101 + }, + { + "epoch": 0.7080421723672919, + "grad_norm": 1.6418445278501028, + "learning_rate": 2.073655778646429e-06, + "loss": 0.4495, + "step": 23102 + }, + { + "epoch": 0.7080728208900331, + "grad_norm": 1.8028616579710783, + "learning_rate": 2.073253359052514e-06, + "loss": 0.4941, + "step": 23103 + }, + { + "epoch": 0.7081034694127744, + "grad_norm": 1.5789393770142426, + "learning_rate": 2.072850968296582e-06, + "loss": 0.5614, + "step": 23104 + }, + { + "epoch": 0.7081341179355155, + "grad_norm": 0.8539592892366691, + "learning_rate": 2.0724486063826003e-06, + "loss": 0.419, + "step": 23105 + }, + { + "epoch": 0.7081647664582568, + "grad_norm": 1.8998968718015374, + "learning_rate": 2.072046273314532e-06, + "loss": 0.6684, + "step": 23106 + }, + { + "epoch": 0.7081954149809979, + "grad_norm": 1.7904088530759625, + "learning_rate": 2.0716439690963385e-06, + "loss": 0.5817, + "step": 23107 + }, + { + "epoch": 0.7082260635037391, + "grad_norm": 1.832732381044668, + "learning_rate": 2.071241693731986e-06, + "loss": 0.5647, + "step": 23108 + }, + { + "epoch": 0.7082567120264803, + "grad_norm": 2.044890605063615, + "learning_rate": 2.0708394472254397e-06, + "loss": 0.4875, + "step": 23109 + }, + { + "epoch": 0.7082873605492215, + "grad_norm": 1.8907612002512462, + "learning_rate": 2.0704372295806622e-06, + "loss": 0.5945, + "step": 23110 + }, + { + "epoch": 0.7083180090719627, + "grad_norm": 0.8003749993598818, + "learning_rate": 2.070035040801615e-06, + "loss": 0.4266, + "step": 23111 + }, + { + "epoch": 0.7083486575947039, + "grad_norm": 1.663325577100741, + "learning_rate": 2.0696328808922623e-06, + "loss": 0.5584, + "step": 23112 + }, + { + "epoch": 0.7083793061174452, + "grad_norm": 1.808401612286784, + "learning_rate": 2.0692307498565685e-06, + "loss": 0.6097, + "step": 23113 + }, + { + "epoch": 0.7084099546401863, + "grad_norm": 0.7568447243152859, + "learning_rate": 2.068828647698492e-06, + "loss": 0.3964, + "step": 23114 + }, + { + "epoch": 0.7084406031629276, + "grad_norm": 1.682363647069225, + "learning_rate": 2.0684265744219965e-06, + "loss": 0.5852, + "step": 23115 + }, + { + "epoch": 0.7084712516856687, + "grad_norm": 0.7917218749738671, + "learning_rate": 2.0680245300310465e-06, + "loss": 0.4056, + "step": 23116 + }, + { + "epoch": 0.70850190020841, + "grad_norm": 0.7779953241558096, + "learning_rate": 2.0676225145296e-06, + "loss": 0.4191, + "step": 23117 + }, + { + "epoch": 0.7085325487311511, + "grad_norm": 1.956519133344867, + "learning_rate": 2.0672205279216183e-06, + "loss": 0.5749, + "step": 23118 + }, + { + "epoch": 0.7085631972538924, + "grad_norm": 0.778598351327874, + "learning_rate": 2.0668185702110633e-06, + "loss": 0.4073, + "step": 23119 + }, + { + "epoch": 0.7085938457766335, + "grad_norm": 1.9402123853272066, + "learning_rate": 2.066416641401894e-06, + "loss": 0.6848, + "step": 23120 + }, + { + "epoch": 0.7086244942993748, + "grad_norm": 1.8450211358344855, + "learning_rate": 2.066014741498075e-06, + "loss": 0.6512, + "step": 23121 + }, + { + "epoch": 0.708655142822116, + "grad_norm": 1.9035200499492937, + "learning_rate": 2.065612870503562e-06, + "loss": 0.5635, + "step": 23122 + }, + { + "epoch": 0.7086857913448572, + "grad_norm": 2.204860607623544, + "learning_rate": 2.0652110284223153e-06, + "loss": 0.571, + "step": 23123 + }, + { + "epoch": 0.7087164398675984, + "grad_norm": 0.7764919734873843, + "learning_rate": 2.064809215258298e-06, + "loss": 0.4063, + "step": 23124 + }, + { + "epoch": 0.7087470883903396, + "grad_norm": 1.6064385471735487, + "learning_rate": 2.0644074310154656e-06, + "loss": 0.5308, + "step": 23125 + }, + { + "epoch": 0.7087777369130808, + "grad_norm": 0.8009285453143085, + "learning_rate": 2.0640056756977743e-06, + "loss": 0.4031, + "step": 23126 + }, + { + "epoch": 0.708808385435822, + "grad_norm": 1.930506653809648, + "learning_rate": 2.06360394930919e-06, + "loss": 0.6068, + "step": 23127 + }, + { + "epoch": 0.7088390339585632, + "grad_norm": 0.7363956995145273, + "learning_rate": 2.063202251853666e-06, + "loss": 0.3785, + "step": 23128 + }, + { + "epoch": 0.7088696824813044, + "grad_norm": 1.9641916275597997, + "learning_rate": 2.0628005833351634e-06, + "loss": 0.5426, + "step": 23129 + }, + { + "epoch": 0.7089003310040456, + "grad_norm": 1.818038275439917, + "learning_rate": 2.062398943757636e-06, + "loss": 0.6484, + "step": 23130 + }, + { + "epoch": 0.7089309795267869, + "grad_norm": 1.9787497724996699, + "learning_rate": 2.061997333125043e-06, + "loss": 0.6583, + "step": 23131 + }, + { + "epoch": 0.708961628049528, + "grad_norm": 2.0929485926704388, + "learning_rate": 2.0615957514413446e-06, + "loss": 0.6747, + "step": 23132 + }, + { + "epoch": 0.7089922765722693, + "grad_norm": 0.8181744057870921, + "learning_rate": 2.0611941987104927e-06, + "loss": 0.4104, + "step": 23133 + }, + { + "epoch": 0.7090229250950104, + "grad_norm": 1.92289711300701, + "learning_rate": 2.0607926749364467e-06, + "loss": 0.6589, + "step": 23134 + }, + { + "epoch": 0.7090535736177517, + "grad_norm": 1.6281231703089132, + "learning_rate": 2.060391180123164e-06, + "loss": 0.5987, + "step": 23135 + }, + { + "epoch": 0.7090842221404928, + "grad_norm": 1.9451992125334663, + "learning_rate": 2.0599897142745995e-06, + "loss": 0.5271, + "step": 23136 + }, + { + "epoch": 0.7091148706632341, + "grad_norm": 1.8926774295401727, + "learning_rate": 2.0595882773947045e-06, + "loss": 0.5534, + "step": 23137 + }, + { + "epoch": 0.7091455191859752, + "grad_norm": 0.8000744295952524, + "learning_rate": 2.0591868694874427e-06, + "loss": 0.4012, + "step": 23138 + }, + { + "epoch": 0.7091761677087164, + "grad_norm": 1.6685587646326994, + "learning_rate": 2.058785490556763e-06, + "loss": 0.5781, + "step": 23139 + }, + { + "epoch": 0.7092068162314576, + "grad_norm": 0.7407009840762244, + "learning_rate": 2.058384140606624e-06, + "loss": 0.3979, + "step": 23140 + }, + { + "epoch": 0.7092374647541988, + "grad_norm": 1.749888191104032, + "learning_rate": 2.0579828196409774e-06, + "loss": 0.6322, + "step": 23141 + }, + { + "epoch": 0.7092681132769401, + "grad_norm": 1.822615723744601, + "learning_rate": 2.0575815276637782e-06, + "loss": 0.5424, + "step": 23142 + }, + { + "epoch": 0.7092987617996812, + "grad_norm": 1.9245687029509526, + "learning_rate": 2.0571802646789833e-06, + "loss": 0.6533, + "step": 23143 + }, + { + "epoch": 0.7093294103224225, + "grad_norm": 1.761859972032971, + "learning_rate": 2.0567790306905427e-06, + "loss": 0.5544, + "step": 23144 + }, + { + "epoch": 0.7093600588451636, + "grad_norm": 1.645960542599541, + "learning_rate": 2.056377825702411e-06, + "loss": 0.4936, + "step": 23145 + }, + { + "epoch": 0.7093907073679049, + "grad_norm": 1.9292049443552535, + "learning_rate": 2.0559766497185433e-06, + "loss": 0.4867, + "step": 23146 + }, + { + "epoch": 0.709421355890646, + "grad_norm": 1.7078184560483631, + "learning_rate": 2.055575502742889e-06, + "loss": 0.6286, + "step": 23147 + }, + { + "epoch": 0.7094520044133873, + "grad_norm": 1.7450457069972634, + "learning_rate": 2.055174384779403e-06, + "loss": 0.5955, + "step": 23148 + }, + { + "epoch": 0.7094826529361284, + "grad_norm": 1.8736763007926984, + "learning_rate": 2.054773295832039e-06, + "loss": 0.6449, + "step": 23149 + }, + { + "epoch": 0.7095133014588697, + "grad_norm": 2.0731850075460194, + "learning_rate": 2.054372235904746e-06, + "loss": 0.6191, + "step": 23150 + }, + { + "epoch": 0.7095439499816109, + "grad_norm": 1.81814277571714, + "learning_rate": 2.0539712050014783e-06, + "loss": 0.6693, + "step": 23151 + }, + { + "epoch": 0.7095745985043521, + "grad_norm": 1.8720957743422366, + "learning_rate": 2.0535702031261843e-06, + "loss": 0.5702, + "step": 23152 + }, + { + "epoch": 0.7096052470270933, + "grad_norm": 1.9442933055064453, + "learning_rate": 2.053169230282817e-06, + "loss": 0.597, + "step": 23153 + }, + { + "epoch": 0.7096358955498345, + "grad_norm": 0.789393210900366, + "learning_rate": 2.052768286475329e-06, + "loss": 0.3986, + "step": 23154 + }, + { + "epoch": 0.7096665440725757, + "grad_norm": 1.7334355550693246, + "learning_rate": 2.0523673717076676e-06, + "loss": 0.6053, + "step": 23155 + }, + { + "epoch": 0.7096971925953169, + "grad_norm": 1.6560314527027604, + "learning_rate": 2.0519664859837846e-06, + "loss": 0.5247, + "step": 23156 + }, + { + "epoch": 0.7097278411180581, + "grad_norm": 0.7822851056950269, + "learning_rate": 2.0515656293076315e-06, + "loss": 0.3993, + "step": 23157 + }, + { + "epoch": 0.7097584896407994, + "grad_norm": 1.6647308888926329, + "learning_rate": 2.0511648016831554e-06, + "loss": 0.5528, + "step": 23158 + }, + { + "epoch": 0.7097891381635405, + "grad_norm": 1.5432128614422795, + "learning_rate": 2.0507640031143083e-06, + "loss": 0.4284, + "step": 23159 + }, + { + "epoch": 0.7098197866862818, + "grad_norm": 1.6896835181758598, + "learning_rate": 2.0503632336050367e-06, + "loss": 0.5359, + "step": 23160 + }, + { + "epoch": 0.7098504352090229, + "grad_norm": 1.9849078371685427, + "learning_rate": 2.0499624931592905e-06, + "loss": 0.6444, + "step": 23161 + }, + { + "epoch": 0.7098810837317642, + "grad_norm": 1.9069001911390897, + "learning_rate": 2.049561781781021e-06, + "loss": 0.5694, + "step": 23162 + }, + { + "epoch": 0.7099117322545053, + "grad_norm": 1.837739939898148, + "learning_rate": 2.049161099474172e-06, + "loss": 0.5805, + "step": 23163 + }, + { + "epoch": 0.7099423807772466, + "grad_norm": 1.5772748361001747, + "learning_rate": 2.0487604462426936e-06, + "loss": 0.4973, + "step": 23164 + }, + { + "epoch": 0.7099730292999877, + "grad_norm": 1.866540379169468, + "learning_rate": 2.0483598220905354e-06, + "loss": 0.5798, + "step": 23165 + }, + { + "epoch": 0.710003677822729, + "grad_norm": 0.7934504647909391, + "learning_rate": 2.0479592270216414e-06, + "loss": 0.4203, + "step": 23166 + }, + { + "epoch": 0.7100343263454701, + "grad_norm": 1.9602061101279298, + "learning_rate": 2.04755866103996e-06, + "loss": 0.6939, + "step": 23167 + }, + { + "epoch": 0.7100649748682114, + "grad_norm": 1.7206733170299586, + "learning_rate": 2.047158124149441e-06, + "loss": 0.6132, + "step": 23168 + }, + { + "epoch": 0.7100956233909526, + "grad_norm": 2.2335774047905317, + "learning_rate": 2.0467576163540263e-06, + "loss": 0.7075, + "step": 23169 + }, + { + "epoch": 0.7101262719136937, + "grad_norm": 1.8131433896918696, + "learning_rate": 2.0463571376576667e-06, + "loss": 0.6083, + "step": 23170 + }, + { + "epoch": 0.710156920436435, + "grad_norm": 1.914638527417006, + "learning_rate": 2.0459566880643038e-06, + "loss": 0.631, + "step": 23171 + }, + { + "epoch": 0.7101875689591761, + "grad_norm": 1.893826169677931, + "learning_rate": 2.0455562675778855e-06, + "loss": 0.6189, + "step": 23172 + }, + { + "epoch": 0.7102182174819174, + "grad_norm": 1.6723086651360786, + "learning_rate": 2.0451558762023595e-06, + "loss": 0.5312, + "step": 23173 + }, + { + "epoch": 0.7102488660046585, + "grad_norm": 1.796790006127883, + "learning_rate": 2.0447555139416658e-06, + "loss": 0.5808, + "step": 23174 + }, + { + "epoch": 0.7102795145273998, + "grad_norm": 1.8192548794642127, + "learning_rate": 2.044355180799753e-06, + "loss": 0.4888, + "step": 23175 + }, + { + "epoch": 0.7103101630501409, + "grad_norm": 1.6735035163356977, + "learning_rate": 2.0439548767805667e-06, + "loss": 0.5945, + "step": 23176 + }, + { + "epoch": 0.7103408115728822, + "grad_norm": 1.8930143561970412, + "learning_rate": 2.043554601888047e-06, + "loss": 0.5755, + "step": 23177 + }, + { + "epoch": 0.7103714600956234, + "grad_norm": 2.030660695651665, + "learning_rate": 2.0431543561261408e-06, + "loss": 0.6805, + "step": 23178 + }, + { + "epoch": 0.7104021086183646, + "grad_norm": 1.993492418331314, + "learning_rate": 2.0427541394987926e-06, + "loss": 0.6145, + "step": 23179 + }, + { + "epoch": 0.7104327571411058, + "grad_norm": 1.7600315729837783, + "learning_rate": 2.042353952009943e-06, + "loss": 0.5128, + "step": 23180 + }, + { + "epoch": 0.710463405663847, + "grad_norm": 1.7035849806483185, + "learning_rate": 2.041953793663538e-06, + "loss": 0.5614, + "step": 23181 + }, + { + "epoch": 0.7104940541865882, + "grad_norm": 0.8189867868671072, + "learning_rate": 2.041553664463516e-06, + "loss": 0.4114, + "step": 23182 + }, + { + "epoch": 0.7105247027093294, + "grad_norm": 1.9204910279974103, + "learning_rate": 2.0411535644138266e-06, + "loss": 0.6105, + "step": 23183 + }, + { + "epoch": 0.7105553512320706, + "grad_norm": 1.7527416661685, + "learning_rate": 2.0407534935184076e-06, + "loss": 0.6723, + "step": 23184 + }, + { + "epoch": 0.7105859997548118, + "grad_norm": 1.6263599174012555, + "learning_rate": 2.0403534517811996e-06, + "loss": 0.5312, + "step": 23185 + }, + { + "epoch": 0.710616648277553, + "grad_norm": 1.80847171948466, + "learning_rate": 2.0399534392061464e-06, + "loss": 0.5627, + "step": 23186 + }, + { + "epoch": 0.7106472968002943, + "grad_norm": 1.8430310773708591, + "learning_rate": 2.039553455797192e-06, + "loss": 0.6474, + "step": 23187 + }, + { + "epoch": 0.7106779453230354, + "grad_norm": 1.679985900642099, + "learning_rate": 2.039153501558272e-06, + "loss": 0.5268, + "step": 23188 + }, + { + "epoch": 0.7107085938457767, + "grad_norm": 1.5885242675986984, + "learning_rate": 2.0387535764933306e-06, + "loss": 0.5505, + "step": 23189 + }, + { + "epoch": 0.7107392423685178, + "grad_norm": 1.9557106959595256, + "learning_rate": 2.03835368060631e-06, + "loss": 0.6419, + "step": 23190 + }, + { + "epoch": 0.7107698908912591, + "grad_norm": 1.8963269334692765, + "learning_rate": 2.0379538139011455e-06, + "loss": 0.6224, + "step": 23191 + }, + { + "epoch": 0.7108005394140002, + "grad_norm": 1.8285070850039957, + "learning_rate": 2.0375539763817824e-06, + "loss": 0.6846, + "step": 23192 + }, + { + "epoch": 0.7108311879367415, + "grad_norm": 1.80799589333928, + "learning_rate": 2.0371541680521543e-06, + "loss": 0.569, + "step": 23193 + }, + { + "epoch": 0.7108618364594826, + "grad_norm": 1.8382165440192817, + "learning_rate": 2.0367543889162083e-06, + "loss": 0.5946, + "step": 23194 + }, + { + "epoch": 0.7108924849822239, + "grad_norm": 1.7503421344968126, + "learning_rate": 2.036354638977879e-06, + "loss": 0.5525, + "step": 23195 + }, + { + "epoch": 0.710923133504965, + "grad_norm": 0.741462528928418, + "learning_rate": 2.0359549182411043e-06, + "loss": 0.3885, + "step": 23196 + }, + { + "epoch": 0.7109537820277063, + "grad_norm": 1.813378512615143, + "learning_rate": 2.035555226709824e-06, + "loss": 0.6694, + "step": 23197 + }, + { + "epoch": 0.7109844305504475, + "grad_norm": 1.81326211752348, + "learning_rate": 2.0351555643879777e-06, + "loss": 0.6232, + "step": 23198 + }, + { + "epoch": 0.7110150790731887, + "grad_norm": 1.9063627528310345, + "learning_rate": 2.0347559312795013e-06, + "loss": 0.5124, + "step": 23199 + }, + { + "epoch": 0.7110457275959299, + "grad_norm": 1.9142095310344636, + "learning_rate": 2.034356327388333e-06, + "loss": 0.5934, + "step": 23200 + }, + { + "epoch": 0.711076376118671, + "grad_norm": 0.7819214686144125, + "learning_rate": 2.0339567527184107e-06, + "loss": 0.3911, + "step": 23201 + }, + { + "epoch": 0.7111070246414123, + "grad_norm": 0.7906792676282863, + "learning_rate": 2.033557207273673e-06, + "loss": 0.385, + "step": 23202 + }, + { + "epoch": 0.7111376731641534, + "grad_norm": 2.053966264292855, + "learning_rate": 2.0331576910580554e-06, + "loss": 0.5016, + "step": 23203 + }, + { + "epoch": 0.7111683216868947, + "grad_norm": 1.9865966310690195, + "learning_rate": 2.0327582040754916e-06, + "loss": 0.6175, + "step": 23204 + }, + { + "epoch": 0.7111989702096359, + "grad_norm": 0.7861561695085215, + "learning_rate": 2.0323587463299217e-06, + "loss": 0.4177, + "step": 23205 + }, + { + "epoch": 0.7112296187323771, + "grad_norm": 0.7530975010034575, + "learning_rate": 2.031959317825281e-06, + "loss": 0.4252, + "step": 23206 + }, + { + "epoch": 0.7112602672551183, + "grad_norm": 1.8617482462544541, + "learning_rate": 2.031559918565504e-06, + "loss": 0.6183, + "step": 23207 + }, + { + "epoch": 0.7112909157778595, + "grad_norm": 1.669764139160423, + "learning_rate": 2.0311605485545255e-06, + "loss": 0.646, + "step": 23208 + }, + { + "epoch": 0.7113215643006007, + "grad_norm": 1.926613452757321, + "learning_rate": 2.0307612077962822e-06, + "loss": 0.6316, + "step": 23209 + }, + { + "epoch": 0.7113522128233419, + "grad_norm": 1.8370129563931323, + "learning_rate": 2.03036189629471e-06, + "loss": 0.5157, + "step": 23210 + }, + { + "epoch": 0.7113828613460831, + "grad_norm": 1.843855052315008, + "learning_rate": 2.029962614053742e-06, + "loss": 0.5878, + "step": 23211 + }, + { + "epoch": 0.7114135098688243, + "grad_norm": 0.7721483127143135, + "learning_rate": 2.029563361077309e-06, + "loss": 0.4036, + "step": 23212 + }, + { + "epoch": 0.7114441583915655, + "grad_norm": 1.9330775013761665, + "learning_rate": 2.0291641373693515e-06, + "loss": 0.5395, + "step": 23213 + }, + { + "epoch": 0.7114748069143068, + "grad_norm": 1.7547894777779736, + "learning_rate": 2.0287649429337997e-06, + "loss": 0.5794, + "step": 23214 + }, + { + "epoch": 0.7115054554370479, + "grad_norm": 1.7995478985326505, + "learning_rate": 2.0283657777745856e-06, + "loss": 0.5509, + "step": 23215 + }, + { + "epoch": 0.7115361039597892, + "grad_norm": 1.134026565195404, + "learning_rate": 2.027966641895644e-06, + "loss": 0.4196, + "step": 23216 + }, + { + "epoch": 0.7115667524825303, + "grad_norm": 1.5900753669685623, + "learning_rate": 2.027567535300909e-06, + "loss": 0.5207, + "step": 23217 + }, + { + "epoch": 0.7115974010052716, + "grad_norm": 2.1522510583980416, + "learning_rate": 2.0271684579943096e-06, + "loss": 0.5635, + "step": 23218 + }, + { + "epoch": 0.7116280495280127, + "grad_norm": 1.8424589205458002, + "learning_rate": 2.02676940997978e-06, + "loss": 0.6783, + "step": 23219 + }, + { + "epoch": 0.711658698050754, + "grad_norm": 0.7795880901777574, + "learning_rate": 2.026370391261253e-06, + "loss": 0.3968, + "step": 23220 + }, + { + "epoch": 0.7116893465734951, + "grad_norm": 1.8132896275171264, + "learning_rate": 2.0259714018426606e-06, + "loss": 0.5079, + "step": 23221 + }, + { + "epoch": 0.7117199950962364, + "grad_norm": 1.8085784903781739, + "learning_rate": 2.0255724417279325e-06, + "loss": 0.5423, + "step": 23222 + }, + { + "epoch": 0.7117506436189776, + "grad_norm": 1.8484245115875961, + "learning_rate": 2.0251735109209975e-06, + "loss": 0.6005, + "step": 23223 + }, + { + "epoch": 0.7117812921417188, + "grad_norm": 1.731796425280219, + "learning_rate": 2.024774609425792e-06, + "loss": 0.5993, + "step": 23224 + }, + { + "epoch": 0.71181194066446, + "grad_norm": 1.759676922775562, + "learning_rate": 2.0243757372462435e-06, + "loss": 0.5655, + "step": 23225 + }, + { + "epoch": 0.7118425891872012, + "grad_norm": 1.7457918105718693, + "learning_rate": 2.0239768943862808e-06, + "loss": 0.5425, + "step": 23226 + }, + { + "epoch": 0.7118732377099424, + "grad_norm": 1.7989246224827218, + "learning_rate": 2.0235780808498346e-06, + "loss": 0.6469, + "step": 23227 + }, + { + "epoch": 0.7119038862326836, + "grad_norm": 1.7984859512654205, + "learning_rate": 2.0231792966408357e-06, + "loss": 0.5771, + "step": 23228 + }, + { + "epoch": 0.7119345347554248, + "grad_norm": 2.03598468565875, + "learning_rate": 2.0227805417632148e-06, + "loss": 0.5547, + "step": 23229 + }, + { + "epoch": 0.711965183278166, + "grad_norm": 1.97317167103064, + "learning_rate": 2.0223818162208965e-06, + "loss": 0.7136, + "step": 23230 + }, + { + "epoch": 0.7119958318009072, + "grad_norm": 1.8363949449234256, + "learning_rate": 2.021983120017812e-06, + "loss": 0.4943, + "step": 23231 + }, + { + "epoch": 0.7120264803236483, + "grad_norm": 1.704586102759324, + "learning_rate": 2.021584453157892e-06, + "loss": 0.5443, + "step": 23232 + }, + { + "epoch": 0.7120571288463896, + "grad_norm": 2.000977980579568, + "learning_rate": 2.0211858156450627e-06, + "loss": 0.6015, + "step": 23233 + }, + { + "epoch": 0.7120877773691308, + "grad_norm": 2.0531933904660047, + "learning_rate": 2.0207872074832476e-06, + "loss": 0.6989, + "step": 23234 + }, + { + "epoch": 0.712118425891872, + "grad_norm": 1.7672094565366918, + "learning_rate": 2.020388628676382e-06, + "loss": 0.6638, + "step": 23235 + }, + { + "epoch": 0.7121490744146132, + "grad_norm": 1.9205215295061129, + "learning_rate": 2.019990079228388e-06, + "loss": 0.6313, + "step": 23236 + }, + { + "epoch": 0.7121797229373544, + "grad_norm": 1.673349418749099, + "learning_rate": 2.0195915591431957e-06, + "loss": 0.5676, + "step": 23237 + }, + { + "epoch": 0.7122103714600956, + "grad_norm": 1.9775920982808437, + "learning_rate": 2.019193068424729e-06, + "loss": 0.6444, + "step": 23238 + }, + { + "epoch": 0.7122410199828368, + "grad_norm": 1.9035595076049148, + "learning_rate": 2.0187946070769153e-06, + "loss": 0.603, + "step": 23239 + }, + { + "epoch": 0.712271668505578, + "grad_norm": 1.7485033093009892, + "learning_rate": 2.0183961751036834e-06, + "loss": 0.681, + "step": 23240 + }, + { + "epoch": 0.7123023170283193, + "grad_norm": 1.9211488310790117, + "learning_rate": 2.017997772508955e-06, + "loss": 0.624, + "step": 23241 + }, + { + "epoch": 0.7123329655510604, + "grad_norm": 1.77618846942375, + "learning_rate": 2.0175993992966568e-06, + "loss": 0.6015, + "step": 23242 + }, + { + "epoch": 0.7123636140738017, + "grad_norm": 1.8344989892782266, + "learning_rate": 2.017201055470717e-06, + "loss": 0.4658, + "step": 23243 + }, + { + "epoch": 0.7123942625965428, + "grad_norm": 0.7709583230353391, + "learning_rate": 2.0168027410350587e-06, + "loss": 0.4141, + "step": 23244 + }, + { + "epoch": 0.7124249111192841, + "grad_norm": 1.9836064102788107, + "learning_rate": 2.0164044559936023e-06, + "loss": 0.6754, + "step": 23245 + }, + { + "epoch": 0.7124555596420252, + "grad_norm": 1.822708645863138, + "learning_rate": 2.01600620035028e-06, + "loss": 0.6081, + "step": 23246 + }, + { + "epoch": 0.7124862081647665, + "grad_norm": 1.9150249661500391, + "learning_rate": 2.0156079741090107e-06, + "loss": 0.6781, + "step": 23247 + }, + { + "epoch": 0.7125168566875076, + "grad_norm": 2.1050798818460748, + "learning_rate": 2.0152097772737204e-06, + "loss": 0.6887, + "step": 23248 + }, + { + "epoch": 0.7125475052102489, + "grad_norm": 1.749519945656159, + "learning_rate": 2.0148116098483313e-06, + "loss": 0.585, + "step": 23249 + }, + { + "epoch": 0.71257815373299, + "grad_norm": 2.0277936522150655, + "learning_rate": 2.0144134718367665e-06, + "loss": 0.7063, + "step": 23250 + }, + { + "epoch": 0.7126088022557313, + "grad_norm": 1.7220518357324859, + "learning_rate": 2.014015363242951e-06, + "loss": 0.5419, + "step": 23251 + }, + { + "epoch": 0.7126394507784725, + "grad_norm": 2.053326516818585, + "learning_rate": 2.0136172840708053e-06, + "loss": 0.6323, + "step": 23252 + }, + { + "epoch": 0.7126700993012137, + "grad_norm": 1.831034467211026, + "learning_rate": 2.013219234324252e-06, + "loss": 0.585, + "step": 23253 + }, + { + "epoch": 0.7127007478239549, + "grad_norm": 1.8477501268133436, + "learning_rate": 2.0128212140072156e-06, + "loss": 0.6194, + "step": 23254 + }, + { + "epoch": 0.7127313963466961, + "grad_norm": 1.745016911522144, + "learning_rate": 2.012423223123614e-06, + "loss": 0.4997, + "step": 23255 + }, + { + "epoch": 0.7127620448694373, + "grad_norm": 0.8866469623908368, + "learning_rate": 2.0120252616773735e-06, + "loss": 0.4057, + "step": 23256 + }, + { + "epoch": 0.7127926933921785, + "grad_norm": 1.9812305212617811, + "learning_rate": 2.0116273296724098e-06, + "loss": 0.5915, + "step": 23257 + }, + { + "epoch": 0.7128233419149197, + "grad_norm": 1.9834392682562378, + "learning_rate": 2.011229427112647e-06, + "loss": 0.6918, + "step": 23258 + }, + { + "epoch": 0.712853990437661, + "grad_norm": 1.6573062213386651, + "learning_rate": 2.0108315540020072e-06, + "loss": 0.5457, + "step": 23259 + }, + { + "epoch": 0.7128846389604021, + "grad_norm": 0.7976743034158658, + "learning_rate": 2.0104337103444074e-06, + "loss": 0.4184, + "step": 23260 + }, + { + "epoch": 0.7129152874831434, + "grad_norm": 1.949858662204934, + "learning_rate": 2.010035896143769e-06, + "loss": 0.6274, + "step": 23261 + }, + { + "epoch": 0.7129459360058845, + "grad_norm": 1.8816516011569815, + "learning_rate": 2.0096381114040136e-06, + "loss": 0.6153, + "step": 23262 + }, + { + "epoch": 0.7129765845286257, + "grad_norm": 1.7003423227615702, + "learning_rate": 2.009240356129057e-06, + "loss": 0.5242, + "step": 23263 + }, + { + "epoch": 0.7130072330513669, + "grad_norm": 1.6958874316421089, + "learning_rate": 2.0088426303228208e-06, + "loss": 0.5709, + "step": 23264 + }, + { + "epoch": 0.7130378815741081, + "grad_norm": 1.8113155263152403, + "learning_rate": 2.0084449339892247e-06, + "loss": 0.6166, + "step": 23265 + }, + { + "epoch": 0.7130685300968493, + "grad_norm": 1.9920679656169442, + "learning_rate": 2.0080472671321847e-06, + "loss": 0.6041, + "step": 23266 + }, + { + "epoch": 0.7130991786195905, + "grad_norm": 1.8763011573232755, + "learning_rate": 2.0076496297556224e-06, + "loss": 0.5891, + "step": 23267 + }, + { + "epoch": 0.7131298271423318, + "grad_norm": 1.833979792971873, + "learning_rate": 2.0072520218634524e-06, + "loss": 0.5116, + "step": 23268 + }, + { + "epoch": 0.7131604756650729, + "grad_norm": 1.7371687384222518, + "learning_rate": 2.006854443459594e-06, + "loss": 0.586, + "step": 23269 + }, + { + "epoch": 0.7131911241878142, + "grad_norm": 1.748712267805885, + "learning_rate": 2.006456894547966e-06, + "loss": 0.601, + "step": 23270 + }, + { + "epoch": 0.7132217727105553, + "grad_norm": 1.9644600263217997, + "learning_rate": 2.0060593751324817e-06, + "loss": 0.5573, + "step": 23271 + }, + { + "epoch": 0.7132524212332966, + "grad_norm": 0.7674639736385244, + "learning_rate": 2.0056618852170613e-06, + "loss": 0.3994, + "step": 23272 + }, + { + "epoch": 0.7132830697560377, + "grad_norm": 0.7797343873010857, + "learning_rate": 2.0052644248056217e-06, + "loss": 0.3992, + "step": 23273 + }, + { + "epoch": 0.713313718278779, + "grad_norm": 0.7656722512361187, + "learning_rate": 2.0048669939020766e-06, + "loss": 0.4146, + "step": 23274 + }, + { + "epoch": 0.7133443668015201, + "grad_norm": 1.7725987311760976, + "learning_rate": 2.0044695925103435e-06, + "loss": 0.5184, + "step": 23275 + }, + { + "epoch": 0.7133750153242614, + "grad_norm": 1.6583014086306354, + "learning_rate": 2.004072220634339e-06, + "loss": 0.5616, + "step": 23276 + }, + { + "epoch": 0.7134056638470025, + "grad_norm": 1.9790204092019381, + "learning_rate": 2.0036748782779764e-06, + "loss": 0.5467, + "step": 23277 + }, + { + "epoch": 0.7134363123697438, + "grad_norm": 1.880158328329981, + "learning_rate": 2.0032775654451736e-06, + "loss": 0.5566, + "step": 23278 + }, + { + "epoch": 0.713466960892485, + "grad_norm": 1.8335615193778667, + "learning_rate": 2.0028802821398415e-06, + "loss": 0.5596, + "step": 23279 + }, + { + "epoch": 0.7134976094152262, + "grad_norm": 2.0484062924388375, + "learning_rate": 2.0024830283658968e-06, + "loss": 0.6299, + "step": 23280 + }, + { + "epoch": 0.7135282579379674, + "grad_norm": 1.811417136248714, + "learning_rate": 2.002085804127256e-06, + "loss": 0.6985, + "step": 23281 + }, + { + "epoch": 0.7135589064607086, + "grad_norm": 1.6933601369455704, + "learning_rate": 2.0016886094278286e-06, + "loss": 0.5099, + "step": 23282 + }, + { + "epoch": 0.7135895549834498, + "grad_norm": 1.8787047168633457, + "learning_rate": 2.001291444271531e-06, + "loss": 0.6303, + "step": 23283 + }, + { + "epoch": 0.713620203506191, + "grad_norm": 1.767553047273297, + "learning_rate": 2.000894308662277e-06, + "loss": 0.5036, + "step": 23284 + }, + { + "epoch": 0.7136508520289322, + "grad_norm": 1.6917730715679942, + "learning_rate": 2.000497202603978e-06, + "loss": 0.6275, + "step": 23285 + }, + { + "epoch": 0.7136815005516735, + "grad_norm": 1.846686415469187, + "learning_rate": 2.000100126100547e-06, + "loss": 0.5923, + "step": 23286 + }, + { + "epoch": 0.7137121490744146, + "grad_norm": 1.9752737141464252, + "learning_rate": 1.9997030791558985e-06, + "loss": 0.5705, + "step": 23287 + }, + { + "epoch": 0.7137427975971559, + "grad_norm": 1.9393220808153748, + "learning_rate": 1.999306061773942e-06, + "loss": 0.5521, + "step": 23288 + }, + { + "epoch": 0.713773446119897, + "grad_norm": 1.8488336626011974, + "learning_rate": 1.998909073958592e-06, + "loss": 0.4911, + "step": 23289 + }, + { + "epoch": 0.7138040946426383, + "grad_norm": 0.8278111880524146, + "learning_rate": 1.9985121157137553e-06, + "loss": 0.4104, + "step": 23290 + }, + { + "epoch": 0.7138347431653794, + "grad_norm": 1.7415034311591493, + "learning_rate": 1.99811518704335e-06, + "loss": 0.6282, + "step": 23291 + }, + { + "epoch": 0.7138653916881207, + "grad_norm": 1.824106577766883, + "learning_rate": 1.997718287951285e-06, + "loss": 0.6019, + "step": 23292 + }, + { + "epoch": 0.7138960402108618, + "grad_norm": 1.826273417130726, + "learning_rate": 1.9973214184414667e-06, + "loss": 0.5643, + "step": 23293 + }, + { + "epoch": 0.713926688733603, + "grad_norm": 1.8011917530411377, + "learning_rate": 1.9969245785178093e-06, + "loss": 0.502, + "step": 23294 + }, + { + "epoch": 0.7139573372563442, + "grad_norm": 1.664763143619634, + "learning_rate": 1.9965277681842244e-06, + "loss": 0.5626, + "step": 23295 + }, + { + "epoch": 0.7139879857790854, + "grad_norm": 1.7369037733717112, + "learning_rate": 1.996130987444618e-06, + "loss": 0.6152, + "step": 23296 + }, + { + "epoch": 0.7140186343018267, + "grad_norm": 0.7856049009062752, + "learning_rate": 1.995734236302901e-06, + "loss": 0.4222, + "step": 23297 + }, + { + "epoch": 0.7140492828245678, + "grad_norm": 1.8926803638192857, + "learning_rate": 1.9953375147629854e-06, + "loss": 0.5695, + "step": 23298 + }, + { + "epoch": 0.7140799313473091, + "grad_norm": 1.8938694325282754, + "learning_rate": 1.994940822828776e-06, + "loss": 0.5688, + "step": 23299 + }, + { + "epoch": 0.7141105798700502, + "grad_norm": 1.8877162730560928, + "learning_rate": 1.994544160504186e-06, + "loss": 0.6354, + "step": 23300 + }, + { + "epoch": 0.7141412283927915, + "grad_norm": 0.8111548911866543, + "learning_rate": 1.9941475277931187e-06, + "loss": 0.4128, + "step": 23301 + }, + { + "epoch": 0.7141718769155326, + "grad_norm": 0.7956280002695207, + "learning_rate": 1.993750924699486e-06, + "loss": 0.4119, + "step": 23302 + }, + { + "epoch": 0.7142025254382739, + "grad_norm": 1.6518949463107642, + "learning_rate": 1.9933543512271954e-06, + "loss": 0.5282, + "step": 23303 + }, + { + "epoch": 0.714233173961015, + "grad_norm": 1.7392295723977285, + "learning_rate": 1.992957807380152e-06, + "loss": 0.4968, + "step": 23304 + }, + { + "epoch": 0.7142638224837563, + "grad_norm": 1.7803419685033894, + "learning_rate": 1.992561293162265e-06, + "loss": 0.5861, + "step": 23305 + }, + { + "epoch": 0.7142944710064975, + "grad_norm": 0.7413307159600009, + "learning_rate": 1.992164808577443e-06, + "loss": 0.3756, + "step": 23306 + }, + { + "epoch": 0.7143251195292387, + "grad_norm": 2.083529657632229, + "learning_rate": 1.9917683536295886e-06, + "loss": 0.6345, + "step": 23307 + }, + { + "epoch": 0.7143557680519799, + "grad_norm": 1.8929858790119476, + "learning_rate": 1.9913719283226123e-06, + "loss": 0.6431, + "step": 23308 + }, + { + "epoch": 0.7143864165747211, + "grad_norm": 2.231567208228349, + "learning_rate": 1.9909755326604145e-06, + "loss": 0.7115, + "step": 23309 + }, + { + "epoch": 0.7144170650974623, + "grad_norm": 1.8992278564803313, + "learning_rate": 1.9905791666469084e-06, + "loss": 0.534, + "step": 23310 + }, + { + "epoch": 0.7144477136202035, + "grad_norm": 1.9460394984446057, + "learning_rate": 1.9901828302859954e-06, + "loss": 0.6162, + "step": 23311 + }, + { + "epoch": 0.7144783621429447, + "grad_norm": 0.753063967675235, + "learning_rate": 1.9897865235815795e-06, + "loss": 0.3996, + "step": 23312 + }, + { + "epoch": 0.714509010665686, + "grad_norm": 1.9659262119286498, + "learning_rate": 1.9893902465375677e-06, + "loss": 0.5742, + "step": 23313 + }, + { + "epoch": 0.7145396591884271, + "grad_norm": 0.7811014559043296, + "learning_rate": 1.9889939991578648e-06, + "loss": 0.3981, + "step": 23314 + }, + { + "epoch": 0.7145703077111684, + "grad_norm": 1.9098290218640677, + "learning_rate": 1.9885977814463734e-06, + "loss": 0.7683, + "step": 23315 + }, + { + "epoch": 0.7146009562339095, + "grad_norm": 1.9530918013540146, + "learning_rate": 1.9882015934069985e-06, + "loss": 0.6211, + "step": 23316 + }, + { + "epoch": 0.7146316047566508, + "grad_norm": 0.7636438934362739, + "learning_rate": 1.9878054350436452e-06, + "loss": 0.4027, + "step": 23317 + }, + { + "epoch": 0.7146622532793919, + "grad_norm": 1.8688191675435883, + "learning_rate": 1.9874093063602146e-06, + "loss": 0.5034, + "step": 23318 + }, + { + "epoch": 0.7146929018021332, + "grad_norm": 1.6894839723152153, + "learning_rate": 1.9870132073606124e-06, + "loss": 0.598, + "step": 23319 + }, + { + "epoch": 0.7147235503248743, + "grad_norm": 1.9181662301183557, + "learning_rate": 1.9866171380487365e-06, + "loss": 0.6312, + "step": 23320 + }, + { + "epoch": 0.7147541988476156, + "grad_norm": 1.9480577579496423, + "learning_rate": 1.9862210984284964e-06, + "loss": 0.6254, + "step": 23321 + }, + { + "epoch": 0.7147848473703567, + "grad_norm": 1.8742222188504112, + "learning_rate": 1.9858250885037907e-06, + "loss": 0.6424, + "step": 23322 + }, + { + "epoch": 0.714815495893098, + "grad_norm": 1.8561428736324697, + "learning_rate": 1.98542910827852e-06, + "loss": 0.6004, + "step": 23323 + }, + { + "epoch": 0.7148461444158392, + "grad_norm": 1.94343565529399, + "learning_rate": 1.985033157756587e-06, + "loss": 0.5391, + "step": 23324 + }, + { + "epoch": 0.7148767929385803, + "grad_norm": 0.7661520033823499, + "learning_rate": 1.9846372369418964e-06, + "loss": 0.4044, + "step": 23325 + }, + { + "epoch": 0.7149074414613216, + "grad_norm": 0.772490230512227, + "learning_rate": 1.984241345838345e-06, + "loss": 0.4004, + "step": 23326 + }, + { + "epoch": 0.7149380899840627, + "grad_norm": 1.8587464355268453, + "learning_rate": 1.9838454844498344e-06, + "loss": 0.5574, + "step": 23327 + }, + { + "epoch": 0.714968738506804, + "grad_norm": 0.7714659733416755, + "learning_rate": 1.9834496527802665e-06, + "loss": 0.3912, + "step": 23328 + }, + { + "epoch": 0.7149993870295451, + "grad_norm": 2.1082762743626384, + "learning_rate": 1.9830538508335425e-06, + "loss": 0.5728, + "step": 23329 + }, + { + "epoch": 0.7150300355522864, + "grad_norm": 1.6078727164391955, + "learning_rate": 1.982658078613561e-06, + "loss": 0.56, + "step": 23330 + }, + { + "epoch": 0.7150606840750275, + "grad_norm": 0.7793734754064203, + "learning_rate": 1.9822623361242176e-06, + "loss": 0.4116, + "step": 23331 + }, + { + "epoch": 0.7150913325977688, + "grad_norm": 1.7201150136038346, + "learning_rate": 1.9818666233694196e-06, + "loss": 0.5566, + "step": 23332 + }, + { + "epoch": 0.71512198112051, + "grad_norm": 1.9297898079941411, + "learning_rate": 1.981470940353062e-06, + "loss": 0.6049, + "step": 23333 + }, + { + "epoch": 0.7151526296432512, + "grad_norm": 1.7959315948516288, + "learning_rate": 1.981075287079041e-06, + "loss": 0.5935, + "step": 23334 + }, + { + "epoch": 0.7151832781659924, + "grad_norm": 1.8447233721484027, + "learning_rate": 1.9806796635512583e-06, + "loss": 0.6628, + "step": 23335 + }, + { + "epoch": 0.7152139266887336, + "grad_norm": 1.820395241071523, + "learning_rate": 1.980284069773611e-06, + "loss": 0.6365, + "step": 23336 + }, + { + "epoch": 0.7152445752114748, + "grad_norm": 1.6883864954100911, + "learning_rate": 1.9798885057499994e-06, + "loss": 0.5788, + "step": 23337 + }, + { + "epoch": 0.715275223734216, + "grad_norm": 0.830413644464335, + "learning_rate": 1.9794929714843176e-06, + "loss": 0.3929, + "step": 23338 + }, + { + "epoch": 0.7153058722569572, + "grad_norm": 1.772269173798396, + "learning_rate": 1.9790974669804637e-06, + "loss": 0.6292, + "step": 23339 + }, + { + "epoch": 0.7153365207796984, + "grad_norm": 1.7868437940309123, + "learning_rate": 1.9787019922423376e-06, + "loss": 0.6314, + "step": 23340 + }, + { + "epoch": 0.7153671693024396, + "grad_norm": 1.8360323783783068, + "learning_rate": 1.978306547273834e-06, + "loss": 0.5432, + "step": 23341 + }, + { + "epoch": 0.7153978178251809, + "grad_norm": 1.6737900894293776, + "learning_rate": 1.977911132078845e-06, + "loss": 0.5623, + "step": 23342 + }, + { + "epoch": 0.715428466347922, + "grad_norm": 0.7998283269502207, + "learning_rate": 1.977515746661275e-06, + "loss": 0.3806, + "step": 23343 + }, + { + "epoch": 0.7154591148706633, + "grad_norm": 1.7791515091281547, + "learning_rate": 1.9771203910250155e-06, + "loss": 0.6713, + "step": 23344 + }, + { + "epoch": 0.7154897633934044, + "grad_norm": 1.8991100151673335, + "learning_rate": 1.97672506517396e-06, + "loss": 0.5525, + "step": 23345 + }, + { + "epoch": 0.7155204119161457, + "grad_norm": 0.787574995057761, + "learning_rate": 1.9763297691120065e-06, + "loss": 0.4083, + "step": 23346 + }, + { + "epoch": 0.7155510604388868, + "grad_norm": 1.719637710080032, + "learning_rate": 1.975934502843049e-06, + "loss": 0.5777, + "step": 23347 + }, + { + "epoch": 0.7155817089616281, + "grad_norm": 2.0016167462835224, + "learning_rate": 1.9755392663709842e-06, + "loss": 0.6875, + "step": 23348 + }, + { + "epoch": 0.7156123574843692, + "grad_norm": 1.9551911119989187, + "learning_rate": 1.975144059699704e-06, + "loss": 0.6659, + "step": 23349 + }, + { + "epoch": 0.7156430060071105, + "grad_norm": 0.7852424592760139, + "learning_rate": 1.9747488828331022e-06, + "loss": 0.409, + "step": 23350 + }, + { + "epoch": 0.7156736545298517, + "grad_norm": 1.7843629970746768, + "learning_rate": 1.9743537357750763e-06, + "loss": 0.5489, + "step": 23351 + }, + { + "epoch": 0.7157043030525929, + "grad_norm": 1.9282581811448924, + "learning_rate": 1.9739586185295172e-06, + "loss": 0.6578, + "step": 23352 + }, + { + "epoch": 0.7157349515753341, + "grad_norm": 1.9940129022156443, + "learning_rate": 1.973563531100316e-06, + "loss": 0.6153, + "step": 23353 + }, + { + "epoch": 0.7157656000980753, + "grad_norm": 1.9284827280931887, + "learning_rate": 1.9731684734913675e-06, + "loss": 0.6727, + "step": 23354 + }, + { + "epoch": 0.7157962486208165, + "grad_norm": 1.6365468767597429, + "learning_rate": 1.972773445706565e-06, + "loss": 0.646, + "step": 23355 + }, + { + "epoch": 0.7158268971435576, + "grad_norm": 0.763062875557492, + "learning_rate": 1.9723784477498014e-06, + "loss": 0.3872, + "step": 23356 + }, + { + "epoch": 0.7158575456662989, + "grad_norm": 1.9895488531382757, + "learning_rate": 1.9719834796249666e-06, + "loss": 0.5992, + "step": 23357 + }, + { + "epoch": 0.71588819418904, + "grad_norm": 0.8245664497970027, + "learning_rate": 1.9715885413359525e-06, + "loss": 0.4137, + "step": 23358 + }, + { + "epoch": 0.7159188427117813, + "grad_norm": 2.016771639976629, + "learning_rate": 1.971193632886654e-06, + "loss": 0.6849, + "step": 23359 + }, + { + "epoch": 0.7159494912345225, + "grad_norm": 1.825129721190977, + "learning_rate": 1.9707987542809585e-06, + "loss": 0.5842, + "step": 23360 + }, + { + "epoch": 0.7159801397572637, + "grad_norm": 2.008653689263666, + "learning_rate": 1.970403905522755e-06, + "loss": 0.5303, + "step": 23361 + }, + { + "epoch": 0.7160107882800049, + "grad_norm": 0.8045440983810871, + "learning_rate": 1.970009086615941e-06, + "loss": 0.4291, + "step": 23362 + }, + { + "epoch": 0.7160414368027461, + "grad_norm": 2.0962076099943734, + "learning_rate": 1.9696142975644008e-06, + "loss": 0.6254, + "step": 23363 + }, + { + "epoch": 0.7160720853254873, + "grad_norm": 2.04235033121333, + "learning_rate": 1.9692195383720275e-06, + "loss": 0.5573, + "step": 23364 + }, + { + "epoch": 0.7161027338482285, + "grad_norm": 1.8352753027121609, + "learning_rate": 1.968824809042708e-06, + "loss": 0.5183, + "step": 23365 + }, + { + "epoch": 0.7161333823709697, + "grad_norm": 1.7593154332600092, + "learning_rate": 1.968430109580333e-06, + "loss": 0.6361, + "step": 23366 + }, + { + "epoch": 0.716164030893711, + "grad_norm": 1.7450092664514025, + "learning_rate": 1.968035439988794e-06, + "loss": 0.6034, + "step": 23367 + }, + { + "epoch": 0.7161946794164521, + "grad_norm": 1.8229805116632172, + "learning_rate": 1.9676408002719753e-06, + "loss": 0.6209, + "step": 23368 + }, + { + "epoch": 0.7162253279391934, + "grad_norm": 1.8282879220841206, + "learning_rate": 1.967246190433768e-06, + "loss": 0.6198, + "step": 23369 + }, + { + "epoch": 0.7162559764619345, + "grad_norm": 1.9555872135500743, + "learning_rate": 1.966851610478062e-06, + "loss": 0.6419, + "step": 23370 + }, + { + "epoch": 0.7162866249846758, + "grad_norm": 1.9313966558767925, + "learning_rate": 1.9664570604087428e-06, + "loss": 0.5517, + "step": 23371 + }, + { + "epoch": 0.7163172735074169, + "grad_norm": 1.7467264356729766, + "learning_rate": 1.9660625402296948e-06, + "loss": 0.5443, + "step": 23372 + }, + { + "epoch": 0.7163479220301582, + "grad_norm": 1.9543238010255561, + "learning_rate": 1.965668049944812e-06, + "loss": 0.5791, + "step": 23373 + }, + { + "epoch": 0.7163785705528993, + "grad_norm": 0.7894441604445079, + "learning_rate": 1.9652735895579773e-06, + "loss": 0.395, + "step": 23374 + }, + { + "epoch": 0.7164092190756406, + "grad_norm": 1.906435824470127, + "learning_rate": 1.96487915907308e-06, + "loss": 0.6087, + "step": 23375 + }, + { + "epoch": 0.7164398675983817, + "grad_norm": 1.9045584960697757, + "learning_rate": 1.964484758494003e-06, + "loss": 0.5785, + "step": 23376 + }, + { + "epoch": 0.716470516121123, + "grad_norm": 1.9392067432652067, + "learning_rate": 1.9640903878246344e-06, + "loss": 0.6454, + "step": 23377 + }, + { + "epoch": 0.7165011646438642, + "grad_norm": 1.9190598875286806, + "learning_rate": 1.9636960470688613e-06, + "loss": 0.5492, + "step": 23378 + }, + { + "epoch": 0.7165318131666054, + "grad_norm": 1.7677878305413819, + "learning_rate": 1.963301736230567e-06, + "loss": 0.6287, + "step": 23379 + }, + { + "epoch": 0.7165624616893466, + "grad_norm": 1.6357825870522058, + "learning_rate": 1.9629074553136367e-06, + "loss": 0.5597, + "step": 23380 + }, + { + "epoch": 0.7165931102120878, + "grad_norm": 1.9631657649248864, + "learning_rate": 1.9625132043219584e-06, + "loss": 0.5592, + "step": 23381 + }, + { + "epoch": 0.716623758734829, + "grad_norm": 1.8472228918897065, + "learning_rate": 1.962118983259413e-06, + "loss": 0.5945, + "step": 23382 + }, + { + "epoch": 0.7166544072575702, + "grad_norm": 1.6940336183866413, + "learning_rate": 1.9617247921298865e-06, + "loss": 0.5105, + "step": 23383 + }, + { + "epoch": 0.7166850557803114, + "grad_norm": 0.8095851034491839, + "learning_rate": 1.961330630937265e-06, + "loss": 0.4251, + "step": 23384 + }, + { + "epoch": 0.7167157043030526, + "grad_norm": 1.90751798198411, + "learning_rate": 1.9609364996854285e-06, + "loss": 0.5857, + "step": 23385 + }, + { + "epoch": 0.7167463528257938, + "grad_norm": 1.7304857852433921, + "learning_rate": 1.9605423983782633e-06, + "loss": 0.567, + "step": 23386 + }, + { + "epoch": 0.716777001348535, + "grad_norm": 0.8076233905022849, + "learning_rate": 1.960148327019651e-06, + "loss": 0.3961, + "step": 23387 + }, + { + "epoch": 0.7168076498712762, + "grad_norm": 1.8644908674794871, + "learning_rate": 1.9597542856134737e-06, + "loss": 0.5745, + "step": 23388 + }, + { + "epoch": 0.7168382983940174, + "grad_norm": 1.7978567838370554, + "learning_rate": 1.959360274163618e-06, + "loss": 0.4767, + "step": 23389 + }, + { + "epoch": 0.7168689469167586, + "grad_norm": 1.8336374626899308, + "learning_rate": 1.9589662926739616e-06, + "loss": 0.5864, + "step": 23390 + }, + { + "epoch": 0.7168995954394998, + "grad_norm": 1.7170001984326235, + "learning_rate": 1.958572341148388e-06, + "loss": 0.509, + "step": 23391 + }, + { + "epoch": 0.716930243962241, + "grad_norm": 1.9791816046046133, + "learning_rate": 1.9581784195907817e-06, + "loss": 0.5979, + "step": 23392 + }, + { + "epoch": 0.7169608924849822, + "grad_norm": 2.014566128662328, + "learning_rate": 1.9577845280050194e-06, + "loss": 0.5954, + "step": 23393 + }, + { + "epoch": 0.7169915410077234, + "grad_norm": 0.8083426915414897, + "learning_rate": 1.9573906663949845e-06, + "loss": 0.4049, + "step": 23394 + }, + { + "epoch": 0.7170221895304646, + "grad_norm": 1.6639828213640702, + "learning_rate": 1.9569968347645597e-06, + "loss": 0.5986, + "step": 23395 + }, + { + "epoch": 0.7170528380532059, + "grad_norm": 1.7108187972290727, + "learning_rate": 1.9566030331176223e-06, + "loss": 0.5239, + "step": 23396 + }, + { + "epoch": 0.717083486575947, + "grad_norm": 0.8317850466332807, + "learning_rate": 1.956209261458055e-06, + "loss": 0.4204, + "step": 23397 + }, + { + "epoch": 0.7171141350986883, + "grad_norm": 1.7488841731868194, + "learning_rate": 1.9558155197897355e-06, + "loss": 0.5785, + "step": 23398 + }, + { + "epoch": 0.7171447836214294, + "grad_norm": 1.9615033133945208, + "learning_rate": 1.9554218081165444e-06, + "loss": 0.6447, + "step": 23399 + }, + { + "epoch": 0.7171754321441707, + "grad_norm": 1.9353188284336642, + "learning_rate": 1.9550281264423626e-06, + "loss": 0.6169, + "step": 23400 + }, + { + "epoch": 0.7172060806669118, + "grad_norm": 1.8874662315497817, + "learning_rate": 1.9546344747710666e-06, + "loss": 0.5899, + "step": 23401 + }, + { + "epoch": 0.7172367291896531, + "grad_norm": 1.8342424727559135, + "learning_rate": 1.954240853106536e-06, + "loss": 0.5268, + "step": 23402 + }, + { + "epoch": 0.7172673777123942, + "grad_norm": 2.005364833134572, + "learning_rate": 1.953847261452651e-06, + "loss": 0.5924, + "step": 23403 + }, + { + "epoch": 0.7172980262351355, + "grad_norm": 2.0437668123591495, + "learning_rate": 1.953453699813287e-06, + "loss": 0.6823, + "step": 23404 + }, + { + "epoch": 0.7173286747578766, + "grad_norm": 1.8720161110526314, + "learning_rate": 1.953060168192325e-06, + "loss": 0.5212, + "step": 23405 + }, + { + "epoch": 0.7173593232806179, + "grad_norm": 1.8207635995186555, + "learning_rate": 1.9526666665936388e-06, + "loss": 0.639, + "step": 23406 + }, + { + "epoch": 0.7173899718033591, + "grad_norm": 1.9928252348289341, + "learning_rate": 1.952273195021108e-06, + "loss": 0.6076, + "step": 23407 + }, + { + "epoch": 0.7174206203261003, + "grad_norm": 0.7548936921613243, + "learning_rate": 1.95187975347861e-06, + "loss": 0.394, + "step": 23408 + }, + { + "epoch": 0.7174512688488415, + "grad_norm": 2.1028389121759274, + "learning_rate": 1.9514863419700198e-06, + "loss": 0.5552, + "step": 23409 + }, + { + "epoch": 0.7174819173715827, + "grad_norm": 1.668699232149452, + "learning_rate": 1.9510929604992147e-06, + "loss": 0.5287, + "step": 23410 + }, + { + "epoch": 0.7175125658943239, + "grad_norm": 1.6912321613554033, + "learning_rate": 1.950699609070072e-06, + "loss": 0.6225, + "step": 23411 + }, + { + "epoch": 0.7175432144170651, + "grad_norm": 1.6962714277633042, + "learning_rate": 1.950306287686465e-06, + "loss": 0.5792, + "step": 23412 + }, + { + "epoch": 0.7175738629398063, + "grad_norm": 1.7260417333895797, + "learning_rate": 1.94991299635227e-06, + "loss": 0.5775, + "step": 23413 + }, + { + "epoch": 0.7176045114625476, + "grad_norm": 1.820887249100399, + "learning_rate": 1.9495197350713645e-06, + "loss": 0.5751, + "step": 23414 + }, + { + "epoch": 0.7176351599852887, + "grad_norm": 1.9269338114418173, + "learning_rate": 1.9491265038476197e-06, + "loss": 0.6372, + "step": 23415 + }, + { + "epoch": 0.71766580850803, + "grad_norm": 1.758989996496664, + "learning_rate": 1.948733302684914e-06, + "loss": 0.5547, + "step": 23416 + }, + { + "epoch": 0.7176964570307711, + "grad_norm": 0.7967137978112949, + "learning_rate": 1.9483401315871163e-06, + "loss": 0.4077, + "step": 23417 + }, + { + "epoch": 0.7177271055535123, + "grad_norm": 1.6645114270155472, + "learning_rate": 1.947946990558107e-06, + "loss": 0.4536, + "step": 23418 + }, + { + "epoch": 0.7177577540762535, + "grad_norm": 1.5535236490442292, + "learning_rate": 1.947553879601758e-06, + "loss": 0.5419, + "step": 23419 + }, + { + "epoch": 0.7177884025989947, + "grad_norm": 1.6611156290963447, + "learning_rate": 1.9471607987219394e-06, + "loss": 0.5271, + "step": 23420 + }, + { + "epoch": 0.7178190511217359, + "grad_norm": 0.8193492395100239, + "learning_rate": 1.946767747922526e-06, + "loss": 0.4181, + "step": 23421 + }, + { + "epoch": 0.7178496996444771, + "grad_norm": 1.8099552838043929, + "learning_rate": 1.9463747272073935e-06, + "loss": 0.6744, + "step": 23422 + }, + { + "epoch": 0.7178803481672184, + "grad_norm": 1.8954728138990922, + "learning_rate": 1.94598173658041e-06, + "loss": 0.6166, + "step": 23423 + }, + { + "epoch": 0.7179109966899595, + "grad_norm": 1.837731965097584, + "learning_rate": 1.9455887760454505e-06, + "loss": 0.5553, + "step": 23424 + }, + { + "epoch": 0.7179416452127008, + "grad_norm": 1.774026417738412, + "learning_rate": 1.9451958456063876e-06, + "loss": 0.5538, + "step": 23425 + }, + { + "epoch": 0.7179722937354419, + "grad_norm": 1.8422071352660991, + "learning_rate": 1.9448029452670902e-06, + "loss": 0.5474, + "step": 23426 + }, + { + "epoch": 0.7180029422581832, + "grad_norm": 1.7279428165957047, + "learning_rate": 1.9444100750314327e-06, + "loss": 0.5193, + "step": 23427 + }, + { + "epoch": 0.7180335907809243, + "grad_norm": 0.7959201000736668, + "learning_rate": 1.9440172349032806e-06, + "loss": 0.4019, + "step": 23428 + }, + { + "epoch": 0.7180642393036656, + "grad_norm": 2.049102765148475, + "learning_rate": 1.9436244248865123e-06, + "loss": 0.6385, + "step": 23429 + }, + { + "epoch": 0.7180948878264067, + "grad_norm": 1.9210743927941902, + "learning_rate": 1.9432316449849946e-06, + "loss": 0.5656, + "step": 23430 + }, + { + "epoch": 0.718125536349148, + "grad_norm": 1.8177082887058162, + "learning_rate": 1.9428388952025963e-06, + "loss": 0.6187, + "step": 23431 + }, + { + "epoch": 0.7181561848718891, + "grad_norm": 0.7746649717451858, + "learning_rate": 1.942446175543188e-06, + "loss": 0.4112, + "step": 23432 + }, + { + "epoch": 0.7181868333946304, + "grad_norm": 1.7713698669256102, + "learning_rate": 1.9420534860106417e-06, + "loss": 0.6275, + "step": 23433 + }, + { + "epoch": 0.7182174819173716, + "grad_norm": 1.768407047130166, + "learning_rate": 1.941660826608823e-06, + "loss": 0.5365, + "step": 23434 + }, + { + "epoch": 0.7182481304401128, + "grad_norm": 1.5464280915616382, + "learning_rate": 1.941268197341603e-06, + "loss": 0.4882, + "step": 23435 + }, + { + "epoch": 0.718278778962854, + "grad_norm": 1.6429785898513278, + "learning_rate": 1.9408755982128498e-06, + "loss": 0.4846, + "step": 23436 + }, + { + "epoch": 0.7183094274855952, + "grad_norm": 1.9568568161114293, + "learning_rate": 1.9404830292264336e-06, + "loss": 0.5862, + "step": 23437 + }, + { + "epoch": 0.7183400760083364, + "grad_norm": 1.7664867451561543, + "learning_rate": 1.9400904903862215e-06, + "loss": 0.4869, + "step": 23438 + }, + { + "epoch": 0.7183707245310776, + "grad_norm": 1.6537130597722702, + "learning_rate": 1.939697981696077e-06, + "loss": 0.5511, + "step": 23439 + }, + { + "epoch": 0.7184013730538188, + "grad_norm": 1.811177193476457, + "learning_rate": 1.9393055031598745e-06, + "loss": 0.558, + "step": 23440 + }, + { + "epoch": 0.71843202157656, + "grad_norm": 0.8142390190188472, + "learning_rate": 1.9389130547814784e-06, + "loss": 0.4172, + "step": 23441 + }, + { + "epoch": 0.7184626700993012, + "grad_norm": 1.6647890288334868, + "learning_rate": 1.938520636564753e-06, + "loss": 0.4862, + "step": 23442 + }, + { + "epoch": 0.7184933186220425, + "grad_norm": 1.7625217367662378, + "learning_rate": 1.9381282485135676e-06, + "loss": 0.5489, + "step": 23443 + }, + { + "epoch": 0.7185239671447836, + "grad_norm": 1.9148894491867035, + "learning_rate": 1.937735890631788e-06, + "loss": 0.5776, + "step": 23444 + }, + { + "epoch": 0.7185546156675249, + "grad_norm": 2.0241076512286704, + "learning_rate": 1.937343562923281e-06, + "loss": 0.5574, + "step": 23445 + }, + { + "epoch": 0.718585264190266, + "grad_norm": 1.7505064714986507, + "learning_rate": 1.936951265391911e-06, + "loss": 0.5993, + "step": 23446 + }, + { + "epoch": 0.7186159127130073, + "grad_norm": 1.9455724993644727, + "learning_rate": 1.9365589980415427e-06, + "loss": 0.6257, + "step": 23447 + }, + { + "epoch": 0.7186465612357484, + "grad_norm": 2.1642114879911776, + "learning_rate": 1.936166760876045e-06, + "loss": 0.5766, + "step": 23448 + }, + { + "epoch": 0.7186772097584896, + "grad_norm": 0.7801737990342374, + "learning_rate": 1.93577455389928e-06, + "loss": 0.4258, + "step": 23449 + }, + { + "epoch": 0.7187078582812308, + "grad_norm": 1.846574553466598, + "learning_rate": 1.93538237711511e-06, + "loss": 0.6119, + "step": 23450 + }, + { + "epoch": 0.718738506803972, + "grad_norm": 1.6980958176768337, + "learning_rate": 1.934990230527402e-06, + "loss": 0.5204, + "step": 23451 + }, + { + "epoch": 0.7187691553267133, + "grad_norm": 0.7785230989923579, + "learning_rate": 1.9345981141400215e-06, + "loss": 0.3893, + "step": 23452 + }, + { + "epoch": 0.7187998038494544, + "grad_norm": 0.7637886848960367, + "learning_rate": 1.934206027956828e-06, + "loss": 0.4204, + "step": 23453 + }, + { + "epoch": 0.7188304523721957, + "grad_norm": 1.6999784523712138, + "learning_rate": 1.9338139719816866e-06, + "loss": 0.6216, + "step": 23454 + }, + { + "epoch": 0.7188611008949368, + "grad_norm": 1.9018324810049438, + "learning_rate": 1.933421946218461e-06, + "loss": 0.6677, + "step": 23455 + }, + { + "epoch": 0.7188917494176781, + "grad_norm": 1.782994969505866, + "learning_rate": 1.933029950671016e-06, + "loss": 0.6993, + "step": 23456 + }, + { + "epoch": 0.7189223979404192, + "grad_norm": 1.8981609379861355, + "learning_rate": 1.932637985343211e-06, + "loss": 0.6001, + "step": 23457 + }, + { + "epoch": 0.7189530464631605, + "grad_norm": 1.6593622434805566, + "learning_rate": 1.932246050238905e-06, + "loss": 0.5538, + "step": 23458 + }, + { + "epoch": 0.7189836949859016, + "grad_norm": 1.719214028204955, + "learning_rate": 1.9318541453619673e-06, + "loss": 0.6346, + "step": 23459 + }, + { + "epoch": 0.7190143435086429, + "grad_norm": 1.7789786096871132, + "learning_rate": 1.9314622707162555e-06, + "loss": 0.5294, + "step": 23460 + }, + { + "epoch": 0.7190449920313841, + "grad_norm": 2.0912299474059095, + "learning_rate": 1.9310704263056295e-06, + "loss": 0.4993, + "step": 23461 + }, + { + "epoch": 0.7190756405541253, + "grad_norm": 1.8080345972890657, + "learning_rate": 1.9306786121339522e-06, + "loss": 0.5978, + "step": 23462 + }, + { + "epoch": 0.7191062890768665, + "grad_norm": 1.7550819139705285, + "learning_rate": 1.9302868282050837e-06, + "loss": 0.6164, + "step": 23463 + }, + { + "epoch": 0.7191369375996077, + "grad_norm": 1.7033208542920772, + "learning_rate": 1.9298950745228858e-06, + "loss": 0.5806, + "step": 23464 + }, + { + "epoch": 0.7191675861223489, + "grad_norm": 1.840714504296883, + "learning_rate": 1.929503351091216e-06, + "loss": 0.4298, + "step": 23465 + }, + { + "epoch": 0.7191982346450901, + "grad_norm": 1.7145681235281884, + "learning_rate": 1.9291116579139347e-06, + "loss": 0.543, + "step": 23466 + }, + { + "epoch": 0.7192288831678313, + "grad_norm": 1.9768924727185242, + "learning_rate": 1.9287199949949043e-06, + "loss": 0.6371, + "step": 23467 + }, + { + "epoch": 0.7192595316905726, + "grad_norm": 1.6989029757243534, + "learning_rate": 1.928328362337981e-06, + "loss": 0.552, + "step": 23468 + }, + { + "epoch": 0.7192901802133137, + "grad_norm": 1.7516344426949215, + "learning_rate": 1.9279367599470216e-06, + "loss": 0.6035, + "step": 23469 + }, + { + "epoch": 0.719320828736055, + "grad_norm": 1.9067771626066488, + "learning_rate": 1.9275451878258905e-06, + "loss": 0.6156, + "step": 23470 + }, + { + "epoch": 0.7193514772587961, + "grad_norm": 0.782437487920946, + "learning_rate": 1.9271536459784406e-06, + "loss": 0.414, + "step": 23471 + }, + { + "epoch": 0.7193821257815374, + "grad_norm": 2.0691452063985296, + "learning_rate": 1.926762134408534e-06, + "loss": 0.6138, + "step": 23472 + }, + { + "epoch": 0.7194127743042785, + "grad_norm": 1.8237918684483228, + "learning_rate": 1.926370653120025e-06, + "loss": 0.5902, + "step": 23473 + }, + { + "epoch": 0.7194434228270198, + "grad_norm": 1.8043930745997294, + "learning_rate": 1.9259792021167724e-06, + "loss": 0.6237, + "step": 23474 + }, + { + "epoch": 0.7194740713497609, + "grad_norm": 1.9472377686976934, + "learning_rate": 1.925587781402635e-06, + "loss": 0.7169, + "step": 23475 + }, + { + "epoch": 0.7195047198725022, + "grad_norm": 0.7783201773360993, + "learning_rate": 1.925196390981465e-06, + "loss": 0.4187, + "step": 23476 + }, + { + "epoch": 0.7195353683952433, + "grad_norm": 1.7164182999278974, + "learning_rate": 1.9248050308571224e-06, + "loss": 0.5592, + "step": 23477 + }, + { + "epoch": 0.7195660169179846, + "grad_norm": 1.9424133523822475, + "learning_rate": 1.924413701033465e-06, + "loss": 0.5633, + "step": 23478 + }, + { + "epoch": 0.7195966654407258, + "grad_norm": 1.574434911405477, + "learning_rate": 1.9240224015143455e-06, + "loss": 0.5208, + "step": 23479 + }, + { + "epoch": 0.7196273139634669, + "grad_norm": 1.7577811640984833, + "learning_rate": 1.9236311323036172e-06, + "loss": 0.5465, + "step": 23480 + }, + { + "epoch": 0.7196579624862082, + "grad_norm": 1.7128059423086235, + "learning_rate": 1.923239893405142e-06, + "loss": 0.6305, + "step": 23481 + }, + { + "epoch": 0.7196886110089493, + "grad_norm": 1.8771043575176163, + "learning_rate": 1.9228486848227697e-06, + "loss": 0.5232, + "step": 23482 + }, + { + "epoch": 0.7197192595316906, + "grad_norm": 0.7716247858539694, + "learning_rate": 1.922457506560358e-06, + "loss": 0.3948, + "step": 23483 + }, + { + "epoch": 0.7197499080544317, + "grad_norm": 1.8693746939283689, + "learning_rate": 1.9220663586217585e-06, + "loss": 0.618, + "step": 23484 + }, + { + "epoch": 0.719780556577173, + "grad_norm": 1.957344499495347, + "learning_rate": 1.9216752410108264e-06, + "loss": 0.6388, + "step": 23485 + }, + { + "epoch": 0.7198112050999141, + "grad_norm": 1.9149641814046827, + "learning_rate": 1.9212841537314173e-06, + "loss": 0.5367, + "step": 23486 + }, + { + "epoch": 0.7198418536226554, + "grad_norm": 1.7907778956453448, + "learning_rate": 1.9208930967873824e-06, + "loss": 0.5806, + "step": 23487 + }, + { + "epoch": 0.7198725021453966, + "grad_norm": 1.7189727377000492, + "learning_rate": 1.920502070182575e-06, + "loss": 0.545, + "step": 23488 + }, + { + "epoch": 0.7199031506681378, + "grad_norm": 0.7976917841914367, + "learning_rate": 1.92011107392085e-06, + "loss": 0.4075, + "step": 23489 + }, + { + "epoch": 0.719933799190879, + "grad_norm": 1.874277581026874, + "learning_rate": 1.9197201080060572e-06, + "loss": 0.5649, + "step": 23490 + }, + { + "epoch": 0.7199644477136202, + "grad_norm": 2.12850531272517, + "learning_rate": 1.9193291724420503e-06, + "loss": 0.6094, + "step": 23491 + }, + { + "epoch": 0.7199950962363614, + "grad_norm": 1.8692890991139925, + "learning_rate": 1.9189382672326833e-06, + "loss": 0.6668, + "step": 23492 + }, + { + "epoch": 0.7200257447591026, + "grad_norm": 1.7001155133630694, + "learning_rate": 1.918547392381804e-06, + "loss": 0.6426, + "step": 23493 + }, + { + "epoch": 0.7200563932818438, + "grad_norm": 1.7608107067175254, + "learning_rate": 1.9181565478932675e-06, + "loss": 0.6486, + "step": 23494 + }, + { + "epoch": 0.720087041804585, + "grad_norm": 1.8641728529625101, + "learning_rate": 1.917765733770921e-06, + "loss": 0.6547, + "step": 23495 + }, + { + "epoch": 0.7201176903273262, + "grad_norm": 0.7995695284016725, + "learning_rate": 1.9173749500186174e-06, + "loss": 0.3929, + "step": 23496 + }, + { + "epoch": 0.7201483388500675, + "grad_norm": 1.9326510755507207, + "learning_rate": 1.9169841966402085e-06, + "loss": 0.5711, + "step": 23497 + }, + { + "epoch": 0.7201789873728086, + "grad_norm": 1.8898877300412193, + "learning_rate": 1.916593473639542e-06, + "loss": 0.6821, + "step": 23498 + }, + { + "epoch": 0.7202096358955499, + "grad_norm": 0.783461447689035, + "learning_rate": 1.916202781020469e-06, + "loss": 0.4185, + "step": 23499 + }, + { + "epoch": 0.720240284418291, + "grad_norm": 0.878058261926522, + "learning_rate": 1.9158121187868407e-06, + "loss": 0.4293, + "step": 23500 + }, + { + "epoch": 0.7202709329410323, + "grad_norm": 0.824728111205083, + "learning_rate": 1.9154214869425026e-06, + "loss": 0.4185, + "step": 23501 + }, + { + "epoch": 0.7203015814637734, + "grad_norm": 1.5941903909977735, + "learning_rate": 1.915030885491308e-06, + "loss": 0.6361, + "step": 23502 + }, + { + "epoch": 0.7203322299865147, + "grad_norm": 1.8136737131748462, + "learning_rate": 1.9146403144371014e-06, + "loss": 0.6089, + "step": 23503 + }, + { + "epoch": 0.7203628785092558, + "grad_norm": 1.7612845052126627, + "learning_rate": 1.9142497737837327e-06, + "loss": 0.6374, + "step": 23504 + }, + { + "epoch": 0.7203935270319971, + "grad_norm": 1.828463557515693, + "learning_rate": 1.9138592635350535e-06, + "loss": 0.5936, + "step": 23505 + }, + { + "epoch": 0.7204241755547383, + "grad_norm": 0.7961946202186473, + "learning_rate": 1.913468783694906e-06, + "loss": 0.4416, + "step": 23506 + }, + { + "epoch": 0.7204548240774795, + "grad_norm": 1.9882601234151942, + "learning_rate": 1.9130783342671406e-06, + "loss": 0.5337, + "step": 23507 + }, + { + "epoch": 0.7204854726002207, + "grad_norm": 1.821949522812847, + "learning_rate": 1.9126879152556055e-06, + "loss": 0.5428, + "step": 23508 + }, + { + "epoch": 0.7205161211229619, + "grad_norm": 1.859919208613375, + "learning_rate": 1.9122975266641446e-06, + "loss": 0.6767, + "step": 23509 + }, + { + "epoch": 0.7205467696457031, + "grad_norm": 1.717515989964583, + "learning_rate": 1.9119071684966065e-06, + "loss": 0.4766, + "step": 23510 + }, + { + "epoch": 0.7205774181684442, + "grad_norm": 0.7232193944748272, + "learning_rate": 1.911516840756839e-06, + "loss": 0.3997, + "step": 23511 + }, + { + "epoch": 0.7206080666911855, + "grad_norm": 1.8090896714082423, + "learning_rate": 1.911126543448685e-06, + "loss": 0.4875, + "step": 23512 + }, + { + "epoch": 0.7206387152139266, + "grad_norm": 1.8168481983046005, + "learning_rate": 1.910736276575992e-06, + "loss": 0.5453, + "step": 23513 + }, + { + "epoch": 0.7206693637366679, + "grad_norm": 2.1813514235341125, + "learning_rate": 1.910346040142604e-06, + "loss": 0.5471, + "step": 23514 + }, + { + "epoch": 0.720700012259409, + "grad_norm": 1.8051290937400293, + "learning_rate": 1.9099558341523664e-06, + "loss": 0.6486, + "step": 23515 + }, + { + "epoch": 0.7207306607821503, + "grad_norm": 0.7554758429662177, + "learning_rate": 1.9095656586091273e-06, + "loss": 0.3943, + "step": 23516 + }, + { + "epoch": 0.7207613093048915, + "grad_norm": 0.8438305040648602, + "learning_rate": 1.909175513516726e-06, + "loss": 0.4053, + "step": 23517 + }, + { + "epoch": 0.7207919578276327, + "grad_norm": 1.8308917009458268, + "learning_rate": 1.908785398879009e-06, + "loss": 0.6259, + "step": 23518 + }, + { + "epoch": 0.7208226063503739, + "grad_norm": 0.7595632357197978, + "learning_rate": 1.9083953146998224e-06, + "loss": 0.3935, + "step": 23519 + }, + { + "epoch": 0.7208532548731151, + "grad_norm": 1.9041394537337015, + "learning_rate": 1.9080052609830065e-06, + "loss": 0.6283, + "step": 23520 + }, + { + "epoch": 0.7208839033958563, + "grad_norm": 1.8282586766184457, + "learning_rate": 1.907615237732405e-06, + "loss": 0.618, + "step": 23521 + }, + { + "epoch": 0.7209145519185975, + "grad_norm": 1.7842010774663848, + "learning_rate": 1.9072252449518647e-06, + "loss": 0.5747, + "step": 23522 + }, + { + "epoch": 0.7209452004413387, + "grad_norm": 1.7948999850020328, + "learning_rate": 1.9068352826452225e-06, + "loss": 0.5479, + "step": 23523 + }, + { + "epoch": 0.72097584896408, + "grad_norm": 1.9273131745777479, + "learning_rate": 1.9064453508163261e-06, + "loss": 0.6224, + "step": 23524 + }, + { + "epoch": 0.7210064974868211, + "grad_norm": 1.7159620116192786, + "learning_rate": 1.906055449469013e-06, + "loss": 0.5204, + "step": 23525 + }, + { + "epoch": 0.7210371460095624, + "grad_norm": 2.0016004411747135, + "learning_rate": 1.9056655786071277e-06, + "loss": 0.6161, + "step": 23526 + }, + { + "epoch": 0.7210677945323035, + "grad_norm": 0.7894134747739006, + "learning_rate": 1.9052757382345128e-06, + "loss": 0.4199, + "step": 23527 + }, + { + "epoch": 0.7210984430550448, + "grad_norm": 1.973791816268171, + "learning_rate": 1.9048859283550059e-06, + "loss": 0.5535, + "step": 23528 + }, + { + "epoch": 0.7211290915777859, + "grad_norm": 1.791492596047958, + "learning_rate": 1.9044961489724495e-06, + "loss": 0.5079, + "step": 23529 + }, + { + "epoch": 0.7211597401005272, + "grad_norm": 1.8065109458338544, + "learning_rate": 1.9041064000906868e-06, + "loss": 0.6046, + "step": 23530 + }, + { + "epoch": 0.7211903886232683, + "grad_norm": 1.5836912548364235, + "learning_rate": 1.9037166817135538e-06, + "loss": 0.5553, + "step": 23531 + }, + { + "epoch": 0.7212210371460096, + "grad_norm": 0.758902490620417, + "learning_rate": 1.9033269938448927e-06, + "loss": 0.4022, + "step": 23532 + }, + { + "epoch": 0.7212516856687508, + "grad_norm": 1.7773126008533982, + "learning_rate": 1.902937336488545e-06, + "loss": 0.6469, + "step": 23533 + }, + { + "epoch": 0.721282334191492, + "grad_norm": 0.8079581752886921, + "learning_rate": 1.9025477096483463e-06, + "loss": 0.3927, + "step": 23534 + }, + { + "epoch": 0.7213129827142332, + "grad_norm": 1.786550005679052, + "learning_rate": 1.9021581133281397e-06, + "loss": 0.7192, + "step": 23535 + }, + { + "epoch": 0.7213436312369744, + "grad_norm": 2.043212642889224, + "learning_rate": 1.9017685475317576e-06, + "loss": 0.6915, + "step": 23536 + }, + { + "epoch": 0.7213742797597156, + "grad_norm": 1.7853513097821987, + "learning_rate": 1.9013790122630465e-06, + "loss": 0.687, + "step": 23537 + }, + { + "epoch": 0.7214049282824568, + "grad_norm": 0.7821446821635313, + "learning_rate": 1.900989507525841e-06, + "loss": 0.3881, + "step": 23538 + }, + { + "epoch": 0.721435576805198, + "grad_norm": 1.7095165033819655, + "learning_rate": 1.9006000333239766e-06, + "loss": 0.5447, + "step": 23539 + }, + { + "epoch": 0.7214662253279392, + "grad_norm": 1.904967821155121, + "learning_rate": 1.9002105896612932e-06, + "loss": 0.5457, + "step": 23540 + }, + { + "epoch": 0.7214968738506804, + "grad_norm": 1.7166390238705587, + "learning_rate": 1.8998211765416303e-06, + "loss": 0.5309, + "step": 23541 + }, + { + "epoch": 0.7215275223734215, + "grad_norm": 1.8342261076670938, + "learning_rate": 1.89943179396882e-06, + "loss": 0.6063, + "step": 23542 + }, + { + "epoch": 0.7215581708961628, + "grad_norm": 1.9906424902021855, + "learning_rate": 1.8990424419467019e-06, + "loss": 0.5272, + "step": 23543 + }, + { + "epoch": 0.721588819418904, + "grad_norm": 1.5847986073201632, + "learning_rate": 1.8986531204791124e-06, + "loss": 0.4668, + "step": 23544 + }, + { + "epoch": 0.7216194679416452, + "grad_norm": 0.7738400490442767, + "learning_rate": 1.8982638295698885e-06, + "loss": 0.423, + "step": 23545 + }, + { + "epoch": 0.7216501164643864, + "grad_norm": 2.154045061550098, + "learning_rate": 1.897874569222865e-06, + "loss": 0.5751, + "step": 23546 + }, + { + "epoch": 0.7216807649871276, + "grad_norm": 1.9039891104651863, + "learning_rate": 1.8974853394418752e-06, + "loss": 0.6123, + "step": 23547 + }, + { + "epoch": 0.7217114135098688, + "grad_norm": 1.8364136890412563, + "learning_rate": 1.897096140230756e-06, + "loss": 0.5486, + "step": 23548 + }, + { + "epoch": 0.72174206203261, + "grad_norm": 1.7671502080330501, + "learning_rate": 1.8967069715933444e-06, + "loss": 0.5462, + "step": 23549 + }, + { + "epoch": 0.7217727105553512, + "grad_norm": 1.8258923522492807, + "learning_rate": 1.8963178335334708e-06, + "loss": 0.6147, + "step": 23550 + }, + { + "epoch": 0.7218033590780925, + "grad_norm": 0.769452391940307, + "learning_rate": 1.8959287260549724e-06, + "loss": 0.3845, + "step": 23551 + }, + { + "epoch": 0.7218340076008336, + "grad_norm": 1.8142998215194899, + "learning_rate": 1.895539649161684e-06, + "loss": 0.5951, + "step": 23552 + }, + { + "epoch": 0.7218646561235749, + "grad_norm": 1.5497786153930262, + "learning_rate": 1.8951506028574356e-06, + "loss": 0.4747, + "step": 23553 + }, + { + "epoch": 0.721895304646316, + "grad_norm": 1.6301862071111497, + "learning_rate": 1.894761587146065e-06, + "loss": 0.6157, + "step": 23554 + }, + { + "epoch": 0.7219259531690573, + "grad_norm": 1.6731374071486254, + "learning_rate": 1.894372602031399e-06, + "loss": 0.559, + "step": 23555 + }, + { + "epoch": 0.7219566016917984, + "grad_norm": 1.9394501034187261, + "learning_rate": 1.8939836475172784e-06, + "loss": 0.6297, + "step": 23556 + }, + { + "epoch": 0.7219872502145397, + "grad_norm": 1.9870121882256164, + "learning_rate": 1.8935947236075314e-06, + "loss": 0.694, + "step": 23557 + }, + { + "epoch": 0.7220178987372808, + "grad_norm": 1.7433311633298354, + "learning_rate": 1.8932058303059887e-06, + "loss": 0.5789, + "step": 23558 + }, + { + "epoch": 0.7220485472600221, + "grad_norm": 0.769341413695377, + "learning_rate": 1.8928169676164836e-06, + "loss": 0.4004, + "step": 23559 + }, + { + "epoch": 0.7220791957827633, + "grad_norm": 0.789461747523172, + "learning_rate": 1.8924281355428498e-06, + "loss": 0.3966, + "step": 23560 + }, + { + "epoch": 0.7221098443055045, + "grad_norm": 2.0845206704717234, + "learning_rate": 1.8920393340889148e-06, + "loss": 0.528, + "step": 23561 + }, + { + "epoch": 0.7221404928282457, + "grad_norm": 1.7376859393044635, + "learning_rate": 1.8916505632585119e-06, + "loss": 0.6202, + "step": 23562 + }, + { + "epoch": 0.7221711413509869, + "grad_norm": 0.8070436356383585, + "learning_rate": 1.8912618230554708e-06, + "loss": 0.3833, + "step": 23563 + }, + { + "epoch": 0.7222017898737281, + "grad_norm": 1.8193366107294033, + "learning_rate": 1.8908731134836244e-06, + "loss": 0.6359, + "step": 23564 + }, + { + "epoch": 0.7222324383964693, + "grad_norm": 1.7205131383001757, + "learning_rate": 1.8904844345468004e-06, + "loss": 0.5052, + "step": 23565 + }, + { + "epoch": 0.7222630869192105, + "grad_norm": 1.908856116006507, + "learning_rate": 1.8900957862488262e-06, + "loss": 0.5911, + "step": 23566 + }, + { + "epoch": 0.7222937354419517, + "grad_norm": 1.816678693021545, + "learning_rate": 1.8897071685935364e-06, + "loss": 0.6027, + "step": 23567 + }, + { + "epoch": 0.7223243839646929, + "grad_norm": 1.99305475059761, + "learning_rate": 1.8893185815847581e-06, + "loss": 0.6252, + "step": 23568 + }, + { + "epoch": 0.7223550324874342, + "grad_norm": 1.8662611468509147, + "learning_rate": 1.888930025226318e-06, + "loss": 0.6439, + "step": 23569 + }, + { + "epoch": 0.7223856810101753, + "grad_norm": 2.20779106720283, + "learning_rate": 1.8885414995220464e-06, + "loss": 0.6479, + "step": 23570 + }, + { + "epoch": 0.7224163295329166, + "grad_norm": 1.6965996852079672, + "learning_rate": 1.8881530044757718e-06, + "loss": 0.5826, + "step": 23571 + }, + { + "epoch": 0.7224469780556577, + "grad_norm": 1.7782616025204068, + "learning_rate": 1.8877645400913231e-06, + "loss": 0.5534, + "step": 23572 + }, + { + "epoch": 0.7224776265783989, + "grad_norm": 1.726411936104694, + "learning_rate": 1.887376106372525e-06, + "loss": 0.5634, + "step": 23573 + }, + { + "epoch": 0.7225082751011401, + "grad_norm": 0.8175669420513361, + "learning_rate": 1.8869877033232065e-06, + "loss": 0.4072, + "step": 23574 + }, + { + "epoch": 0.7225389236238813, + "grad_norm": 1.6004215273465234, + "learning_rate": 1.8865993309471965e-06, + "loss": 0.5822, + "step": 23575 + }, + { + "epoch": 0.7225695721466225, + "grad_norm": 2.174462247108078, + "learning_rate": 1.88621098924832e-06, + "loss": 0.5576, + "step": 23576 + }, + { + "epoch": 0.7226002206693637, + "grad_norm": 0.7816493579468372, + "learning_rate": 1.8858226782303996e-06, + "loss": 0.3926, + "step": 23577 + }, + { + "epoch": 0.722630869192105, + "grad_norm": 1.7893022455516534, + "learning_rate": 1.885434397897269e-06, + "loss": 0.6623, + "step": 23578 + }, + { + "epoch": 0.7226615177148461, + "grad_norm": 1.858636746964808, + "learning_rate": 1.8850461482527498e-06, + "loss": 0.6137, + "step": 23579 + }, + { + "epoch": 0.7226921662375874, + "grad_norm": 2.014959102851492, + "learning_rate": 1.884657929300666e-06, + "loss": 0.6366, + "step": 23580 + }, + { + "epoch": 0.7227228147603285, + "grad_norm": 1.7872554743155444, + "learning_rate": 1.8842697410448457e-06, + "loss": 0.5609, + "step": 23581 + }, + { + "epoch": 0.7227534632830698, + "grad_norm": 1.7929278320379127, + "learning_rate": 1.8838815834891116e-06, + "loss": 0.5682, + "step": 23582 + }, + { + "epoch": 0.7227841118058109, + "grad_norm": 2.1330583121348776, + "learning_rate": 1.883493456637292e-06, + "loss": 0.5443, + "step": 23583 + }, + { + "epoch": 0.7228147603285522, + "grad_norm": 1.9067476673430586, + "learning_rate": 1.883105360493207e-06, + "loss": 0.5462, + "step": 23584 + }, + { + "epoch": 0.7228454088512933, + "grad_norm": 1.7610874203953972, + "learning_rate": 1.882717295060682e-06, + "loss": 0.6061, + "step": 23585 + }, + { + "epoch": 0.7228760573740346, + "grad_norm": 1.6435088783695408, + "learning_rate": 1.8823292603435434e-06, + "loss": 0.5941, + "step": 23586 + }, + { + "epoch": 0.7229067058967757, + "grad_norm": 1.8692435048472746, + "learning_rate": 1.8819412563456118e-06, + "loss": 0.5967, + "step": 23587 + }, + { + "epoch": 0.722937354419517, + "grad_norm": 1.6099213201950993, + "learning_rate": 1.881553283070708e-06, + "loss": 0.4693, + "step": 23588 + }, + { + "epoch": 0.7229680029422582, + "grad_norm": 1.7143783418053415, + "learning_rate": 1.88116534052266e-06, + "loss": 0.6067, + "step": 23589 + }, + { + "epoch": 0.7229986514649994, + "grad_norm": 2.1895654471685497, + "learning_rate": 1.8807774287052866e-06, + "loss": 0.6029, + "step": 23590 + }, + { + "epoch": 0.7230292999877406, + "grad_norm": 1.9175254126925068, + "learning_rate": 1.8803895476224133e-06, + "loss": 0.6015, + "step": 23591 + }, + { + "epoch": 0.7230599485104818, + "grad_norm": 1.8710855378860727, + "learning_rate": 1.8800016972778578e-06, + "loss": 0.5149, + "step": 23592 + }, + { + "epoch": 0.723090597033223, + "grad_norm": 2.025386839100477, + "learning_rate": 1.8796138776754442e-06, + "loss": 0.5428, + "step": 23593 + }, + { + "epoch": 0.7231212455559642, + "grad_norm": 2.2171967890484194, + "learning_rate": 1.879226088818995e-06, + "loss": 0.549, + "step": 23594 + }, + { + "epoch": 0.7231518940787054, + "grad_norm": 1.6651590521868171, + "learning_rate": 1.878838330712328e-06, + "loss": 0.4874, + "step": 23595 + }, + { + "epoch": 0.7231825426014467, + "grad_norm": 2.0159466194327935, + "learning_rate": 1.8784506033592658e-06, + "loss": 0.5331, + "step": 23596 + }, + { + "epoch": 0.7232131911241878, + "grad_norm": 1.7141253532012406, + "learning_rate": 1.8780629067636296e-06, + "loss": 0.4797, + "step": 23597 + }, + { + "epoch": 0.7232438396469291, + "grad_norm": 1.5561007941555325, + "learning_rate": 1.877675240929237e-06, + "loss": 0.578, + "step": 23598 + }, + { + "epoch": 0.7232744881696702, + "grad_norm": 1.791866809724895, + "learning_rate": 1.877287605859911e-06, + "loss": 0.567, + "step": 23599 + }, + { + "epoch": 0.7233051366924115, + "grad_norm": 1.8255657397526583, + "learning_rate": 1.8769000015594675e-06, + "loss": 0.4815, + "step": 23600 + }, + { + "epoch": 0.7233357852151526, + "grad_norm": 1.9380429070512997, + "learning_rate": 1.876512428031727e-06, + "loss": 0.5863, + "step": 23601 + }, + { + "epoch": 0.7233664337378939, + "grad_norm": 1.8531753076863224, + "learning_rate": 1.8761248852805103e-06, + "loss": 0.6219, + "step": 23602 + }, + { + "epoch": 0.723397082260635, + "grad_norm": 1.6391216460595268, + "learning_rate": 1.8757373733096334e-06, + "loss": 0.5537, + "step": 23603 + }, + { + "epoch": 0.7234277307833762, + "grad_norm": 1.895189498376453, + "learning_rate": 1.8753498921229146e-06, + "loss": 0.6285, + "step": 23604 + }, + { + "epoch": 0.7234583793061174, + "grad_norm": 1.6519396448652868, + "learning_rate": 1.8749624417241751e-06, + "loss": 0.6174, + "step": 23605 + }, + { + "epoch": 0.7234890278288586, + "grad_norm": 1.743440696646368, + "learning_rate": 1.8745750221172305e-06, + "loss": 0.5124, + "step": 23606 + }, + { + "epoch": 0.7235196763515999, + "grad_norm": 2.0655312921981572, + "learning_rate": 1.874187633305894e-06, + "loss": 0.6476, + "step": 23607 + }, + { + "epoch": 0.723550324874341, + "grad_norm": 1.7823332172603725, + "learning_rate": 1.8738002752939899e-06, + "loss": 0.586, + "step": 23608 + }, + { + "epoch": 0.7235809733970823, + "grad_norm": 1.9169545895763345, + "learning_rate": 1.8734129480853302e-06, + "loss": 0.5519, + "step": 23609 + }, + { + "epoch": 0.7236116219198234, + "grad_norm": 0.7916970001660412, + "learning_rate": 1.8730256516837342e-06, + "loss": 0.4087, + "step": 23610 + }, + { + "epoch": 0.7236422704425647, + "grad_norm": 0.8076674187788606, + "learning_rate": 1.8726383860930148e-06, + "loss": 0.4173, + "step": 23611 + }, + { + "epoch": 0.7236729189653058, + "grad_norm": 1.9410132593787743, + "learning_rate": 1.8722511513169894e-06, + "loss": 0.5877, + "step": 23612 + }, + { + "epoch": 0.7237035674880471, + "grad_norm": 1.698650069915404, + "learning_rate": 1.8718639473594757e-06, + "loss": 0.5389, + "step": 23613 + }, + { + "epoch": 0.7237342160107882, + "grad_norm": 1.9348031989071006, + "learning_rate": 1.871476774224285e-06, + "loss": 0.6353, + "step": 23614 + }, + { + "epoch": 0.7237648645335295, + "grad_norm": 2.0085274441030347, + "learning_rate": 1.8710896319152339e-06, + "loss": 0.6625, + "step": 23615 + }, + { + "epoch": 0.7237955130562707, + "grad_norm": 1.8596882353255513, + "learning_rate": 1.8707025204361396e-06, + "loss": 0.5919, + "step": 23616 + }, + { + "epoch": 0.7238261615790119, + "grad_norm": 1.823003094406272, + "learning_rate": 1.8703154397908119e-06, + "loss": 0.4584, + "step": 23617 + }, + { + "epoch": 0.7238568101017531, + "grad_norm": 1.6372637314401668, + "learning_rate": 1.8699283899830667e-06, + "loss": 0.5705, + "step": 23618 + }, + { + "epoch": 0.7238874586244943, + "grad_norm": 1.9909673524770821, + "learning_rate": 1.86954137101672e-06, + "loss": 0.5902, + "step": 23619 + }, + { + "epoch": 0.7239181071472355, + "grad_norm": 1.9892246608537425, + "learning_rate": 1.869154382895581e-06, + "loss": 0.6545, + "step": 23620 + }, + { + "epoch": 0.7239487556699767, + "grad_norm": 1.883259720233067, + "learning_rate": 1.8687674256234672e-06, + "loss": 0.583, + "step": 23621 + }, + { + "epoch": 0.7239794041927179, + "grad_norm": 1.9619113581729777, + "learning_rate": 1.868380499204187e-06, + "loss": 0.6328, + "step": 23622 + }, + { + "epoch": 0.7240100527154592, + "grad_norm": 1.730612312478149, + "learning_rate": 1.8679936036415552e-06, + "loss": 0.5766, + "step": 23623 + }, + { + "epoch": 0.7240407012382003, + "grad_norm": 1.9219009120968114, + "learning_rate": 1.8676067389393854e-06, + "loss": 0.5837, + "step": 23624 + }, + { + "epoch": 0.7240713497609416, + "grad_norm": 0.7948307730618924, + "learning_rate": 1.8672199051014862e-06, + "loss": 0.4121, + "step": 23625 + }, + { + "epoch": 0.7241019982836827, + "grad_norm": 1.821016852331546, + "learning_rate": 1.866833102131671e-06, + "loss": 0.5882, + "step": 23626 + }, + { + "epoch": 0.724132646806424, + "grad_norm": 2.037493223906067, + "learning_rate": 1.8664463300337521e-06, + "loss": 0.6532, + "step": 23627 + }, + { + "epoch": 0.7241632953291651, + "grad_norm": 1.8309628924205872, + "learning_rate": 1.866059588811538e-06, + "loss": 0.5732, + "step": 23628 + }, + { + "epoch": 0.7241939438519064, + "grad_norm": 1.7931590935811126, + "learning_rate": 1.8656728784688404e-06, + "loss": 0.5506, + "step": 23629 + }, + { + "epoch": 0.7242245923746475, + "grad_norm": 1.6972926383384812, + "learning_rate": 1.8652861990094717e-06, + "loss": 0.6424, + "step": 23630 + }, + { + "epoch": 0.7242552408973888, + "grad_norm": 1.807269060650492, + "learning_rate": 1.8648995504372386e-06, + "loss": 0.6024, + "step": 23631 + }, + { + "epoch": 0.72428588942013, + "grad_norm": 2.112082512860469, + "learning_rate": 1.8645129327559536e-06, + "loss": 0.6491, + "step": 23632 + }, + { + "epoch": 0.7243165379428712, + "grad_norm": 1.708731834026289, + "learning_rate": 1.8641263459694237e-06, + "loss": 0.5725, + "step": 23633 + }, + { + "epoch": 0.7243471864656124, + "grad_norm": 1.8717157134843863, + "learning_rate": 1.8637397900814591e-06, + "loss": 0.5537, + "step": 23634 + }, + { + "epoch": 0.7243778349883535, + "grad_norm": 2.052151666190116, + "learning_rate": 1.8633532650958702e-06, + "loss": 0.5991, + "step": 23635 + }, + { + "epoch": 0.7244084835110948, + "grad_norm": 1.7325647815142775, + "learning_rate": 1.8629667710164628e-06, + "loss": 0.6141, + "step": 23636 + }, + { + "epoch": 0.7244391320338359, + "grad_norm": 2.3123830237432728, + "learning_rate": 1.8625803078470467e-06, + "loss": 0.5923, + "step": 23637 + }, + { + "epoch": 0.7244697805565772, + "grad_norm": 1.6336184759119345, + "learning_rate": 1.8621938755914309e-06, + "loss": 0.5338, + "step": 23638 + }, + { + "epoch": 0.7245004290793183, + "grad_norm": 1.812341813967547, + "learning_rate": 1.8618074742534199e-06, + "loss": 0.582, + "step": 23639 + }, + { + "epoch": 0.7245310776020596, + "grad_norm": 1.8814005793216233, + "learning_rate": 1.861421103836823e-06, + "loss": 0.5712, + "step": 23640 + }, + { + "epoch": 0.7245617261248007, + "grad_norm": 0.7868925864291164, + "learning_rate": 1.8610347643454486e-06, + "loss": 0.3995, + "step": 23641 + }, + { + "epoch": 0.724592374647542, + "grad_norm": 1.9284383600008839, + "learning_rate": 1.8606484557831e-06, + "loss": 0.6487, + "step": 23642 + }, + { + "epoch": 0.7246230231702832, + "grad_norm": 1.6051914708197514, + "learning_rate": 1.8602621781535874e-06, + "loss": 0.494, + "step": 23643 + }, + { + "epoch": 0.7246536716930244, + "grad_norm": 1.741967422009554, + "learning_rate": 1.8598759314607129e-06, + "loss": 0.537, + "step": 23644 + }, + { + "epoch": 0.7246843202157656, + "grad_norm": 1.8079545019589545, + "learning_rate": 1.859489715708284e-06, + "loss": 0.6507, + "step": 23645 + }, + { + "epoch": 0.7247149687385068, + "grad_norm": 1.8282718971735163, + "learning_rate": 1.8591035309001087e-06, + "loss": 0.5189, + "step": 23646 + }, + { + "epoch": 0.724745617261248, + "grad_norm": 1.7932263397522004, + "learning_rate": 1.8587173770399886e-06, + "loss": 0.5697, + "step": 23647 + }, + { + "epoch": 0.7247762657839892, + "grad_norm": 2.0707977396423027, + "learning_rate": 1.8583312541317288e-06, + "loss": 0.6336, + "step": 23648 + }, + { + "epoch": 0.7248069143067304, + "grad_norm": 0.7931147362794108, + "learning_rate": 1.8579451621791377e-06, + "loss": 0.4119, + "step": 23649 + }, + { + "epoch": 0.7248375628294716, + "grad_norm": 0.7993938618776201, + "learning_rate": 1.8575591011860146e-06, + "loss": 0.4073, + "step": 23650 + }, + { + "epoch": 0.7248682113522128, + "grad_norm": 1.762246807745324, + "learning_rate": 1.8571730711561675e-06, + "loss": 0.613, + "step": 23651 + }, + { + "epoch": 0.7248988598749541, + "grad_norm": 0.7832727319857001, + "learning_rate": 1.8567870720933967e-06, + "loss": 0.3943, + "step": 23652 + }, + { + "epoch": 0.7249295083976952, + "grad_norm": 1.7535005532221168, + "learning_rate": 1.8564011040015074e-06, + "loss": 0.5814, + "step": 23653 + }, + { + "epoch": 0.7249601569204365, + "grad_norm": 1.6686425625681625, + "learning_rate": 1.8560151668843035e-06, + "loss": 0.5206, + "step": 23654 + }, + { + "epoch": 0.7249908054431776, + "grad_norm": 0.8164883573193509, + "learning_rate": 1.855629260745585e-06, + "loss": 0.4126, + "step": 23655 + }, + { + "epoch": 0.7250214539659189, + "grad_norm": 2.088653850077777, + "learning_rate": 1.855243385589156e-06, + "loss": 0.6939, + "step": 23656 + }, + { + "epoch": 0.72505210248866, + "grad_norm": 1.9911288221847707, + "learning_rate": 1.8548575414188202e-06, + "loss": 0.6209, + "step": 23657 + }, + { + "epoch": 0.7250827510114013, + "grad_norm": 1.7852247083604722, + "learning_rate": 1.8544717282383755e-06, + "loss": 0.5933, + "step": 23658 + }, + { + "epoch": 0.7251133995341424, + "grad_norm": 0.8173694953636712, + "learning_rate": 1.8540859460516265e-06, + "loss": 0.4164, + "step": 23659 + }, + { + "epoch": 0.7251440480568837, + "grad_norm": 1.9719303541805167, + "learning_rate": 1.853700194862375e-06, + "loss": 0.5628, + "step": 23660 + }, + { + "epoch": 0.7251746965796249, + "grad_norm": 1.6003462254873972, + "learning_rate": 1.8533144746744192e-06, + "loss": 0.5413, + "step": 23661 + }, + { + "epoch": 0.7252053451023661, + "grad_norm": 0.7825712943379513, + "learning_rate": 1.8529287854915622e-06, + "loss": 0.4033, + "step": 23662 + }, + { + "epoch": 0.7252359936251073, + "grad_norm": 2.1960210005027143, + "learning_rate": 1.8525431273175998e-06, + "loss": 0.5784, + "step": 23663 + }, + { + "epoch": 0.7252666421478485, + "grad_norm": 1.7444551011308385, + "learning_rate": 1.8521575001563386e-06, + "loss": 0.6398, + "step": 23664 + }, + { + "epoch": 0.7252972906705897, + "grad_norm": 1.840040514813414, + "learning_rate": 1.851771904011575e-06, + "loss": 0.5668, + "step": 23665 + }, + { + "epoch": 0.7253279391933308, + "grad_norm": 1.8201166492484842, + "learning_rate": 1.8513863388871067e-06, + "loss": 0.6428, + "step": 23666 + }, + { + "epoch": 0.7253585877160721, + "grad_norm": 1.934954085868941, + "learning_rate": 1.8510008047867345e-06, + "loss": 0.6069, + "step": 23667 + }, + { + "epoch": 0.7253892362388132, + "grad_norm": 1.6940865444321855, + "learning_rate": 1.8506153017142587e-06, + "loss": 0.5541, + "step": 23668 + }, + { + "epoch": 0.7254198847615545, + "grad_norm": 0.8009417493664558, + "learning_rate": 1.8502298296734744e-06, + "loss": 0.4029, + "step": 23669 + }, + { + "epoch": 0.7254505332842957, + "grad_norm": 1.624727992190881, + "learning_rate": 1.8498443886681817e-06, + "loss": 0.5127, + "step": 23670 + }, + { + "epoch": 0.7254811818070369, + "grad_norm": 1.8705230498936096, + "learning_rate": 1.8494589787021777e-06, + "loss": 0.6332, + "step": 23671 + }, + { + "epoch": 0.7255118303297781, + "grad_norm": 1.7526639434017057, + "learning_rate": 1.8490735997792625e-06, + "loss": 0.6043, + "step": 23672 + }, + { + "epoch": 0.7255424788525193, + "grad_norm": 1.5582284174367746, + "learning_rate": 1.8486882519032317e-06, + "loss": 0.6058, + "step": 23673 + }, + { + "epoch": 0.7255731273752605, + "grad_norm": 1.797496843706629, + "learning_rate": 1.848302935077878e-06, + "loss": 0.5982, + "step": 23674 + }, + { + "epoch": 0.7256037758980017, + "grad_norm": 1.927694271302029, + "learning_rate": 1.8479176493070055e-06, + "loss": 0.5258, + "step": 23675 + }, + { + "epoch": 0.7256344244207429, + "grad_norm": 0.8055443586712197, + "learning_rate": 1.8475323945944067e-06, + "loss": 0.3955, + "step": 23676 + }, + { + "epoch": 0.7256650729434841, + "grad_norm": 2.053414580442222, + "learning_rate": 1.847147170943876e-06, + "loss": 0.5537, + "step": 23677 + }, + { + "epoch": 0.7256957214662253, + "grad_norm": 1.6238446293304238, + "learning_rate": 1.8467619783592112e-06, + "loss": 0.5503, + "step": 23678 + }, + { + "epoch": 0.7257263699889666, + "grad_norm": 1.8127300756791953, + "learning_rate": 1.8463768168442091e-06, + "loss": 0.5801, + "step": 23679 + }, + { + "epoch": 0.7257570185117077, + "grad_norm": 1.8334390392233737, + "learning_rate": 1.8459916864026611e-06, + "loss": 0.5598, + "step": 23680 + }, + { + "epoch": 0.725787667034449, + "grad_norm": 1.8628688516913747, + "learning_rate": 1.8456065870383643e-06, + "loss": 0.5955, + "step": 23681 + }, + { + "epoch": 0.7258183155571901, + "grad_norm": 1.777147371939837, + "learning_rate": 1.8452215187551132e-06, + "loss": 0.5528, + "step": 23682 + }, + { + "epoch": 0.7258489640799314, + "grad_norm": 1.777365639447432, + "learning_rate": 1.8448364815567027e-06, + "loss": 0.6461, + "step": 23683 + }, + { + "epoch": 0.7258796126026725, + "grad_norm": 1.8215694165183798, + "learning_rate": 1.844451475446926e-06, + "loss": 0.5612, + "step": 23684 + }, + { + "epoch": 0.7259102611254138, + "grad_norm": 1.832726734789226, + "learning_rate": 1.8440665004295743e-06, + "loss": 0.5975, + "step": 23685 + }, + { + "epoch": 0.7259409096481549, + "grad_norm": 1.7374041779046592, + "learning_rate": 1.8436815565084432e-06, + "loss": 0.5522, + "step": 23686 + }, + { + "epoch": 0.7259715581708962, + "grad_norm": 1.9243182998132673, + "learning_rate": 1.8432966436873268e-06, + "loss": 0.5793, + "step": 23687 + }, + { + "epoch": 0.7260022066936374, + "grad_norm": 1.81278373972721, + "learning_rate": 1.8429117619700149e-06, + "loss": 0.5869, + "step": 23688 + }, + { + "epoch": 0.7260328552163786, + "grad_norm": 1.7930993459990894, + "learning_rate": 1.8425269113603005e-06, + "loss": 0.5424, + "step": 23689 + }, + { + "epoch": 0.7260635037391198, + "grad_norm": 2.120387730676026, + "learning_rate": 1.842142091861977e-06, + "loss": 0.5182, + "step": 23690 + }, + { + "epoch": 0.726094152261861, + "grad_norm": 1.7031301097664127, + "learning_rate": 1.8417573034788367e-06, + "loss": 0.6101, + "step": 23691 + }, + { + "epoch": 0.7261248007846022, + "grad_norm": 1.9813338655680002, + "learning_rate": 1.841372546214668e-06, + "loss": 0.5597, + "step": 23692 + }, + { + "epoch": 0.7261554493073434, + "grad_norm": 1.6990132610041984, + "learning_rate": 1.8409878200732644e-06, + "loss": 0.6139, + "step": 23693 + }, + { + "epoch": 0.7261860978300846, + "grad_norm": 0.8878027399291436, + "learning_rate": 1.8406031250584171e-06, + "loss": 0.3961, + "step": 23694 + }, + { + "epoch": 0.7262167463528258, + "grad_norm": 2.0464813733657867, + "learning_rate": 1.840218461173916e-06, + "loss": 0.5515, + "step": 23695 + }, + { + "epoch": 0.726247394875567, + "grad_norm": 1.991413298996768, + "learning_rate": 1.839833828423549e-06, + "loss": 0.6184, + "step": 23696 + }, + { + "epoch": 0.7262780433983081, + "grad_norm": 1.9629058213318809, + "learning_rate": 1.8394492268111081e-06, + "loss": 0.5414, + "step": 23697 + }, + { + "epoch": 0.7263086919210494, + "grad_norm": 1.590648154938428, + "learning_rate": 1.8390646563403819e-06, + "loss": 0.4786, + "step": 23698 + }, + { + "epoch": 0.7263393404437906, + "grad_norm": 1.9829250480587828, + "learning_rate": 1.838680117015163e-06, + "loss": 0.5789, + "step": 23699 + }, + { + "epoch": 0.7263699889665318, + "grad_norm": 1.9462454061747982, + "learning_rate": 1.8382956088392356e-06, + "loss": 0.6104, + "step": 23700 + }, + { + "epoch": 0.726400637489273, + "grad_norm": 1.8702662538550066, + "learning_rate": 1.8379111318163905e-06, + "loss": 0.5022, + "step": 23701 + }, + { + "epoch": 0.7264312860120142, + "grad_norm": 0.8083772187652074, + "learning_rate": 1.8375266859504177e-06, + "loss": 0.4177, + "step": 23702 + }, + { + "epoch": 0.7264619345347554, + "grad_norm": 2.46004491569008, + "learning_rate": 1.8371422712451037e-06, + "loss": 0.5906, + "step": 23703 + }, + { + "epoch": 0.7264925830574966, + "grad_norm": 1.8663293790080848, + "learning_rate": 1.836757887704233e-06, + "loss": 0.6114, + "step": 23704 + }, + { + "epoch": 0.7265232315802378, + "grad_norm": 1.9367950530403173, + "learning_rate": 1.8363735353315991e-06, + "loss": 0.5907, + "step": 23705 + }, + { + "epoch": 0.726553880102979, + "grad_norm": 1.7977216258346558, + "learning_rate": 1.835989214130987e-06, + "loss": 0.5111, + "step": 23706 + }, + { + "epoch": 0.7265845286257202, + "grad_norm": 1.6629549971973105, + "learning_rate": 1.8356049241061802e-06, + "loss": 0.4909, + "step": 23707 + }, + { + "epoch": 0.7266151771484615, + "grad_norm": 1.6981717800905178, + "learning_rate": 1.8352206652609682e-06, + "loss": 0.5507, + "step": 23708 + }, + { + "epoch": 0.7266458256712026, + "grad_norm": 1.8661301489898685, + "learning_rate": 1.8348364375991368e-06, + "loss": 0.5522, + "step": 23709 + }, + { + "epoch": 0.7266764741939439, + "grad_norm": 1.8511404126520008, + "learning_rate": 1.834452241124473e-06, + "loss": 0.5741, + "step": 23710 + }, + { + "epoch": 0.726707122716685, + "grad_norm": 0.7833608833381842, + "learning_rate": 1.8340680758407597e-06, + "loss": 0.3929, + "step": 23711 + }, + { + "epoch": 0.7267377712394263, + "grad_norm": 1.659913316793096, + "learning_rate": 1.8336839417517837e-06, + "loss": 0.5595, + "step": 23712 + }, + { + "epoch": 0.7267684197621674, + "grad_norm": 1.8177841946900821, + "learning_rate": 1.833299838861332e-06, + "loss": 0.6221, + "step": 23713 + }, + { + "epoch": 0.7267990682849087, + "grad_norm": 2.047608936872808, + "learning_rate": 1.8329157671731873e-06, + "loss": 0.6541, + "step": 23714 + }, + { + "epoch": 0.7268297168076499, + "grad_norm": 1.884726393516129, + "learning_rate": 1.8325317266911297e-06, + "loss": 0.591, + "step": 23715 + }, + { + "epoch": 0.7268603653303911, + "grad_norm": 0.7761702668237423, + "learning_rate": 1.8321477174189518e-06, + "loss": 0.399, + "step": 23716 + }, + { + "epoch": 0.7268910138531323, + "grad_norm": 2.1239688074440464, + "learning_rate": 1.8317637393604304e-06, + "loss": 0.5486, + "step": 23717 + }, + { + "epoch": 0.7269216623758735, + "grad_norm": 1.691856234125103, + "learning_rate": 1.831379792519354e-06, + "loss": 0.6114, + "step": 23718 + }, + { + "epoch": 0.7269523108986147, + "grad_norm": 1.6669324116631852, + "learning_rate": 1.8309958768995007e-06, + "loss": 0.5773, + "step": 23719 + }, + { + "epoch": 0.7269829594213559, + "grad_norm": 1.9575730749305265, + "learning_rate": 1.830611992504656e-06, + "loss": 0.6861, + "step": 23720 + }, + { + "epoch": 0.7270136079440971, + "grad_norm": 1.6421773119356988, + "learning_rate": 1.8302281393386046e-06, + "loss": 0.6943, + "step": 23721 + }, + { + "epoch": 0.7270442564668383, + "grad_norm": 1.9961307851699104, + "learning_rate": 1.829844317405124e-06, + "loss": 0.6499, + "step": 23722 + }, + { + "epoch": 0.7270749049895795, + "grad_norm": 1.719638155734453, + "learning_rate": 1.829460526707999e-06, + "loss": 0.5283, + "step": 23723 + }, + { + "epoch": 0.7271055535123208, + "grad_norm": 2.0401879294500564, + "learning_rate": 1.8290767672510117e-06, + "loss": 0.5909, + "step": 23724 + }, + { + "epoch": 0.7271362020350619, + "grad_norm": 0.8251873691250361, + "learning_rate": 1.8286930390379416e-06, + "loss": 0.3957, + "step": 23725 + }, + { + "epoch": 0.7271668505578032, + "grad_norm": 1.7284635854409847, + "learning_rate": 1.8283093420725695e-06, + "loss": 0.5699, + "step": 23726 + }, + { + "epoch": 0.7271974990805443, + "grad_norm": 1.7124235154371021, + "learning_rate": 1.827925676358679e-06, + "loss": 0.7001, + "step": 23727 + }, + { + "epoch": 0.7272281476032855, + "grad_norm": 1.7550776444823593, + "learning_rate": 1.8275420419000466e-06, + "loss": 0.5788, + "step": 23728 + }, + { + "epoch": 0.7272587961260267, + "grad_norm": 1.8490519877437848, + "learning_rate": 1.8271584387004559e-06, + "loss": 0.6139, + "step": 23729 + }, + { + "epoch": 0.7272894446487679, + "grad_norm": 1.8213708745627641, + "learning_rate": 1.8267748667636831e-06, + "loss": 0.4918, + "step": 23730 + }, + { + "epoch": 0.7273200931715091, + "grad_norm": 2.0587085742756464, + "learning_rate": 1.8263913260935102e-06, + "loss": 0.6257, + "step": 23731 + }, + { + "epoch": 0.7273507416942503, + "grad_norm": 1.995798736711881, + "learning_rate": 1.8260078166937161e-06, + "loss": 0.6335, + "step": 23732 + }, + { + "epoch": 0.7273813902169916, + "grad_norm": 1.9232264896536388, + "learning_rate": 1.8256243385680782e-06, + "loss": 0.5547, + "step": 23733 + }, + { + "epoch": 0.7274120387397327, + "grad_norm": 1.7639603109713948, + "learning_rate": 1.8252408917203756e-06, + "loss": 0.6711, + "step": 23734 + }, + { + "epoch": 0.727442687262474, + "grad_norm": 1.6568102390797945, + "learning_rate": 1.8248574761543885e-06, + "loss": 0.4974, + "step": 23735 + }, + { + "epoch": 0.7274733357852151, + "grad_norm": 1.8783213697255392, + "learning_rate": 1.8244740918738917e-06, + "loss": 0.5718, + "step": 23736 + }, + { + "epoch": 0.7275039843079564, + "grad_norm": 1.8530967908217524, + "learning_rate": 1.8240907388826656e-06, + "loss": 0.5693, + "step": 23737 + }, + { + "epoch": 0.7275346328306975, + "grad_norm": 1.885380377954822, + "learning_rate": 1.8237074171844843e-06, + "loss": 0.5919, + "step": 23738 + }, + { + "epoch": 0.7275652813534388, + "grad_norm": 0.8567712902903787, + "learning_rate": 1.8233241267831265e-06, + "loss": 0.4084, + "step": 23739 + }, + { + "epoch": 0.7275959298761799, + "grad_norm": 1.707558166745491, + "learning_rate": 1.8229408676823707e-06, + "loss": 0.5546, + "step": 23740 + }, + { + "epoch": 0.7276265783989212, + "grad_norm": 0.7837737023220215, + "learning_rate": 1.8225576398859896e-06, + "loss": 0.4018, + "step": 23741 + }, + { + "epoch": 0.7276572269216623, + "grad_norm": 1.8373402638860483, + "learning_rate": 1.8221744433977612e-06, + "loss": 0.5569, + "step": 23742 + }, + { + "epoch": 0.7276878754444036, + "grad_norm": 1.9856191969846115, + "learning_rate": 1.8217912782214625e-06, + "loss": 0.6231, + "step": 23743 + }, + { + "epoch": 0.7277185239671448, + "grad_norm": 1.7628960700099687, + "learning_rate": 1.821408144360866e-06, + "loss": 0.5434, + "step": 23744 + }, + { + "epoch": 0.727749172489886, + "grad_norm": 1.61509431373873, + "learning_rate": 1.8210250418197484e-06, + "loss": 0.4947, + "step": 23745 + }, + { + "epoch": 0.7277798210126272, + "grad_norm": 1.7427345797794458, + "learning_rate": 1.8206419706018857e-06, + "loss": 0.6228, + "step": 23746 + }, + { + "epoch": 0.7278104695353684, + "grad_norm": 1.6304327922344528, + "learning_rate": 1.8202589307110501e-06, + "loss": 0.5466, + "step": 23747 + }, + { + "epoch": 0.7278411180581096, + "grad_norm": 1.4112944783000525, + "learning_rate": 1.8198759221510182e-06, + "loss": 0.508, + "step": 23748 + }, + { + "epoch": 0.7278717665808508, + "grad_norm": 1.777656660081527, + "learning_rate": 1.8194929449255605e-06, + "loss": 0.5766, + "step": 23749 + }, + { + "epoch": 0.727902415103592, + "grad_norm": 1.7118158064753104, + "learning_rate": 1.819109999038453e-06, + "loss": 0.6463, + "step": 23750 + }, + { + "epoch": 0.7279330636263333, + "grad_norm": 1.8562587551802279, + "learning_rate": 1.8187270844934702e-06, + "loss": 0.593, + "step": 23751 + }, + { + "epoch": 0.7279637121490744, + "grad_norm": 0.7905912232099206, + "learning_rate": 1.818344201294382e-06, + "loss": 0.4021, + "step": 23752 + }, + { + "epoch": 0.7279943606718157, + "grad_norm": 1.6248693599144586, + "learning_rate": 1.8179613494449615e-06, + "loss": 0.546, + "step": 23753 + }, + { + "epoch": 0.7280250091945568, + "grad_norm": 1.6405780011831939, + "learning_rate": 1.8175785289489844e-06, + "loss": 0.5584, + "step": 23754 + }, + { + "epoch": 0.7280556577172981, + "grad_norm": 0.7894914061740405, + "learning_rate": 1.8171957398102186e-06, + "loss": 0.4011, + "step": 23755 + }, + { + "epoch": 0.7280863062400392, + "grad_norm": 1.8688671583914354, + "learning_rate": 1.8168129820324375e-06, + "loss": 0.6049, + "step": 23756 + }, + { + "epoch": 0.7281169547627805, + "grad_norm": 1.4092334818959438, + "learning_rate": 1.816430255619414e-06, + "loss": 0.5266, + "step": 23757 + }, + { + "epoch": 0.7281476032855216, + "grad_norm": 0.7839300456444426, + "learning_rate": 1.8160475605749166e-06, + "loss": 0.4105, + "step": 23758 + }, + { + "epoch": 0.7281782518082628, + "grad_norm": 1.7062309054737061, + "learning_rate": 1.8156648969027186e-06, + "loss": 0.5302, + "step": 23759 + }, + { + "epoch": 0.728208900331004, + "grad_norm": 1.8879637990344047, + "learning_rate": 1.815282264606587e-06, + "loss": 0.6241, + "step": 23760 + }, + { + "epoch": 0.7282395488537452, + "grad_norm": 2.1046765325199313, + "learning_rate": 1.814899663690295e-06, + "loss": 0.698, + "step": 23761 + }, + { + "epoch": 0.7282701973764865, + "grad_norm": 0.797813094607992, + "learning_rate": 1.8145170941576124e-06, + "loss": 0.408, + "step": 23762 + }, + { + "epoch": 0.7283008458992276, + "grad_norm": 1.953827827739131, + "learning_rate": 1.8141345560123065e-06, + "loss": 0.6052, + "step": 23763 + }, + { + "epoch": 0.7283314944219689, + "grad_norm": 1.8312720654898318, + "learning_rate": 1.8137520492581478e-06, + "loss": 0.7026, + "step": 23764 + }, + { + "epoch": 0.72836214294471, + "grad_norm": 1.738468900790825, + "learning_rate": 1.8133695738989077e-06, + "loss": 0.6965, + "step": 23765 + }, + { + "epoch": 0.7283927914674513, + "grad_norm": 1.8893276291245196, + "learning_rate": 1.81298712993835e-06, + "loss": 0.6329, + "step": 23766 + }, + { + "epoch": 0.7284234399901924, + "grad_norm": 1.7416568363436828, + "learning_rate": 1.8126047173802463e-06, + "loss": 0.5697, + "step": 23767 + }, + { + "epoch": 0.7284540885129337, + "grad_norm": 1.8824481566401323, + "learning_rate": 1.8122223362283653e-06, + "loss": 0.6004, + "step": 23768 + }, + { + "epoch": 0.7284847370356748, + "grad_norm": 1.803499461618619, + "learning_rate": 1.811839986486471e-06, + "loss": 0.4917, + "step": 23769 + }, + { + "epoch": 0.7285153855584161, + "grad_norm": 1.680302075562044, + "learning_rate": 1.8114576681583351e-06, + "loss": 0.5745, + "step": 23770 + }, + { + "epoch": 0.7285460340811573, + "grad_norm": 1.8219266741265359, + "learning_rate": 1.8110753812477195e-06, + "loss": 0.575, + "step": 23771 + }, + { + "epoch": 0.7285766826038985, + "grad_norm": 1.6863654545952722, + "learning_rate": 1.8106931257583975e-06, + "loss": 0.5889, + "step": 23772 + }, + { + "epoch": 0.7286073311266397, + "grad_norm": 0.8283719788781881, + "learning_rate": 1.8103109016941317e-06, + "loss": 0.4023, + "step": 23773 + }, + { + "epoch": 0.7286379796493809, + "grad_norm": 1.6349446265014338, + "learning_rate": 1.8099287090586876e-06, + "loss": 0.5225, + "step": 23774 + }, + { + "epoch": 0.7286686281721221, + "grad_norm": 1.615926896130914, + "learning_rate": 1.8095465478558317e-06, + "loss": 0.5632, + "step": 23775 + }, + { + "epoch": 0.7286992766948633, + "grad_norm": 0.7925078946746684, + "learning_rate": 1.8091644180893313e-06, + "loss": 0.4165, + "step": 23776 + }, + { + "epoch": 0.7287299252176045, + "grad_norm": 1.7651059880382887, + "learning_rate": 1.8087823197629495e-06, + "loss": 0.5314, + "step": 23777 + }, + { + "epoch": 0.7287605737403458, + "grad_norm": 1.7541025645307073, + "learning_rate": 1.8084002528804518e-06, + "loss": 0.508, + "step": 23778 + }, + { + "epoch": 0.7287912222630869, + "grad_norm": 1.62310584319389, + "learning_rate": 1.8080182174456024e-06, + "loss": 0.5494, + "step": 23779 + }, + { + "epoch": 0.7288218707858282, + "grad_norm": 1.8021184367559537, + "learning_rate": 1.8076362134621683e-06, + "loss": 0.5042, + "step": 23780 + }, + { + "epoch": 0.7288525193085693, + "grad_norm": 1.7998237185338497, + "learning_rate": 1.8072542409339117e-06, + "loss": 0.591, + "step": 23781 + }, + { + "epoch": 0.7288831678313106, + "grad_norm": 1.8080418495893762, + "learning_rate": 1.8068722998645939e-06, + "loss": 0.5872, + "step": 23782 + }, + { + "epoch": 0.7289138163540517, + "grad_norm": 1.9278855401149362, + "learning_rate": 1.80649039025798e-06, + "loss": 0.6841, + "step": 23783 + }, + { + "epoch": 0.728944464876793, + "grad_norm": 1.7388436620195689, + "learning_rate": 1.8061085121178357e-06, + "loss": 0.5463, + "step": 23784 + }, + { + "epoch": 0.7289751133995341, + "grad_norm": 1.6696312419797377, + "learning_rate": 1.8057266654479195e-06, + "loss": 0.5672, + "step": 23785 + }, + { + "epoch": 0.7290057619222754, + "grad_norm": 1.7996785218783526, + "learning_rate": 1.8053448502519954e-06, + "loss": 0.6141, + "step": 23786 + }, + { + "epoch": 0.7290364104450165, + "grad_norm": 0.8468008916562215, + "learning_rate": 1.804963066533828e-06, + "loss": 0.4249, + "step": 23787 + }, + { + "epoch": 0.7290670589677578, + "grad_norm": 1.764157388161216, + "learning_rate": 1.8045813142971752e-06, + "loss": 0.6003, + "step": 23788 + }, + { + "epoch": 0.729097707490499, + "grad_norm": 1.8480699905925029, + "learning_rate": 1.8041995935458023e-06, + "loss": 0.5736, + "step": 23789 + }, + { + "epoch": 0.7291283560132401, + "grad_norm": 2.1672603956672187, + "learning_rate": 1.8038179042834648e-06, + "loss": 0.6116, + "step": 23790 + }, + { + "epoch": 0.7291590045359814, + "grad_norm": 2.0849853039387916, + "learning_rate": 1.8034362465139304e-06, + "loss": 0.6255, + "step": 23791 + }, + { + "epoch": 0.7291896530587225, + "grad_norm": 1.9314191052056495, + "learning_rate": 1.803054620240957e-06, + "loss": 0.5229, + "step": 23792 + }, + { + "epoch": 0.7292203015814638, + "grad_norm": 1.7842759365630572, + "learning_rate": 1.8026730254683023e-06, + "loss": 0.5642, + "step": 23793 + }, + { + "epoch": 0.7292509501042049, + "grad_norm": 1.9073529200099781, + "learning_rate": 1.802291462199729e-06, + "loss": 0.622, + "step": 23794 + }, + { + "epoch": 0.7292815986269462, + "grad_norm": 1.8071910677856036, + "learning_rate": 1.801909930438997e-06, + "loss": 0.554, + "step": 23795 + }, + { + "epoch": 0.7293122471496873, + "grad_norm": 1.9364046495710492, + "learning_rate": 1.8015284301898633e-06, + "loss": 0.6473, + "step": 23796 + }, + { + "epoch": 0.7293428956724286, + "grad_norm": 2.0052171303856414, + "learning_rate": 1.8011469614560883e-06, + "loss": 0.582, + "step": 23797 + }, + { + "epoch": 0.7293735441951698, + "grad_norm": 1.8695102752151385, + "learning_rate": 1.8007655242414313e-06, + "loss": 0.5416, + "step": 23798 + }, + { + "epoch": 0.729404192717911, + "grad_norm": 1.981807023645212, + "learning_rate": 1.8003841185496513e-06, + "loss": 0.564, + "step": 23799 + }, + { + "epoch": 0.7294348412406522, + "grad_norm": 1.7899694967073805, + "learning_rate": 1.8000027443845052e-06, + "loss": 0.6276, + "step": 23800 + }, + { + "epoch": 0.7294654897633934, + "grad_norm": 1.9744174204402782, + "learning_rate": 1.7996214017497477e-06, + "loss": 0.6386, + "step": 23801 + }, + { + "epoch": 0.7294961382861346, + "grad_norm": 2.0244735661260775, + "learning_rate": 1.7992400906491426e-06, + "loss": 0.5874, + "step": 23802 + }, + { + "epoch": 0.7295267868088758, + "grad_norm": 1.9244274319224235, + "learning_rate": 1.7988588110864436e-06, + "loss": 0.5931, + "step": 23803 + }, + { + "epoch": 0.729557435331617, + "grad_norm": 1.7147003321526666, + "learning_rate": 1.7984775630654067e-06, + "loss": 0.5641, + "step": 23804 + }, + { + "epoch": 0.7295880838543582, + "grad_norm": 1.8070246448667648, + "learning_rate": 1.798096346589789e-06, + "loss": 0.6718, + "step": 23805 + }, + { + "epoch": 0.7296187323770994, + "grad_norm": 1.7083325176276196, + "learning_rate": 1.7977151616633475e-06, + "loss": 0.6485, + "step": 23806 + }, + { + "epoch": 0.7296493808998407, + "grad_norm": 1.9564739593157445, + "learning_rate": 1.7973340082898395e-06, + "loss": 0.6055, + "step": 23807 + }, + { + "epoch": 0.7296800294225818, + "grad_norm": 1.8272141537864455, + "learning_rate": 1.7969528864730168e-06, + "loss": 0.5752, + "step": 23808 + }, + { + "epoch": 0.7297106779453231, + "grad_norm": 1.9213144903871913, + "learning_rate": 1.7965717962166374e-06, + "loss": 0.5975, + "step": 23809 + }, + { + "epoch": 0.7297413264680642, + "grad_norm": 1.8017352118516068, + "learning_rate": 1.7961907375244574e-06, + "loss": 0.6015, + "step": 23810 + }, + { + "epoch": 0.7297719749908055, + "grad_norm": 1.8971792875390205, + "learning_rate": 1.7958097104002297e-06, + "loss": 0.5927, + "step": 23811 + }, + { + "epoch": 0.7298026235135466, + "grad_norm": 1.7553504814619265, + "learning_rate": 1.795428714847705e-06, + "loss": 0.6209, + "step": 23812 + }, + { + "epoch": 0.7298332720362879, + "grad_norm": 1.6962626867223687, + "learning_rate": 1.7950477508706448e-06, + "loss": 0.5287, + "step": 23813 + }, + { + "epoch": 0.729863920559029, + "grad_norm": 1.7618262727792353, + "learning_rate": 1.7946668184727995e-06, + "loss": 0.5579, + "step": 23814 + }, + { + "epoch": 0.7298945690817703, + "grad_norm": 1.7129960209747996, + "learning_rate": 1.7942859176579203e-06, + "loss": 0.5687, + "step": 23815 + }, + { + "epoch": 0.7299252176045115, + "grad_norm": 1.8100797456996047, + "learning_rate": 1.7939050484297616e-06, + "loss": 0.5918, + "step": 23816 + }, + { + "epoch": 0.7299558661272527, + "grad_norm": 1.8133110150812166, + "learning_rate": 1.7935242107920775e-06, + "loss": 0.504, + "step": 23817 + }, + { + "epoch": 0.7299865146499939, + "grad_norm": 1.8138094282366066, + "learning_rate": 1.7931434047486208e-06, + "loss": 0.6626, + "step": 23818 + }, + { + "epoch": 0.7300171631727351, + "grad_norm": 1.7317006790915594, + "learning_rate": 1.7927626303031414e-06, + "loss": 0.5884, + "step": 23819 + }, + { + "epoch": 0.7300478116954763, + "grad_norm": 1.8456848065689915, + "learning_rate": 1.7923818874593924e-06, + "loss": 0.5167, + "step": 23820 + }, + { + "epoch": 0.7300784602182174, + "grad_norm": 3.6076548042700582, + "learning_rate": 1.792001176221127e-06, + "loss": 0.5392, + "step": 23821 + }, + { + "epoch": 0.7301091087409587, + "grad_norm": 1.9595749536914717, + "learning_rate": 1.7916204965920946e-06, + "loss": 0.5506, + "step": 23822 + }, + { + "epoch": 0.7301397572636998, + "grad_norm": 1.9619151919099156, + "learning_rate": 1.791239848576043e-06, + "loss": 0.6443, + "step": 23823 + }, + { + "epoch": 0.7301704057864411, + "grad_norm": 1.963281292539214, + "learning_rate": 1.7908592321767298e-06, + "loss": 0.5632, + "step": 23824 + }, + { + "epoch": 0.7302010543091823, + "grad_norm": 1.423731824158883, + "learning_rate": 1.7904786473978996e-06, + "loss": 0.4993, + "step": 23825 + }, + { + "epoch": 0.7302317028319235, + "grad_norm": 1.6672462689508838, + "learning_rate": 1.7900980942433067e-06, + "loss": 0.5479, + "step": 23826 + }, + { + "epoch": 0.7302623513546647, + "grad_norm": 1.9643513306760192, + "learning_rate": 1.7897175727166966e-06, + "loss": 0.5702, + "step": 23827 + }, + { + "epoch": 0.7302929998774059, + "grad_norm": 1.753856356271015, + "learning_rate": 1.7893370828218204e-06, + "loss": 0.6209, + "step": 23828 + }, + { + "epoch": 0.7303236484001471, + "grad_norm": 0.7800120868802147, + "learning_rate": 1.7889566245624296e-06, + "loss": 0.3917, + "step": 23829 + }, + { + "epoch": 0.7303542969228883, + "grad_norm": 1.9483753418090246, + "learning_rate": 1.788576197942269e-06, + "loss": 0.5413, + "step": 23830 + }, + { + "epoch": 0.7303849454456295, + "grad_norm": 0.8267175276459704, + "learning_rate": 1.788195802965088e-06, + "loss": 0.4071, + "step": 23831 + }, + { + "epoch": 0.7304155939683707, + "grad_norm": 0.8229609254751508, + "learning_rate": 1.787815439634638e-06, + "loss": 0.4189, + "step": 23832 + }, + { + "epoch": 0.7304462424911119, + "grad_norm": 1.8548058384890016, + "learning_rate": 1.7874351079546642e-06, + "loss": 0.6221, + "step": 23833 + }, + { + "epoch": 0.7304768910138532, + "grad_norm": 1.84596737131439, + "learning_rate": 1.7870548079289123e-06, + "loss": 0.63, + "step": 23834 + }, + { + "epoch": 0.7305075395365943, + "grad_norm": 2.66000451799753, + "learning_rate": 1.7866745395611318e-06, + "loss": 0.6136, + "step": 23835 + }, + { + "epoch": 0.7305381880593356, + "grad_norm": 1.9481267129973876, + "learning_rate": 1.7862943028550694e-06, + "loss": 0.6728, + "step": 23836 + }, + { + "epoch": 0.7305688365820767, + "grad_norm": 1.9454152706193344, + "learning_rate": 1.785914097814473e-06, + "loss": 0.528, + "step": 23837 + }, + { + "epoch": 0.730599485104818, + "grad_norm": 1.838821335263542, + "learning_rate": 1.7855339244430852e-06, + "loss": 0.6247, + "step": 23838 + }, + { + "epoch": 0.7306301336275591, + "grad_norm": 1.7923431192890655, + "learning_rate": 1.7851537827446548e-06, + "loss": 0.6247, + "step": 23839 + }, + { + "epoch": 0.7306607821503004, + "grad_norm": 1.736034712566646, + "learning_rate": 1.7847736727229276e-06, + "loss": 0.6184, + "step": 23840 + }, + { + "epoch": 0.7306914306730415, + "grad_norm": 1.5765000619039575, + "learning_rate": 1.7843935943816488e-06, + "loss": 0.5591, + "step": 23841 + }, + { + "epoch": 0.7307220791957828, + "grad_norm": 1.7833385368097805, + "learning_rate": 1.784013547724559e-06, + "loss": 0.5208, + "step": 23842 + }, + { + "epoch": 0.730752727718524, + "grad_norm": 0.822676123605763, + "learning_rate": 1.7836335327554099e-06, + "loss": 0.4064, + "step": 23843 + }, + { + "epoch": 0.7307833762412652, + "grad_norm": 1.9194274780041607, + "learning_rate": 1.7832535494779408e-06, + "loss": 0.5395, + "step": 23844 + }, + { + "epoch": 0.7308140247640064, + "grad_norm": 2.0588491676834737, + "learning_rate": 1.7828735978958995e-06, + "loss": 0.5473, + "step": 23845 + }, + { + "epoch": 0.7308446732867476, + "grad_norm": 1.5857973892914101, + "learning_rate": 1.782493678013026e-06, + "loss": 0.5335, + "step": 23846 + }, + { + "epoch": 0.7308753218094888, + "grad_norm": 1.7433562073695892, + "learning_rate": 1.7821137898330654e-06, + "loss": 0.5605, + "step": 23847 + }, + { + "epoch": 0.73090597033223, + "grad_norm": 0.7962291151472806, + "learning_rate": 1.7817339333597622e-06, + "loss": 0.3978, + "step": 23848 + }, + { + "epoch": 0.7309366188549712, + "grad_norm": 0.8527349644951404, + "learning_rate": 1.7813541085968573e-06, + "loss": 0.417, + "step": 23849 + }, + { + "epoch": 0.7309672673777124, + "grad_norm": 1.8340263875829854, + "learning_rate": 1.7809743155480929e-06, + "loss": 0.6177, + "step": 23850 + }, + { + "epoch": 0.7309979159004536, + "grad_norm": 1.8143092754886585, + "learning_rate": 1.7805945542172143e-06, + "loss": 0.6481, + "step": 23851 + }, + { + "epoch": 0.7310285644231947, + "grad_norm": 1.632758870857113, + "learning_rate": 1.7802148246079597e-06, + "loss": 0.5699, + "step": 23852 + }, + { + "epoch": 0.731059212945936, + "grad_norm": 1.7740028007405304, + "learning_rate": 1.7798351267240722e-06, + "loss": 0.5181, + "step": 23853 + }, + { + "epoch": 0.7310898614686772, + "grad_norm": 1.8913410604172456, + "learning_rate": 1.779455460569295e-06, + "loss": 0.5989, + "step": 23854 + }, + { + "epoch": 0.7311205099914184, + "grad_norm": 1.7741022317844741, + "learning_rate": 1.7790758261473651e-06, + "loss": 0.6335, + "step": 23855 + }, + { + "epoch": 0.7311511585141596, + "grad_norm": 1.9791574961319802, + "learning_rate": 1.778696223462027e-06, + "loss": 0.5985, + "step": 23856 + }, + { + "epoch": 0.7311818070369008, + "grad_norm": 1.7153411113734238, + "learning_rate": 1.7783166525170175e-06, + "loss": 0.5446, + "step": 23857 + }, + { + "epoch": 0.731212455559642, + "grad_norm": 1.7660807041122104, + "learning_rate": 1.7779371133160784e-06, + "loss": 0.4973, + "step": 23858 + }, + { + "epoch": 0.7312431040823832, + "grad_norm": 0.8059062766205362, + "learning_rate": 1.7775576058629512e-06, + "loss": 0.4128, + "step": 23859 + }, + { + "epoch": 0.7312737526051244, + "grad_norm": 1.7871196141458865, + "learning_rate": 1.7771781301613716e-06, + "loss": 0.61, + "step": 23860 + }, + { + "epoch": 0.7313044011278657, + "grad_norm": 2.0457232338334492, + "learning_rate": 1.7767986862150805e-06, + "loss": 0.5724, + "step": 23861 + }, + { + "epoch": 0.7313350496506068, + "grad_norm": 1.869874476643524, + "learning_rate": 1.776419274027818e-06, + "loss": 0.5776, + "step": 23862 + }, + { + "epoch": 0.7313656981733481, + "grad_norm": 0.8627962806129347, + "learning_rate": 1.7760398936033195e-06, + "loss": 0.4153, + "step": 23863 + }, + { + "epoch": 0.7313963466960892, + "grad_norm": 1.6932720576459883, + "learning_rate": 1.7756605449453252e-06, + "loss": 0.6483, + "step": 23864 + }, + { + "epoch": 0.7314269952188305, + "grad_norm": 1.7842339261919726, + "learning_rate": 1.7752812280575737e-06, + "loss": 0.5434, + "step": 23865 + }, + { + "epoch": 0.7314576437415716, + "grad_norm": 1.9842373808765952, + "learning_rate": 1.7749019429438003e-06, + "loss": 0.5274, + "step": 23866 + }, + { + "epoch": 0.7314882922643129, + "grad_norm": 2.0837312656855382, + "learning_rate": 1.7745226896077444e-06, + "loss": 0.5937, + "step": 23867 + }, + { + "epoch": 0.731518940787054, + "grad_norm": 0.7822548459331304, + "learning_rate": 1.7741434680531405e-06, + "loss": 0.4159, + "step": 23868 + }, + { + "epoch": 0.7315495893097953, + "grad_norm": 2.0835895808692473, + "learning_rate": 1.773764278283726e-06, + "loss": 0.5648, + "step": 23869 + }, + { + "epoch": 0.7315802378325365, + "grad_norm": 1.921924211524516, + "learning_rate": 1.7733851203032393e-06, + "loss": 0.5908, + "step": 23870 + }, + { + "epoch": 0.7316108863552777, + "grad_norm": 1.9154499408003975, + "learning_rate": 1.7730059941154133e-06, + "loss": 0.6333, + "step": 23871 + }, + { + "epoch": 0.7316415348780189, + "grad_norm": 1.850786509766599, + "learning_rate": 1.7726268997239843e-06, + "loss": 0.6277, + "step": 23872 + }, + { + "epoch": 0.7316721834007601, + "grad_norm": 1.9873510899272928, + "learning_rate": 1.7722478371326902e-06, + "loss": 0.561, + "step": 23873 + }, + { + "epoch": 0.7317028319235013, + "grad_norm": 1.8368303894997853, + "learning_rate": 1.7718688063452621e-06, + "loss": 0.6309, + "step": 23874 + }, + { + "epoch": 0.7317334804462425, + "grad_norm": 1.834014511548775, + "learning_rate": 1.7714898073654368e-06, + "loss": 0.5504, + "step": 23875 + }, + { + "epoch": 0.7317641289689837, + "grad_norm": 1.636445011852327, + "learning_rate": 1.7711108401969502e-06, + "loss": 0.652, + "step": 23876 + }, + { + "epoch": 0.731794777491725, + "grad_norm": 0.8125582678830822, + "learning_rate": 1.770731904843533e-06, + "loss": 0.4122, + "step": 23877 + }, + { + "epoch": 0.7318254260144661, + "grad_norm": 2.0787282687078883, + "learning_rate": 1.7703530013089221e-06, + "loss": 0.6228, + "step": 23878 + }, + { + "epoch": 0.7318560745372074, + "grad_norm": 1.661915450698347, + "learning_rate": 1.7699741295968476e-06, + "loss": 0.4833, + "step": 23879 + }, + { + "epoch": 0.7318867230599485, + "grad_norm": 1.809959577680124, + "learning_rate": 1.7695952897110447e-06, + "loss": 0.5638, + "step": 23880 + }, + { + "epoch": 0.7319173715826898, + "grad_norm": 2.0297936692388556, + "learning_rate": 1.7692164816552476e-06, + "loss": 0.6217, + "step": 23881 + }, + { + "epoch": 0.7319480201054309, + "grad_norm": 1.9970056797040947, + "learning_rate": 1.7688377054331858e-06, + "loss": 0.5074, + "step": 23882 + }, + { + "epoch": 0.7319786686281721, + "grad_norm": 1.8969536911514309, + "learning_rate": 1.768458961048592e-06, + "loss": 0.5357, + "step": 23883 + }, + { + "epoch": 0.7320093171509133, + "grad_norm": 1.90534349798699, + "learning_rate": 1.7680802485052011e-06, + "loss": 0.6967, + "step": 23884 + }, + { + "epoch": 0.7320399656736545, + "grad_norm": 0.7891779710067349, + "learning_rate": 1.7677015678067405e-06, + "loss": 0.4211, + "step": 23885 + }, + { + "epoch": 0.7320706141963957, + "grad_norm": 1.7615975125769983, + "learning_rate": 1.7673229189569451e-06, + "loss": 0.5926, + "step": 23886 + }, + { + "epoch": 0.7321012627191369, + "grad_norm": 1.9628145543826014, + "learning_rate": 1.766944301959543e-06, + "loss": 0.5821, + "step": 23887 + }, + { + "epoch": 0.7321319112418782, + "grad_norm": 1.8256775497105198, + "learning_rate": 1.7665657168182655e-06, + "loss": 0.5555, + "step": 23888 + }, + { + "epoch": 0.7321625597646193, + "grad_norm": 1.7423154073781888, + "learning_rate": 1.7661871635368444e-06, + "loss": 0.5808, + "step": 23889 + }, + { + "epoch": 0.7321932082873606, + "grad_norm": 1.9206973500993323, + "learning_rate": 1.7658086421190074e-06, + "loss": 0.5713, + "step": 23890 + }, + { + "epoch": 0.7322238568101017, + "grad_norm": 1.8619465312032317, + "learning_rate": 1.7654301525684853e-06, + "loss": 0.607, + "step": 23891 + }, + { + "epoch": 0.732254505332843, + "grad_norm": 1.8351592719540613, + "learning_rate": 1.7650516948890095e-06, + "loss": 0.5142, + "step": 23892 + }, + { + "epoch": 0.7322851538555841, + "grad_norm": 1.9852553928432808, + "learning_rate": 1.764673269084305e-06, + "loss": 0.6256, + "step": 23893 + }, + { + "epoch": 0.7323158023783254, + "grad_norm": 0.8023265636437574, + "learning_rate": 1.7642948751581029e-06, + "loss": 0.4158, + "step": 23894 + }, + { + "epoch": 0.7323464509010665, + "grad_norm": 1.7098548534433013, + "learning_rate": 1.7639165131141329e-06, + "loss": 0.6484, + "step": 23895 + }, + { + "epoch": 0.7323770994238078, + "grad_norm": 1.8222362367564786, + "learning_rate": 1.7635381829561193e-06, + "loss": 0.6397, + "step": 23896 + }, + { + "epoch": 0.732407747946549, + "grad_norm": 2.1539234205490505, + "learning_rate": 1.763159884687794e-06, + "loss": 0.5628, + "step": 23897 + }, + { + "epoch": 0.7324383964692902, + "grad_norm": 0.7766743282990782, + "learning_rate": 1.7627816183128793e-06, + "loss": 0.3945, + "step": 23898 + }, + { + "epoch": 0.7324690449920314, + "grad_norm": 1.8412375615732324, + "learning_rate": 1.762403383835109e-06, + "loss": 0.5203, + "step": 23899 + }, + { + "epoch": 0.7324996935147726, + "grad_norm": 0.7395619024612202, + "learning_rate": 1.7620251812582068e-06, + "loss": 0.4135, + "step": 23900 + }, + { + "epoch": 0.7325303420375138, + "grad_norm": 1.7704711702608684, + "learning_rate": 1.7616470105858968e-06, + "loss": 0.673, + "step": 23901 + }, + { + "epoch": 0.732560990560255, + "grad_norm": 1.7703494693285735, + "learning_rate": 1.7612688718219072e-06, + "loss": 0.5922, + "step": 23902 + }, + { + "epoch": 0.7325916390829962, + "grad_norm": 1.8043693815990902, + "learning_rate": 1.7608907649699663e-06, + "loss": 0.6666, + "step": 23903 + }, + { + "epoch": 0.7326222876057374, + "grad_norm": 0.7772018258470565, + "learning_rate": 1.7605126900337953e-06, + "loss": 0.3963, + "step": 23904 + }, + { + "epoch": 0.7326529361284786, + "grad_norm": 1.799666670690649, + "learning_rate": 1.760134647017122e-06, + "loss": 0.6156, + "step": 23905 + }, + { + "epoch": 0.7326835846512199, + "grad_norm": 1.6140949310489305, + "learning_rate": 1.7597566359236712e-06, + "loss": 0.6272, + "step": 23906 + }, + { + "epoch": 0.732714233173961, + "grad_norm": 2.031949488763994, + "learning_rate": 1.7593786567571686e-06, + "loss": 0.5495, + "step": 23907 + }, + { + "epoch": 0.7327448816967023, + "grad_norm": 0.7814652379718872, + "learning_rate": 1.7590007095213369e-06, + "loss": 0.3952, + "step": 23908 + }, + { + "epoch": 0.7327755302194434, + "grad_norm": 0.7455357408279671, + "learning_rate": 1.7586227942198975e-06, + "loss": 0.4063, + "step": 23909 + }, + { + "epoch": 0.7328061787421847, + "grad_norm": 0.8200386539101083, + "learning_rate": 1.7582449108565807e-06, + "loss": 0.4391, + "step": 23910 + }, + { + "epoch": 0.7328368272649258, + "grad_norm": 1.8111157405092233, + "learning_rate": 1.757867059435106e-06, + "loss": 0.5881, + "step": 23911 + }, + { + "epoch": 0.7328674757876671, + "grad_norm": 1.6613911815178843, + "learning_rate": 1.7574892399591947e-06, + "loss": 0.5828, + "step": 23912 + }, + { + "epoch": 0.7328981243104082, + "grad_norm": 1.865624225553855, + "learning_rate": 1.7571114524325716e-06, + "loss": 0.5484, + "step": 23913 + }, + { + "epoch": 0.7329287728331494, + "grad_norm": 2.034839694305301, + "learning_rate": 1.756733696858961e-06, + "loss": 0.6257, + "step": 23914 + }, + { + "epoch": 0.7329594213558907, + "grad_norm": 1.778162931911195, + "learning_rate": 1.7563559732420815e-06, + "loss": 0.5896, + "step": 23915 + }, + { + "epoch": 0.7329900698786318, + "grad_norm": 1.609269725753223, + "learning_rate": 1.7559782815856563e-06, + "loss": 0.5184, + "step": 23916 + }, + { + "epoch": 0.7330207184013731, + "grad_norm": 0.8052006401208394, + "learning_rate": 1.7556006218934074e-06, + "loss": 0.3935, + "step": 23917 + }, + { + "epoch": 0.7330513669241142, + "grad_norm": 1.9692937024975452, + "learning_rate": 1.7552229941690573e-06, + "loss": 0.6815, + "step": 23918 + }, + { + "epoch": 0.7330820154468555, + "grad_norm": 0.7529404241827129, + "learning_rate": 1.754845398416325e-06, + "loss": 0.3977, + "step": 23919 + }, + { + "epoch": 0.7331126639695966, + "grad_norm": 1.8025274117917076, + "learning_rate": 1.7544678346389283e-06, + "loss": 0.5409, + "step": 23920 + }, + { + "epoch": 0.7331433124923379, + "grad_norm": 0.8070072425350245, + "learning_rate": 1.7540903028405936e-06, + "loss": 0.4223, + "step": 23921 + }, + { + "epoch": 0.733173961015079, + "grad_norm": 1.743227037485688, + "learning_rate": 1.7537128030250372e-06, + "loss": 0.5317, + "step": 23922 + }, + { + "epoch": 0.7332046095378203, + "grad_norm": 1.7211801396517799, + "learning_rate": 1.7533353351959782e-06, + "loss": 0.5993, + "step": 23923 + }, + { + "epoch": 0.7332352580605614, + "grad_norm": 1.7650799229366296, + "learning_rate": 1.7529578993571367e-06, + "loss": 0.621, + "step": 23924 + }, + { + "epoch": 0.7332659065833027, + "grad_norm": 1.6647248836319541, + "learning_rate": 1.7525804955122316e-06, + "loss": 0.5598, + "step": 23925 + }, + { + "epoch": 0.7332965551060439, + "grad_norm": 1.8863092412387978, + "learning_rate": 1.7522031236649833e-06, + "loss": 0.6417, + "step": 23926 + }, + { + "epoch": 0.7333272036287851, + "grad_norm": 1.8164990906060665, + "learning_rate": 1.7518257838191073e-06, + "loss": 0.6085, + "step": 23927 + }, + { + "epoch": 0.7333578521515263, + "grad_norm": 1.7403313993274645, + "learning_rate": 1.7514484759783223e-06, + "loss": 0.5547, + "step": 23928 + }, + { + "epoch": 0.7333885006742675, + "grad_norm": 0.809469615093356, + "learning_rate": 1.7510712001463493e-06, + "loss": 0.4155, + "step": 23929 + }, + { + "epoch": 0.7334191491970087, + "grad_norm": 0.791151092341208, + "learning_rate": 1.7506939563269021e-06, + "loss": 0.4038, + "step": 23930 + }, + { + "epoch": 0.7334497977197499, + "grad_norm": 1.7083744765314646, + "learning_rate": 1.7503167445236974e-06, + "loss": 0.5543, + "step": 23931 + }, + { + "epoch": 0.7334804462424911, + "grad_norm": 0.7917076892362213, + "learning_rate": 1.7499395647404532e-06, + "loss": 0.4138, + "step": 23932 + }, + { + "epoch": 0.7335110947652324, + "grad_norm": 1.7033490760777705, + "learning_rate": 1.7495624169808862e-06, + "loss": 0.6094, + "step": 23933 + }, + { + "epoch": 0.7335417432879735, + "grad_norm": 1.8838599001911147, + "learning_rate": 1.7491853012487141e-06, + "loss": 0.5649, + "step": 23934 + }, + { + "epoch": 0.7335723918107148, + "grad_norm": 1.7391887167025943, + "learning_rate": 1.7488082175476495e-06, + "loss": 0.5846, + "step": 23935 + }, + { + "epoch": 0.7336030403334559, + "grad_norm": 0.8094770966827673, + "learning_rate": 1.748431165881409e-06, + "loss": 0.4285, + "step": 23936 + }, + { + "epoch": 0.7336336888561972, + "grad_norm": 0.7658928942830058, + "learning_rate": 1.7480541462537098e-06, + "loss": 0.4098, + "step": 23937 + }, + { + "epoch": 0.7336643373789383, + "grad_norm": 1.6576275956952442, + "learning_rate": 1.7476771586682655e-06, + "loss": 0.5071, + "step": 23938 + }, + { + "epoch": 0.7336949859016796, + "grad_norm": 0.8460876374011885, + "learning_rate": 1.7473002031287867e-06, + "loss": 0.4214, + "step": 23939 + }, + { + "epoch": 0.7337256344244207, + "grad_norm": 1.9387084650443869, + "learning_rate": 1.7469232796389945e-06, + "loss": 0.5144, + "step": 23940 + }, + { + "epoch": 0.733756282947162, + "grad_norm": 1.9511956528594474, + "learning_rate": 1.7465463882025995e-06, + "loss": 0.607, + "step": 23941 + }, + { + "epoch": 0.7337869314699031, + "grad_norm": 0.7687569348190431, + "learning_rate": 1.7461695288233138e-06, + "loss": 0.393, + "step": 23942 + }, + { + "epoch": 0.7338175799926444, + "grad_norm": 0.7477933897355844, + "learning_rate": 1.7457927015048526e-06, + "loss": 0.4007, + "step": 23943 + }, + { + "epoch": 0.7338482285153856, + "grad_norm": 1.8028855895386808, + "learning_rate": 1.7454159062509286e-06, + "loss": 0.5216, + "step": 23944 + }, + { + "epoch": 0.7338788770381267, + "grad_norm": 1.9150293005629442, + "learning_rate": 1.7450391430652552e-06, + "loss": 0.6422, + "step": 23945 + }, + { + "epoch": 0.733909525560868, + "grad_norm": 1.940019492963828, + "learning_rate": 1.7446624119515432e-06, + "loss": 0.6769, + "step": 23946 + }, + { + "epoch": 0.7339401740836091, + "grad_norm": 1.6531273006012215, + "learning_rate": 1.744285712913505e-06, + "loss": 0.4697, + "step": 23947 + }, + { + "epoch": 0.7339708226063504, + "grad_norm": 1.7415879545603603, + "learning_rate": 1.7439090459548541e-06, + "loss": 0.508, + "step": 23948 + }, + { + "epoch": 0.7340014711290915, + "grad_norm": 1.9199120257820312, + "learning_rate": 1.7435324110793006e-06, + "loss": 0.6186, + "step": 23949 + }, + { + "epoch": 0.7340321196518328, + "grad_norm": 1.8769172573862363, + "learning_rate": 1.7431558082905525e-06, + "loss": 0.6181, + "step": 23950 + }, + { + "epoch": 0.7340627681745739, + "grad_norm": 1.8805924942091008, + "learning_rate": 1.7427792375923264e-06, + "loss": 0.648, + "step": 23951 + }, + { + "epoch": 0.7340934166973152, + "grad_norm": 2.083754076764416, + "learning_rate": 1.7424026989883285e-06, + "loss": 0.6045, + "step": 23952 + }, + { + "epoch": 0.7341240652200564, + "grad_norm": 0.8301593613165619, + "learning_rate": 1.7420261924822717e-06, + "loss": 0.4103, + "step": 23953 + }, + { + "epoch": 0.7341547137427976, + "grad_norm": 2.0047568906887974, + "learning_rate": 1.741649718077863e-06, + "loss": 0.6219, + "step": 23954 + }, + { + "epoch": 0.7341853622655388, + "grad_norm": 1.9497046981738713, + "learning_rate": 1.741273275778813e-06, + "loss": 0.5544, + "step": 23955 + }, + { + "epoch": 0.73421601078828, + "grad_norm": 1.7026753836319268, + "learning_rate": 1.740896865588833e-06, + "loss": 0.5145, + "step": 23956 + }, + { + "epoch": 0.7342466593110212, + "grad_norm": 0.8500631274825664, + "learning_rate": 1.7405204875116289e-06, + "loss": 0.4333, + "step": 23957 + }, + { + "epoch": 0.7342773078337624, + "grad_norm": 0.7669424744748414, + "learning_rate": 1.7401441415509096e-06, + "loss": 0.398, + "step": 23958 + }, + { + "epoch": 0.7343079563565036, + "grad_norm": 1.7292074725778506, + "learning_rate": 1.7397678277103863e-06, + "loss": 0.6402, + "step": 23959 + }, + { + "epoch": 0.7343386048792448, + "grad_norm": 1.8067726013465015, + "learning_rate": 1.7393915459937631e-06, + "loss": 0.5214, + "step": 23960 + }, + { + "epoch": 0.734369253401986, + "grad_norm": 1.5171390853685869, + "learning_rate": 1.7390152964047492e-06, + "loss": 0.471, + "step": 23961 + }, + { + "epoch": 0.7343999019247273, + "grad_norm": 1.8667934507353925, + "learning_rate": 1.7386390789470536e-06, + "loss": 0.5966, + "step": 23962 + }, + { + "epoch": 0.7344305504474684, + "grad_norm": 1.8298625702497266, + "learning_rate": 1.73826289362438e-06, + "loss": 0.566, + "step": 23963 + }, + { + "epoch": 0.7344611989702097, + "grad_norm": 2.1395365253693726, + "learning_rate": 1.7378867404404382e-06, + "loss": 0.5898, + "step": 23964 + }, + { + "epoch": 0.7344918474929508, + "grad_norm": 1.9277274770116335, + "learning_rate": 1.737510619398931e-06, + "loss": 0.632, + "step": 23965 + }, + { + "epoch": 0.7345224960156921, + "grad_norm": 1.5825640053847125, + "learning_rate": 1.737134530503567e-06, + "loss": 0.5649, + "step": 23966 + }, + { + "epoch": 0.7345531445384332, + "grad_norm": 1.9068683324554845, + "learning_rate": 1.7367584737580528e-06, + "loss": 0.6237, + "step": 23967 + }, + { + "epoch": 0.7345837930611745, + "grad_norm": 1.8658359522900565, + "learning_rate": 1.7363824491660902e-06, + "loss": 0.5999, + "step": 23968 + }, + { + "epoch": 0.7346144415839156, + "grad_norm": 1.8036675306648564, + "learning_rate": 1.7360064567313866e-06, + "loss": 0.5782, + "step": 23969 + }, + { + "epoch": 0.7346450901066569, + "grad_norm": 1.7846515401877265, + "learning_rate": 1.7356304964576488e-06, + "loss": 0.5099, + "step": 23970 + }, + { + "epoch": 0.7346757386293981, + "grad_norm": 1.837843453904586, + "learning_rate": 1.7352545683485766e-06, + "loss": 0.5636, + "step": 23971 + }, + { + "epoch": 0.7347063871521393, + "grad_norm": 1.8776161440116013, + "learning_rate": 1.7348786724078765e-06, + "loss": 0.6361, + "step": 23972 + }, + { + "epoch": 0.7347370356748805, + "grad_norm": 1.9329097037864582, + "learning_rate": 1.734502808639254e-06, + "loss": 0.573, + "step": 23973 + }, + { + "epoch": 0.7347676841976217, + "grad_norm": 1.8280324627972608, + "learning_rate": 1.7341269770464091e-06, + "loss": 0.6634, + "step": 23974 + }, + { + "epoch": 0.7347983327203629, + "grad_norm": 0.7829820378463246, + "learning_rate": 1.733751177633049e-06, + "loss": 0.3972, + "step": 23975 + }, + { + "epoch": 0.734828981243104, + "grad_norm": 0.8054572767135662, + "learning_rate": 1.7333754104028721e-06, + "loss": 0.3984, + "step": 23976 + }, + { + "epoch": 0.7348596297658453, + "grad_norm": 1.7106397078313453, + "learning_rate": 1.732999675359583e-06, + "loss": 0.5567, + "step": 23977 + }, + { + "epoch": 0.7348902782885864, + "grad_norm": 1.9147815326629523, + "learning_rate": 1.7326239725068856e-06, + "loss": 0.6107, + "step": 23978 + }, + { + "epoch": 0.7349209268113277, + "grad_norm": 0.8143565772755553, + "learning_rate": 1.7322483018484787e-06, + "loss": 0.4, + "step": 23979 + }, + { + "epoch": 0.7349515753340689, + "grad_norm": 1.7513444933778777, + "learning_rate": 1.7318726633880655e-06, + "loss": 0.6067, + "step": 23980 + }, + { + "epoch": 0.7349822238568101, + "grad_norm": 1.962419015742906, + "learning_rate": 1.7314970571293488e-06, + "loss": 0.6268, + "step": 23981 + }, + { + "epoch": 0.7350128723795513, + "grad_norm": 0.8132790575572494, + "learning_rate": 1.7311214830760258e-06, + "loss": 0.4038, + "step": 23982 + }, + { + "epoch": 0.7350435209022925, + "grad_norm": 1.64074826658688, + "learning_rate": 1.7307459412318013e-06, + "loss": 0.5363, + "step": 23983 + }, + { + "epoch": 0.7350741694250337, + "grad_norm": 1.65045123607994, + "learning_rate": 1.7303704316003716e-06, + "loss": 0.6531, + "step": 23984 + }, + { + "epoch": 0.7351048179477749, + "grad_norm": 2.1095754893557883, + "learning_rate": 1.7299949541854382e-06, + "loss": 0.7221, + "step": 23985 + }, + { + "epoch": 0.7351354664705161, + "grad_norm": 1.6177523716987217, + "learning_rate": 1.7296195089907037e-06, + "loss": 0.5066, + "step": 23986 + }, + { + "epoch": 0.7351661149932573, + "grad_norm": 0.7758927132053962, + "learning_rate": 1.7292440960198631e-06, + "loss": 0.3919, + "step": 23987 + }, + { + "epoch": 0.7351967635159985, + "grad_norm": 1.766678509223574, + "learning_rate": 1.728868715276617e-06, + "loss": 0.6735, + "step": 23988 + }, + { + "epoch": 0.7352274120387398, + "grad_norm": 1.7819023919369854, + "learning_rate": 1.728493366764666e-06, + "loss": 0.5443, + "step": 23989 + }, + { + "epoch": 0.7352580605614809, + "grad_norm": 2.084078246127482, + "learning_rate": 1.7281180504877053e-06, + "loss": 0.6131, + "step": 23990 + }, + { + "epoch": 0.7352887090842222, + "grad_norm": 1.7820566516198633, + "learning_rate": 1.7277427664494352e-06, + "loss": 0.5879, + "step": 23991 + }, + { + "epoch": 0.7353193576069633, + "grad_norm": 0.8434255611031117, + "learning_rate": 1.7273675146535535e-06, + "loss": 0.4169, + "step": 23992 + }, + { + "epoch": 0.7353500061297046, + "grad_norm": 1.7598520488311216, + "learning_rate": 1.726992295103756e-06, + "loss": 0.4623, + "step": 23993 + }, + { + "epoch": 0.7353806546524457, + "grad_norm": 0.7908934863581122, + "learning_rate": 1.7266171078037424e-06, + "loss": 0.4154, + "step": 23994 + }, + { + "epoch": 0.735411303175187, + "grad_norm": 1.7378730387453423, + "learning_rate": 1.7262419527572062e-06, + "loss": 0.5243, + "step": 23995 + }, + { + "epoch": 0.7354419516979281, + "grad_norm": 0.7856200985132971, + "learning_rate": 1.725866829967846e-06, + "loss": 0.4038, + "step": 23996 + }, + { + "epoch": 0.7354726002206694, + "grad_norm": 0.7895627780334575, + "learning_rate": 1.7254917394393588e-06, + "loss": 0.409, + "step": 23997 + }, + { + "epoch": 0.7355032487434106, + "grad_norm": 1.9526883377327158, + "learning_rate": 1.7251166811754384e-06, + "loss": 0.6767, + "step": 23998 + }, + { + "epoch": 0.7355338972661518, + "grad_norm": 1.8115187235056223, + "learning_rate": 1.7247416551797802e-06, + "loss": 0.5661, + "step": 23999 + }, + { + "epoch": 0.735564545788893, + "grad_norm": 1.9909806889018398, + "learning_rate": 1.7243666614560828e-06, + "loss": 0.6739, + "step": 24000 + }, + { + "epoch": 0.7355951943116342, + "grad_norm": 1.747050840700644, + "learning_rate": 1.723991700008037e-06, + "loss": 0.5206, + "step": 24001 + }, + { + "epoch": 0.7356258428343754, + "grad_norm": 2.0587624501288735, + "learning_rate": 1.7236167708393393e-06, + "loss": 0.6115, + "step": 24002 + }, + { + "epoch": 0.7356564913571166, + "grad_norm": 0.7779830030830005, + "learning_rate": 1.7232418739536854e-06, + "loss": 0.3839, + "step": 24003 + }, + { + "epoch": 0.7356871398798578, + "grad_norm": 0.8449442575238152, + "learning_rate": 1.7228670093547661e-06, + "loss": 0.3914, + "step": 24004 + }, + { + "epoch": 0.735717788402599, + "grad_norm": 2.253668157519409, + "learning_rate": 1.7224921770462782e-06, + "loss": 0.5908, + "step": 24005 + }, + { + "epoch": 0.7357484369253402, + "grad_norm": 1.5532669170727593, + "learning_rate": 1.7221173770319105e-06, + "loss": 0.4836, + "step": 24006 + }, + { + "epoch": 0.7357790854480813, + "grad_norm": 0.8115460929199667, + "learning_rate": 1.7217426093153623e-06, + "loss": 0.3887, + "step": 24007 + }, + { + "epoch": 0.7358097339708226, + "grad_norm": 1.6741575531656194, + "learning_rate": 1.7213678739003225e-06, + "loss": 0.5156, + "step": 24008 + }, + { + "epoch": 0.7358403824935638, + "grad_norm": 1.6974744491344596, + "learning_rate": 1.7209931707904826e-06, + "loss": 0.5404, + "step": 24009 + }, + { + "epoch": 0.735871031016305, + "grad_norm": 2.071684173737199, + "learning_rate": 1.7206184999895354e-06, + "loss": 0.5753, + "step": 24010 + }, + { + "epoch": 0.7359016795390462, + "grad_norm": 1.7729865196724313, + "learning_rate": 1.7202438615011757e-06, + "loss": 0.5662, + "step": 24011 + }, + { + "epoch": 0.7359323280617874, + "grad_norm": 1.9331741222717833, + "learning_rate": 1.7198692553290903e-06, + "loss": 0.5528, + "step": 24012 + }, + { + "epoch": 0.7359629765845286, + "grad_norm": 1.9945744683101614, + "learning_rate": 1.719494681476972e-06, + "loss": 0.6243, + "step": 24013 + }, + { + "epoch": 0.7359936251072698, + "grad_norm": 2.041746480630505, + "learning_rate": 1.7191201399485141e-06, + "loss": 0.5131, + "step": 24014 + }, + { + "epoch": 0.736024273630011, + "grad_norm": 1.8335342762696252, + "learning_rate": 1.7187456307474031e-06, + "loss": 0.6535, + "step": 24015 + }, + { + "epoch": 0.7360549221527523, + "grad_norm": 2.0973456077743404, + "learning_rate": 1.7183711538773328e-06, + "loss": 0.6076, + "step": 24016 + }, + { + "epoch": 0.7360855706754934, + "grad_norm": 1.81550240222822, + "learning_rate": 1.7179967093419876e-06, + "loss": 0.555, + "step": 24017 + }, + { + "epoch": 0.7361162191982347, + "grad_norm": 2.0527760828948063, + "learning_rate": 1.717622297145064e-06, + "loss": 0.5881, + "step": 24018 + }, + { + "epoch": 0.7361468677209758, + "grad_norm": 1.7484130137949416, + "learning_rate": 1.7172479172902474e-06, + "loss": 0.5415, + "step": 24019 + }, + { + "epoch": 0.7361775162437171, + "grad_norm": 1.9609722665930376, + "learning_rate": 1.7168735697812254e-06, + "loss": 0.6109, + "step": 24020 + }, + { + "epoch": 0.7362081647664582, + "grad_norm": 1.6999722507034822, + "learning_rate": 1.7164992546216886e-06, + "loss": 0.6002, + "step": 24021 + }, + { + "epoch": 0.7362388132891995, + "grad_norm": 2.1844235287647416, + "learning_rate": 1.7161249718153266e-06, + "loss": 0.5839, + "step": 24022 + }, + { + "epoch": 0.7362694618119406, + "grad_norm": 2.0302098589960567, + "learning_rate": 1.7157507213658232e-06, + "loss": 0.5174, + "step": 24023 + }, + { + "epoch": 0.7363001103346819, + "grad_norm": 1.7459611140059905, + "learning_rate": 1.7153765032768683e-06, + "loss": 0.5682, + "step": 24024 + }, + { + "epoch": 0.736330758857423, + "grad_norm": 1.9457457788387653, + "learning_rate": 1.7150023175521496e-06, + "loss": 0.5913, + "step": 24025 + }, + { + "epoch": 0.7363614073801643, + "grad_norm": 0.7749967061392755, + "learning_rate": 1.714628164195355e-06, + "loss": 0.4088, + "step": 24026 + }, + { + "epoch": 0.7363920559029055, + "grad_norm": 1.7799738219695296, + "learning_rate": 1.7142540432101695e-06, + "loss": 0.5659, + "step": 24027 + }, + { + "epoch": 0.7364227044256467, + "grad_norm": 2.0174702086677736, + "learning_rate": 1.7138799546002776e-06, + "loss": 0.5648, + "step": 24028 + }, + { + "epoch": 0.7364533529483879, + "grad_norm": 1.8171373196240475, + "learning_rate": 1.7135058983693682e-06, + "loss": 0.4563, + "step": 24029 + }, + { + "epoch": 0.7364840014711291, + "grad_norm": 1.6584289140236674, + "learning_rate": 1.7131318745211272e-06, + "loss": 0.5389, + "step": 24030 + }, + { + "epoch": 0.7365146499938703, + "grad_norm": 1.7677529814753294, + "learning_rate": 1.7127578830592374e-06, + "loss": 0.5905, + "step": 24031 + }, + { + "epoch": 0.7365452985166115, + "grad_norm": 2.0339682035653945, + "learning_rate": 1.7123839239873845e-06, + "loss": 0.6401, + "step": 24032 + }, + { + "epoch": 0.7365759470393527, + "grad_norm": 1.7124312120238039, + "learning_rate": 1.7120099973092551e-06, + "loss": 0.5527, + "step": 24033 + }, + { + "epoch": 0.736606595562094, + "grad_norm": 1.8418378475027826, + "learning_rate": 1.7116361030285334e-06, + "loss": 0.5915, + "step": 24034 + }, + { + "epoch": 0.7366372440848351, + "grad_norm": 0.7913027730157475, + "learning_rate": 1.7112622411489026e-06, + "loss": 0.3941, + "step": 24035 + }, + { + "epoch": 0.7366678926075764, + "grad_norm": 1.9471344433111468, + "learning_rate": 1.7108884116740432e-06, + "loss": 0.6063, + "step": 24036 + }, + { + "epoch": 0.7366985411303175, + "grad_norm": 1.8172732173294197, + "learning_rate": 1.7105146146076452e-06, + "loss": 0.5016, + "step": 24037 + }, + { + "epoch": 0.7367291896530587, + "grad_norm": 1.790618649611728, + "learning_rate": 1.7101408499533883e-06, + "loss": 0.487, + "step": 24038 + }, + { + "epoch": 0.7367598381757999, + "grad_norm": 1.974958107456145, + "learning_rate": 1.7097671177149538e-06, + "loss": 0.5738, + "step": 24039 + }, + { + "epoch": 0.7367904866985411, + "grad_norm": 1.892182381745854, + "learning_rate": 1.7093934178960258e-06, + "loss": 0.5948, + "step": 24040 + }, + { + "epoch": 0.7368211352212823, + "grad_norm": 1.7973839637424187, + "learning_rate": 1.7090197505002877e-06, + "loss": 0.5705, + "step": 24041 + }, + { + "epoch": 0.7368517837440235, + "grad_norm": 1.8135534400605315, + "learning_rate": 1.7086461155314189e-06, + "loss": 0.5614, + "step": 24042 + }, + { + "epoch": 0.7368824322667648, + "grad_norm": 1.9153140471172598, + "learning_rate": 1.7082725129931015e-06, + "loss": 0.6053, + "step": 24043 + }, + { + "epoch": 0.7369130807895059, + "grad_norm": 2.205531654078606, + "learning_rate": 1.7078989428890176e-06, + "loss": 0.5381, + "step": 24044 + }, + { + "epoch": 0.7369437293122472, + "grad_norm": 1.7884332525940074, + "learning_rate": 1.707525405222849e-06, + "loss": 0.5215, + "step": 24045 + }, + { + "epoch": 0.7369743778349883, + "grad_norm": 2.030383379158576, + "learning_rate": 1.7071518999982756e-06, + "loss": 0.5961, + "step": 24046 + }, + { + "epoch": 0.7370050263577296, + "grad_norm": 1.952451595505761, + "learning_rate": 1.706778427218973e-06, + "loss": 0.5734, + "step": 24047 + }, + { + "epoch": 0.7370356748804707, + "grad_norm": 1.7764004705716503, + "learning_rate": 1.706404986888629e-06, + "loss": 0.6025, + "step": 24048 + }, + { + "epoch": 0.737066323403212, + "grad_norm": 1.8698515131449114, + "learning_rate": 1.7060315790109195e-06, + "loss": 0.4871, + "step": 24049 + }, + { + "epoch": 0.7370969719259531, + "grad_norm": 1.7564034519536496, + "learning_rate": 1.7056582035895213e-06, + "loss": 0.5414, + "step": 24050 + }, + { + "epoch": 0.7371276204486944, + "grad_norm": 1.7069360143454901, + "learning_rate": 1.7052848606281164e-06, + "loss": 0.5699, + "step": 24051 + }, + { + "epoch": 0.7371582689714355, + "grad_norm": 2.064321752671944, + "learning_rate": 1.7049115501303827e-06, + "loss": 0.6963, + "step": 24052 + }, + { + "epoch": 0.7371889174941768, + "grad_norm": 1.7924722103242556, + "learning_rate": 1.7045382720999997e-06, + "loss": 0.5234, + "step": 24053 + }, + { + "epoch": 0.737219566016918, + "grad_norm": 1.853975944058435, + "learning_rate": 1.7041650265406428e-06, + "loss": 0.5774, + "step": 24054 + }, + { + "epoch": 0.7372502145396592, + "grad_norm": 1.9331853236703607, + "learning_rate": 1.7037918134559917e-06, + "loss": 0.5034, + "step": 24055 + }, + { + "epoch": 0.7372808630624004, + "grad_norm": 0.775436815961532, + "learning_rate": 1.7034186328497243e-06, + "loss": 0.3967, + "step": 24056 + }, + { + "epoch": 0.7373115115851416, + "grad_norm": 1.7969621963997657, + "learning_rate": 1.7030454847255168e-06, + "loss": 0.4372, + "step": 24057 + }, + { + "epoch": 0.7373421601078828, + "grad_norm": 0.8283680075863681, + "learning_rate": 1.7026723690870422e-06, + "loss": 0.3903, + "step": 24058 + }, + { + "epoch": 0.737372808630624, + "grad_norm": 1.5691687239531171, + "learning_rate": 1.7022992859379844e-06, + "loss": 0.4403, + "step": 24059 + }, + { + "epoch": 0.7374034571533652, + "grad_norm": 0.7721453342117082, + "learning_rate": 1.7019262352820132e-06, + "loss": 0.3965, + "step": 24060 + }, + { + "epoch": 0.7374341056761065, + "grad_norm": 2.0069182830787557, + "learning_rate": 1.7015532171228083e-06, + "loss": 0.7018, + "step": 24061 + }, + { + "epoch": 0.7374647541988476, + "grad_norm": 1.6931630841790974, + "learning_rate": 1.7011802314640418e-06, + "loss": 0.5478, + "step": 24062 + }, + { + "epoch": 0.7374954027215889, + "grad_norm": 0.8030223630219183, + "learning_rate": 1.7008072783093909e-06, + "loss": 0.3726, + "step": 24063 + }, + { + "epoch": 0.73752605124433, + "grad_norm": 1.941652870570404, + "learning_rate": 1.7004343576625315e-06, + "loss": 0.586, + "step": 24064 + }, + { + "epoch": 0.7375566997670713, + "grad_norm": 1.8595896340846658, + "learning_rate": 1.700061469527135e-06, + "loss": 0.524, + "step": 24065 + }, + { + "epoch": 0.7375873482898124, + "grad_norm": 1.8699534449343103, + "learning_rate": 1.699688613906877e-06, + "loss": 0.6213, + "step": 24066 + }, + { + "epoch": 0.7376179968125537, + "grad_norm": 1.9340189881680847, + "learning_rate": 1.6993157908054335e-06, + "loss": 0.5126, + "step": 24067 + }, + { + "epoch": 0.7376486453352948, + "grad_norm": 0.8072833266069257, + "learning_rate": 1.6989430002264757e-06, + "loss": 0.4232, + "step": 24068 + }, + { + "epoch": 0.737679293858036, + "grad_norm": 1.6774475058257272, + "learning_rate": 1.698570242173674e-06, + "loss": 0.5493, + "step": 24069 + }, + { + "epoch": 0.7377099423807773, + "grad_norm": 1.9161680577075912, + "learning_rate": 1.6981975166507076e-06, + "loss": 0.5127, + "step": 24070 + }, + { + "epoch": 0.7377405909035184, + "grad_norm": 1.826611033862438, + "learning_rate": 1.6978248236612443e-06, + "loss": 0.6271, + "step": 24071 + }, + { + "epoch": 0.7377712394262597, + "grad_norm": 0.7730443581295793, + "learning_rate": 1.6974521632089597e-06, + "loss": 0.3867, + "step": 24072 + }, + { + "epoch": 0.7378018879490008, + "grad_norm": 1.7671696136984223, + "learning_rate": 1.6970795352975216e-06, + "loss": 0.6295, + "step": 24073 + }, + { + "epoch": 0.7378325364717421, + "grad_norm": 1.978551704947063, + "learning_rate": 1.6967069399306047e-06, + "loss": 0.6516, + "step": 24074 + }, + { + "epoch": 0.7378631849944832, + "grad_norm": 0.7662092219454187, + "learning_rate": 1.6963343771118806e-06, + "loss": 0.419, + "step": 24075 + }, + { + "epoch": 0.7378938335172245, + "grad_norm": 1.7382233875415467, + "learning_rate": 1.6959618468450179e-06, + "loss": 0.5499, + "step": 24076 + }, + { + "epoch": 0.7379244820399656, + "grad_norm": 1.8988648795304006, + "learning_rate": 1.6955893491336884e-06, + "loss": 0.5785, + "step": 24077 + }, + { + "epoch": 0.7379551305627069, + "grad_norm": 1.7747995430997143, + "learning_rate": 1.695216883981564e-06, + "loss": 0.6275, + "step": 24078 + }, + { + "epoch": 0.737985779085448, + "grad_norm": 1.7100988118112566, + "learning_rate": 1.6948444513923118e-06, + "loss": 0.5739, + "step": 24079 + }, + { + "epoch": 0.7380164276081893, + "grad_norm": 1.8620807298716655, + "learning_rate": 1.6944720513696045e-06, + "loss": 0.5067, + "step": 24080 + }, + { + "epoch": 0.7380470761309305, + "grad_norm": 1.1763667017258288, + "learning_rate": 1.694099683917108e-06, + "loss": 0.3846, + "step": 24081 + }, + { + "epoch": 0.7380777246536717, + "grad_norm": 1.8457987459412943, + "learning_rate": 1.6937273490384936e-06, + "loss": 0.5957, + "step": 24082 + }, + { + "epoch": 0.7381083731764129, + "grad_norm": 1.908813999059978, + "learning_rate": 1.693355046737431e-06, + "loss": 0.6566, + "step": 24083 + }, + { + "epoch": 0.7381390216991541, + "grad_norm": 1.7195108906972316, + "learning_rate": 1.6929827770175849e-06, + "loss": 0.5041, + "step": 24084 + }, + { + "epoch": 0.7381696702218953, + "grad_norm": 1.9377871759166183, + "learning_rate": 1.6926105398826264e-06, + "loss": 0.5524, + "step": 24085 + }, + { + "epoch": 0.7382003187446365, + "grad_norm": 1.8402373558146645, + "learning_rate": 1.6922383353362237e-06, + "loss": 0.5621, + "step": 24086 + }, + { + "epoch": 0.7382309672673777, + "grad_norm": 1.831718349498934, + "learning_rate": 1.6918661633820415e-06, + "loss": 0.5807, + "step": 24087 + }, + { + "epoch": 0.738261615790119, + "grad_norm": 1.8658995005175902, + "learning_rate": 1.6914940240237486e-06, + "loss": 0.5153, + "step": 24088 + }, + { + "epoch": 0.7382922643128601, + "grad_norm": 1.8626079953471335, + "learning_rate": 1.6911219172650133e-06, + "loss": 0.5704, + "step": 24089 + }, + { + "epoch": 0.7383229128356014, + "grad_norm": 1.8084816188275739, + "learning_rate": 1.690749843109498e-06, + "loss": 0.586, + "step": 24090 + }, + { + "epoch": 0.7383535613583425, + "grad_norm": 1.8296876382296925, + "learning_rate": 1.690377801560874e-06, + "loss": 0.6381, + "step": 24091 + }, + { + "epoch": 0.7383842098810838, + "grad_norm": 1.5531470187579903, + "learning_rate": 1.690005792622802e-06, + "loss": 0.5967, + "step": 24092 + }, + { + "epoch": 0.7384148584038249, + "grad_norm": 1.748830631636864, + "learning_rate": 1.6896338162989494e-06, + "loss": 0.5439, + "step": 24093 + }, + { + "epoch": 0.7384455069265662, + "grad_norm": 1.6339649765912923, + "learning_rate": 1.6892618725929843e-06, + "loss": 0.6516, + "step": 24094 + }, + { + "epoch": 0.7384761554493073, + "grad_norm": 1.7267064456680499, + "learning_rate": 1.6888899615085668e-06, + "loss": 0.5277, + "step": 24095 + }, + { + "epoch": 0.7385068039720486, + "grad_norm": 0.7705694568587557, + "learning_rate": 1.688518083049364e-06, + "loss": 0.4397, + "step": 24096 + }, + { + "epoch": 0.7385374524947897, + "grad_norm": 1.7131068191721142, + "learning_rate": 1.6881462372190415e-06, + "loss": 0.5601, + "step": 24097 + }, + { + "epoch": 0.738568101017531, + "grad_norm": 1.8971957456471915, + "learning_rate": 1.6877744240212596e-06, + "loss": 0.5777, + "step": 24098 + }, + { + "epoch": 0.7385987495402722, + "grad_norm": 2.4292370775707477, + "learning_rate": 1.687402643459684e-06, + "loss": 0.637, + "step": 24099 + }, + { + "epoch": 0.7386293980630133, + "grad_norm": 1.6376083731974438, + "learning_rate": 1.6870308955379795e-06, + "loss": 0.5984, + "step": 24100 + }, + { + "epoch": 0.7386600465857546, + "grad_norm": 1.7752869237108555, + "learning_rate": 1.6866591802598054e-06, + "loss": 0.6569, + "step": 24101 + }, + { + "epoch": 0.7386906951084957, + "grad_norm": 1.9024997861619335, + "learning_rate": 1.6862874976288274e-06, + "loss": 0.5845, + "step": 24102 + }, + { + "epoch": 0.738721343631237, + "grad_norm": 1.832857099028389, + "learning_rate": 1.6859158476487053e-06, + "loss": 0.6093, + "step": 24103 + }, + { + "epoch": 0.7387519921539781, + "grad_norm": 1.6841789015952051, + "learning_rate": 1.6855442303231023e-06, + "loss": 0.6333, + "step": 24104 + }, + { + "epoch": 0.7387826406767194, + "grad_norm": 1.6135827727813945, + "learning_rate": 1.6851726456556816e-06, + "loss": 0.5596, + "step": 24105 + }, + { + "epoch": 0.7388132891994605, + "grad_norm": 0.7476955279542156, + "learning_rate": 1.6848010936501014e-06, + "loss": 0.3994, + "step": 24106 + }, + { + "epoch": 0.7388439377222018, + "grad_norm": 1.76420088723063, + "learning_rate": 1.6844295743100243e-06, + "loss": 0.6109, + "step": 24107 + }, + { + "epoch": 0.738874586244943, + "grad_norm": 1.7976132171673715, + "learning_rate": 1.6840580876391126e-06, + "loss": 0.5927, + "step": 24108 + }, + { + "epoch": 0.7389052347676842, + "grad_norm": 1.9294232094042851, + "learning_rate": 1.6836866336410229e-06, + "loss": 0.5287, + "step": 24109 + }, + { + "epoch": 0.7389358832904254, + "grad_norm": 1.98300269015024, + "learning_rate": 1.683315212319418e-06, + "loss": 0.6067, + "step": 24110 + }, + { + "epoch": 0.7389665318131666, + "grad_norm": 1.8494067495101383, + "learning_rate": 1.6829438236779582e-06, + "loss": 0.5443, + "step": 24111 + }, + { + "epoch": 0.7389971803359078, + "grad_norm": 0.832623443149836, + "learning_rate": 1.6825724677202998e-06, + "loss": 0.4114, + "step": 24112 + }, + { + "epoch": 0.739027828858649, + "grad_norm": 1.9059858062827255, + "learning_rate": 1.6822011444501058e-06, + "loss": 0.6145, + "step": 24113 + }, + { + "epoch": 0.7390584773813902, + "grad_norm": 1.7646214292898814, + "learning_rate": 1.6818298538710287e-06, + "loss": 0.5992, + "step": 24114 + }, + { + "epoch": 0.7390891259041314, + "grad_norm": 1.6259842981493777, + "learning_rate": 1.6814585959867353e-06, + "loss": 0.5247, + "step": 24115 + }, + { + "epoch": 0.7391197744268726, + "grad_norm": 1.7688743110206133, + "learning_rate": 1.681087370800879e-06, + "loss": 0.6135, + "step": 24116 + }, + { + "epoch": 0.7391504229496139, + "grad_norm": 1.7314199601759754, + "learning_rate": 1.680716178317116e-06, + "loss": 0.6063, + "step": 24117 + }, + { + "epoch": 0.739181071472355, + "grad_norm": 1.7613701068947547, + "learning_rate": 1.6803450185391063e-06, + "loss": 0.5136, + "step": 24118 + }, + { + "epoch": 0.7392117199950963, + "grad_norm": 1.647230965055815, + "learning_rate": 1.6799738914705078e-06, + "loss": 0.5021, + "step": 24119 + }, + { + "epoch": 0.7392423685178374, + "grad_norm": 1.8652583712323285, + "learning_rate": 1.6796027971149748e-06, + "loss": 0.6538, + "step": 24120 + }, + { + "epoch": 0.7392730170405787, + "grad_norm": 1.667351138176356, + "learning_rate": 1.6792317354761644e-06, + "loss": 0.548, + "step": 24121 + }, + { + "epoch": 0.7393036655633198, + "grad_norm": 1.7442880792053652, + "learning_rate": 1.6788607065577355e-06, + "loss": 0.5691, + "step": 24122 + }, + { + "epoch": 0.7393343140860611, + "grad_norm": 1.797847613227407, + "learning_rate": 1.6784897103633401e-06, + "loss": 0.6725, + "step": 24123 + }, + { + "epoch": 0.7393649626088022, + "grad_norm": 1.5774448046423182, + "learning_rate": 1.678118746896637e-06, + "loss": 0.4932, + "step": 24124 + }, + { + "epoch": 0.7393956111315435, + "grad_norm": 1.6855004215805103, + "learning_rate": 1.6777478161612781e-06, + "loss": 0.617, + "step": 24125 + }, + { + "epoch": 0.7394262596542847, + "grad_norm": 1.6431411529972224, + "learning_rate": 1.6773769181609201e-06, + "loss": 0.4547, + "step": 24126 + }, + { + "epoch": 0.7394569081770259, + "grad_norm": 0.8247397482241303, + "learning_rate": 1.6770060528992194e-06, + "loss": 0.4156, + "step": 24127 + }, + { + "epoch": 0.7394875566997671, + "grad_norm": 1.7850023007926634, + "learning_rate": 1.676635220379826e-06, + "loss": 0.5368, + "step": 24128 + }, + { + "epoch": 0.7395182052225083, + "grad_norm": 1.7732554476151263, + "learning_rate": 1.6762644206063967e-06, + "loss": 0.6028, + "step": 24129 + }, + { + "epoch": 0.7395488537452495, + "grad_norm": 1.637207421399197, + "learning_rate": 1.6758936535825853e-06, + "loss": 0.5027, + "step": 24130 + }, + { + "epoch": 0.7395795022679906, + "grad_norm": 0.8253674810815347, + "learning_rate": 1.6755229193120437e-06, + "loss": 0.4258, + "step": 24131 + }, + { + "epoch": 0.7396101507907319, + "grad_norm": 0.788935433581776, + "learning_rate": 1.6751522177984264e-06, + "loss": 0.4001, + "step": 24132 + }, + { + "epoch": 0.739640799313473, + "grad_norm": 1.9573198018727667, + "learning_rate": 1.6747815490453816e-06, + "loss": 0.6156, + "step": 24133 + }, + { + "epoch": 0.7396714478362143, + "grad_norm": 1.8655001018409787, + "learning_rate": 1.6744109130565684e-06, + "loss": 0.6395, + "step": 24134 + }, + { + "epoch": 0.7397020963589555, + "grad_norm": 0.849143879977456, + "learning_rate": 1.6740403098356357e-06, + "loss": 0.3903, + "step": 24135 + }, + { + "epoch": 0.7397327448816967, + "grad_norm": 1.8940380683949292, + "learning_rate": 1.6736697393862328e-06, + "loss": 0.5729, + "step": 24136 + }, + { + "epoch": 0.7397633934044379, + "grad_norm": 2.016673908922913, + "learning_rate": 1.673299201712013e-06, + "loss": 0.5788, + "step": 24137 + }, + { + "epoch": 0.7397940419271791, + "grad_norm": 1.9725412738233832, + "learning_rate": 1.6729286968166291e-06, + "loss": 0.5634, + "step": 24138 + }, + { + "epoch": 0.7398246904499203, + "grad_norm": 1.5970140048230188, + "learning_rate": 1.672558224703728e-06, + "loss": 0.5918, + "step": 24139 + }, + { + "epoch": 0.7398553389726615, + "grad_norm": 2.082901820620476, + "learning_rate": 1.6721877853769624e-06, + "loss": 0.6617, + "step": 24140 + }, + { + "epoch": 0.7398859874954027, + "grad_norm": 1.6171484817693647, + "learning_rate": 1.6718173788399822e-06, + "loss": 0.4959, + "step": 24141 + }, + { + "epoch": 0.739916636018144, + "grad_norm": 2.090888046654332, + "learning_rate": 1.6714470050964387e-06, + "loss": 0.6275, + "step": 24142 + }, + { + "epoch": 0.7399472845408851, + "grad_norm": 1.761833263682472, + "learning_rate": 1.6710766641499793e-06, + "loss": 0.5591, + "step": 24143 + }, + { + "epoch": 0.7399779330636264, + "grad_norm": 1.8758378617745404, + "learning_rate": 1.6707063560042497e-06, + "loss": 0.5279, + "step": 24144 + }, + { + "epoch": 0.7400085815863675, + "grad_norm": 0.7894765759511093, + "learning_rate": 1.6703360806629055e-06, + "loss": 0.4071, + "step": 24145 + }, + { + "epoch": 0.7400392301091088, + "grad_norm": 1.815398128242508, + "learning_rate": 1.6699658381295919e-06, + "loss": 0.5387, + "step": 24146 + }, + { + "epoch": 0.7400698786318499, + "grad_norm": 2.1844226249953493, + "learning_rate": 1.6695956284079557e-06, + "loss": 0.6147, + "step": 24147 + }, + { + "epoch": 0.7401005271545912, + "grad_norm": 0.8129615095365459, + "learning_rate": 1.6692254515016455e-06, + "loss": 0.4131, + "step": 24148 + }, + { + "epoch": 0.7401311756773323, + "grad_norm": 1.7386662642334927, + "learning_rate": 1.668855307414311e-06, + "loss": 0.5021, + "step": 24149 + }, + { + "epoch": 0.7401618242000736, + "grad_norm": 1.78752165874345, + "learning_rate": 1.6684851961495956e-06, + "loss": 0.5586, + "step": 24150 + }, + { + "epoch": 0.7401924727228147, + "grad_norm": 1.8568503465927733, + "learning_rate": 1.6681151177111482e-06, + "loss": 0.5517, + "step": 24151 + }, + { + "epoch": 0.740223121245556, + "grad_norm": 1.9031700833934044, + "learning_rate": 1.667745072102615e-06, + "loss": 0.5708, + "step": 24152 + }, + { + "epoch": 0.7402537697682972, + "grad_norm": 1.9326690040003038, + "learning_rate": 1.6673750593276433e-06, + "loss": 0.5873, + "step": 24153 + }, + { + "epoch": 0.7402844182910384, + "grad_norm": 1.7599279858888774, + "learning_rate": 1.6670050793898785e-06, + "loss": 0.5906, + "step": 24154 + }, + { + "epoch": 0.7403150668137796, + "grad_norm": 1.8736572442021195, + "learning_rate": 1.6666351322929618e-06, + "loss": 0.5639, + "step": 24155 + }, + { + "epoch": 0.7403457153365208, + "grad_norm": 1.811254105526503, + "learning_rate": 1.6662652180405458e-06, + "loss": 0.5279, + "step": 24156 + }, + { + "epoch": 0.740376363859262, + "grad_norm": 1.8392648011322967, + "learning_rate": 1.6658953366362713e-06, + "loss": 0.6369, + "step": 24157 + }, + { + "epoch": 0.7404070123820032, + "grad_norm": 1.728125527095768, + "learning_rate": 1.6655254880837812e-06, + "loss": 0.5986, + "step": 24158 + }, + { + "epoch": 0.7404376609047444, + "grad_norm": 1.830146849712259, + "learning_rate": 1.6651556723867219e-06, + "loss": 0.5563, + "step": 24159 + }, + { + "epoch": 0.7404683094274856, + "grad_norm": 2.0322158486860062, + "learning_rate": 1.6647858895487368e-06, + "loss": 0.5548, + "step": 24160 + }, + { + "epoch": 0.7404989579502268, + "grad_norm": 1.8161469505059624, + "learning_rate": 1.6644161395734715e-06, + "loss": 0.6417, + "step": 24161 + }, + { + "epoch": 0.740529606472968, + "grad_norm": 0.7531913264220548, + "learning_rate": 1.6640464224645657e-06, + "loss": 0.3997, + "step": 24162 + }, + { + "epoch": 0.7405602549957092, + "grad_norm": 1.8980081403201592, + "learning_rate": 1.6636767382256641e-06, + "loss": 0.6282, + "step": 24163 + }, + { + "epoch": 0.7405909035184504, + "grad_norm": 1.9298081432984497, + "learning_rate": 1.6633070868604107e-06, + "loss": 0.563, + "step": 24164 + }, + { + "epoch": 0.7406215520411916, + "grad_norm": 0.849624859426378, + "learning_rate": 1.6629374683724465e-06, + "loss": 0.4132, + "step": 24165 + }, + { + "epoch": 0.7406522005639328, + "grad_norm": 0.776519284136179, + "learning_rate": 1.6625678827654102e-06, + "loss": 0.3916, + "step": 24166 + }, + { + "epoch": 0.740682849086674, + "grad_norm": 1.6652539929351595, + "learning_rate": 1.6621983300429495e-06, + "loss": 0.5366, + "step": 24167 + }, + { + "epoch": 0.7407134976094152, + "grad_norm": 1.8829882706715957, + "learning_rate": 1.6618288102087026e-06, + "loss": 0.5649, + "step": 24168 + }, + { + "epoch": 0.7407441461321564, + "grad_norm": 1.7735075319957023, + "learning_rate": 1.6614593232663089e-06, + "loss": 0.5164, + "step": 24169 + }, + { + "epoch": 0.7407747946548976, + "grad_norm": 1.5919993477873893, + "learning_rate": 1.6610898692194106e-06, + "loss": 0.5047, + "step": 24170 + }, + { + "epoch": 0.7408054431776389, + "grad_norm": 1.8482714773652458, + "learning_rate": 1.6607204480716483e-06, + "loss": 0.609, + "step": 24171 + }, + { + "epoch": 0.74083609170038, + "grad_norm": 1.7860345546713032, + "learning_rate": 1.6603510598266631e-06, + "loss": 0.6342, + "step": 24172 + }, + { + "epoch": 0.7408667402231213, + "grad_norm": 1.9409836918173038, + "learning_rate": 1.6599817044880923e-06, + "loss": 0.6567, + "step": 24173 + }, + { + "epoch": 0.7408973887458624, + "grad_norm": 1.7290076323442716, + "learning_rate": 1.659612382059576e-06, + "loss": 0.5574, + "step": 24174 + }, + { + "epoch": 0.7409280372686037, + "grad_norm": 1.9652687153377029, + "learning_rate": 1.6592430925447557e-06, + "loss": 0.6518, + "step": 24175 + }, + { + "epoch": 0.7409586857913448, + "grad_norm": 1.8866220408075132, + "learning_rate": 1.6588738359472672e-06, + "loss": 0.6983, + "step": 24176 + }, + { + "epoch": 0.7409893343140861, + "grad_norm": 2.0017912707071934, + "learning_rate": 1.6585046122707489e-06, + "loss": 0.6626, + "step": 24177 + }, + { + "epoch": 0.7410199828368272, + "grad_norm": 0.8623277572024906, + "learning_rate": 1.658135421518839e-06, + "loss": 0.4213, + "step": 24178 + }, + { + "epoch": 0.7410506313595685, + "grad_norm": 1.8427770407992852, + "learning_rate": 1.6577662636951758e-06, + "loss": 0.6615, + "step": 24179 + }, + { + "epoch": 0.7410812798823097, + "grad_norm": 1.8423158420937706, + "learning_rate": 1.6573971388033989e-06, + "loss": 0.6999, + "step": 24180 + }, + { + "epoch": 0.7411119284050509, + "grad_norm": 0.850092647730154, + "learning_rate": 1.6570280468471412e-06, + "loss": 0.4103, + "step": 24181 + }, + { + "epoch": 0.7411425769277921, + "grad_norm": 1.720942781501406, + "learning_rate": 1.6566589878300416e-06, + "loss": 0.5811, + "step": 24182 + }, + { + "epoch": 0.7411732254505333, + "grad_norm": 1.8663969134795149, + "learning_rate": 1.6562899617557377e-06, + "loss": 0.6886, + "step": 24183 + }, + { + "epoch": 0.7412038739732745, + "grad_norm": 2.191049375239189, + "learning_rate": 1.6559209686278648e-06, + "loss": 0.6133, + "step": 24184 + }, + { + "epoch": 0.7412345224960157, + "grad_norm": 1.8652217288801451, + "learning_rate": 1.655552008450055e-06, + "loss": 0.472, + "step": 24185 + }, + { + "epoch": 0.7412651710187569, + "grad_norm": 0.814557886969701, + "learning_rate": 1.6551830812259494e-06, + "loss": 0.4112, + "step": 24186 + }, + { + "epoch": 0.7412958195414981, + "grad_norm": 1.746553246550039, + "learning_rate": 1.65481418695918e-06, + "loss": 0.5385, + "step": 24187 + }, + { + "epoch": 0.7413264680642393, + "grad_norm": 1.9104602542920937, + "learning_rate": 1.6544453256533838e-06, + "loss": 0.6537, + "step": 24188 + }, + { + "epoch": 0.7413571165869806, + "grad_norm": 1.7084233085744944, + "learning_rate": 1.654076497312192e-06, + "loss": 0.5884, + "step": 24189 + }, + { + "epoch": 0.7413877651097217, + "grad_norm": 1.9014348792551758, + "learning_rate": 1.6537077019392406e-06, + "loss": 0.5051, + "step": 24190 + }, + { + "epoch": 0.741418413632463, + "grad_norm": 1.9388751178853816, + "learning_rate": 1.653338939538165e-06, + "loss": 0.6378, + "step": 24191 + }, + { + "epoch": 0.7414490621552041, + "grad_norm": 0.7661092399573848, + "learning_rate": 1.6529702101125955e-06, + "loss": 0.4048, + "step": 24192 + }, + { + "epoch": 0.7414797106779453, + "grad_norm": 1.7680805147287453, + "learning_rate": 1.6526015136661666e-06, + "loss": 0.6302, + "step": 24193 + }, + { + "epoch": 0.7415103592006865, + "grad_norm": 1.8353069551274914, + "learning_rate": 1.6522328502025137e-06, + "loss": 0.6561, + "step": 24194 + }, + { + "epoch": 0.7415410077234277, + "grad_norm": 1.7844305436521022, + "learning_rate": 1.6518642197252666e-06, + "loss": 0.6237, + "step": 24195 + }, + { + "epoch": 0.7415716562461689, + "grad_norm": 0.8488845638172405, + "learning_rate": 1.6514956222380552e-06, + "loss": 0.4055, + "step": 24196 + }, + { + "epoch": 0.7416023047689101, + "grad_norm": 1.8103731692631, + "learning_rate": 1.6511270577445171e-06, + "loss": 0.549, + "step": 24197 + }, + { + "epoch": 0.7416329532916514, + "grad_norm": 1.8245091793462564, + "learning_rate": 1.650758526248279e-06, + "loss": 0.521, + "step": 24198 + }, + { + "epoch": 0.7416636018143925, + "grad_norm": 1.7095690382304052, + "learning_rate": 1.6503900277529761e-06, + "loss": 0.4977, + "step": 24199 + }, + { + "epoch": 0.7416942503371338, + "grad_norm": 1.909902947475136, + "learning_rate": 1.6500215622622356e-06, + "loss": 0.6123, + "step": 24200 + }, + { + "epoch": 0.7417248988598749, + "grad_norm": 1.9255899530557103, + "learning_rate": 1.6496531297796902e-06, + "loss": 0.5994, + "step": 24201 + }, + { + "epoch": 0.7417555473826162, + "grad_norm": 0.8060436833963406, + "learning_rate": 1.649284730308971e-06, + "loss": 0.3956, + "step": 24202 + }, + { + "epoch": 0.7417861959053573, + "grad_norm": 1.7509613941438307, + "learning_rate": 1.6489163638537048e-06, + "loss": 0.6218, + "step": 24203 + }, + { + "epoch": 0.7418168444280986, + "grad_norm": 1.7223848273886915, + "learning_rate": 1.6485480304175232e-06, + "loss": 0.4976, + "step": 24204 + }, + { + "epoch": 0.7418474929508397, + "grad_norm": 1.8070522253721197, + "learning_rate": 1.648179730004057e-06, + "loss": 0.6155, + "step": 24205 + }, + { + "epoch": 0.741878141473581, + "grad_norm": 0.7933377649684685, + "learning_rate": 1.6478114626169322e-06, + "loss": 0.4157, + "step": 24206 + }, + { + "epoch": 0.7419087899963221, + "grad_norm": 0.8135516387951294, + "learning_rate": 1.6474432282597784e-06, + "loss": 0.4074, + "step": 24207 + }, + { + "epoch": 0.7419394385190634, + "grad_norm": 1.808994212163182, + "learning_rate": 1.6470750269362263e-06, + "loss": 0.6506, + "step": 24208 + }, + { + "epoch": 0.7419700870418046, + "grad_norm": 1.6566832720158342, + "learning_rate": 1.6467068586498997e-06, + "loss": 0.481, + "step": 24209 + }, + { + "epoch": 0.7420007355645458, + "grad_norm": 1.8835746118013175, + "learning_rate": 1.6463387234044303e-06, + "loss": 0.5837, + "step": 24210 + }, + { + "epoch": 0.742031384087287, + "grad_norm": 1.8479277194401922, + "learning_rate": 1.6459706212034421e-06, + "loss": 0.5806, + "step": 24211 + }, + { + "epoch": 0.7420620326100282, + "grad_norm": 1.733586150309773, + "learning_rate": 1.6456025520505631e-06, + "loss": 0.5522, + "step": 24212 + }, + { + "epoch": 0.7420926811327694, + "grad_norm": 2.0446646016276753, + "learning_rate": 1.6452345159494222e-06, + "loss": 0.6114, + "step": 24213 + }, + { + "epoch": 0.7421233296555106, + "grad_norm": 0.8094135077733344, + "learning_rate": 1.6448665129036423e-06, + "loss": 0.3921, + "step": 24214 + }, + { + "epoch": 0.7421539781782518, + "grad_norm": 1.8090437584845198, + "learning_rate": 1.6444985429168514e-06, + "loss": 0.6164, + "step": 24215 + }, + { + "epoch": 0.742184626700993, + "grad_norm": 0.8124044411692609, + "learning_rate": 1.6441306059926765e-06, + "loss": 0.4332, + "step": 24216 + }, + { + "epoch": 0.7422152752237342, + "grad_norm": 1.5935798987002117, + "learning_rate": 1.643762702134739e-06, + "loss": 0.617, + "step": 24217 + }, + { + "epoch": 0.7422459237464755, + "grad_norm": 0.7874337301698, + "learning_rate": 1.6433948313466675e-06, + "loss": 0.4022, + "step": 24218 + }, + { + "epoch": 0.7422765722692166, + "grad_norm": 1.7265561083928682, + "learning_rate": 1.6430269936320864e-06, + "loss": 0.5677, + "step": 24219 + }, + { + "epoch": 0.7423072207919579, + "grad_norm": 1.9214615118424627, + "learning_rate": 1.6426591889946176e-06, + "loss": 0.6306, + "step": 24220 + }, + { + "epoch": 0.742337869314699, + "grad_norm": 1.9095133549993666, + "learning_rate": 1.642291417437889e-06, + "loss": 0.6937, + "step": 24221 + }, + { + "epoch": 0.7423685178374403, + "grad_norm": 1.7699191958023064, + "learning_rate": 1.6419236789655202e-06, + "loss": 0.5844, + "step": 24222 + }, + { + "epoch": 0.7423991663601814, + "grad_norm": 2.0510711373536767, + "learning_rate": 1.6415559735811365e-06, + "loss": 0.6042, + "step": 24223 + }, + { + "epoch": 0.7424298148829226, + "grad_norm": 1.9470676938955167, + "learning_rate": 1.641188301288363e-06, + "loss": 0.6174, + "step": 24224 + }, + { + "epoch": 0.7424604634056639, + "grad_norm": 1.7192546872407772, + "learning_rate": 1.6408206620908185e-06, + "loss": 0.4799, + "step": 24225 + }, + { + "epoch": 0.742491111928405, + "grad_norm": 1.657014250842389, + "learning_rate": 1.6404530559921279e-06, + "loss": 0.5462, + "step": 24226 + }, + { + "epoch": 0.7425217604511463, + "grad_norm": 1.8831344641523768, + "learning_rate": 1.640085482995914e-06, + "loss": 0.607, + "step": 24227 + }, + { + "epoch": 0.7425524089738874, + "grad_norm": 1.5866225750162803, + "learning_rate": 1.6397179431057965e-06, + "loss": 0.6203, + "step": 24228 + }, + { + "epoch": 0.7425830574966287, + "grad_norm": 1.8364506110821164, + "learning_rate": 1.6393504363253986e-06, + "loss": 0.5789, + "step": 24229 + }, + { + "epoch": 0.7426137060193698, + "grad_norm": 2.090955871066241, + "learning_rate": 1.6389829626583404e-06, + "loss": 0.5707, + "step": 24230 + }, + { + "epoch": 0.7426443545421111, + "grad_norm": 1.9213924113127523, + "learning_rate": 1.6386155221082422e-06, + "loss": 0.6005, + "step": 24231 + }, + { + "epoch": 0.7426750030648522, + "grad_norm": 1.701092093988014, + "learning_rate": 1.6382481146787272e-06, + "loss": 0.6813, + "step": 24232 + }, + { + "epoch": 0.7427056515875935, + "grad_norm": 2.0467077871549257, + "learning_rate": 1.6378807403734115e-06, + "loss": 0.6894, + "step": 24233 + }, + { + "epoch": 0.7427363001103346, + "grad_norm": 1.92579645798093, + "learning_rate": 1.6375133991959174e-06, + "loss": 0.6131, + "step": 24234 + }, + { + "epoch": 0.7427669486330759, + "grad_norm": 1.7032873296121482, + "learning_rate": 1.637146091149866e-06, + "loss": 0.5614, + "step": 24235 + }, + { + "epoch": 0.7427975971558171, + "grad_norm": 1.7497895655358036, + "learning_rate": 1.6367788162388732e-06, + "loss": 0.5153, + "step": 24236 + }, + { + "epoch": 0.7428282456785583, + "grad_norm": 1.803728666953605, + "learning_rate": 1.6364115744665588e-06, + "loss": 0.6004, + "step": 24237 + }, + { + "epoch": 0.7428588942012995, + "grad_norm": 1.8779210714489125, + "learning_rate": 1.6360443658365433e-06, + "loss": 0.5841, + "step": 24238 + }, + { + "epoch": 0.7428895427240407, + "grad_norm": 0.7741960833215877, + "learning_rate": 1.6356771903524416e-06, + "loss": 0.3962, + "step": 24239 + }, + { + "epoch": 0.7429201912467819, + "grad_norm": 0.7805286052390958, + "learning_rate": 1.6353100480178756e-06, + "loss": 0.3982, + "step": 24240 + }, + { + "epoch": 0.7429508397695231, + "grad_norm": 1.6680989099778234, + "learning_rate": 1.6349429388364568e-06, + "loss": 0.5704, + "step": 24241 + }, + { + "epoch": 0.7429814882922643, + "grad_norm": 1.8485027003918197, + "learning_rate": 1.6345758628118096e-06, + "loss": 0.5934, + "step": 24242 + }, + { + "epoch": 0.7430121368150056, + "grad_norm": 1.6951427507123809, + "learning_rate": 1.6342088199475475e-06, + "loss": 0.6339, + "step": 24243 + }, + { + "epoch": 0.7430427853377467, + "grad_norm": 1.3852841652959413, + "learning_rate": 1.6338418102472857e-06, + "loss": 0.5011, + "step": 24244 + }, + { + "epoch": 0.743073433860488, + "grad_norm": 2.046969934082676, + "learning_rate": 1.6334748337146417e-06, + "loss": 0.5692, + "step": 24245 + }, + { + "epoch": 0.7431040823832291, + "grad_norm": 1.9462280472359281, + "learning_rate": 1.6331078903532332e-06, + "loss": 0.6385, + "step": 24246 + }, + { + "epoch": 0.7431347309059704, + "grad_norm": 0.8118441522760563, + "learning_rate": 1.6327409801666722e-06, + "loss": 0.3911, + "step": 24247 + }, + { + "epoch": 0.7431653794287115, + "grad_norm": 1.7163216866516242, + "learning_rate": 1.632374103158576e-06, + "loss": 0.5693, + "step": 24248 + }, + { + "epoch": 0.7431960279514528, + "grad_norm": 1.9359365996464906, + "learning_rate": 1.6320072593325608e-06, + "loss": 0.5652, + "step": 24249 + }, + { + "epoch": 0.7432266764741939, + "grad_norm": 1.8867002399917792, + "learning_rate": 1.631640448692239e-06, + "loss": 0.6133, + "step": 24250 + }, + { + "epoch": 0.7432573249969352, + "grad_norm": 1.9178933052486085, + "learning_rate": 1.6312736712412264e-06, + "loss": 0.616, + "step": 24251 + }, + { + "epoch": 0.7432879735196763, + "grad_norm": 0.755938150159709, + "learning_rate": 1.6309069269831334e-06, + "loss": 0.3876, + "step": 24252 + }, + { + "epoch": 0.7433186220424176, + "grad_norm": 1.8264055534548036, + "learning_rate": 1.6305402159215799e-06, + "loss": 0.6136, + "step": 24253 + }, + { + "epoch": 0.7433492705651588, + "grad_norm": 1.6176531655592468, + "learning_rate": 1.6301735380601751e-06, + "loss": 0.6309, + "step": 24254 + }, + { + "epoch": 0.7433799190878999, + "grad_norm": 1.6189965892544331, + "learning_rate": 1.6298068934025318e-06, + "loss": 0.4862, + "step": 24255 + }, + { + "epoch": 0.7434105676106412, + "grad_norm": 1.6932536577991302, + "learning_rate": 1.6294402819522632e-06, + "loss": 0.5737, + "step": 24256 + }, + { + "epoch": 0.7434412161333823, + "grad_norm": 1.9952910788130322, + "learning_rate": 1.6290737037129834e-06, + "loss": 0.6189, + "step": 24257 + }, + { + "epoch": 0.7434718646561236, + "grad_norm": 1.860948961295285, + "learning_rate": 1.6287071586883014e-06, + "loss": 0.5657, + "step": 24258 + }, + { + "epoch": 0.7435025131788647, + "grad_norm": 1.8100401486570614, + "learning_rate": 1.6283406468818303e-06, + "loss": 0.4752, + "step": 24259 + }, + { + "epoch": 0.743533161701606, + "grad_norm": 1.971362835161339, + "learning_rate": 1.627974168297181e-06, + "loss": 0.5502, + "step": 24260 + }, + { + "epoch": 0.7435638102243471, + "grad_norm": 1.9102748785623431, + "learning_rate": 1.6276077229379672e-06, + "loss": 0.5341, + "step": 24261 + }, + { + "epoch": 0.7435944587470884, + "grad_norm": 1.9119228089750515, + "learning_rate": 1.6272413108077973e-06, + "loss": 0.5916, + "step": 24262 + }, + { + "epoch": 0.7436251072698296, + "grad_norm": 2.0020253332496427, + "learning_rate": 1.6268749319102784e-06, + "loss": 0.6902, + "step": 24263 + }, + { + "epoch": 0.7436557557925708, + "grad_norm": 0.7740583559059347, + "learning_rate": 1.626508586249027e-06, + "loss": 0.4198, + "step": 24264 + }, + { + "epoch": 0.743686404315312, + "grad_norm": 1.8429869607187526, + "learning_rate": 1.62614227382765e-06, + "loss": 0.639, + "step": 24265 + }, + { + "epoch": 0.7437170528380532, + "grad_norm": 1.5878503753115574, + "learning_rate": 1.6257759946497542e-06, + "loss": 0.4545, + "step": 24266 + }, + { + "epoch": 0.7437477013607944, + "grad_norm": 1.5811471835547861, + "learning_rate": 1.6254097487189513e-06, + "loss": 0.5965, + "step": 24267 + }, + { + "epoch": 0.7437783498835356, + "grad_norm": 0.8636557553631324, + "learning_rate": 1.6250435360388494e-06, + "loss": 0.4073, + "step": 24268 + }, + { + "epoch": 0.7438089984062768, + "grad_norm": 1.5990888972407737, + "learning_rate": 1.624677356613059e-06, + "loss": 0.4972, + "step": 24269 + }, + { + "epoch": 0.743839646929018, + "grad_norm": 1.6947268212102808, + "learning_rate": 1.624311210445184e-06, + "loss": 0.4985, + "step": 24270 + }, + { + "epoch": 0.7438702954517592, + "grad_norm": 2.068615590142298, + "learning_rate": 1.623945097538835e-06, + "loss": 0.6969, + "step": 24271 + }, + { + "epoch": 0.7439009439745005, + "grad_norm": 1.790476618274568, + "learning_rate": 1.62357901789762e-06, + "loss": 0.5223, + "step": 24272 + }, + { + "epoch": 0.7439315924972416, + "grad_norm": 2.041739646365549, + "learning_rate": 1.6232129715251449e-06, + "loss": 0.5245, + "step": 24273 + }, + { + "epoch": 0.7439622410199829, + "grad_norm": 1.700463823864323, + "learning_rate": 1.6228469584250151e-06, + "loss": 0.5742, + "step": 24274 + }, + { + "epoch": 0.743992889542724, + "grad_norm": 1.8164953434746791, + "learning_rate": 1.6224809786008377e-06, + "loss": 0.5417, + "step": 24275 + }, + { + "epoch": 0.7440235380654653, + "grad_norm": 1.8449769484998253, + "learning_rate": 1.6221150320562212e-06, + "loss": 0.5472, + "step": 24276 + }, + { + "epoch": 0.7440541865882064, + "grad_norm": 1.6872568905991072, + "learning_rate": 1.6217491187947682e-06, + "loss": 0.5585, + "step": 24277 + }, + { + "epoch": 0.7440848351109477, + "grad_norm": 1.8650482893887061, + "learning_rate": 1.621383238820085e-06, + "loss": 0.6079, + "step": 24278 + }, + { + "epoch": 0.7441154836336888, + "grad_norm": 1.6335628796958934, + "learning_rate": 1.6210173921357775e-06, + "loss": 0.5178, + "step": 24279 + }, + { + "epoch": 0.7441461321564301, + "grad_norm": 1.8473026224228022, + "learning_rate": 1.6206515787454518e-06, + "loss": 0.5885, + "step": 24280 + }, + { + "epoch": 0.7441767806791713, + "grad_norm": 1.5967575822021365, + "learning_rate": 1.620285798652711e-06, + "loss": 0.4836, + "step": 24281 + }, + { + "epoch": 0.7442074292019125, + "grad_norm": 1.8080328839104696, + "learning_rate": 1.6199200518611553e-06, + "loss": 0.5426, + "step": 24282 + }, + { + "epoch": 0.7442380777246537, + "grad_norm": 1.8365183296230996, + "learning_rate": 1.6195543383743956e-06, + "loss": 0.529, + "step": 24283 + }, + { + "epoch": 0.7442687262473949, + "grad_norm": 1.9050641750872874, + "learning_rate": 1.619188658196032e-06, + "loss": 0.5519, + "step": 24284 + }, + { + "epoch": 0.7442993747701361, + "grad_norm": 1.6164904052314664, + "learning_rate": 1.618823011329666e-06, + "loss": 0.4712, + "step": 24285 + }, + { + "epoch": 0.7443300232928772, + "grad_norm": 1.9913634811649283, + "learning_rate": 1.6184573977789014e-06, + "loss": 0.5691, + "step": 24286 + }, + { + "epoch": 0.7443606718156185, + "grad_norm": 0.7823933237411925, + "learning_rate": 1.618091817547342e-06, + "loss": 0.3767, + "step": 24287 + }, + { + "epoch": 0.7443913203383596, + "grad_norm": 1.7815395761422337, + "learning_rate": 1.6177262706385904e-06, + "loss": 0.6673, + "step": 24288 + }, + { + "epoch": 0.7444219688611009, + "grad_norm": 1.7998692767803333, + "learning_rate": 1.617360757056246e-06, + "loss": 0.4531, + "step": 24289 + }, + { + "epoch": 0.744452617383842, + "grad_norm": 1.8629732373673678, + "learning_rate": 1.616995276803911e-06, + "loss": 0.5383, + "step": 24290 + }, + { + "epoch": 0.7444832659065833, + "grad_norm": 1.5977757845815477, + "learning_rate": 1.616629829885189e-06, + "loss": 0.5282, + "step": 24291 + }, + { + "epoch": 0.7445139144293245, + "grad_norm": 2.042471315904534, + "learning_rate": 1.6162644163036795e-06, + "loss": 0.652, + "step": 24292 + }, + { + "epoch": 0.7445445629520657, + "grad_norm": 0.7947588268794535, + "learning_rate": 1.6158990360629783e-06, + "loss": 0.4009, + "step": 24293 + }, + { + "epoch": 0.7445752114748069, + "grad_norm": 2.0109888134488125, + "learning_rate": 1.6155336891666935e-06, + "loss": 0.5694, + "step": 24294 + }, + { + "epoch": 0.7446058599975481, + "grad_norm": 1.9199873713019242, + "learning_rate": 1.6151683756184193e-06, + "loss": 0.6809, + "step": 24295 + }, + { + "epoch": 0.7446365085202893, + "grad_norm": 0.7661132173888433, + "learning_rate": 1.6148030954217592e-06, + "loss": 0.3928, + "step": 24296 + }, + { + "epoch": 0.7446671570430305, + "grad_norm": 0.788739996449937, + "learning_rate": 1.6144378485803086e-06, + "loss": 0.4028, + "step": 24297 + }, + { + "epoch": 0.7446978055657717, + "grad_norm": 1.722439231043296, + "learning_rate": 1.6140726350976683e-06, + "loss": 0.5728, + "step": 24298 + }, + { + "epoch": 0.744728454088513, + "grad_norm": 1.938736089885582, + "learning_rate": 1.613707454977438e-06, + "loss": 0.6152, + "step": 24299 + }, + { + "epoch": 0.7447591026112541, + "grad_norm": 1.8807557888894526, + "learning_rate": 1.6133423082232131e-06, + "loss": 0.6593, + "step": 24300 + }, + { + "epoch": 0.7447897511339954, + "grad_norm": 0.7656949822209946, + "learning_rate": 1.6129771948385926e-06, + "loss": 0.3932, + "step": 24301 + }, + { + "epoch": 0.7448203996567365, + "grad_norm": 1.8890684285580719, + "learning_rate": 1.612612114827176e-06, + "loss": 0.5917, + "step": 24302 + }, + { + "epoch": 0.7448510481794778, + "grad_norm": 1.9568732743264596, + "learning_rate": 1.6122470681925594e-06, + "loss": 0.6344, + "step": 24303 + }, + { + "epoch": 0.7448816967022189, + "grad_norm": 1.8228344541528216, + "learning_rate": 1.6118820549383358e-06, + "loss": 0.6262, + "step": 24304 + }, + { + "epoch": 0.7449123452249602, + "grad_norm": 1.756614005170314, + "learning_rate": 1.611517075068108e-06, + "loss": 0.5687, + "step": 24305 + }, + { + "epoch": 0.7449429937477013, + "grad_norm": 2.001584125202152, + "learning_rate": 1.6111521285854687e-06, + "loss": 0.5759, + "step": 24306 + }, + { + "epoch": 0.7449736422704426, + "grad_norm": 0.7587031264369603, + "learning_rate": 1.6107872154940152e-06, + "loss": 0.4141, + "step": 24307 + }, + { + "epoch": 0.7450042907931838, + "grad_norm": 1.8897733405515849, + "learning_rate": 1.6104223357973414e-06, + "loss": 0.6521, + "step": 24308 + }, + { + "epoch": 0.745034939315925, + "grad_norm": 1.8540328078920265, + "learning_rate": 1.6100574894990433e-06, + "loss": 0.5911, + "step": 24309 + }, + { + "epoch": 0.7450655878386662, + "grad_norm": 1.920341277962546, + "learning_rate": 1.6096926766027183e-06, + "loss": 0.5617, + "step": 24310 + }, + { + "epoch": 0.7450962363614074, + "grad_norm": 1.6198919915557983, + "learning_rate": 1.6093278971119569e-06, + "loss": 0.4879, + "step": 24311 + }, + { + "epoch": 0.7451268848841486, + "grad_norm": 1.87099326718208, + "learning_rate": 1.608963151030355e-06, + "loss": 0.5285, + "step": 24312 + }, + { + "epoch": 0.7451575334068898, + "grad_norm": 1.9499799268342393, + "learning_rate": 1.6085984383615084e-06, + "loss": 0.5797, + "step": 24313 + }, + { + "epoch": 0.745188181929631, + "grad_norm": 1.9535101497883527, + "learning_rate": 1.608233759109008e-06, + "loss": 0.6315, + "step": 24314 + }, + { + "epoch": 0.7452188304523722, + "grad_norm": 1.5855461272343965, + "learning_rate": 1.6078691132764478e-06, + "loss": 0.5485, + "step": 24315 + }, + { + "epoch": 0.7452494789751134, + "grad_norm": 1.785665384640759, + "learning_rate": 1.6075045008674228e-06, + "loss": 0.6612, + "step": 24316 + }, + { + "epoch": 0.7452801274978545, + "grad_norm": 1.8101738454170442, + "learning_rate": 1.6071399218855222e-06, + "loss": 0.52, + "step": 24317 + }, + { + "epoch": 0.7453107760205958, + "grad_norm": 1.9815419169184407, + "learning_rate": 1.606775376334342e-06, + "loss": 0.689, + "step": 24318 + }, + { + "epoch": 0.745341424543337, + "grad_norm": 1.7675516441538162, + "learning_rate": 1.6064108642174702e-06, + "loss": 0.5623, + "step": 24319 + }, + { + "epoch": 0.7453720730660782, + "grad_norm": 0.9760449807258725, + "learning_rate": 1.6060463855385005e-06, + "loss": 0.4102, + "step": 24320 + }, + { + "epoch": 0.7454027215888194, + "grad_norm": 1.7621810697215874, + "learning_rate": 1.6056819403010265e-06, + "loss": 0.5963, + "step": 24321 + }, + { + "epoch": 0.7454333701115606, + "grad_norm": 1.8666783806064449, + "learning_rate": 1.6053175285086341e-06, + "loss": 0.5923, + "step": 24322 + }, + { + "epoch": 0.7454640186343018, + "grad_norm": 1.8879684894781383, + "learning_rate": 1.6049531501649173e-06, + "loss": 0.5975, + "step": 24323 + }, + { + "epoch": 0.745494667157043, + "grad_norm": 1.9035263828003683, + "learning_rate": 1.6045888052734676e-06, + "loss": 0.6123, + "step": 24324 + }, + { + "epoch": 0.7455253156797842, + "grad_norm": 1.8047970217942624, + "learning_rate": 1.6042244938378709e-06, + "loss": 0.6477, + "step": 24325 + }, + { + "epoch": 0.7455559642025255, + "grad_norm": 2.022527798466008, + "learning_rate": 1.6038602158617211e-06, + "loss": 0.6297, + "step": 24326 + }, + { + "epoch": 0.7455866127252666, + "grad_norm": 2.0952317855688927, + "learning_rate": 1.6034959713486043e-06, + "loss": 0.661, + "step": 24327 + }, + { + "epoch": 0.7456172612480079, + "grad_norm": 1.8453775899341511, + "learning_rate": 1.6031317603021101e-06, + "loss": 0.551, + "step": 24328 + }, + { + "epoch": 0.745647909770749, + "grad_norm": 1.8033937741105688, + "learning_rate": 1.6027675827258294e-06, + "loss": 0.5626, + "step": 24329 + }, + { + "epoch": 0.7456785582934903, + "grad_norm": 2.0102034442643433, + "learning_rate": 1.6024034386233477e-06, + "loss": 0.6282, + "step": 24330 + }, + { + "epoch": 0.7457092068162314, + "grad_norm": 1.8919232174283342, + "learning_rate": 1.6020393279982539e-06, + "loss": 0.547, + "step": 24331 + }, + { + "epoch": 0.7457398553389727, + "grad_norm": 1.6905373198226097, + "learning_rate": 1.6016752508541377e-06, + "loss": 0.6058, + "step": 24332 + }, + { + "epoch": 0.7457705038617138, + "grad_norm": 1.6968366323115958, + "learning_rate": 1.6013112071945835e-06, + "loss": 0.609, + "step": 24333 + }, + { + "epoch": 0.7458011523844551, + "grad_norm": 2.1675851268677526, + "learning_rate": 1.6009471970231793e-06, + "loss": 0.5335, + "step": 24334 + }, + { + "epoch": 0.7458318009071963, + "grad_norm": 2.095639775929135, + "learning_rate": 1.6005832203435135e-06, + "loss": 0.6368, + "step": 24335 + }, + { + "epoch": 0.7458624494299375, + "grad_norm": 1.8633463010420261, + "learning_rate": 1.6002192771591697e-06, + "loss": 0.6264, + "step": 24336 + }, + { + "epoch": 0.7458930979526787, + "grad_norm": 0.7599336631673017, + "learning_rate": 1.5998553674737365e-06, + "loss": 0.4035, + "step": 24337 + }, + { + "epoch": 0.7459237464754199, + "grad_norm": 1.8123622360972969, + "learning_rate": 1.5994914912907973e-06, + "loss": 0.6095, + "step": 24338 + }, + { + "epoch": 0.7459543949981611, + "grad_norm": 1.7543388962766462, + "learning_rate": 1.599127648613938e-06, + "loss": 0.48, + "step": 24339 + }, + { + "epoch": 0.7459850435209023, + "grad_norm": 1.8914996201555507, + "learning_rate": 1.5987638394467454e-06, + "loss": 0.6304, + "step": 24340 + }, + { + "epoch": 0.7460156920436435, + "grad_norm": 1.5815108985108028, + "learning_rate": 1.598400063792802e-06, + "loss": 0.5141, + "step": 24341 + }, + { + "epoch": 0.7460463405663847, + "grad_norm": 1.8275269827408704, + "learning_rate": 1.5980363216556926e-06, + "loss": 0.5952, + "step": 24342 + }, + { + "epoch": 0.7460769890891259, + "grad_norm": 1.726552967760059, + "learning_rate": 1.5976726130390036e-06, + "loss": 0.5184, + "step": 24343 + }, + { + "epoch": 0.7461076376118672, + "grad_norm": 1.743367078880319, + "learning_rate": 1.5973089379463152e-06, + "loss": 0.6081, + "step": 24344 + }, + { + "epoch": 0.7461382861346083, + "grad_norm": 1.7718412751806505, + "learning_rate": 1.5969452963812126e-06, + "loss": 0.5784, + "step": 24345 + }, + { + "epoch": 0.7461689346573496, + "grad_norm": 1.7840907508599595, + "learning_rate": 1.5965816883472807e-06, + "loss": 0.5681, + "step": 24346 + }, + { + "epoch": 0.7461995831800907, + "grad_norm": 1.7593460339764606, + "learning_rate": 1.5962181138480981e-06, + "loss": 0.5757, + "step": 24347 + }, + { + "epoch": 0.7462302317028319, + "grad_norm": 1.8394624998061326, + "learning_rate": 1.595854572887251e-06, + "loss": 0.5287, + "step": 24348 + }, + { + "epoch": 0.7462608802255731, + "grad_norm": 1.8672077933352735, + "learning_rate": 1.595491065468318e-06, + "loss": 0.5678, + "step": 24349 + }, + { + "epoch": 0.7462915287483143, + "grad_norm": 1.6567575254785534, + "learning_rate": 1.5951275915948827e-06, + "loss": 0.5271, + "step": 24350 + }, + { + "epoch": 0.7463221772710555, + "grad_norm": 1.8812894976326064, + "learning_rate": 1.5947641512705282e-06, + "loss": 0.6069, + "step": 24351 + }, + { + "epoch": 0.7463528257937967, + "grad_norm": 0.7918080936275257, + "learning_rate": 1.5944007444988318e-06, + "loss": 0.4031, + "step": 24352 + }, + { + "epoch": 0.746383474316538, + "grad_norm": 1.862571335701285, + "learning_rate": 1.5940373712833768e-06, + "loss": 0.631, + "step": 24353 + }, + { + "epoch": 0.7464141228392791, + "grad_norm": 1.80166663778883, + "learning_rate": 1.5936740316277444e-06, + "loss": 0.6387, + "step": 24354 + }, + { + "epoch": 0.7464447713620204, + "grad_norm": 1.7208989185363615, + "learning_rate": 1.5933107255355113e-06, + "loss": 0.5181, + "step": 24355 + }, + { + "epoch": 0.7464754198847615, + "grad_norm": 1.8027097334650586, + "learning_rate": 1.5929474530102596e-06, + "loss": 0.645, + "step": 24356 + }, + { + "epoch": 0.7465060684075028, + "grad_norm": 1.6324180438212748, + "learning_rate": 1.5925842140555704e-06, + "loss": 0.5068, + "step": 24357 + }, + { + "epoch": 0.7465367169302439, + "grad_norm": 1.8600575458932187, + "learning_rate": 1.5922210086750183e-06, + "loss": 0.5678, + "step": 24358 + }, + { + "epoch": 0.7465673654529852, + "grad_norm": 1.7728321149331994, + "learning_rate": 1.5918578368721865e-06, + "loss": 0.4846, + "step": 24359 + }, + { + "epoch": 0.7465980139757263, + "grad_norm": 1.892698120407675, + "learning_rate": 1.5914946986506502e-06, + "loss": 0.6614, + "step": 24360 + }, + { + "epoch": 0.7466286624984676, + "grad_norm": 1.779638018284253, + "learning_rate": 1.5911315940139883e-06, + "loss": 0.5713, + "step": 24361 + }, + { + "epoch": 0.7466593110212087, + "grad_norm": 1.7511235806874261, + "learning_rate": 1.590768522965781e-06, + "loss": 0.5658, + "step": 24362 + }, + { + "epoch": 0.74668995954395, + "grad_norm": 1.8888192016428904, + "learning_rate": 1.5904054855096019e-06, + "loss": 0.6208, + "step": 24363 + }, + { + "epoch": 0.7467206080666912, + "grad_norm": 1.9481354881930824, + "learning_rate": 1.5900424816490295e-06, + "loss": 0.7048, + "step": 24364 + }, + { + "epoch": 0.7467512565894324, + "grad_norm": 1.644624794049074, + "learning_rate": 1.5896795113876435e-06, + "loss": 0.5084, + "step": 24365 + }, + { + "epoch": 0.7467819051121736, + "grad_norm": 1.880616072379981, + "learning_rate": 1.5893165747290156e-06, + "loss": 0.4932, + "step": 24366 + }, + { + "epoch": 0.7468125536349148, + "grad_norm": 1.8710763587496209, + "learning_rate": 1.5889536716767246e-06, + "loss": 0.6612, + "step": 24367 + }, + { + "epoch": 0.746843202157656, + "grad_norm": 1.6630290747702565, + "learning_rate": 1.5885908022343454e-06, + "loss": 0.542, + "step": 24368 + }, + { + "epoch": 0.7468738506803972, + "grad_norm": 1.8729171040152772, + "learning_rate": 1.5882279664054557e-06, + "loss": 0.5599, + "step": 24369 + }, + { + "epoch": 0.7469044992031384, + "grad_norm": 1.7318082273794044, + "learning_rate": 1.5878651641936283e-06, + "loss": 0.6363, + "step": 24370 + }, + { + "epoch": 0.7469351477258797, + "grad_norm": 1.8906851309644526, + "learning_rate": 1.5875023956024377e-06, + "loss": 0.494, + "step": 24371 + }, + { + "epoch": 0.7469657962486208, + "grad_norm": 1.8660462997785057, + "learning_rate": 1.5871396606354584e-06, + "loss": 0.5191, + "step": 24372 + }, + { + "epoch": 0.7469964447713621, + "grad_norm": 0.7911380941692518, + "learning_rate": 1.5867769592962673e-06, + "loss": 0.4059, + "step": 24373 + }, + { + "epoch": 0.7470270932941032, + "grad_norm": 0.8073431258247442, + "learning_rate": 1.586414291588434e-06, + "loss": 0.4113, + "step": 24374 + }, + { + "epoch": 0.7470577418168445, + "grad_norm": 1.7529733281481688, + "learning_rate": 1.586051657515535e-06, + "loss": 0.6123, + "step": 24375 + }, + { + "epoch": 0.7470883903395856, + "grad_norm": 1.8479411657892533, + "learning_rate": 1.5856890570811433e-06, + "loss": 0.6269, + "step": 24376 + }, + { + "epoch": 0.7471190388623269, + "grad_norm": 1.9055635895263505, + "learning_rate": 1.5853264902888294e-06, + "loss": 0.5568, + "step": 24377 + }, + { + "epoch": 0.747149687385068, + "grad_norm": 1.644671508102661, + "learning_rate": 1.5849639571421693e-06, + "loss": 0.6139, + "step": 24378 + }, + { + "epoch": 0.7471803359078092, + "grad_norm": 1.590810238766832, + "learning_rate": 1.5846014576447294e-06, + "loss": 0.5359, + "step": 24379 + }, + { + "epoch": 0.7472109844305505, + "grad_norm": 0.7917303375394957, + "learning_rate": 1.5842389918000888e-06, + "loss": 0.4116, + "step": 24380 + }, + { + "epoch": 0.7472416329532916, + "grad_norm": 1.9979909708581187, + "learning_rate": 1.583876559611815e-06, + "loss": 0.5593, + "step": 24381 + }, + { + "epoch": 0.7472722814760329, + "grad_norm": 0.7590969209104466, + "learning_rate": 1.583514161083478e-06, + "loss": 0.3875, + "step": 24382 + }, + { + "epoch": 0.747302929998774, + "grad_norm": 2.0052099360057793, + "learning_rate": 1.5831517962186505e-06, + "loss": 0.6054, + "step": 24383 + }, + { + "epoch": 0.7473335785215153, + "grad_norm": 1.7496503429638675, + "learning_rate": 1.582789465020904e-06, + "loss": 0.6204, + "step": 24384 + }, + { + "epoch": 0.7473642270442564, + "grad_norm": 1.759670771831446, + "learning_rate": 1.5824271674938057e-06, + "loss": 0.5294, + "step": 24385 + }, + { + "epoch": 0.7473948755669977, + "grad_norm": 2.0201455772049846, + "learning_rate": 1.5820649036409269e-06, + "loss": 0.6422, + "step": 24386 + }, + { + "epoch": 0.7474255240897388, + "grad_norm": 1.9824056847751497, + "learning_rate": 1.5817026734658369e-06, + "loss": 0.5366, + "step": 24387 + }, + { + "epoch": 0.7474561726124801, + "grad_norm": 1.6888044123894463, + "learning_rate": 1.581340476972107e-06, + "loss": 0.6069, + "step": 24388 + }, + { + "epoch": 0.7474868211352212, + "grad_norm": 1.7452459971162817, + "learning_rate": 1.580978314163304e-06, + "loss": 0.6401, + "step": 24389 + }, + { + "epoch": 0.7475174696579625, + "grad_norm": 1.9254677532102626, + "learning_rate": 1.580616185042993e-06, + "loss": 0.5816, + "step": 24390 + }, + { + "epoch": 0.7475481181807037, + "grad_norm": 1.8204310357687157, + "learning_rate": 1.580254089614749e-06, + "loss": 0.5108, + "step": 24391 + }, + { + "epoch": 0.7475787667034449, + "grad_norm": 1.6472400416626833, + "learning_rate": 1.5798920278821362e-06, + "loss": 0.5781, + "step": 24392 + }, + { + "epoch": 0.7476094152261861, + "grad_norm": 2.0012435933572026, + "learning_rate": 1.5795299998487212e-06, + "loss": 0.6362, + "step": 24393 + }, + { + "epoch": 0.7476400637489273, + "grad_norm": 0.7821015052330924, + "learning_rate": 1.579168005518072e-06, + "loss": 0.4034, + "step": 24394 + }, + { + "epoch": 0.7476707122716685, + "grad_norm": 1.9985816546839343, + "learning_rate": 1.5788060448937554e-06, + "loss": 0.6512, + "step": 24395 + }, + { + "epoch": 0.7477013607944097, + "grad_norm": 1.9169749631223112, + "learning_rate": 1.5784441179793402e-06, + "loss": 0.5794, + "step": 24396 + }, + { + "epoch": 0.7477320093171509, + "grad_norm": 1.8411281818979761, + "learning_rate": 1.5780822247783888e-06, + "loss": 0.6587, + "step": 24397 + }, + { + "epoch": 0.7477626578398922, + "grad_norm": 2.022875908478752, + "learning_rate": 1.577720365294469e-06, + "loss": 0.5973, + "step": 24398 + }, + { + "epoch": 0.7477933063626333, + "grad_norm": 1.59261131674756, + "learning_rate": 1.5773585395311474e-06, + "loss": 0.5212, + "step": 24399 + }, + { + "epoch": 0.7478239548853746, + "grad_norm": 0.7892987693682662, + "learning_rate": 1.576996747491988e-06, + "loss": 0.4099, + "step": 24400 + }, + { + "epoch": 0.7478546034081157, + "grad_norm": 1.6552061010909127, + "learning_rate": 1.5766349891805521e-06, + "loss": 0.5254, + "step": 24401 + }, + { + "epoch": 0.747885251930857, + "grad_norm": 1.7438998643546193, + "learning_rate": 1.5762732646004109e-06, + "loss": 0.5323, + "step": 24402 + }, + { + "epoch": 0.7479159004535981, + "grad_norm": 1.6953087341897746, + "learning_rate": 1.575911573755125e-06, + "loss": 0.4992, + "step": 24403 + }, + { + "epoch": 0.7479465489763394, + "grad_norm": 0.7690725561863851, + "learning_rate": 1.5755499166482568e-06, + "loss": 0.3973, + "step": 24404 + }, + { + "epoch": 0.7479771974990805, + "grad_norm": 1.5399041056620424, + "learning_rate": 1.5751882932833717e-06, + "loss": 0.6282, + "step": 24405 + }, + { + "epoch": 0.7480078460218218, + "grad_norm": 1.661137947347663, + "learning_rate": 1.574826703664033e-06, + "loss": 0.5459, + "step": 24406 + }, + { + "epoch": 0.748038494544563, + "grad_norm": 1.7409596537753218, + "learning_rate": 1.574465147793804e-06, + "loss": 0.5348, + "step": 24407 + }, + { + "epoch": 0.7480691430673042, + "grad_norm": 0.8210418303245343, + "learning_rate": 1.5741036256762455e-06, + "loss": 0.4126, + "step": 24408 + }, + { + "epoch": 0.7480997915900454, + "grad_norm": 2.028082986070615, + "learning_rate": 1.5737421373149198e-06, + "loss": 0.6213, + "step": 24409 + }, + { + "epoch": 0.7481304401127865, + "grad_norm": 1.9060914295290614, + "learning_rate": 1.5733806827133913e-06, + "loss": 0.6381, + "step": 24410 + }, + { + "epoch": 0.7481610886355278, + "grad_norm": 2.0330118380323565, + "learning_rate": 1.57301926187522e-06, + "loss": 0.6145, + "step": 24411 + }, + { + "epoch": 0.7481917371582689, + "grad_norm": 0.8453235404704115, + "learning_rate": 1.572657874803965e-06, + "loss": 0.4301, + "step": 24412 + }, + { + "epoch": 0.7482223856810102, + "grad_norm": 0.7925653726519702, + "learning_rate": 1.5722965215031888e-06, + "loss": 0.4212, + "step": 24413 + }, + { + "epoch": 0.7482530342037513, + "grad_norm": 1.7828693118531094, + "learning_rate": 1.5719352019764516e-06, + "loss": 0.5649, + "step": 24414 + }, + { + "epoch": 0.7482836827264926, + "grad_norm": 1.7280035535895941, + "learning_rate": 1.5715739162273163e-06, + "loss": 0.5386, + "step": 24415 + }, + { + "epoch": 0.7483143312492337, + "grad_norm": 1.7154919699926026, + "learning_rate": 1.5712126642593385e-06, + "loss": 0.5771, + "step": 24416 + }, + { + "epoch": 0.748344979771975, + "grad_norm": 1.8823423268268953, + "learning_rate": 1.5708514460760794e-06, + "loss": 0.6267, + "step": 24417 + }, + { + "epoch": 0.7483756282947162, + "grad_norm": 1.8136087982074505, + "learning_rate": 1.5704902616811002e-06, + "loss": 0.5977, + "step": 24418 + }, + { + "epoch": 0.7484062768174574, + "grad_norm": 2.043385190146566, + "learning_rate": 1.5701291110779565e-06, + "loss": 0.6116, + "step": 24419 + }, + { + "epoch": 0.7484369253401986, + "grad_norm": 0.8109373617522732, + "learning_rate": 1.5697679942702077e-06, + "loss": 0.4151, + "step": 24420 + }, + { + "epoch": 0.7484675738629398, + "grad_norm": 1.7969953858285148, + "learning_rate": 1.5694069112614146e-06, + "loss": 0.6114, + "step": 24421 + }, + { + "epoch": 0.748498222385681, + "grad_norm": 1.8592831042635263, + "learning_rate": 1.5690458620551313e-06, + "loss": 0.5343, + "step": 24422 + }, + { + "epoch": 0.7485288709084222, + "grad_norm": 1.653705351801182, + "learning_rate": 1.5686848466549182e-06, + "loss": 0.5702, + "step": 24423 + }, + { + "epoch": 0.7485595194311634, + "grad_norm": 0.7961570336327392, + "learning_rate": 1.5683238650643302e-06, + "loss": 0.4053, + "step": 24424 + }, + { + "epoch": 0.7485901679539047, + "grad_norm": 1.8714786427176189, + "learning_rate": 1.567962917286925e-06, + "loss": 0.5434, + "step": 24425 + }, + { + "epoch": 0.7486208164766458, + "grad_norm": 1.6630761238068648, + "learning_rate": 1.5676020033262606e-06, + "loss": 0.5716, + "step": 24426 + }, + { + "epoch": 0.7486514649993871, + "grad_norm": 1.8473531361422744, + "learning_rate": 1.5672411231858903e-06, + "loss": 0.6016, + "step": 24427 + }, + { + "epoch": 0.7486821135221282, + "grad_norm": 1.8120377622971628, + "learning_rate": 1.566880276869372e-06, + "loss": 0.6514, + "step": 24428 + }, + { + "epoch": 0.7487127620448695, + "grad_norm": 1.9498844951064023, + "learning_rate": 1.5665194643802617e-06, + "loss": 0.6441, + "step": 24429 + }, + { + "epoch": 0.7487434105676106, + "grad_norm": 1.8319188683963374, + "learning_rate": 1.5661586857221139e-06, + "loss": 0.5417, + "step": 24430 + }, + { + "epoch": 0.7487740590903519, + "grad_norm": 2.0052262828400065, + "learning_rate": 1.5657979408984803e-06, + "loss": 0.6073, + "step": 24431 + }, + { + "epoch": 0.748804707613093, + "grad_norm": 2.0018235558817414, + "learning_rate": 1.5654372299129212e-06, + "loss": 0.5748, + "step": 24432 + }, + { + "epoch": 0.7488353561358343, + "grad_norm": 1.7485608951072453, + "learning_rate": 1.565076552768986e-06, + "loss": 0.5681, + "step": 24433 + }, + { + "epoch": 0.7488660046585754, + "grad_norm": 1.8820455559907003, + "learning_rate": 1.5647159094702325e-06, + "loss": 0.6095, + "step": 24434 + }, + { + "epoch": 0.7488966531813167, + "grad_norm": 1.6957068690965946, + "learning_rate": 1.56435530002021e-06, + "loss": 0.5351, + "step": 24435 + }, + { + "epoch": 0.7489273017040579, + "grad_norm": 1.8285620263300206, + "learning_rate": 1.563994724422474e-06, + "loss": 0.5695, + "step": 24436 + }, + { + "epoch": 0.7489579502267991, + "grad_norm": 1.8329245486406684, + "learning_rate": 1.5636341826805783e-06, + "loss": 0.6374, + "step": 24437 + }, + { + "epoch": 0.7489885987495403, + "grad_norm": 1.8161722932820163, + "learning_rate": 1.563273674798073e-06, + "loss": 0.5744, + "step": 24438 + }, + { + "epoch": 0.7490192472722815, + "grad_norm": 1.7934021535151567, + "learning_rate": 1.562913200778512e-06, + "loss": 0.5277, + "step": 24439 + }, + { + "epoch": 0.7490498957950227, + "grad_norm": 1.850143524942469, + "learning_rate": 1.5625527606254477e-06, + "loss": 0.5809, + "step": 24440 + }, + { + "epoch": 0.7490805443177638, + "grad_norm": 1.7483901908703865, + "learning_rate": 1.5621923543424288e-06, + "loss": 0.5434, + "step": 24441 + }, + { + "epoch": 0.7491111928405051, + "grad_norm": 1.9069633969180189, + "learning_rate": 1.5618319819330086e-06, + "loss": 0.5745, + "step": 24442 + }, + { + "epoch": 0.7491418413632462, + "grad_norm": 2.141317733258504, + "learning_rate": 1.5614716434007393e-06, + "loss": 0.5917, + "step": 24443 + }, + { + "epoch": 0.7491724898859875, + "grad_norm": 2.1890308444503135, + "learning_rate": 1.5611113387491678e-06, + "loss": 0.6365, + "step": 24444 + }, + { + "epoch": 0.7492031384087287, + "grad_norm": 1.651274692259101, + "learning_rate": 1.5607510679818478e-06, + "loss": 0.5049, + "step": 24445 + }, + { + "epoch": 0.7492337869314699, + "grad_norm": 1.846276399278453, + "learning_rate": 1.5603908311023258e-06, + "loss": 0.5623, + "step": 24446 + }, + { + "epoch": 0.7492644354542111, + "grad_norm": 2.402557525526752, + "learning_rate": 1.560030628114153e-06, + "loss": 0.6268, + "step": 24447 + }, + { + "epoch": 0.7492950839769523, + "grad_norm": 1.7880961815162462, + "learning_rate": 1.5596704590208806e-06, + "loss": 0.5969, + "step": 24448 + }, + { + "epoch": 0.7493257324996935, + "grad_norm": 1.8328626858285644, + "learning_rate": 1.5593103238260531e-06, + "loss": 0.6556, + "step": 24449 + }, + { + "epoch": 0.7493563810224347, + "grad_norm": 1.8307913625754573, + "learning_rate": 1.558950222533222e-06, + "loss": 0.6807, + "step": 24450 + }, + { + "epoch": 0.7493870295451759, + "grad_norm": 2.1129965245243767, + "learning_rate": 1.558590155145936e-06, + "loss": 0.5633, + "step": 24451 + }, + { + "epoch": 0.7494176780679171, + "grad_norm": 0.8158529492627402, + "learning_rate": 1.5582301216677399e-06, + "loss": 0.3978, + "step": 24452 + }, + { + "epoch": 0.7494483265906583, + "grad_norm": 1.767669226339956, + "learning_rate": 1.5578701221021835e-06, + "loss": 0.5404, + "step": 24453 + }, + { + "epoch": 0.7494789751133996, + "grad_norm": 1.7712143297409972, + "learning_rate": 1.557510156452815e-06, + "loss": 0.5125, + "step": 24454 + }, + { + "epoch": 0.7495096236361407, + "grad_norm": 1.9384708041714072, + "learning_rate": 1.5571502247231778e-06, + "loss": 0.458, + "step": 24455 + }, + { + "epoch": 0.749540272158882, + "grad_norm": 2.253021362220373, + "learning_rate": 1.5567903269168222e-06, + "loss": 0.6008, + "step": 24456 + }, + { + "epoch": 0.7495709206816231, + "grad_norm": 1.9506885681398658, + "learning_rate": 1.5564304630372912e-06, + "loss": 0.6716, + "step": 24457 + }, + { + "epoch": 0.7496015692043644, + "grad_norm": 1.827711714446942, + "learning_rate": 1.5560706330881313e-06, + "loss": 0.5669, + "step": 24458 + }, + { + "epoch": 0.7496322177271055, + "grad_norm": 1.6314627738569039, + "learning_rate": 1.5557108370728908e-06, + "loss": 0.5963, + "step": 24459 + }, + { + "epoch": 0.7496628662498468, + "grad_norm": 2.027852318020845, + "learning_rate": 1.5553510749951106e-06, + "loss": 0.5974, + "step": 24460 + }, + { + "epoch": 0.7496935147725879, + "grad_norm": 1.5714387032006805, + "learning_rate": 1.5549913468583378e-06, + "loss": 0.5783, + "step": 24461 + }, + { + "epoch": 0.7497241632953292, + "grad_norm": 2.1029279170013266, + "learning_rate": 1.554631652666118e-06, + "loss": 0.6299, + "step": 24462 + }, + { + "epoch": 0.7497548118180704, + "grad_norm": 2.0925110955244595, + "learning_rate": 1.5542719924219928e-06, + "loss": 0.6691, + "step": 24463 + }, + { + "epoch": 0.7497854603408116, + "grad_norm": 2.0238917386589743, + "learning_rate": 1.553912366129509e-06, + "loss": 0.5932, + "step": 24464 + }, + { + "epoch": 0.7498161088635528, + "grad_norm": 1.9484277610174672, + "learning_rate": 1.5535527737922068e-06, + "loss": 0.5843, + "step": 24465 + }, + { + "epoch": 0.749846757386294, + "grad_norm": 1.878488835642931, + "learning_rate": 1.5531932154136308e-06, + "loss": 0.6267, + "step": 24466 + }, + { + "epoch": 0.7498774059090352, + "grad_norm": 1.5680618718894357, + "learning_rate": 1.5528336909973258e-06, + "loss": 0.5774, + "step": 24467 + }, + { + "epoch": 0.7499080544317764, + "grad_norm": 2.1043616826762697, + "learning_rate": 1.5524742005468302e-06, + "loss": 0.7187, + "step": 24468 + }, + { + "epoch": 0.7499387029545176, + "grad_norm": 1.8139112973813076, + "learning_rate": 1.5521147440656887e-06, + "loss": 0.6011, + "step": 24469 + }, + { + "epoch": 0.7499693514772588, + "grad_norm": 1.7897334607972653, + "learning_rate": 1.5517553215574444e-06, + "loss": 0.5658, + "step": 24470 + }, + { + "epoch": 0.75, + "grad_norm": 1.6229820948025304, + "learning_rate": 1.5513959330256357e-06, + "loss": 0.5602, + "step": 24471 + }, + { + "epoch": 0.7500306485227412, + "grad_norm": 2.13113796843086, + "learning_rate": 1.5510365784738052e-06, + "loss": 0.6035, + "step": 24472 + }, + { + "epoch": 0.7500612970454824, + "grad_norm": 2.0322740895730247, + "learning_rate": 1.5506772579054952e-06, + "loss": 0.629, + "step": 24473 + }, + { + "epoch": 0.7500919455682236, + "grad_norm": 0.7658545653733213, + "learning_rate": 1.5503179713242432e-06, + "loss": 0.3916, + "step": 24474 + }, + { + "epoch": 0.7501225940909648, + "grad_norm": 1.9359813711674, + "learning_rate": 1.549958718733593e-06, + "loss": 0.5769, + "step": 24475 + }, + { + "epoch": 0.750153242613706, + "grad_norm": 1.8013542376412384, + "learning_rate": 1.5495995001370784e-06, + "loss": 0.5525, + "step": 24476 + }, + { + "epoch": 0.7501838911364472, + "grad_norm": 1.9813763900001828, + "learning_rate": 1.5492403155382462e-06, + "loss": 0.576, + "step": 24477 + }, + { + "epoch": 0.7502145396591884, + "grad_norm": 1.8769631268813425, + "learning_rate": 1.5488811649406322e-06, + "loss": 0.6734, + "step": 24478 + }, + { + "epoch": 0.7502451881819296, + "grad_norm": 1.7798893763906878, + "learning_rate": 1.5485220483477731e-06, + "loss": 0.6315, + "step": 24479 + }, + { + "epoch": 0.7502758367046708, + "grad_norm": 1.933286086306785, + "learning_rate": 1.5481629657632096e-06, + "loss": 0.6803, + "step": 24480 + }, + { + "epoch": 0.7503064852274121, + "grad_norm": 2.145750614647648, + "learning_rate": 1.5478039171904813e-06, + "loss": 0.5713, + "step": 24481 + }, + { + "epoch": 0.7503371337501532, + "grad_norm": 2.0341657117987486, + "learning_rate": 1.5474449026331222e-06, + "loss": 0.635, + "step": 24482 + }, + { + "epoch": 0.7503677822728945, + "grad_norm": 1.6664275488630371, + "learning_rate": 1.5470859220946722e-06, + "loss": 0.494, + "step": 24483 + }, + { + "epoch": 0.7503984307956356, + "grad_norm": 0.7701918625922152, + "learning_rate": 1.5467269755786695e-06, + "loss": 0.4002, + "step": 24484 + }, + { + "epoch": 0.7504290793183769, + "grad_norm": 0.8101886093514586, + "learning_rate": 1.5463680630886486e-06, + "loss": 0.4245, + "step": 24485 + }, + { + "epoch": 0.750459727841118, + "grad_norm": 1.7851013812456324, + "learning_rate": 1.5460091846281477e-06, + "loss": 0.6528, + "step": 24486 + }, + { + "epoch": 0.7504903763638593, + "grad_norm": 1.6606797914271947, + "learning_rate": 1.5456503402006989e-06, + "loss": 0.5773, + "step": 24487 + }, + { + "epoch": 0.7505210248866004, + "grad_norm": 1.6965378455045854, + "learning_rate": 1.545291529809844e-06, + "loss": 0.5037, + "step": 24488 + }, + { + "epoch": 0.7505516734093417, + "grad_norm": 1.9181893332121651, + "learning_rate": 1.5449327534591164e-06, + "loss": 0.4787, + "step": 24489 + }, + { + "epoch": 0.7505823219320829, + "grad_norm": 0.8116273547633057, + "learning_rate": 1.5445740111520486e-06, + "loss": 0.4065, + "step": 24490 + }, + { + "epoch": 0.7506129704548241, + "grad_norm": 1.9332917664261486, + "learning_rate": 1.5442153028921768e-06, + "loss": 0.6123, + "step": 24491 + }, + { + "epoch": 0.7506436189775653, + "grad_norm": 0.7471680024768039, + "learning_rate": 1.5438566286830376e-06, + "loss": 0.3888, + "step": 24492 + }, + { + "epoch": 0.7506742675003065, + "grad_norm": 1.7827956535369334, + "learning_rate": 1.5434979885281615e-06, + "loss": 0.6249, + "step": 24493 + }, + { + "epoch": 0.7507049160230477, + "grad_norm": 2.058083034472991, + "learning_rate": 1.5431393824310847e-06, + "loss": 0.5573, + "step": 24494 + }, + { + "epoch": 0.7507355645457889, + "grad_norm": 1.9932973315629696, + "learning_rate": 1.5427808103953396e-06, + "loss": 0.4972, + "step": 24495 + }, + { + "epoch": 0.7507662130685301, + "grad_norm": 0.7734390905277421, + "learning_rate": 1.5424222724244615e-06, + "loss": 0.4103, + "step": 24496 + }, + { + "epoch": 0.7507968615912713, + "grad_norm": 0.7901425826093396, + "learning_rate": 1.5420637685219814e-06, + "loss": 0.4086, + "step": 24497 + }, + { + "epoch": 0.7508275101140125, + "grad_norm": 1.6804009220908531, + "learning_rate": 1.5417052986914283e-06, + "loss": 0.5735, + "step": 24498 + }, + { + "epoch": 0.7508581586367538, + "grad_norm": 1.7185055520924466, + "learning_rate": 1.541346862936341e-06, + "loss": 0.6634, + "step": 24499 + }, + { + "epoch": 0.7508888071594949, + "grad_norm": 1.8121137277871302, + "learning_rate": 1.5409884612602482e-06, + "loss": 0.6176, + "step": 24500 + }, + { + "epoch": 0.7509194556822362, + "grad_norm": 1.7665495832578506, + "learning_rate": 1.5406300936666796e-06, + "loss": 0.5403, + "step": 24501 + }, + { + "epoch": 0.7509501042049773, + "grad_norm": 1.9008058765849458, + "learning_rate": 1.5402717601591676e-06, + "loss": 0.5451, + "step": 24502 + }, + { + "epoch": 0.7509807527277185, + "grad_norm": 1.9004686152049928, + "learning_rate": 1.5399134607412447e-06, + "loss": 0.5814, + "step": 24503 + }, + { + "epoch": 0.7510114012504597, + "grad_norm": 1.9806004444324792, + "learning_rate": 1.5395551954164383e-06, + "loss": 0.6432, + "step": 24504 + }, + { + "epoch": 0.7510420497732009, + "grad_norm": 1.6910823478986405, + "learning_rate": 1.5391969641882798e-06, + "loss": 0.522, + "step": 24505 + }, + { + "epoch": 0.7510726982959421, + "grad_norm": 0.832537712591996, + "learning_rate": 1.5388387670602995e-06, + "loss": 0.4148, + "step": 24506 + }, + { + "epoch": 0.7511033468186833, + "grad_norm": 1.961484227580124, + "learning_rate": 1.5384806040360272e-06, + "loss": 0.7051, + "step": 24507 + }, + { + "epoch": 0.7511339953414246, + "grad_norm": 1.6540569175260493, + "learning_rate": 1.538122475118992e-06, + "loss": 0.5257, + "step": 24508 + }, + { + "epoch": 0.7511646438641657, + "grad_norm": 1.9420103960446717, + "learning_rate": 1.5377643803127196e-06, + "loss": 0.574, + "step": 24509 + }, + { + "epoch": 0.751195292386907, + "grad_norm": 1.8769486690322112, + "learning_rate": 1.5374063196207411e-06, + "loss": 0.5987, + "step": 24510 + }, + { + "epoch": 0.7512259409096481, + "grad_norm": 1.7831970758145943, + "learning_rate": 1.5370482930465858e-06, + "loss": 0.5948, + "step": 24511 + }, + { + "epoch": 0.7512565894323894, + "grad_norm": 1.9411836571523136, + "learning_rate": 1.5366903005937777e-06, + "loss": 0.5847, + "step": 24512 + }, + { + "epoch": 0.7512872379551305, + "grad_norm": 1.8619906843515923, + "learning_rate": 1.536332342265846e-06, + "loss": 0.5513, + "step": 24513 + }, + { + "epoch": 0.7513178864778718, + "grad_norm": 2.157706064501577, + "learning_rate": 1.5359744180663184e-06, + "loss": 0.6636, + "step": 24514 + }, + { + "epoch": 0.7513485350006129, + "grad_norm": 1.787280416933499, + "learning_rate": 1.5356165279987222e-06, + "loss": 0.5635, + "step": 24515 + }, + { + "epoch": 0.7513791835233542, + "grad_norm": 1.8339434335690443, + "learning_rate": 1.5352586720665835e-06, + "loss": 0.5426, + "step": 24516 + }, + { + "epoch": 0.7514098320460953, + "grad_norm": 1.8462749133289285, + "learning_rate": 1.5349008502734242e-06, + "loss": 0.6054, + "step": 24517 + }, + { + "epoch": 0.7514404805688366, + "grad_norm": 0.7757833678202719, + "learning_rate": 1.5345430626227763e-06, + "loss": 0.406, + "step": 24518 + }, + { + "epoch": 0.7514711290915778, + "grad_norm": 1.8096113681274562, + "learning_rate": 1.5341853091181624e-06, + "loss": 0.5309, + "step": 24519 + }, + { + "epoch": 0.751501777614319, + "grad_norm": 1.799434421611656, + "learning_rate": 1.533827589763106e-06, + "loss": 0.6423, + "step": 24520 + }, + { + "epoch": 0.7515324261370602, + "grad_norm": 0.8273771284524293, + "learning_rate": 1.533469904561133e-06, + "loss": 0.4007, + "step": 24521 + }, + { + "epoch": 0.7515630746598014, + "grad_norm": 0.8281640230668735, + "learning_rate": 1.5331122535157677e-06, + "loss": 0.4103, + "step": 24522 + }, + { + "epoch": 0.7515937231825426, + "grad_norm": 1.823928053945998, + "learning_rate": 1.5327546366305368e-06, + "loss": 0.5652, + "step": 24523 + }, + { + "epoch": 0.7516243717052838, + "grad_norm": 1.9952518328786117, + "learning_rate": 1.5323970539089595e-06, + "loss": 0.5533, + "step": 24524 + }, + { + "epoch": 0.751655020228025, + "grad_norm": 0.7846753383149855, + "learning_rate": 1.5320395053545612e-06, + "loss": 0.4015, + "step": 24525 + }, + { + "epoch": 0.7516856687507663, + "grad_norm": 1.905806162489496, + "learning_rate": 1.5316819909708668e-06, + "loss": 0.5584, + "step": 24526 + }, + { + "epoch": 0.7517163172735074, + "grad_norm": 1.687976589939438, + "learning_rate": 1.5313245107613967e-06, + "loss": 0.6606, + "step": 24527 + }, + { + "epoch": 0.7517469657962487, + "grad_norm": 1.924508944831169, + "learning_rate": 1.530967064729671e-06, + "loss": 0.6069, + "step": 24528 + }, + { + "epoch": 0.7517776143189898, + "grad_norm": 0.7972916714606927, + "learning_rate": 1.5306096528792175e-06, + "loss": 0.4007, + "step": 24529 + }, + { + "epoch": 0.7518082628417311, + "grad_norm": 1.8711136101036363, + "learning_rate": 1.5302522752135546e-06, + "loss": 0.4907, + "step": 24530 + }, + { + "epoch": 0.7518389113644722, + "grad_norm": 0.8129300081491276, + "learning_rate": 1.5298949317362022e-06, + "loss": 0.394, + "step": 24531 + }, + { + "epoch": 0.7518695598872135, + "grad_norm": 1.9303039445022376, + "learning_rate": 1.529537622450683e-06, + "loss": 0.483, + "step": 24532 + }, + { + "epoch": 0.7519002084099546, + "grad_norm": 1.861846154133219, + "learning_rate": 1.5291803473605176e-06, + "loss": 0.6125, + "step": 24533 + }, + { + "epoch": 0.7519308569326958, + "grad_norm": 2.0501510924802107, + "learning_rate": 1.5288231064692277e-06, + "loss": 0.5711, + "step": 24534 + }, + { + "epoch": 0.751961505455437, + "grad_norm": 1.8574862167985062, + "learning_rate": 1.528465899780331e-06, + "loss": 0.57, + "step": 24535 + }, + { + "epoch": 0.7519921539781782, + "grad_norm": 2.121910298002044, + "learning_rate": 1.5281087272973471e-06, + "loss": 0.637, + "step": 24536 + }, + { + "epoch": 0.7520228025009195, + "grad_norm": 1.7860494603222457, + "learning_rate": 1.527751589023798e-06, + "loss": 0.5866, + "step": 24537 + }, + { + "epoch": 0.7520534510236606, + "grad_norm": 2.0680304733555896, + "learning_rate": 1.527394484963201e-06, + "loss": 0.5925, + "step": 24538 + }, + { + "epoch": 0.7520840995464019, + "grad_norm": 1.779877133148587, + "learning_rate": 1.527037415119072e-06, + "loss": 0.5673, + "step": 24539 + }, + { + "epoch": 0.752114748069143, + "grad_norm": 1.7782342098069286, + "learning_rate": 1.5266803794949343e-06, + "loss": 0.6399, + "step": 24540 + }, + { + "epoch": 0.7521453965918843, + "grad_norm": 1.9953131262226698, + "learning_rate": 1.5263233780943027e-06, + "loss": 0.5412, + "step": 24541 + }, + { + "epoch": 0.7521760451146254, + "grad_norm": 2.004246837523395, + "learning_rate": 1.5259664109206966e-06, + "loss": 0.6156, + "step": 24542 + }, + { + "epoch": 0.7522066936373667, + "grad_norm": 1.958977276744303, + "learning_rate": 1.5256094779776308e-06, + "loss": 0.4956, + "step": 24543 + }, + { + "epoch": 0.7522373421601078, + "grad_norm": 1.6954657409904663, + "learning_rate": 1.525252579268624e-06, + "loss": 0.5033, + "step": 24544 + }, + { + "epoch": 0.7522679906828491, + "grad_norm": 1.7732880265681235, + "learning_rate": 1.5248957147971939e-06, + "loss": 0.6731, + "step": 24545 + }, + { + "epoch": 0.7522986392055903, + "grad_norm": 0.78448538397984, + "learning_rate": 1.5245388845668541e-06, + "loss": 0.4043, + "step": 24546 + }, + { + "epoch": 0.7523292877283315, + "grad_norm": 2.0707697869364172, + "learning_rate": 1.524182088581122e-06, + "loss": 0.614, + "step": 24547 + }, + { + "epoch": 0.7523599362510727, + "grad_norm": 2.1507607317377984, + "learning_rate": 1.5238253268435143e-06, + "loss": 0.5648, + "step": 24548 + }, + { + "epoch": 0.7523905847738139, + "grad_norm": 2.049157393297596, + "learning_rate": 1.5234685993575439e-06, + "loss": 0.6302, + "step": 24549 + }, + { + "epoch": 0.7524212332965551, + "grad_norm": 1.9608925197116385, + "learning_rate": 1.5231119061267268e-06, + "loss": 0.5852, + "step": 24550 + }, + { + "epoch": 0.7524518818192963, + "grad_norm": 1.7359251585269881, + "learning_rate": 1.5227552471545793e-06, + "loss": 0.5906, + "step": 24551 + }, + { + "epoch": 0.7524825303420375, + "grad_norm": 1.8059216136818705, + "learning_rate": 1.5223986224446124e-06, + "loss": 0.548, + "step": 24552 + }, + { + "epoch": 0.7525131788647788, + "grad_norm": 1.7355859271256955, + "learning_rate": 1.5220420320003433e-06, + "loss": 0.5448, + "step": 24553 + }, + { + "epoch": 0.7525438273875199, + "grad_norm": 2.062321529205591, + "learning_rate": 1.521685475825282e-06, + "loss": 0.6594, + "step": 24554 + }, + { + "epoch": 0.7525744759102612, + "grad_norm": 2.0444687440783844, + "learning_rate": 1.5213289539229443e-06, + "loss": 0.6066, + "step": 24555 + }, + { + "epoch": 0.7526051244330023, + "grad_norm": 1.710679173983627, + "learning_rate": 1.5209724662968433e-06, + "loss": 0.5578, + "step": 24556 + }, + { + "epoch": 0.7526357729557436, + "grad_norm": 2.0316624761259523, + "learning_rate": 1.52061601295049e-06, + "loss": 0.6569, + "step": 24557 + }, + { + "epoch": 0.7526664214784847, + "grad_norm": 1.845352935519372, + "learning_rate": 1.5202595938873965e-06, + "loss": 0.5568, + "step": 24558 + }, + { + "epoch": 0.752697070001226, + "grad_norm": 0.7637750340001729, + "learning_rate": 1.5199032091110777e-06, + "loss": 0.3901, + "step": 24559 + }, + { + "epoch": 0.7527277185239671, + "grad_norm": 0.8605433319335132, + "learning_rate": 1.5195468586250412e-06, + "loss": 0.4241, + "step": 24560 + }, + { + "epoch": 0.7527583670467084, + "grad_norm": 1.7855041978504462, + "learning_rate": 1.5191905424328019e-06, + "loss": 0.5978, + "step": 24561 + }, + { + "epoch": 0.7527890155694495, + "grad_norm": 0.7994506259643128, + "learning_rate": 1.518834260537867e-06, + "loss": 0.4101, + "step": 24562 + }, + { + "epoch": 0.7528196640921908, + "grad_norm": 1.9052479552496109, + "learning_rate": 1.5184780129437487e-06, + "loss": 0.6255, + "step": 24563 + }, + { + "epoch": 0.752850312614932, + "grad_norm": 0.7891597805602222, + "learning_rate": 1.5181217996539589e-06, + "loss": 0.39, + "step": 24564 + }, + { + "epoch": 0.7528809611376731, + "grad_norm": 1.9362609479483113, + "learning_rate": 1.5177656206720043e-06, + "loss": 0.5531, + "step": 24565 + }, + { + "epoch": 0.7529116096604144, + "grad_norm": 1.684339880703327, + "learning_rate": 1.5174094760013963e-06, + "loss": 0.5331, + "step": 24566 + }, + { + "epoch": 0.7529422581831555, + "grad_norm": 0.8115545251402779, + "learning_rate": 1.517053365645645e-06, + "loss": 0.3925, + "step": 24567 + }, + { + "epoch": 0.7529729067058968, + "grad_norm": 1.83106865645937, + "learning_rate": 1.5166972896082565e-06, + "loss": 0.6235, + "step": 24568 + }, + { + "epoch": 0.7530035552286379, + "grad_norm": 1.701346452760213, + "learning_rate": 1.5163412478927408e-06, + "loss": 0.428, + "step": 24569 + }, + { + "epoch": 0.7530342037513792, + "grad_norm": 1.7651198147571365, + "learning_rate": 1.5159852405026082e-06, + "loss": 0.6191, + "step": 24570 + }, + { + "epoch": 0.7530648522741203, + "grad_norm": 1.7155230543633946, + "learning_rate": 1.5156292674413625e-06, + "loss": 0.5752, + "step": 24571 + }, + { + "epoch": 0.7530955007968616, + "grad_norm": 1.992003426069001, + "learning_rate": 1.515273328712515e-06, + "loss": 0.5864, + "step": 24572 + }, + { + "epoch": 0.7531261493196028, + "grad_norm": 2.04321227350391, + "learning_rate": 1.5149174243195692e-06, + "loss": 0.6136, + "step": 24573 + }, + { + "epoch": 0.753156797842344, + "grad_norm": 1.8935870710237561, + "learning_rate": 1.5145615542660335e-06, + "loss": 0.5951, + "step": 24574 + }, + { + "epoch": 0.7531874463650852, + "grad_norm": 1.9769899756180673, + "learning_rate": 1.5142057185554165e-06, + "loss": 0.6278, + "step": 24575 + }, + { + "epoch": 0.7532180948878264, + "grad_norm": 1.7846572368909155, + "learning_rate": 1.5138499171912208e-06, + "loss": 0.5351, + "step": 24576 + }, + { + "epoch": 0.7532487434105676, + "grad_norm": 1.9667204609952107, + "learning_rate": 1.5134941501769534e-06, + "loss": 0.5577, + "step": 24577 + }, + { + "epoch": 0.7532793919333088, + "grad_norm": 1.7489712392061976, + "learning_rate": 1.5131384175161223e-06, + "loss": 0.5948, + "step": 24578 + }, + { + "epoch": 0.75331004045605, + "grad_norm": 2.0023650103358963, + "learning_rate": 1.512782719212229e-06, + "loss": 0.6298, + "step": 24579 + }, + { + "epoch": 0.7533406889787913, + "grad_norm": 1.9473604477866115, + "learning_rate": 1.5124270552687798e-06, + "loss": 0.6443, + "step": 24580 + }, + { + "epoch": 0.7533713375015324, + "grad_norm": 1.6065296779120832, + "learning_rate": 1.5120714256892804e-06, + "loss": 0.539, + "step": 24581 + }, + { + "epoch": 0.7534019860242737, + "grad_norm": 1.7992376163212112, + "learning_rate": 1.5117158304772322e-06, + "loss": 0.5427, + "step": 24582 + }, + { + "epoch": 0.7534326345470148, + "grad_norm": 1.6562916735531263, + "learning_rate": 1.5113602696361419e-06, + "loss": 0.5771, + "step": 24583 + }, + { + "epoch": 0.7534632830697561, + "grad_norm": 1.7957723234460214, + "learning_rate": 1.51100474316951e-06, + "loss": 0.5544, + "step": 24584 + }, + { + "epoch": 0.7534939315924972, + "grad_norm": 1.8252938382524768, + "learning_rate": 1.5106492510808413e-06, + "loss": 0.5633, + "step": 24585 + }, + { + "epoch": 0.7535245801152385, + "grad_norm": 1.8245205393426052, + "learning_rate": 1.5102937933736394e-06, + "loss": 0.5615, + "step": 24586 + }, + { + "epoch": 0.7535552286379796, + "grad_norm": 1.8765582437048653, + "learning_rate": 1.5099383700514047e-06, + "loss": 0.5186, + "step": 24587 + }, + { + "epoch": 0.7535858771607209, + "grad_norm": 1.9361595965483611, + "learning_rate": 1.5095829811176399e-06, + "loss": 0.5735, + "step": 24588 + }, + { + "epoch": 0.753616525683462, + "grad_norm": 1.8374442946773772, + "learning_rate": 1.5092276265758483e-06, + "loss": 0.5656, + "step": 24589 + }, + { + "epoch": 0.7536471742062033, + "grad_norm": 1.6861531931629605, + "learning_rate": 1.5088723064295291e-06, + "loss": 0.5965, + "step": 24590 + }, + { + "epoch": 0.7536778227289445, + "grad_norm": 2.0092615168279613, + "learning_rate": 1.5085170206821836e-06, + "loss": 0.6202, + "step": 24591 + }, + { + "epoch": 0.7537084712516857, + "grad_norm": 1.7579759276146978, + "learning_rate": 1.5081617693373151e-06, + "loss": 0.4976, + "step": 24592 + }, + { + "epoch": 0.7537391197744269, + "grad_norm": 2.235599886984649, + "learning_rate": 1.5078065523984208e-06, + "loss": 0.6496, + "step": 24593 + }, + { + "epoch": 0.7537697682971681, + "grad_norm": 1.7939657950890446, + "learning_rate": 1.5074513698690036e-06, + "loss": 0.5474, + "step": 24594 + }, + { + "epoch": 0.7538004168199093, + "grad_norm": 0.8013894982193677, + "learning_rate": 1.5070962217525582e-06, + "loss": 0.4064, + "step": 24595 + }, + { + "epoch": 0.7538310653426504, + "grad_norm": 1.6412286955265247, + "learning_rate": 1.5067411080525907e-06, + "loss": 0.5494, + "step": 24596 + }, + { + "epoch": 0.7538617138653917, + "grad_norm": 0.8387074966619215, + "learning_rate": 1.5063860287725968e-06, + "loss": 0.4026, + "step": 24597 + }, + { + "epoch": 0.7538923623881328, + "grad_norm": 1.6675305686489004, + "learning_rate": 1.5060309839160737e-06, + "loss": 0.488, + "step": 24598 + }, + { + "epoch": 0.7539230109108741, + "grad_norm": 1.8883604499333047, + "learning_rate": 1.5056759734865218e-06, + "loss": 0.4514, + "step": 24599 + }, + { + "epoch": 0.7539536594336153, + "grad_norm": 1.9570228851867995, + "learning_rate": 1.5053209974874393e-06, + "loss": 0.6233, + "step": 24600 + }, + { + "epoch": 0.7539843079563565, + "grad_norm": 2.0506520912995256, + "learning_rate": 1.5049660559223223e-06, + "loss": 0.6445, + "step": 24601 + }, + { + "epoch": 0.7540149564790977, + "grad_norm": 1.882827742430076, + "learning_rate": 1.5046111487946685e-06, + "loss": 0.6602, + "step": 24602 + }, + { + "epoch": 0.7540456050018389, + "grad_norm": 0.7649830379036856, + "learning_rate": 1.5042562761079755e-06, + "loss": 0.3985, + "step": 24603 + }, + { + "epoch": 0.7540762535245801, + "grad_norm": 1.9096140306312108, + "learning_rate": 1.5039014378657413e-06, + "loss": 0.6066, + "step": 24604 + }, + { + "epoch": 0.7541069020473213, + "grad_norm": 1.8844078130184316, + "learning_rate": 1.503546634071461e-06, + "loss": 0.5748, + "step": 24605 + }, + { + "epoch": 0.7541375505700625, + "grad_norm": 1.8048841478331228, + "learning_rate": 1.5031918647286287e-06, + "loss": 0.5522, + "step": 24606 + }, + { + "epoch": 0.7541681990928037, + "grad_norm": 2.0802882449985636, + "learning_rate": 1.5028371298407418e-06, + "loss": 0.6355, + "step": 24607 + }, + { + "epoch": 0.7541988476155449, + "grad_norm": 1.8829586030638779, + "learning_rate": 1.5024824294112967e-06, + "loss": 0.5565, + "step": 24608 + }, + { + "epoch": 0.7542294961382862, + "grad_norm": 1.726905934695489, + "learning_rate": 1.5021277634437858e-06, + "loss": 0.5963, + "step": 24609 + }, + { + "epoch": 0.7542601446610273, + "grad_norm": 1.821053868040754, + "learning_rate": 1.501773131941705e-06, + "loss": 0.6274, + "step": 24610 + }, + { + "epoch": 0.7542907931837686, + "grad_norm": 1.8446959249498835, + "learning_rate": 1.5014185349085498e-06, + "loss": 0.6404, + "step": 24611 + }, + { + "epoch": 0.7543214417065097, + "grad_norm": 1.75966247588415, + "learning_rate": 1.5010639723478115e-06, + "loss": 0.5845, + "step": 24612 + }, + { + "epoch": 0.754352090229251, + "grad_norm": 1.9524306135485754, + "learning_rate": 1.500709444262987e-06, + "loss": 0.6496, + "step": 24613 + }, + { + "epoch": 0.7543827387519921, + "grad_norm": 1.783664660326747, + "learning_rate": 1.5003549506575643e-06, + "loss": 0.5485, + "step": 24614 + }, + { + "epoch": 0.7544133872747334, + "grad_norm": 1.8558487382565578, + "learning_rate": 1.500000491535043e-06, + "loss": 0.666, + "step": 24615 + }, + { + "epoch": 0.7544440357974745, + "grad_norm": 1.6828429011145951, + "learning_rate": 1.499646066898912e-06, + "loss": 0.5071, + "step": 24616 + }, + { + "epoch": 0.7544746843202158, + "grad_norm": 1.9167852254077964, + "learning_rate": 1.4992916767526627e-06, + "loss": 0.6202, + "step": 24617 + }, + { + "epoch": 0.754505332842957, + "grad_norm": 1.7852682556523873, + "learning_rate": 1.4989373210997882e-06, + "loss": 0.5712, + "step": 24618 + }, + { + "epoch": 0.7545359813656982, + "grad_norm": 1.6846633826252777, + "learning_rate": 1.4985829999437813e-06, + "loss": 0.5566, + "step": 24619 + }, + { + "epoch": 0.7545666298884394, + "grad_norm": 1.8836462959815303, + "learning_rate": 1.4982287132881307e-06, + "loss": 0.6, + "step": 24620 + }, + { + "epoch": 0.7545972784111806, + "grad_norm": 1.8087283331064656, + "learning_rate": 1.4978744611363284e-06, + "loss": 0.5818, + "step": 24621 + }, + { + "epoch": 0.7546279269339218, + "grad_norm": 1.8993798713132366, + "learning_rate": 1.4975202434918652e-06, + "loss": 0.6676, + "step": 24622 + }, + { + "epoch": 0.754658575456663, + "grad_norm": 1.8004965870030085, + "learning_rate": 1.4971660603582328e-06, + "loss": 0.5562, + "step": 24623 + }, + { + "epoch": 0.7546892239794042, + "grad_norm": 1.6858035726104197, + "learning_rate": 1.4968119117389197e-06, + "loss": 0.5322, + "step": 24624 + }, + { + "epoch": 0.7547198725021455, + "grad_norm": 1.7599461214154362, + "learning_rate": 1.4964577976374112e-06, + "loss": 0.5252, + "step": 24625 + }, + { + "epoch": 0.7547505210248866, + "grad_norm": 1.7597030897085342, + "learning_rate": 1.496103718057204e-06, + "loss": 0.5854, + "step": 24626 + }, + { + "epoch": 0.7547811695476278, + "grad_norm": 1.9088011107841938, + "learning_rate": 1.4957496730017834e-06, + "loss": 0.615, + "step": 24627 + }, + { + "epoch": 0.754811818070369, + "grad_norm": 0.814187208503512, + "learning_rate": 1.4953956624746369e-06, + "loss": 0.3825, + "step": 24628 + }, + { + "epoch": 0.7548424665931102, + "grad_norm": 1.9241490816539855, + "learning_rate": 1.4950416864792528e-06, + "loss": 0.5627, + "step": 24629 + }, + { + "epoch": 0.7548731151158514, + "grad_norm": 1.9078801639641187, + "learning_rate": 1.49468774501912e-06, + "loss": 0.7265, + "step": 24630 + }, + { + "epoch": 0.7549037636385926, + "grad_norm": 2.039436164189281, + "learning_rate": 1.4943338380977274e-06, + "loss": 0.6087, + "step": 24631 + }, + { + "epoch": 0.7549344121613338, + "grad_norm": 1.6818688378674682, + "learning_rate": 1.4939799657185589e-06, + "loss": 0.4951, + "step": 24632 + }, + { + "epoch": 0.754965060684075, + "grad_norm": 0.8071884081236316, + "learning_rate": 1.4936261278851028e-06, + "loss": 0.3929, + "step": 24633 + }, + { + "epoch": 0.7549957092068162, + "grad_norm": 1.8218229979756644, + "learning_rate": 1.4932723246008468e-06, + "loss": 0.5823, + "step": 24634 + }, + { + "epoch": 0.7550263577295574, + "grad_norm": 0.7972968359222046, + "learning_rate": 1.4929185558692767e-06, + "loss": 0.4074, + "step": 24635 + }, + { + "epoch": 0.7550570062522987, + "grad_norm": 2.057382287057678, + "learning_rate": 1.4925648216938732e-06, + "loss": 0.5834, + "step": 24636 + }, + { + "epoch": 0.7550876547750398, + "grad_norm": 2.094321980225072, + "learning_rate": 1.4922111220781299e-06, + "loss": 0.4844, + "step": 24637 + }, + { + "epoch": 0.7551183032977811, + "grad_norm": 1.6821710304897564, + "learning_rate": 1.4918574570255272e-06, + "loss": 0.5638, + "step": 24638 + }, + { + "epoch": 0.7551489518205222, + "grad_norm": 1.972173049990898, + "learning_rate": 1.4915038265395498e-06, + "loss": 0.6116, + "step": 24639 + }, + { + "epoch": 0.7551796003432635, + "grad_norm": 1.8886458254555554, + "learning_rate": 1.4911502306236825e-06, + "loss": 0.5684, + "step": 24640 + }, + { + "epoch": 0.7552102488660046, + "grad_norm": 1.8480488117207803, + "learning_rate": 1.4907966692814096e-06, + "loss": 0.6763, + "step": 24641 + }, + { + "epoch": 0.7552408973887459, + "grad_norm": 2.008678460019266, + "learning_rate": 1.4904431425162164e-06, + "loss": 0.6087, + "step": 24642 + }, + { + "epoch": 0.755271545911487, + "grad_norm": 2.1865655520675347, + "learning_rate": 1.4900896503315836e-06, + "loss": 0.617, + "step": 24643 + }, + { + "epoch": 0.7553021944342283, + "grad_norm": 1.8644434835560069, + "learning_rate": 1.4897361927309955e-06, + "loss": 0.6455, + "step": 24644 + }, + { + "epoch": 0.7553328429569695, + "grad_norm": 1.8740278061939053, + "learning_rate": 1.489382769717937e-06, + "loss": 0.6427, + "step": 24645 + }, + { + "epoch": 0.7553634914797107, + "grad_norm": 1.8882230752259594, + "learning_rate": 1.4890293812958873e-06, + "loss": 0.6105, + "step": 24646 + }, + { + "epoch": 0.7553941400024519, + "grad_norm": 1.7199419335933874, + "learning_rate": 1.488676027468327e-06, + "loss": 0.5787, + "step": 24647 + }, + { + "epoch": 0.7554247885251931, + "grad_norm": 1.9464411499130705, + "learning_rate": 1.488322708238743e-06, + "loss": 0.5726, + "step": 24648 + }, + { + "epoch": 0.7554554370479343, + "grad_norm": 1.9027533893433901, + "learning_rate": 1.4879694236106124e-06, + "loss": 0.6001, + "step": 24649 + }, + { + "epoch": 0.7554860855706755, + "grad_norm": 1.8952258409116556, + "learning_rate": 1.487616173587419e-06, + "loss": 0.5243, + "step": 24650 + }, + { + "epoch": 0.7555167340934167, + "grad_norm": 0.8355216516772395, + "learning_rate": 1.4872629581726406e-06, + "loss": 0.4265, + "step": 24651 + }, + { + "epoch": 0.755547382616158, + "grad_norm": 1.7608579484380258, + "learning_rate": 1.4869097773697594e-06, + "loss": 0.5603, + "step": 24652 + }, + { + "epoch": 0.7555780311388991, + "grad_norm": 1.722713707710728, + "learning_rate": 1.4865566311822565e-06, + "loss": 0.5923, + "step": 24653 + }, + { + "epoch": 0.7556086796616404, + "grad_norm": 1.8030583082914682, + "learning_rate": 1.4862035196136083e-06, + "loss": 0.5761, + "step": 24654 + }, + { + "epoch": 0.7556393281843815, + "grad_norm": 1.8831265900227094, + "learning_rate": 1.485850442667296e-06, + "loss": 0.6644, + "step": 24655 + }, + { + "epoch": 0.7556699767071228, + "grad_norm": 1.7729355853591025, + "learning_rate": 1.4854974003467997e-06, + "loss": 0.5414, + "step": 24656 + }, + { + "epoch": 0.7557006252298639, + "grad_norm": 1.6845946914677505, + "learning_rate": 1.4851443926555953e-06, + "loss": 0.5593, + "step": 24657 + }, + { + "epoch": 0.7557312737526051, + "grad_norm": 1.5270843089942887, + "learning_rate": 1.484791419597164e-06, + "loss": 0.4976, + "step": 24658 + }, + { + "epoch": 0.7557619222753463, + "grad_norm": 1.941704419449683, + "learning_rate": 1.4844384811749812e-06, + "loss": 0.5941, + "step": 24659 + }, + { + "epoch": 0.7557925707980875, + "grad_norm": 1.6623236475302583, + "learning_rate": 1.484085577392525e-06, + "loss": 0.6312, + "step": 24660 + }, + { + "epoch": 0.7558232193208287, + "grad_norm": 1.6510136024669113, + "learning_rate": 1.4837327082532755e-06, + "loss": 0.543, + "step": 24661 + }, + { + "epoch": 0.7558538678435699, + "grad_norm": 1.7278670017857733, + "learning_rate": 1.4833798737607046e-06, + "loss": 0.5917, + "step": 24662 + }, + { + "epoch": 0.7558845163663112, + "grad_norm": 0.7480996805562427, + "learning_rate": 1.4830270739182922e-06, + "loss": 0.4007, + "step": 24663 + }, + { + "epoch": 0.7559151648890523, + "grad_norm": 1.5762614393889536, + "learning_rate": 1.482674308729516e-06, + "loss": 0.5283, + "step": 24664 + }, + { + "epoch": 0.7559458134117936, + "grad_norm": 1.5985406049571464, + "learning_rate": 1.482321578197849e-06, + "loss": 0.5523, + "step": 24665 + }, + { + "epoch": 0.7559764619345347, + "grad_norm": 1.8991352668612984, + "learning_rate": 1.4819688823267647e-06, + "loss": 0.5195, + "step": 24666 + }, + { + "epoch": 0.756007110457276, + "grad_norm": 1.7304596023809136, + "learning_rate": 1.4816162211197438e-06, + "loss": 0.5748, + "step": 24667 + }, + { + "epoch": 0.7560377589800171, + "grad_norm": 0.7847328273762514, + "learning_rate": 1.4812635945802574e-06, + "loss": 0.3931, + "step": 24668 + }, + { + "epoch": 0.7560684075027584, + "grad_norm": 0.7904517230552177, + "learning_rate": 1.480911002711783e-06, + "loss": 0.4096, + "step": 24669 + }, + { + "epoch": 0.7560990560254995, + "grad_norm": 2.1794738805740206, + "learning_rate": 1.4805584455177908e-06, + "loss": 0.5743, + "step": 24670 + }, + { + "epoch": 0.7561297045482408, + "grad_norm": 0.7848361595015962, + "learning_rate": 1.4802059230017568e-06, + "loss": 0.3973, + "step": 24671 + }, + { + "epoch": 0.756160353070982, + "grad_norm": 0.8425311661011755, + "learning_rate": 1.4798534351671556e-06, + "loss": 0.4158, + "step": 24672 + }, + { + "epoch": 0.7561910015937232, + "grad_norm": 1.7250533851804233, + "learning_rate": 1.4795009820174584e-06, + "loss": 0.6371, + "step": 24673 + }, + { + "epoch": 0.7562216501164644, + "grad_norm": 1.9245865574842744, + "learning_rate": 1.4791485635561382e-06, + "loss": 0.6118, + "step": 24674 + }, + { + "epoch": 0.7562522986392056, + "grad_norm": 0.7929902593928305, + "learning_rate": 1.4787961797866695e-06, + "loss": 0.3968, + "step": 24675 + }, + { + "epoch": 0.7562829471619468, + "grad_norm": 1.9012771767476844, + "learning_rate": 1.478443830712522e-06, + "loss": 0.5759, + "step": 24676 + }, + { + "epoch": 0.756313595684688, + "grad_norm": 1.7831055500284172, + "learning_rate": 1.4780915163371685e-06, + "loss": 0.5448, + "step": 24677 + }, + { + "epoch": 0.7563442442074292, + "grad_norm": 1.7239954430426363, + "learning_rate": 1.4777392366640813e-06, + "loss": 0.5104, + "step": 24678 + }, + { + "epoch": 0.7563748927301704, + "grad_norm": 1.7983766665302785, + "learning_rate": 1.4773869916967292e-06, + "loss": 0.5869, + "step": 24679 + }, + { + "epoch": 0.7564055412529116, + "grad_norm": 1.8382744057723133, + "learning_rate": 1.4770347814385865e-06, + "loss": 0.5653, + "step": 24680 + }, + { + "epoch": 0.7564361897756529, + "grad_norm": 1.6405046609067837, + "learning_rate": 1.4766826058931195e-06, + "loss": 0.6224, + "step": 24681 + }, + { + "epoch": 0.756466838298394, + "grad_norm": 1.6852507378364316, + "learning_rate": 1.4763304650638e-06, + "loss": 0.6316, + "step": 24682 + }, + { + "epoch": 0.7564974868211353, + "grad_norm": 2.117075172253822, + "learning_rate": 1.4759783589541e-06, + "loss": 0.5764, + "step": 24683 + }, + { + "epoch": 0.7565281353438764, + "grad_norm": 1.7980098201707089, + "learning_rate": 1.4756262875674855e-06, + "loss": 0.5066, + "step": 24684 + }, + { + "epoch": 0.7565587838666177, + "grad_norm": 1.8188649425688874, + "learning_rate": 1.4752742509074265e-06, + "loss": 0.5666, + "step": 24685 + }, + { + "epoch": 0.7565894323893588, + "grad_norm": 0.8033286801307418, + "learning_rate": 1.474922248977394e-06, + "loss": 0.4194, + "step": 24686 + }, + { + "epoch": 0.7566200809121001, + "grad_norm": 1.738430694952628, + "learning_rate": 1.4745702817808527e-06, + "loss": 0.5387, + "step": 24687 + }, + { + "epoch": 0.7566507294348412, + "grad_norm": 1.6741450785489254, + "learning_rate": 1.4742183493212726e-06, + "loss": 0.6702, + "step": 24688 + }, + { + "epoch": 0.7566813779575824, + "grad_norm": 1.9267989532397496, + "learning_rate": 1.4738664516021228e-06, + "loss": 0.6499, + "step": 24689 + }, + { + "epoch": 0.7567120264803237, + "grad_norm": 2.0183848136441425, + "learning_rate": 1.473514588626867e-06, + "loss": 0.7615, + "step": 24690 + }, + { + "epoch": 0.7567426750030648, + "grad_norm": 0.8030391933350816, + "learning_rate": 1.4731627603989762e-06, + "loss": 0.4089, + "step": 24691 + }, + { + "epoch": 0.7567733235258061, + "grad_norm": 1.9477816111756059, + "learning_rate": 1.4728109669219137e-06, + "loss": 0.6188, + "step": 24692 + }, + { + "epoch": 0.7568039720485472, + "grad_norm": 1.8437384362086209, + "learning_rate": 1.4724592081991473e-06, + "loss": 0.5897, + "step": 24693 + }, + { + "epoch": 0.7568346205712885, + "grad_norm": 0.774854524660453, + "learning_rate": 1.4721074842341444e-06, + "loss": 0.4026, + "step": 24694 + }, + { + "epoch": 0.7568652690940296, + "grad_norm": 1.703005427570352, + "learning_rate": 1.4717557950303669e-06, + "loss": 0.6248, + "step": 24695 + }, + { + "epoch": 0.7568959176167709, + "grad_norm": 1.8359933388741076, + "learning_rate": 1.4714041405912828e-06, + "loss": 0.5919, + "step": 24696 + }, + { + "epoch": 0.756926566139512, + "grad_norm": 2.0751143095412687, + "learning_rate": 1.471052520920358e-06, + "loss": 0.5808, + "step": 24697 + }, + { + "epoch": 0.7569572146622533, + "grad_norm": 1.806057008838536, + "learning_rate": 1.4707009360210544e-06, + "loss": 0.5747, + "step": 24698 + }, + { + "epoch": 0.7569878631849944, + "grad_norm": 1.8723842858505315, + "learning_rate": 1.470349385896837e-06, + "loss": 0.5969, + "step": 24699 + }, + { + "epoch": 0.7570185117077357, + "grad_norm": 1.7660501912102315, + "learning_rate": 1.469997870551172e-06, + "loss": 0.5984, + "step": 24700 + }, + { + "epoch": 0.7570491602304769, + "grad_norm": 1.8301619814637122, + "learning_rate": 1.4696463899875196e-06, + "loss": 0.6579, + "step": 24701 + }, + { + "epoch": 0.7570798087532181, + "grad_norm": 1.760177316722251, + "learning_rate": 1.4692949442093462e-06, + "loss": 0.5114, + "step": 24702 + }, + { + "epoch": 0.7571104572759593, + "grad_norm": 1.936463545654551, + "learning_rate": 1.4689435332201113e-06, + "loss": 0.5564, + "step": 24703 + }, + { + "epoch": 0.7571411057987005, + "grad_norm": 1.711193763553575, + "learning_rate": 1.4685921570232797e-06, + "loss": 0.5568, + "step": 24704 + }, + { + "epoch": 0.7571717543214417, + "grad_norm": 2.0063907975828577, + "learning_rate": 1.4682408156223144e-06, + "loss": 0.6441, + "step": 24705 + }, + { + "epoch": 0.7572024028441829, + "grad_norm": 1.8290375603813638, + "learning_rate": 1.467889509020674e-06, + "loss": 0.5596, + "step": 24706 + }, + { + "epoch": 0.7572330513669241, + "grad_norm": 2.108986627724803, + "learning_rate": 1.4675382372218227e-06, + "loss": 0.5842, + "step": 24707 + }, + { + "epoch": 0.7572636998896654, + "grad_norm": 1.71338857161079, + "learning_rate": 1.4671870002292221e-06, + "loss": 0.6954, + "step": 24708 + }, + { + "epoch": 0.7572943484124065, + "grad_norm": 1.7973196237630913, + "learning_rate": 1.4668357980463305e-06, + "loss": 0.5638, + "step": 24709 + }, + { + "epoch": 0.7573249969351478, + "grad_norm": 2.1886221873410716, + "learning_rate": 1.4664846306766112e-06, + "loss": 0.6743, + "step": 24710 + }, + { + "epoch": 0.7573556454578889, + "grad_norm": 0.7472112088605755, + "learning_rate": 1.4661334981235215e-06, + "loss": 0.3879, + "step": 24711 + }, + { + "epoch": 0.7573862939806302, + "grad_norm": 1.7766886912585755, + "learning_rate": 1.4657824003905225e-06, + "loss": 0.4694, + "step": 24712 + }, + { + "epoch": 0.7574169425033713, + "grad_norm": 1.851418519151221, + "learning_rate": 1.4654313374810752e-06, + "loss": 0.6269, + "step": 24713 + }, + { + "epoch": 0.7574475910261126, + "grad_norm": 1.9277424404619412, + "learning_rate": 1.465080309398636e-06, + "loss": 0.5925, + "step": 24714 + }, + { + "epoch": 0.7574782395488537, + "grad_norm": 1.9350892763498928, + "learning_rate": 1.4647293161466652e-06, + "loss": 0.6088, + "step": 24715 + }, + { + "epoch": 0.757508888071595, + "grad_norm": 1.7171351865481506, + "learning_rate": 1.4643783577286225e-06, + "loss": 0.5771, + "step": 24716 + }, + { + "epoch": 0.7575395365943361, + "grad_norm": 1.845653795195688, + "learning_rate": 1.4640274341479621e-06, + "loss": 0.5826, + "step": 24717 + }, + { + "epoch": 0.7575701851170774, + "grad_norm": 1.9913968440621408, + "learning_rate": 1.4636765454081447e-06, + "loss": 0.6053, + "step": 24718 + }, + { + "epoch": 0.7576008336398186, + "grad_norm": 1.7960180871040636, + "learning_rate": 1.4633256915126287e-06, + "loss": 0.5633, + "step": 24719 + }, + { + "epoch": 0.7576314821625597, + "grad_norm": 1.8328809737150422, + "learning_rate": 1.462974872464868e-06, + "loss": 0.6381, + "step": 24720 + }, + { + "epoch": 0.757662130685301, + "grad_norm": 1.8393922586018288, + "learning_rate": 1.4626240882683224e-06, + "loss": 0.5282, + "step": 24721 + }, + { + "epoch": 0.7576927792080421, + "grad_norm": 1.9178077743594095, + "learning_rate": 1.4622733389264438e-06, + "loss": 0.6187, + "step": 24722 + }, + { + "epoch": 0.7577234277307834, + "grad_norm": 1.6211556606329525, + "learning_rate": 1.4619226244426938e-06, + "loss": 0.5677, + "step": 24723 + }, + { + "epoch": 0.7577540762535245, + "grad_norm": 1.8342053955457527, + "learning_rate": 1.4615719448205257e-06, + "loss": 0.5897, + "step": 24724 + }, + { + "epoch": 0.7577847247762658, + "grad_norm": 1.9020701964444686, + "learning_rate": 1.4612213000633929e-06, + "loss": 0.5903, + "step": 24725 + }, + { + "epoch": 0.757815373299007, + "grad_norm": 2.0804448492378222, + "learning_rate": 1.4608706901747527e-06, + "loss": 0.6823, + "step": 24726 + }, + { + "epoch": 0.7578460218217482, + "grad_norm": 1.5903663243662705, + "learning_rate": 1.46052011515806e-06, + "loss": 0.6373, + "step": 24727 + }, + { + "epoch": 0.7578766703444894, + "grad_norm": 0.7893823247481232, + "learning_rate": 1.4601695750167667e-06, + "loss": 0.3838, + "step": 24728 + }, + { + "epoch": 0.7579073188672306, + "grad_norm": 1.8042012649200998, + "learning_rate": 1.4598190697543286e-06, + "loss": 0.6062, + "step": 24729 + }, + { + "epoch": 0.7579379673899718, + "grad_norm": 1.8962805143439276, + "learning_rate": 1.4594685993741987e-06, + "loss": 0.6552, + "step": 24730 + }, + { + "epoch": 0.757968615912713, + "grad_norm": 1.859063883801317, + "learning_rate": 1.4591181638798318e-06, + "loss": 0.6068, + "step": 24731 + }, + { + "epoch": 0.7579992644354542, + "grad_norm": 1.7753169121031556, + "learning_rate": 1.4587677632746805e-06, + "loss": 0.5204, + "step": 24732 + }, + { + "epoch": 0.7580299129581954, + "grad_norm": 1.7214253701443523, + "learning_rate": 1.458417397562193e-06, + "loss": 0.712, + "step": 24733 + }, + { + "epoch": 0.7580605614809366, + "grad_norm": 1.960270196262818, + "learning_rate": 1.458067066745828e-06, + "loss": 0.5919, + "step": 24734 + }, + { + "epoch": 0.7580912100036779, + "grad_norm": 1.979938192989257, + "learning_rate": 1.457716770829034e-06, + "loss": 0.613, + "step": 24735 + }, + { + "epoch": 0.758121858526419, + "grad_norm": 2.4137558307553753, + "learning_rate": 1.4573665098152622e-06, + "loss": 0.6192, + "step": 24736 + }, + { + "epoch": 0.7581525070491603, + "grad_norm": 1.712240524353611, + "learning_rate": 1.4570162837079644e-06, + "loss": 0.6219, + "step": 24737 + }, + { + "epoch": 0.7581831555719014, + "grad_norm": 0.8222432565568124, + "learning_rate": 1.456666092510593e-06, + "loss": 0.4109, + "step": 24738 + }, + { + "epoch": 0.7582138040946427, + "grad_norm": 0.8010690553773673, + "learning_rate": 1.456315936226596e-06, + "loss": 0.3942, + "step": 24739 + }, + { + "epoch": 0.7582444526173838, + "grad_norm": 0.7932475611541061, + "learning_rate": 1.4559658148594246e-06, + "loss": 0.3917, + "step": 24740 + }, + { + "epoch": 0.7582751011401251, + "grad_norm": 1.7872839978604598, + "learning_rate": 1.4556157284125283e-06, + "loss": 0.5204, + "step": 24741 + }, + { + "epoch": 0.7583057496628662, + "grad_norm": 1.7533517601116677, + "learning_rate": 1.4552656768893593e-06, + "loss": 0.5188, + "step": 24742 + }, + { + "epoch": 0.7583363981856075, + "grad_norm": 0.8342079608546024, + "learning_rate": 1.4549156602933644e-06, + "loss": 0.4009, + "step": 24743 + }, + { + "epoch": 0.7583670467083486, + "grad_norm": 1.921266397765172, + "learning_rate": 1.4545656786279894e-06, + "loss": 0.6322, + "step": 24744 + }, + { + "epoch": 0.7583976952310899, + "grad_norm": 1.8427788100838607, + "learning_rate": 1.4542157318966894e-06, + "loss": 0.5749, + "step": 24745 + }, + { + "epoch": 0.7584283437538311, + "grad_norm": 1.7092636020844945, + "learning_rate": 1.4538658201029082e-06, + "loss": 0.6577, + "step": 24746 + }, + { + "epoch": 0.7584589922765723, + "grad_norm": 0.7901998474229884, + "learning_rate": 1.4535159432500933e-06, + "loss": 0.3881, + "step": 24747 + }, + { + "epoch": 0.7584896407993135, + "grad_norm": 1.8142924347668354, + "learning_rate": 1.4531661013416932e-06, + "loss": 0.5254, + "step": 24748 + }, + { + "epoch": 0.7585202893220547, + "grad_norm": 1.7644307108045698, + "learning_rate": 1.4528162943811552e-06, + "loss": 0.5341, + "step": 24749 + }, + { + "epoch": 0.7585509378447959, + "grad_norm": 1.6993125548785044, + "learning_rate": 1.452466522371927e-06, + "loss": 0.6167, + "step": 24750 + }, + { + "epoch": 0.758581586367537, + "grad_norm": 1.9781871558042705, + "learning_rate": 1.4521167853174523e-06, + "loss": 0.5762, + "step": 24751 + }, + { + "epoch": 0.7586122348902783, + "grad_norm": 1.9430667217809898, + "learning_rate": 1.4517670832211789e-06, + "loss": 0.539, + "step": 24752 + }, + { + "epoch": 0.7586428834130194, + "grad_norm": 1.7566465705242382, + "learning_rate": 1.4514174160865541e-06, + "loss": 0.5449, + "step": 24753 + }, + { + "epoch": 0.7586735319357607, + "grad_norm": 1.9351791427970815, + "learning_rate": 1.4510677839170213e-06, + "loss": 0.5468, + "step": 24754 + }, + { + "epoch": 0.7587041804585019, + "grad_norm": 1.980453251820995, + "learning_rate": 1.4507181867160237e-06, + "loss": 0.6119, + "step": 24755 + }, + { + "epoch": 0.7587348289812431, + "grad_norm": 2.3391244161032994, + "learning_rate": 1.450368624487008e-06, + "loss": 0.6103, + "step": 24756 + }, + { + "epoch": 0.7587654775039843, + "grad_norm": 0.8203580643571775, + "learning_rate": 1.4500190972334188e-06, + "loss": 0.4117, + "step": 24757 + }, + { + "epoch": 0.7587961260267255, + "grad_norm": 1.8953479057452098, + "learning_rate": 1.449669604958701e-06, + "loss": 0.541, + "step": 24758 + }, + { + "epoch": 0.7588267745494667, + "grad_norm": 1.8487752483531885, + "learning_rate": 1.4493201476662956e-06, + "loss": 0.504, + "step": 24759 + }, + { + "epoch": 0.7588574230722079, + "grad_norm": 1.8384662370655451, + "learning_rate": 1.4489707253596468e-06, + "loss": 0.6134, + "step": 24760 + }, + { + "epoch": 0.7588880715949491, + "grad_norm": 1.6937491886795397, + "learning_rate": 1.4486213380422003e-06, + "loss": 0.6426, + "step": 24761 + }, + { + "epoch": 0.7589187201176903, + "grad_norm": 1.9042606326552984, + "learning_rate": 1.4482719857173961e-06, + "loss": 0.6454, + "step": 24762 + }, + { + "epoch": 0.7589493686404315, + "grad_norm": 0.8244348025938641, + "learning_rate": 1.4479226683886731e-06, + "loss": 0.3775, + "step": 24763 + }, + { + "epoch": 0.7589800171631728, + "grad_norm": 2.0917963475755244, + "learning_rate": 1.447573386059481e-06, + "loss": 0.6319, + "step": 24764 + }, + { + "epoch": 0.7590106656859139, + "grad_norm": 1.6374510639404336, + "learning_rate": 1.4472241387332565e-06, + "loss": 0.5725, + "step": 24765 + }, + { + "epoch": 0.7590413142086552, + "grad_norm": 2.038303793043331, + "learning_rate": 1.4468749264134401e-06, + "loss": 0.5514, + "step": 24766 + }, + { + "epoch": 0.7590719627313963, + "grad_norm": 1.6284843356956964, + "learning_rate": 1.4465257491034746e-06, + "loss": 0.4299, + "step": 24767 + }, + { + "epoch": 0.7591026112541376, + "grad_norm": 1.7713901250292368, + "learning_rate": 1.4461766068068e-06, + "loss": 0.5889, + "step": 24768 + }, + { + "epoch": 0.7591332597768787, + "grad_norm": 1.8633236429393083, + "learning_rate": 1.4458274995268578e-06, + "loss": 0.627, + "step": 24769 + }, + { + "epoch": 0.75916390829962, + "grad_norm": 1.7907910346405518, + "learning_rate": 1.4454784272670851e-06, + "loss": 0.6282, + "step": 24770 + }, + { + "epoch": 0.7591945568223611, + "grad_norm": 1.778010742258258, + "learning_rate": 1.4451293900309233e-06, + "loss": 0.5667, + "step": 24771 + }, + { + "epoch": 0.7592252053451024, + "grad_norm": 1.7305411993952287, + "learning_rate": 1.4447803878218126e-06, + "loss": 0.5888, + "step": 24772 + }, + { + "epoch": 0.7592558538678436, + "grad_norm": 1.9789453531059946, + "learning_rate": 1.4444314206431904e-06, + "loss": 0.5956, + "step": 24773 + }, + { + "epoch": 0.7592865023905848, + "grad_norm": 0.7852149191820815, + "learning_rate": 1.4440824884984917e-06, + "loss": 0.3978, + "step": 24774 + }, + { + "epoch": 0.759317150913326, + "grad_norm": 1.8946451484719578, + "learning_rate": 1.4437335913911615e-06, + "loss": 0.6011, + "step": 24775 + }, + { + "epoch": 0.7593477994360672, + "grad_norm": 1.9356736434699473, + "learning_rate": 1.4433847293246322e-06, + "loss": 0.5875, + "step": 24776 + }, + { + "epoch": 0.7593784479588084, + "grad_norm": 1.9811152827722696, + "learning_rate": 1.443035902302345e-06, + "loss": 0.637, + "step": 24777 + }, + { + "epoch": 0.7594090964815496, + "grad_norm": 1.8096242296074294, + "learning_rate": 1.4426871103277334e-06, + "loss": 0.6052, + "step": 24778 + }, + { + "epoch": 0.7594397450042908, + "grad_norm": 1.7564603669580228, + "learning_rate": 1.4423383534042361e-06, + "loss": 0.551, + "step": 24779 + }, + { + "epoch": 0.759470393527032, + "grad_norm": 1.8457087650723463, + "learning_rate": 1.441989631535291e-06, + "loss": 0.566, + "step": 24780 + }, + { + "epoch": 0.7595010420497732, + "grad_norm": 0.7832115793019261, + "learning_rate": 1.4416409447243307e-06, + "loss": 0.391, + "step": 24781 + }, + { + "epoch": 0.7595316905725144, + "grad_norm": 1.8371786961142225, + "learning_rate": 1.4412922929747925e-06, + "loss": 0.5425, + "step": 24782 + }, + { + "epoch": 0.7595623390952556, + "grad_norm": 1.564121631914217, + "learning_rate": 1.4409436762901135e-06, + "loss": 0.4737, + "step": 24783 + }, + { + "epoch": 0.7595929876179968, + "grad_norm": 1.6030723367643902, + "learning_rate": 1.4405950946737258e-06, + "loss": 0.5089, + "step": 24784 + }, + { + "epoch": 0.759623636140738, + "grad_norm": 1.9539386283465006, + "learning_rate": 1.4402465481290646e-06, + "loss": 0.6093, + "step": 24785 + }, + { + "epoch": 0.7596542846634792, + "grad_norm": 2.035306341948478, + "learning_rate": 1.4398980366595672e-06, + "loss": 0.6813, + "step": 24786 + }, + { + "epoch": 0.7596849331862204, + "grad_norm": 2.020153089297788, + "learning_rate": 1.4395495602686631e-06, + "loss": 0.5841, + "step": 24787 + }, + { + "epoch": 0.7597155817089616, + "grad_norm": 1.7304206892961354, + "learning_rate": 1.4392011189597903e-06, + "loss": 0.6136, + "step": 24788 + }, + { + "epoch": 0.7597462302317028, + "grad_norm": 0.782238169184541, + "learning_rate": 1.4388527127363784e-06, + "loss": 0.3897, + "step": 24789 + }, + { + "epoch": 0.759776878754444, + "grad_norm": 1.770648198402609, + "learning_rate": 1.4385043416018618e-06, + "loss": 0.6552, + "step": 24790 + }, + { + "epoch": 0.7598075272771853, + "grad_norm": 1.6556922088465034, + "learning_rate": 1.438156005559675e-06, + "loss": 0.5668, + "step": 24791 + }, + { + "epoch": 0.7598381757999264, + "grad_norm": 0.7725598137860805, + "learning_rate": 1.437807704613246e-06, + "loss": 0.4067, + "step": 24792 + }, + { + "epoch": 0.7598688243226677, + "grad_norm": 0.8367428778761083, + "learning_rate": 1.4374594387660096e-06, + "loss": 0.3946, + "step": 24793 + }, + { + "epoch": 0.7598994728454088, + "grad_norm": 1.8077585379617718, + "learning_rate": 1.437111208021399e-06, + "loss": 0.6129, + "step": 24794 + }, + { + "epoch": 0.7599301213681501, + "grad_norm": 1.521918441181058, + "learning_rate": 1.4367630123828407e-06, + "loss": 0.5651, + "step": 24795 + }, + { + "epoch": 0.7599607698908912, + "grad_norm": 0.824561363487711, + "learning_rate": 1.4364148518537685e-06, + "loss": 0.4165, + "step": 24796 + }, + { + "epoch": 0.7599914184136325, + "grad_norm": 0.8145242630841576, + "learning_rate": 1.4360667264376144e-06, + "loss": 0.4024, + "step": 24797 + }, + { + "epoch": 0.7600220669363736, + "grad_norm": 1.7878674821972458, + "learning_rate": 1.435718636137805e-06, + "loss": 0.5923, + "step": 24798 + }, + { + "epoch": 0.7600527154591149, + "grad_norm": 0.7908658724696284, + "learning_rate": 1.4353705809577735e-06, + "loss": 0.391, + "step": 24799 + }, + { + "epoch": 0.760083363981856, + "grad_norm": 1.7497627549476622, + "learning_rate": 1.435022560900946e-06, + "loss": 0.4982, + "step": 24800 + }, + { + "epoch": 0.7601140125045973, + "grad_norm": 0.7644629105469888, + "learning_rate": 1.4346745759707531e-06, + "loss": 0.3999, + "step": 24801 + }, + { + "epoch": 0.7601446610273385, + "grad_norm": 1.7188363113895992, + "learning_rate": 1.4343266261706257e-06, + "loss": 0.5263, + "step": 24802 + }, + { + "epoch": 0.7601753095500797, + "grad_norm": 1.9575518426543839, + "learning_rate": 1.4339787115039887e-06, + "loss": 0.5641, + "step": 24803 + }, + { + "epoch": 0.7602059580728209, + "grad_norm": 0.7663223554994002, + "learning_rate": 1.433630831974272e-06, + "loss": 0.4022, + "step": 24804 + }, + { + "epoch": 0.7602366065955621, + "grad_norm": 1.6644154594106835, + "learning_rate": 1.4332829875849047e-06, + "loss": 0.4906, + "step": 24805 + }, + { + "epoch": 0.7602672551183033, + "grad_norm": 1.853901642435731, + "learning_rate": 1.4329351783393114e-06, + "loss": 0.5756, + "step": 24806 + }, + { + "epoch": 0.7602979036410445, + "grad_norm": 1.7012388673246064, + "learning_rate": 1.4325874042409215e-06, + "loss": 0.5242, + "step": 24807 + }, + { + "epoch": 0.7603285521637857, + "grad_norm": 2.113676732905704, + "learning_rate": 1.4322396652931592e-06, + "loss": 0.6477, + "step": 24808 + }, + { + "epoch": 0.760359200686527, + "grad_norm": 0.8251993609303115, + "learning_rate": 1.4318919614994525e-06, + "loss": 0.4005, + "step": 24809 + }, + { + "epoch": 0.7603898492092681, + "grad_norm": 0.7813391839662355, + "learning_rate": 1.431544292863229e-06, + "loss": 0.3859, + "step": 24810 + }, + { + "epoch": 0.7604204977320094, + "grad_norm": 1.678479217918481, + "learning_rate": 1.4311966593879106e-06, + "loss": 0.5148, + "step": 24811 + }, + { + "epoch": 0.7604511462547505, + "grad_norm": 0.8143410180170565, + "learning_rate": 1.430849061076925e-06, + "loss": 0.3975, + "step": 24812 + }, + { + "epoch": 0.7604817947774917, + "grad_norm": 1.8140844601682906, + "learning_rate": 1.4305014979336983e-06, + "loss": 0.6019, + "step": 24813 + }, + { + "epoch": 0.7605124433002329, + "grad_norm": 0.778638496331109, + "learning_rate": 1.4301539699616523e-06, + "loss": 0.411, + "step": 24814 + }, + { + "epoch": 0.7605430918229741, + "grad_norm": 1.8397520277852732, + "learning_rate": 1.4298064771642123e-06, + "loss": 0.5794, + "step": 24815 + }, + { + "epoch": 0.7605737403457153, + "grad_norm": 2.1350998047518766, + "learning_rate": 1.4294590195448039e-06, + "loss": 0.6065, + "step": 24816 + }, + { + "epoch": 0.7606043888684565, + "grad_norm": 0.8148430868815184, + "learning_rate": 1.4291115971068482e-06, + "loss": 0.4232, + "step": 24817 + }, + { + "epoch": 0.7606350373911978, + "grad_norm": 1.7961943100148297, + "learning_rate": 1.4287642098537707e-06, + "loss": 0.6231, + "step": 24818 + }, + { + "epoch": 0.7606656859139389, + "grad_norm": 1.7310450321840625, + "learning_rate": 1.4284168577889924e-06, + "loss": 0.594, + "step": 24819 + }, + { + "epoch": 0.7606963344366802, + "grad_norm": 2.134739374410096, + "learning_rate": 1.4280695409159357e-06, + "loss": 0.6113, + "step": 24820 + }, + { + "epoch": 0.7607269829594213, + "grad_norm": 2.233427196526304, + "learning_rate": 1.4277222592380259e-06, + "loss": 0.6675, + "step": 24821 + }, + { + "epoch": 0.7607576314821626, + "grad_norm": 0.7965259999138242, + "learning_rate": 1.4273750127586811e-06, + "loss": 0.3832, + "step": 24822 + }, + { + "epoch": 0.7607882800049037, + "grad_norm": 1.9408997830210273, + "learning_rate": 1.4270278014813244e-06, + "loss": 0.5511, + "step": 24823 + }, + { + "epoch": 0.760818928527645, + "grad_norm": 1.957989102643923, + "learning_rate": 1.426680625409378e-06, + "loss": 0.595, + "step": 24824 + }, + { + "epoch": 0.7608495770503861, + "grad_norm": 1.974418949804892, + "learning_rate": 1.4263334845462606e-06, + "loss": 0.6068, + "step": 24825 + }, + { + "epoch": 0.7608802255731274, + "grad_norm": 1.7594658962763288, + "learning_rate": 1.4259863788953938e-06, + "loss": 0.5689, + "step": 24826 + }, + { + "epoch": 0.7609108740958686, + "grad_norm": 1.8073737300056343, + "learning_rate": 1.4256393084601987e-06, + "loss": 0.5536, + "step": 24827 + }, + { + "epoch": 0.7609415226186098, + "grad_norm": 1.7493487369252028, + "learning_rate": 1.4252922732440926e-06, + "loss": 0.5955, + "step": 24828 + }, + { + "epoch": 0.760972171141351, + "grad_norm": 0.7906862577784701, + "learning_rate": 1.4249452732504987e-06, + "loss": 0.397, + "step": 24829 + }, + { + "epoch": 0.7610028196640922, + "grad_norm": 1.8354956849987498, + "learning_rate": 1.4245983084828296e-06, + "loss": 0.5829, + "step": 24830 + }, + { + "epoch": 0.7610334681868334, + "grad_norm": 0.7627335762876938, + "learning_rate": 1.4242513789445117e-06, + "loss": 0.4112, + "step": 24831 + }, + { + "epoch": 0.7610641167095746, + "grad_norm": 0.8317372577339103, + "learning_rate": 1.42390448463896e-06, + "loss": 0.386, + "step": 24832 + }, + { + "epoch": 0.7610947652323158, + "grad_norm": 1.7492095230029094, + "learning_rate": 1.423557625569591e-06, + "loss": 0.5422, + "step": 24833 + }, + { + "epoch": 0.761125413755057, + "grad_norm": 0.7514528705665021, + "learning_rate": 1.4232108017398232e-06, + "loss": 0.3749, + "step": 24834 + }, + { + "epoch": 0.7611560622777982, + "grad_norm": 1.9307100781188056, + "learning_rate": 1.4228640131530764e-06, + "loss": 0.5152, + "step": 24835 + }, + { + "epoch": 0.7611867108005395, + "grad_norm": 1.8200023435394361, + "learning_rate": 1.4225172598127645e-06, + "loss": 0.6405, + "step": 24836 + }, + { + "epoch": 0.7612173593232806, + "grad_norm": 1.8531711441765313, + "learning_rate": 1.4221705417223047e-06, + "loss": 0.6496, + "step": 24837 + }, + { + "epoch": 0.7612480078460219, + "grad_norm": 1.6195091896491216, + "learning_rate": 1.4218238588851147e-06, + "loss": 0.5242, + "step": 24838 + }, + { + "epoch": 0.761278656368763, + "grad_norm": 1.8224569150178365, + "learning_rate": 1.421477211304611e-06, + "loss": 0.5698, + "step": 24839 + }, + { + "epoch": 0.7613093048915043, + "grad_norm": 1.9490161469337277, + "learning_rate": 1.4211305989842079e-06, + "loss": 0.5948, + "step": 24840 + }, + { + "epoch": 0.7613399534142454, + "grad_norm": 1.9633288945671508, + "learning_rate": 1.4207840219273172e-06, + "loss": 0.588, + "step": 24841 + }, + { + "epoch": 0.7613706019369867, + "grad_norm": 1.837340614340622, + "learning_rate": 1.420437480137361e-06, + "loss": 0.3886, + "step": 24842 + }, + { + "epoch": 0.7614012504597278, + "grad_norm": 1.607728553270485, + "learning_rate": 1.4200909736177497e-06, + "loss": 0.5569, + "step": 24843 + }, + { + "epoch": 0.761431898982469, + "grad_norm": 2.470892865979138, + "learning_rate": 1.419744502371897e-06, + "loss": 0.6172, + "step": 24844 + }, + { + "epoch": 0.7614625475052103, + "grad_norm": 2.072498506200291, + "learning_rate": 1.4193980664032176e-06, + "loss": 0.5541, + "step": 24845 + }, + { + "epoch": 0.7614931960279514, + "grad_norm": 1.7408814855017278, + "learning_rate": 1.4190516657151266e-06, + "loss": 0.5833, + "step": 24846 + }, + { + "epoch": 0.7615238445506927, + "grad_norm": 1.8169459880995604, + "learning_rate": 1.4187053003110341e-06, + "loss": 0.5726, + "step": 24847 + }, + { + "epoch": 0.7615544930734338, + "grad_norm": 1.8851596592223785, + "learning_rate": 1.4183589701943545e-06, + "loss": 0.6063, + "step": 24848 + }, + { + "epoch": 0.7615851415961751, + "grad_norm": 1.7660484446943578, + "learning_rate": 1.4180126753685008e-06, + "loss": 0.578, + "step": 24849 + }, + { + "epoch": 0.7616157901189162, + "grad_norm": 1.8445433053567581, + "learning_rate": 1.4176664158368857e-06, + "loss": 0.6038, + "step": 24850 + }, + { + "epoch": 0.7616464386416575, + "grad_norm": 1.832939456536706, + "learning_rate": 1.4173201916029206e-06, + "loss": 0.5757, + "step": 24851 + }, + { + "epoch": 0.7616770871643986, + "grad_norm": 1.8365424999973972, + "learning_rate": 1.4169740026700146e-06, + "loss": 0.59, + "step": 24852 + }, + { + "epoch": 0.7617077356871399, + "grad_norm": 1.7005357371855532, + "learning_rate": 1.4166278490415802e-06, + "loss": 0.5794, + "step": 24853 + }, + { + "epoch": 0.761738384209881, + "grad_norm": 1.7602093771464948, + "learning_rate": 1.4162817307210303e-06, + "loss": 0.5338, + "step": 24854 + }, + { + "epoch": 0.7617690327326223, + "grad_norm": 2.0513266136399984, + "learning_rate": 1.4159356477117714e-06, + "loss": 0.6258, + "step": 24855 + }, + { + "epoch": 0.7617996812553635, + "grad_norm": 1.8379121534505374, + "learning_rate": 1.4155896000172164e-06, + "loss": 0.6696, + "step": 24856 + }, + { + "epoch": 0.7618303297781047, + "grad_norm": 1.7129515427300948, + "learning_rate": 1.4152435876407733e-06, + "loss": 0.5297, + "step": 24857 + }, + { + "epoch": 0.7618609783008459, + "grad_norm": 1.8524648750452837, + "learning_rate": 1.4148976105858548e-06, + "loss": 0.5596, + "step": 24858 + }, + { + "epoch": 0.7618916268235871, + "grad_norm": 1.911838077409093, + "learning_rate": 1.4145516688558669e-06, + "loss": 0.5268, + "step": 24859 + }, + { + "epoch": 0.7619222753463283, + "grad_norm": 1.7829234786685924, + "learning_rate": 1.4142057624542156e-06, + "loss": 0.6509, + "step": 24860 + }, + { + "epoch": 0.7619529238690695, + "grad_norm": 1.8111287699714333, + "learning_rate": 1.4138598913843154e-06, + "loss": 0.5419, + "step": 24861 + }, + { + "epoch": 0.7619835723918107, + "grad_norm": 1.9486554362360693, + "learning_rate": 1.413514055649572e-06, + "loss": 0.6206, + "step": 24862 + }, + { + "epoch": 0.762014220914552, + "grad_norm": 1.9116549083595824, + "learning_rate": 1.41316825525339e-06, + "loss": 0.616, + "step": 24863 + }, + { + "epoch": 0.7620448694372931, + "grad_norm": 1.8744680397346953, + "learning_rate": 1.4128224901991788e-06, + "loss": 0.688, + "step": 24864 + }, + { + "epoch": 0.7620755179600344, + "grad_norm": 1.688018071807526, + "learning_rate": 1.4124767604903472e-06, + "loss": 0.5936, + "step": 24865 + }, + { + "epoch": 0.7621061664827755, + "grad_norm": 2.0575074275335363, + "learning_rate": 1.412131066130299e-06, + "loss": 0.5957, + "step": 24866 + }, + { + "epoch": 0.7621368150055168, + "grad_norm": 1.8706998932558057, + "learning_rate": 1.4117854071224408e-06, + "loss": 0.575, + "step": 24867 + }, + { + "epoch": 0.7621674635282579, + "grad_norm": 1.8142252958157645, + "learning_rate": 1.4114397834701788e-06, + "loss": 0.5987, + "step": 24868 + }, + { + "epoch": 0.7621981120509992, + "grad_norm": 1.6803621728130749, + "learning_rate": 1.4110941951769208e-06, + "loss": 0.5675, + "step": 24869 + }, + { + "epoch": 0.7622287605737403, + "grad_norm": 1.9016181449495553, + "learning_rate": 1.4107486422460698e-06, + "loss": 0.6602, + "step": 24870 + }, + { + "epoch": 0.7622594090964816, + "grad_norm": 1.862390536293591, + "learning_rate": 1.4104031246810278e-06, + "loss": 0.6097, + "step": 24871 + }, + { + "epoch": 0.7622900576192227, + "grad_norm": 1.9153575412322905, + "learning_rate": 1.4100576424852052e-06, + "loss": 0.5965, + "step": 24872 + }, + { + "epoch": 0.762320706141964, + "grad_norm": 2.030390655144899, + "learning_rate": 1.4097121956620036e-06, + "loss": 0.6209, + "step": 24873 + }, + { + "epoch": 0.7623513546647052, + "grad_norm": 1.6499079821264857, + "learning_rate": 1.4093667842148245e-06, + "loss": 0.5878, + "step": 24874 + }, + { + "epoch": 0.7623820031874463, + "grad_norm": 0.8382257019697181, + "learning_rate": 1.4090214081470726e-06, + "loss": 0.4023, + "step": 24875 + }, + { + "epoch": 0.7624126517101876, + "grad_norm": 1.6823903597411678, + "learning_rate": 1.4086760674621525e-06, + "loss": 0.5896, + "step": 24876 + }, + { + "epoch": 0.7624433002329287, + "grad_norm": 1.7707289787206142, + "learning_rate": 1.4083307621634673e-06, + "loss": 0.5531, + "step": 24877 + }, + { + "epoch": 0.76247394875567, + "grad_norm": 2.023966947816422, + "learning_rate": 1.407985492254416e-06, + "loss": 0.5443, + "step": 24878 + }, + { + "epoch": 0.7625045972784111, + "grad_norm": 1.9497429049593025, + "learning_rate": 1.407640257738403e-06, + "loss": 0.7019, + "step": 24879 + }, + { + "epoch": 0.7625352458011524, + "grad_norm": 2.018745094943233, + "learning_rate": 1.4072950586188316e-06, + "loss": 0.5569, + "step": 24880 + }, + { + "epoch": 0.7625658943238935, + "grad_norm": 1.9865936534014355, + "learning_rate": 1.4069498948991007e-06, + "loss": 0.5767, + "step": 24881 + }, + { + "epoch": 0.7625965428466348, + "grad_norm": 1.5147020880424993, + "learning_rate": 1.4066047665826089e-06, + "loss": 0.513, + "step": 24882 + }, + { + "epoch": 0.762627191369376, + "grad_norm": 1.7779868110485133, + "learning_rate": 1.406259673672763e-06, + "loss": 0.6013, + "step": 24883 + }, + { + "epoch": 0.7626578398921172, + "grad_norm": 1.9107065378707624, + "learning_rate": 1.405914616172958e-06, + "loss": 0.5394, + "step": 24884 + }, + { + "epoch": 0.7626884884148584, + "grad_norm": 1.8953103274434866, + "learning_rate": 1.4055695940865983e-06, + "loss": 0.6444, + "step": 24885 + }, + { + "epoch": 0.7627191369375996, + "grad_norm": 1.949382250160696, + "learning_rate": 1.405224607417079e-06, + "loss": 0.5751, + "step": 24886 + }, + { + "epoch": 0.7627497854603408, + "grad_norm": 1.8139128050511866, + "learning_rate": 1.4048796561678012e-06, + "loss": 0.6092, + "step": 24887 + }, + { + "epoch": 0.762780433983082, + "grad_norm": 2.0030579117961325, + "learning_rate": 1.4045347403421656e-06, + "loss": 0.6021, + "step": 24888 + }, + { + "epoch": 0.7628110825058232, + "grad_norm": 1.9322257169094863, + "learning_rate": 1.4041898599435678e-06, + "loss": 0.6244, + "step": 24889 + }, + { + "epoch": 0.7628417310285645, + "grad_norm": 1.9079506309442842, + "learning_rate": 1.4038450149754074e-06, + "loss": 0.6374, + "step": 24890 + }, + { + "epoch": 0.7628723795513056, + "grad_norm": 0.7751866672068466, + "learning_rate": 1.4035002054410835e-06, + "loss": 0.3769, + "step": 24891 + }, + { + "epoch": 0.7629030280740469, + "grad_norm": 1.9289707920804815, + "learning_rate": 1.4031554313439921e-06, + "loss": 0.6611, + "step": 24892 + }, + { + "epoch": 0.762933676596788, + "grad_norm": 1.7605791458647393, + "learning_rate": 1.402810692687528e-06, + "loss": 0.5367, + "step": 24893 + }, + { + "epoch": 0.7629643251195293, + "grad_norm": 1.7971450227666037, + "learning_rate": 1.402465989475093e-06, + "loss": 0.517, + "step": 24894 + }, + { + "epoch": 0.7629949736422704, + "grad_norm": 1.835132311164271, + "learning_rate": 1.4021213217100805e-06, + "loss": 0.5406, + "step": 24895 + }, + { + "epoch": 0.7630256221650117, + "grad_norm": 1.9413492059729383, + "learning_rate": 1.4017766893958878e-06, + "loss": 0.6367, + "step": 24896 + }, + { + "epoch": 0.7630562706877528, + "grad_norm": 2.0068634059008104, + "learning_rate": 1.4014320925359086e-06, + "loss": 0.6407, + "step": 24897 + }, + { + "epoch": 0.7630869192104941, + "grad_norm": 2.009243072679063, + "learning_rate": 1.4010875311335398e-06, + "loss": 0.5591, + "step": 24898 + }, + { + "epoch": 0.7631175677332352, + "grad_norm": 0.7946735595618257, + "learning_rate": 1.4007430051921783e-06, + "loss": 0.4045, + "step": 24899 + }, + { + "epoch": 0.7631482162559765, + "grad_norm": 0.7926675857560941, + "learning_rate": 1.400398514715215e-06, + "loss": 0.4131, + "step": 24900 + }, + { + "epoch": 0.7631788647787177, + "grad_norm": 0.8254913957398085, + "learning_rate": 1.4000540597060463e-06, + "loss": 0.42, + "step": 24901 + }, + { + "epoch": 0.7632095133014589, + "grad_norm": 1.7655095874184261, + "learning_rate": 1.3997096401680672e-06, + "loss": 0.6443, + "step": 24902 + }, + { + "epoch": 0.7632401618242001, + "grad_norm": 1.811723545465187, + "learning_rate": 1.399365256104669e-06, + "loss": 0.5312, + "step": 24903 + }, + { + "epoch": 0.7632708103469413, + "grad_norm": 1.640536256960796, + "learning_rate": 1.3990209075192473e-06, + "loss": 0.4824, + "step": 24904 + }, + { + "epoch": 0.7633014588696825, + "grad_norm": 2.0396752712735777, + "learning_rate": 1.3986765944151932e-06, + "loss": 0.6559, + "step": 24905 + }, + { + "epoch": 0.7633321073924236, + "grad_norm": 1.8155598877926213, + "learning_rate": 1.3983323167958996e-06, + "loss": 0.6331, + "step": 24906 + }, + { + "epoch": 0.7633627559151649, + "grad_norm": 1.6742470289880542, + "learning_rate": 1.3979880746647607e-06, + "loss": 0.6241, + "step": 24907 + }, + { + "epoch": 0.763393404437906, + "grad_norm": 0.8016619679732142, + "learning_rate": 1.3976438680251652e-06, + "loss": 0.4129, + "step": 24908 + }, + { + "epoch": 0.7634240529606473, + "grad_norm": 1.786936972968552, + "learning_rate": 1.397299696880507e-06, + "loss": 0.4815, + "step": 24909 + }, + { + "epoch": 0.7634547014833885, + "grad_norm": 0.7904584006646125, + "learning_rate": 1.3969555612341773e-06, + "loss": 0.3966, + "step": 24910 + }, + { + "epoch": 0.7634853500061297, + "grad_norm": 0.7664196777056085, + "learning_rate": 1.3966114610895653e-06, + "loss": 0.4025, + "step": 24911 + }, + { + "epoch": 0.7635159985288709, + "grad_norm": 1.7386995880128968, + "learning_rate": 1.3962673964500629e-06, + "loss": 0.5752, + "step": 24912 + }, + { + "epoch": 0.7635466470516121, + "grad_norm": 1.9574897453463687, + "learning_rate": 1.3959233673190608e-06, + "loss": 0.6648, + "step": 24913 + }, + { + "epoch": 0.7635772955743533, + "grad_norm": 1.8033543790682454, + "learning_rate": 1.3955793736999468e-06, + "loss": 0.5454, + "step": 24914 + }, + { + "epoch": 0.7636079440970945, + "grad_norm": 1.7921021169841689, + "learning_rate": 1.3952354155961128e-06, + "loss": 0.5919, + "step": 24915 + }, + { + "epoch": 0.7636385926198357, + "grad_norm": 1.7940200854921244, + "learning_rate": 1.3948914930109453e-06, + "loss": 0.5398, + "step": 24916 + }, + { + "epoch": 0.763669241142577, + "grad_norm": 1.8419970316868077, + "learning_rate": 1.3945476059478336e-06, + "loss": 0.6725, + "step": 24917 + }, + { + "epoch": 0.7636998896653181, + "grad_norm": 1.7989633078344427, + "learning_rate": 1.394203754410169e-06, + "loss": 0.5062, + "step": 24918 + }, + { + "epoch": 0.7637305381880594, + "grad_norm": 1.689869252045394, + "learning_rate": 1.3938599384013357e-06, + "loss": 0.5243, + "step": 24919 + }, + { + "epoch": 0.7637611867108005, + "grad_norm": 1.7406261108926826, + "learning_rate": 1.393516157924723e-06, + "loss": 0.5876, + "step": 24920 + }, + { + "epoch": 0.7637918352335418, + "grad_norm": 1.8659111949616365, + "learning_rate": 1.3931724129837205e-06, + "loss": 0.6036, + "step": 24921 + }, + { + "epoch": 0.7638224837562829, + "grad_norm": 1.7840587962010874, + "learning_rate": 1.3928287035817105e-06, + "loss": 0.5602, + "step": 24922 + }, + { + "epoch": 0.7638531322790242, + "grad_norm": 1.6784079624662644, + "learning_rate": 1.3924850297220831e-06, + "loss": 0.5253, + "step": 24923 + }, + { + "epoch": 0.7638837808017653, + "grad_norm": 0.7978713679239939, + "learning_rate": 1.3921413914082249e-06, + "loss": 0.3901, + "step": 24924 + }, + { + "epoch": 0.7639144293245066, + "grad_norm": 1.7761228431199294, + "learning_rate": 1.391797788643519e-06, + "loss": 0.552, + "step": 24925 + }, + { + "epoch": 0.7639450778472477, + "grad_norm": 2.195269470754084, + "learning_rate": 1.3914542214313547e-06, + "loss": 0.7443, + "step": 24926 + }, + { + "epoch": 0.763975726369989, + "grad_norm": 1.8290910732605856, + "learning_rate": 1.391110689775113e-06, + "loss": 0.4554, + "step": 24927 + }, + { + "epoch": 0.7640063748927302, + "grad_norm": 1.6365743622737143, + "learning_rate": 1.3907671936781814e-06, + "loss": 0.4741, + "step": 24928 + }, + { + "epoch": 0.7640370234154714, + "grad_norm": 0.7661233031689668, + "learning_rate": 1.3904237331439457e-06, + "loss": 0.3822, + "step": 24929 + }, + { + "epoch": 0.7640676719382126, + "grad_norm": 1.7707143726336574, + "learning_rate": 1.3900803081757875e-06, + "loss": 0.5674, + "step": 24930 + }, + { + "epoch": 0.7640983204609538, + "grad_norm": 1.8854113527872722, + "learning_rate": 1.389736918777091e-06, + "loss": 0.6393, + "step": 24931 + }, + { + "epoch": 0.764128968983695, + "grad_norm": 0.7853501345542698, + "learning_rate": 1.3893935649512419e-06, + "loss": 0.382, + "step": 24932 + }, + { + "epoch": 0.7641596175064362, + "grad_norm": 1.6968349376170786, + "learning_rate": 1.3890502467016204e-06, + "loss": 0.5008, + "step": 24933 + }, + { + "epoch": 0.7641902660291774, + "grad_norm": 1.8135977750663692, + "learning_rate": 1.3887069640316104e-06, + "loss": 0.5901, + "step": 24934 + }, + { + "epoch": 0.7642209145519187, + "grad_norm": 1.9253519340496636, + "learning_rate": 1.3883637169445967e-06, + "loss": 0.5547, + "step": 24935 + }, + { + "epoch": 0.7642515630746598, + "grad_norm": 1.8723802923178783, + "learning_rate": 1.3880205054439578e-06, + "loss": 0.5727, + "step": 24936 + }, + { + "epoch": 0.764282211597401, + "grad_norm": 1.6641551069489189, + "learning_rate": 1.3876773295330782e-06, + "loss": 0.5606, + "step": 24937 + }, + { + "epoch": 0.7643128601201422, + "grad_norm": 2.186230490885531, + "learning_rate": 1.3873341892153353e-06, + "loss": 0.5894, + "step": 24938 + }, + { + "epoch": 0.7643435086428834, + "grad_norm": 1.99684740516089, + "learning_rate": 1.386991084494116e-06, + "loss": 0.6365, + "step": 24939 + }, + { + "epoch": 0.7643741571656246, + "grad_norm": 1.948341414064031, + "learning_rate": 1.3866480153727978e-06, + "loss": 0.5975, + "step": 24940 + }, + { + "epoch": 0.7644048056883658, + "grad_norm": 2.082488809506079, + "learning_rate": 1.3863049818547604e-06, + "loss": 0.6778, + "step": 24941 + }, + { + "epoch": 0.764435454211107, + "grad_norm": 1.8905383719837394, + "learning_rate": 1.3859619839433836e-06, + "loss": 0.5496, + "step": 24942 + }, + { + "epoch": 0.7644661027338482, + "grad_norm": 1.679522309459, + "learning_rate": 1.3856190216420501e-06, + "loss": 0.5348, + "step": 24943 + }, + { + "epoch": 0.7644967512565894, + "grad_norm": 1.9591898774045122, + "learning_rate": 1.385276094954136e-06, + "loss": 0.5959, + "step": 24944 + }, + { + "epoch": 0.7645273997793306, + "grad_norm": 2.0046289658243404, + "learning_rate": 1.384933203883021e-06, + "loss": 0.6088, + "step": 24945 + }, + { + "epoch": 0.7645580483020719, + "grad_norm": 1.6515472559803983, + "learning_rate": 1.3845903484320855e-06, + "loss": 0.595, + "step": 24946 + }, + { + "epoch": 0.764588696824813, + "grad_norm": 1.87356018261785, + "learning_rate": 1.3842475286047047e-06, + "loss": 0.5425, + "step": 24947 + }, + { + "epoch": 0.7646193453475543, + "grad_norm": 1.8574832173746858, + "learning_rate": 1.3839047444042603e-06, + "loss": 0.5503, + "step": 24948 + }, + { + "epoch": 0.7646499938702954, + "grad_norm": 1.815715113601585, + "learning_rate": 1.3835619958341257e-06, + "loss": 0.5175, + "step": 24949 + }, + { + "epoch": 0.7646806423930367, + "grad_norm": 1.7053597681107722, + "learning_rate": 1.3832192828976798e-06, + "loss": 0.647, + "step": 24950 + }, + { + "epoch": 0.7647112909157778, + "grad_norm": 1.8839369254082865, + "learning_rate": 1.3828766055983018e-06, + "loss": 0.5997, + "step": 24951 + }, + { + "epoch": 0.7647419394385191, + "grad_norm": 2.1259932020469527, + "learning_rate": 1.3825339639393642e-06, + "loss": 0.6395, + "step": 24952 + }, + { + "epoch": 0.7647725879612602, + "grad_norm": 1.9538880732306139, + "learning_rate": 1.3821913579242453e-06, + "loss": 0.62, + "step": 24953 + }, + { + "epoch": 0.7648032364840015, + "grad_norm": 1.8954641269447183, + "learning_rate": 1.3818487875563218e-06, + "loss": 0.599, + "step": 24954 + }, + { + "epoch": 0.7648338850067427, + "grad_norm": 1.8581479147567135, + "learning_rate": 1.3815062528389667e-06, + "loss": 0.6401, + "step": 24955 + }, + { + "epoch": 0.7648645335294839, + "grad_norm": 1.7585899772996905, + "learning_rate": 1.381163753775558e-06, + "loss": 0.5093, + "step": 24956 + }, + { + "epoch": 0.7648951820522251, + "grad_norm": 1.9368746571468498, + "learning_rate": 1.380821290369465e-06, + "loss": 0.6063, + "step": 24957 + }, + { + "epoch": 0.7649258305749663, + "grad_norm": 1.8212893547183602, + "learning_rate": 1.3804788626240695e-06, + "loss": 0.6481, + "step": 24958 + }, + { + "epoch": 0.7649564790977075, + "grad_norm": 1.8471943574814345, + "learning_rate": 1.3801364705427417e-06, + "loss": 0.611, + "step": 24959 + }, + { + "epoch": 0.7649871276204487, + "grad_norm": 1.6366354055195504, + "learning_rate": 1.379794114128854e-06, + "loss": 0.5077, + "step": 24960 + }, + { + "epoch": 0.7650177761431899, + "grad_norm": 1.9886167066066627, + "learning_rate": 1.379451793385781e-06, + "loss": 0.5573, + "step": 24961 + }, + { + "epoch": 0.7650484246659311, + "grad_norm": 1.7052721162589617, + "learning_rate": 1.379109508316897e-06, + "loss": 0.4718, + "step": 24962 + }, + { + "epoch": 0.7650790731886723, + "grad_norm": 1.6573305060744423, + "learning_rate": 1.3787672589255724e-06, + "loss": 0.5949, + "step": 24963 + }, + { + "epoch": 0.7651097217114136, + "grad_norm": 1.8623359259163002, + "learning_rate": 1.3784250452151804e-06, + "loss": 0.5605, + "step": 24964 + }, + { + "epoch": 0.7651403702341547, + "grad_norm": 2.138568955772286, + "learning_rate": 1.3780828671890933e-06, + "loss": 0.5955, + "step": 24965 + }, + { + "epoch": 0.765171018756896, + "grad_norm": 1.770606758917957, + "learning_rate": 1.3777407248506835e-06, + "loss": 0.5609, + "step": 24966 + }, + { + "epoch": 0.7652016672796371, + "grad_norm": 1.6219610658808195, + "learning_rate": 1.3773986182033216e-06, + "loss": 0.6007, + "step": 24967 + }, + { + "epoch": 0.7652323158023783, + "grad_norm": 2.062042401434007, + "learning_rate": 1.377056547250375e-06, + "loss": 0.5887, + "step": 24968 + }, + { + "epoch": 0.7652629643251195, + "grad_norm": 1.7281666571258993, + "learning_rate": 1.3767145119952196e-06, + "loss": 0.578, + "step": 24969 + }, + { + "epoch": 0.7652936128478607, + "grad_norm": 1.695549173866923, + "learning_rate": 1.3763725124412236e-06, + "loss": 0.5596, + "step": 24970 + }, + { + "epoch": 0.7653242613706019, + "grad_norm": 0.8358313109499685, + "learning_rate": 1.3760305485917553e-06, + "loss": 0.3874, + "step": 24971 + }, + { + "epoch": 0.7653549098933431, + "grad_norm": 1.9773760412023451, + "learning_rate": 1.3756886204501845e-06, + "loss": 0.6383, + "step": 24972 + }, + { + "epoch": 0.7653855584160844, + "grad_norm": 1.776180197983091, + "learning_rate": 1.375346728019883e-06, + "loss": 0.5114, + "step": 24973 + }, + { + "epoch": 0.7654162069388255, + "grad_norm": 1.7206429052393233, + "learning_rate": 1.3750048713042159e-06, + "loss": 0.5974, + "step": 24974 + }, + { + "epoch": 0.7654468554615668, + "grad_norm": 1.8109806738816918, + "learning_rate": 1.3746630503065532e-06, + "loss": 0.5745, + "step": 24975 + }, + { + "epoch": 0.7654775039843079, + "grad_norm": 1.6376184225140737, + "learning_rate": 1.3743212650302629e-06, + "loss": 0.4595, + "step": 24976 + }, + { + "epoch": 0.7655081525070492, + "grad_norm": 1.9030023885816116, + "learning_rate": 1.373979515478715e-06, + "loss": 0.5463, + "step": 24977 + }, + { + "epoch": 0.7655388010297903, + "grad_norm": 1.9851312509309034, + "learning_rate": 1.3736378016552743e-06, + "loss": 0.5474, + "step": 24978 + }, + { + "epoch": 0.7655694495525316, + "grad_norm": 2.0011422820728906, + "learning_rate": 1.3732961235633047e-06, + "loss": 0.5119, + "step": 24979 + }, + { + "epoch": 0.7656000980752727, + "grad_norm": 1.886319637574287, + "learning_rate": 1.37295448120618e-06, + "loss": 0.5238, + "step": 24980 + }, + { + "epoch": 0.765630746598014, + "grad_norm": 1.835427342535112, + "learning_rate": 1.3726128745872625e-06, + "loss": 0.5593, + "step": 24981 + }, + { + "epoch": 0.7656613951207552, + "grad_norm": 2.0234757492932007, + "learning_rate": 1.3722713037099173e-06, + "loss": 0.6276, + "step": 24982 + }, + { + "epoch": 0.7656920436434964, + "grad_norm": 1.7706466914682515, + "learning_rate": 1.3719297685775108e-06, + "loss": 0.5303, + "step": 24983 + }, + { + "epoch": 0.7657226921662376, + "grad_norm": 1.7069671747858945, + "learning_rate": 1.3715882691934086e-06, + "loss": 0.5875, + "step": 24984 + }, + { + "epoch": 0.7657533406889788, + "grad_norm": 1.7221889681825104, + "learning_rate": 1.3712468055609774e-06, + "loss": 0.6354, + "step": 24985 + }, + { + "epoch": 0.76578398921172, + "grad_norm": 1.7149581213550205, + "learning_rate": 1.3709053776835778e-06, + "loss": 0.5285, + "step": 24986 + }, + { + "epoch": 0.7658146377344612, + "grad_norm": 0.7867504339292976, + "learning_rate": 1.3705639855645764e-06, + "loss": 0.3962, + "step": 24987 + }, + { + "epoch": 0.7658452862572024, + "grad_norm": 1.9498518748650309, + "learning_rate": 1.3702226292073385e-06, + "loss": 0.6176, + "step": 24988 + }, + { + "epoch": 0.7658759347799436, + "grad_norm": 1.810154040592565, + "learning_rate": 1.3698813086152252e-06, + "loss": 0.5494, + "step": 24989 + }, + { + "epoch": 0.7659065833026848, + "grad_norm": 2.0106717950931112, + "learning_rate": 1.3695400237915974e-06, + "loss": 0.6785, + "step": 24990 + }, + { + "epoch": 0.7659372318254261, + "grad_norm": 1.9206931515427932, + "learning_rate": 1.369198774739824e-06, + "loss": 0.6312, + "step": 24991 + }, + { + "epoch": 0.7659678803481672, + "grad_norm": 1.8213263704124822, + "learning_rate": 1.3688575614632616e-06, + "loss": 0.5899, + "step": 24992 + }, + { + "epoch": 0.7659985288709085, + "grad_norm": 1.925406979545765, + "learning_rate": 1.368516383965277e-06, + "loss": 0.594, + "step": 24993 + }, + { + "epoch": 0.7660291773936496, + "grad_norm": 1.027416616118653, + "learning_rate": 1.3681752422492279e-06, + "loss": 0.397, + "step": 24994 + }, + { + "epoch": 0.7660598259163909, + "grad_norm": 1.9375022515189784, + "learning_rate": 1.3678341363184772e-06, + "loss": 0.589, + "step": 24995 + }, + { + "epoch": 0.766090474439132, + "grad_norm": 1.6775654046842896, + "learning_rate": 1.367493066176388e-06, + "loss": 0.5655, + "step": 24996 + }, + { + "epoch": 0.7661211229618733, + "grad_norm": 2.0143985826993713, + "learning_rate": 1.367152031826317e-06, + "loss": 0.5462, + "step": 24997 + }, + { + "epoch": 0.7661517714846144, + "grad_norm": 1.7436908061512446, + "learning_rate": 1.3668110332716272e-06, + "loss": 0.535, + "step": 24998 + }, + { + "epoch": 0.7661824200073556, + "grad_norm": 1.7780124882162225, + "learning_rate": 1.3664700705156792e-06, + "loss": 0.5654, + "step": 24999 + }, + { + "epoch": 0.7662130685300969, + "grad_norm": 1.8359017665130042, + "learning_rate": 1.3661291435618307e-06, + "loss": 0.5372, + "step": 25000 + }, + { + "epoch": 0.766243717052838, + "grad_norm": 0.7599362439109247, + "learning_rate": 1.3657882524134409e-06, + "loss": 0.3919, + "step": 25001 + }, + { + "epoch": 0.7662743655755793, + "grad_norm": 1.950051300816729, + "learning_rate": 1.3654473970738684e-06, + "loss": 0.617, + "step": 25002 + }, + { + "epoch": 0.7663050140983204, + "grad_norm": 1.6064222133867156, + "learning_rate": 1.3651065775464734e-06, + "loss": 0.5919, + "step": 25003 + }, + { + "epoch": 0.7663356626210617, + "grad_norm": 0.8083131392172285, + "learning_rate": 1.3647657938346143e-06, + "loss": 0.3939, + "step": 25004 + }, + { + "epoch": 0.7663663111438028, + "grad_norm": 1.99104240718104, + "learning_rate": 1.3644250459416464e-06, + "loss": 0.5931, + "step": 25005 + }, + { + "epoch": 0.7663969596665441, + "grad_norm": 1.849676338954872, + "learning_rate": 1.3640843338709292e-06, + "loss": 0.539, + "step": 25006 + }, + { + "epoch": 0.7664276081892852, + "grad_norm": 1.9300284558530951, + "learning_rate": 1.3637436576258206e-06, + "loss": 0.6219, + "step": 25007 + }, + { + "epoch": 0.7664582567120265, + "grad_norm": 1.787486475835619, + "learning_rate": 1.3634030172096762e-06, + "loss": 0.5299, + "step": 25008 + }, + { + "epoch": 0.7664889052347676, + "grad_norm": 2.719130083603761, + "learning_rate": 1.363062412625849e-06, + "loss": 0.5836, + "step": 25009 + }, + { + "epoch": 0.7665195537575089, + "grad_norm": 1.7826575998061274, + "learning_rate": 1.3627218438777017e-06, + "loss": 0.5106, + "step": 25010 + }, + { + "epoch": 0.7665502022802501, + "grad_norm": 1.9679967817907902, + "learning_rate": 1.3623813109685852e-06, + "loss": 0.5539, + "step": 25011 + }, + { + "epoch": 0.7665808508029913, + "grad_norm": 0.7929543058464603, + "learning_rate": 1.3620408139018575e-06, + "loss": 0.4067, + "step": 25012 + }, + { + "epoch": 0.7666114993257325, + "grad_norm": 1.7411468072653273, + "learning_rate": 1.3617003526808714e-06, + "loss": 0.572, + "step": 25013 + }, + { + "epoch": 0.7666421478484737, + "grad_norm": 1.9857293080083227, + "learning_rate": 1.3613599273089828e-06, + "loss": 0.6323, + "step": 25014 + }, + { + "epoch": 0.7666727963712149, + "grad_norm": 1.760997552350771, + "learning_rate": 1.361019537789547e-06, + "loss": 0.532, + "step": 25015 + }, + { + "epoch": 0.7667034448939561, + "grad_norm": 1.7303941977071142, + "learning_rate": 1.3606791841259153e-06, + "loss": 0.5672, + "step": 25016 + }, + { + "epoch": 0.7667340934166973, + "grad_norm": 0.8585467630504745, + "learning_rate": 1.360338866321443e-06, + "loss": 0.42, + "step": 25017 + }, + { + "epoch": 0.7667647419394386, + "grad_norm": 1.9813884293843076, + "learning_rate": 1.359998584379484e-06, + "loss": 0.7092, + "step": 25018 + }, + { + "epoch": 0.7667953904621797, + "grad_norm": 1.912273989338425, + "learning_rate": 1.3596583383033906e-06, + "loss": 0.64, + "step": 25019 + }, + { + "epoch": 0.766826038984921, + "grad_norm": 1.7765105189569934, + "learning_rate": 1.3593181280965124e-06, + "loss": 0.6446, + "step": 25020 + }, + { + "epoch": 0.7668566875076621, + "grad_norm": 1.9998521420385176, + "learning_rate": 1.3589779537622066e-06, + "loss": 0.6255, + "step": 25021 + }, + { + "epoch": 0.7668873360304034, + "grad_norm": 1.7671518244230278, + "learning_rate": 1.3586378153038215e-06, + "loss": 0.5569, + "step": 25022 + }, + { + "epoch": 0.7669179845531445, + "grad_norm": 1.8177763045662474, + "learning_rate": 1.358297712724711e-06, + "loss": 0.5463, + "step": 25023 + }, + { + "epoch": 0.7669486330758858, + "grad_norm": 0.7983133599028462, + "learning_rate": 1.3579576460282234e-06, + "loss": 0.3969, + "step": 25024 + }, + { + "epoch": 0.7669792815986269, + "grad_norm": 1.5572272332400088, + "learning_rate": 1.3576176152177112e-06, + "loss": 0.5055, + "step": 25025 + }, + { + "epoch": 0.7670099301213682, + "grad_norm": 1.6476886122946852, + "learning_rate": 1.357277620296526e-06, + "loss": 0.5769, + "step": 25026 + }, + { + "epoch": 0.7670405786441093, + "grad_norm": 1.788360369831937, + "learning_rate": 1.3569376612680146e-06, + "loss": 0.5462, + "step": 25027 + }, + { + "epoch": 0.7670712271668506, + "grad_norm": 2.0967009328534143, + "learning_rate": 1.3565977381355288e-06, + "loss": 0.6125, + "step": 25028 + }, + { + "epoch": 0.7671018756895918, + "grad_norm": 0.8086664896128498, + "learning_rate": 1.3562578509024194e-06, + "loss": 0.3955, + "step": 25029 + }, + { + "epoch": 0.7671325242123329, + "grad_norm": 2.2900778282330663, + "learning_rate": 1.3559179995720318e-06, + "loss": 0.6044, + "step": 25030 + }, + { + "epoch": 0.7671631727350742, + "grad_norm": 1.8253323207265895, + "learning_rate": 1.3555781841477167e-06, + "loss": 0.5957, + "step": 25031 + }, + { + "epoch": 0.7671938212578153, + "grad_norm": 1.990042988511186, + "learning_rate": 1.3552384046328238e-06, + "loss": 0.5792, + "step": 25032 + }, + { + "epoch": 0.7672244697805566, + "grad_norm": 1.8001337975455454, + "learning_rate": 1.354898661030698e-06, + "loss": 0.5668, + "step": 25033 + }, + { + "epoch": 0.7672551183032977, + "grad_norm": 1.8162724777850392, + "learning_rate": 1.3545589533446897e-06, + "loss": 0.6343, + "step": 25034 + }, + { + "epoch": 0.767285766826039, + "grad_norm": 1.7864769511953467, + "learning_rate": 1.354219281578143e-06, + "loss": 0.5381, + "step": 25035 + }, + { + "epoch": 0.7673164153487801, + "grad_norm": 2.0212939857628704, + "learning_rate": 1.3538796457344066e-06, + "loss": 0.5639, + "step": 25036 + }, + { + "epoch": 0.7673470638715214, + "grad_norm": 1.8078154619502598, + "learning_rate": 1.3535400458168292e-06, + "loss": 0.6313, + "step": 25037 + }, + { + "epoch": 0.7673777123942626, + "grad_norm": 1.619304183263403, + "learning_rate": 1.3532004818287525e-06, + "loss": 0.5847, + "step": 25038 + }, + { + "epoch": 0.7674083609170038, + "grad_norm": 1.717259727056899, + "learning_rate": 1.3528609537735244e-06, + "loss": 0.592, + "step": 25039 + }, + { + "epoch": 0.767439009439745, + "grad_norm": 1.9390771563359432, + "learning_rate": 1.3525214616544924e-06, + "loss": 0.656, + "step": 25040 + }, + { + "epoch": 0.7674696579624862, + "grad_norm": 1.9650551100575124, + "learning_rate": 1.3521820054749979e-06, + "loss": 0.6183, + "step": 25041 + }, + { + "epoch": 0.7675003064852274, + "grad_norm": 0.8305294997430789, + "learning_rate": 1.3518425852383876e-06, + "loss": 0.4051, + "step": 25042 + }, + { + "epoch": 0.7675309550079686, + "grad_norm": 1.8285480704083557, + "learning_rate": 1.3515032009480072e-06, + "loss": 0.5441, + "step": 25043 + }, + { + "epoch": 0.7675616035307098, + "grad_norm": 1.8739123266019768, + "learning_rate": 1.351163852607198e-06, + "loss": 0.5983, + "step": 25044 + }, + { + "epoch": 0.767592252053451, + "grad_norm": 1.921740929509455, + "learning_rate": 1.3508245402193065e-06, + "loss": 0.6149, + "step": 25045 + }, + { + "epoch": 0.7676229005761922, + "grad_norm": 1.762178114112741, + "learning_rate": 1.3504852637876725e-06, + "loss": 0.5213, + "step": 25046 + }, + { + "epoch": 0.7676535490989335, + "grad_norm": 1.9797529596866856, + "learning_rate": 1.350146023315641e-06, + "loss": 0.5582, + "step": 25047 + }, + { + "epoch": 0.7676841976216746, + "grad_norm": 1.860893228261341, + "learning_rate": 1.3498068188065566e-06, + "loss": 0.608, + "step": 25048 + }, + { + "epoch": 0.7677148461444159, + "grad_norm": 1.8422089062812523, + "learning_rate": 1.3494676502637577e-06, + "loss": 0.6117, + "step": 25049 + }, + { + "epoch": 0.767745494667157, + "grad_norm": 1.7808569612585983, + "learning_rate": 1.3491285176905882e-06, + "loss": 0.6052, + "step": 25050 + }, + { + "epoch": 0.7677761431898983, + "grad_norm": 0.7685529704192198, + "learning_rate": 1.3487894210903907e-06, + "loss": 0.393, + "step": 25051 + }, + { + "epoch": 0.7678067917126394, + "grad_norm": 2.075117551483464, + "learning_rate": 1.3484503604665038e-06, + "loss": 0.5736, + "step": 25052 + }, + { + "epoch": 0.7678374402353807, + "grad_norm": 0.7569987195139367, + "learning_rate": 1.3481113358222718e-06, + "loss": 0.3773, + "step": 25053 + }, + { + "epoch": 0.7678680887581218, + "grad_norm": 0.7638078820690204, + "learning_rate": 1.3477723471610315e-06, + "loss": 0.4005, + "step": 25054 + }, + { + "epoch": 0.7678987372808631, + "grad_norm": 2.013138505678402, + "learning_rate": 1.3474333944861245e-06, + "loss": 0.65, + "step": 25055 + }, + { + "epoch": 0.7679293858036043, + "grad_norm": 2.008261487119537, + "learning_rate": 1.347094477800892e-06, + "loss": 0.6294, + "step": 25056 + }, + { + "epoch": 0.7679600343263455, + "grad_norm": 1.7038356978473588, + "learning_rate": 1.3467555971086715e-06, + "loss": 0.5537, + "step": 25057 + }, + { + "epoch": 0.7679906828490867, + "grad_norm": 1.8802323208424283, + "learning_rate": 1.3464167524128025e-06, + "loss": 0.5806, + "step": 25058 + }, + { + "epoch": 0.7680213313718279, + "grad_norm": 2.113386761652852, + "learning_rate": 1.3460779437166255e-06, + "loss": 0.6419, + "step": 25059 + }, + { + "epoch": 0.7680519798945691, + "grad_norm": 1.886147002869556, + "learning_rate": 1.3457391710234757e-06, + "loss": 0.6225, + "step": 25060 + }, + { + "epoch": 0.7680826284173102, + "grad_norm": 1.8252843183226588, + "learning_rate": 1.3454004343366932e-06, + "loss": 0.5984, + "step": 25061 + }, + { + "epoch": 0.7681132769400515, + "grad_norm": 1.853061330908429, + "learning_rate": 1.3450617336596166e-06, + "loss": 0.6196, + "step": 25062 + }, + { + "epoch": 0.7681439254627926, + "grad_norm": 1.8127145024461528, + "learning_rate": 1.3447230689955803e-06, + "loss": 0.6087, + "step": 25063 + }, + { + "epoch": 0.7681745739855339, + "grad_norm": 2.2738084993950016, + "learning_rate": 1.3443844403479244e-06, + "loss": 0.6186, + "step": 25064 + }, + { + "epoch": 0.768205222508275, + "grad_norm": 1.71226716776165, + "learning_rate": 1.3440458477199813e-06, + "loss": 0.6042, + "step": 25065 + }, + { + "epoch": 0.7682358710310163, + "grad_norm": 3.195409046615833, + "learning_rate": 1.3437072911150927e-06, + "loss": 0.5162, + "step": 25066 + }, + { + "epoch": 0.7682665195537575, + "grad_norm": 1.6488650195804755, + "learning_rate": 1.3433687705365917e-06, + "loss": 0.4884, + "step": 25067 + }, + { + "epoch": 0.7682971680764987, + "grad_norm": 1.7740319310596602, + "learning_rate": 1.3430302859878119e-06, + "loss": 0.6398, + "step": 25068 + }, + { + "epoch": 0.7683278165992399, + "grad_norm": 1.9768840022584226, + "learning_rate": 1.3426918374720904e-06, + "loss": 0.5974, + "step": 25069 + }, + { + "epoch": 0.7683584651219811, + "grad_norm": 2.0338426040367183, + "learning_rate": 1.342353424992764e-06, + "loss": 0.6033, + "step": 25070 + }, + { + "epoch": 0.7683891136447223, + "grad_norm": 1.8781727426325283, + "learning_rate": 1.342015048553163e-06, + "loss": 0.5501, + "step": 25071 + }, + { + "epoch": 0.7684197621674635, + "grad_norm": 2.0023400400374993, + "learning_rate": 1.3416767081566244e-06, + "loss": 0.6245, + "step": 25072 + }, + { + "epoch": 0.7684504106902047, + "grad_norm": 1.9989378005295733, + "learning_rate": 1.3413384038064826e-06, + "loss": 0.5749, + "step": 25073 + }, + { + "epoch": 0.768481059212946, + "grad_norm": 0.7806923124017259, + "learning_rate": 1.3410001355060676e-06, + "loss": 0.3941, + "step": 25074 + }, + { + "epoch": 0.7685117077356871, + "grad_norm": 1.7957500311022772, + "learning_rate": 1.340661903258716e-06, + "loss": 0.581, + "step": 25075 + }, + { + "epoch": 0.7685423562584284, + "grad_norm": 1.6993892467560625, + "learning_rate": 1.340323707067756e-06, + "loss": 0.5803, + "step": 25076 + }, + { + "epoch": 0.7685730047811695, + "grad_norm": 1.8703205882867806, + "learning_rate": 1.3399855469365264e-06, + "loss": 0.5944, + "step": 25077 + }, + { + "epoch": 0.7686036533039108, + "grad_norm": 1.7396213374482372, + "learning_rate": 1.3396474228683554e-06, + "loss": 0.5411, + "step": 25078 + }, + { + "epoch": 0.7686343018266519, + "grad_norm": 0.8706865333779221, + "learning_rate": 1.3393093348665732e-06, + "loss": 0.3932, + "step": 25079 + }, + { + "epoch": 0.7686649503493932, + "grad_norm": 1.9011549408745643, + "learning_rate": 1.338971282934513e-06, + "loss": 0.5766, + "step": 25080 + }, + { + "epoch": 0.7686955988721343, + "grad_norm": 2.0004899350120904, + "learning_rate": 1.3386332670755065e-06, + "loss": 0.581, + "step": 25081 + }, + { + "epoch": 0.7687262473948756, + "grad_norm": 1.925578895948059, + "learning_rate": 1.3382952872928823e-06, + "loss": 0.6468, + "step": 25082 + }, + { + "epoch": 0.7687568959176168, + "grad_norm": 1.6865581073122644, + "learning_rate": 1.3379573435899713e-06, + "loss": 0.5501, + "step": 25083 + }, + { + "epoch": 0.768787544440358, + "grad_norm": 1.7667782829708678, + "learning_rate": 1.3376194359701034e-06, + "loss": 0.5419, + "step": 25084 + }, + { + "epoch": 0.7688181929630992, + "grad_norm": 0.8015191028894406, + "learning_rate": 1.33728156443661e-06, + "loss": 0.3769, + "step": 25085 + }, + { + "epoch": 0.7688488414858404, + "grad_norm": 1.6295311830599521, + "learning_rate": 1.3369437289928184e-06, + "loss": 0.6247, + "step": 25086 + }, + { + "epoch": 0.7688794900085816, + "grad_norm": 0.8354130804991641, + "learning_rate": 1.336605929642056e-06, + "loss": 0.3883, + "step": 25087 + }, + { + "epoch": 0.7689101385313228, + "grad_norm": 1.7274948630613522, + "learning_rate": 1.3362681663876526e-06, + "loss": 0.5009, + "step": 25088 + }, + { + "epoch": 0.768940787054064, + "grad_norm": 2.028807509612038, + "learning_rate": 1.3359304392329374e-06, + "loss": 0.629, + "step": 25089 + }, + { + "epoch": 0.7689714355768053, + "grad_norm": 1.625224182401249, + "learning_rate": 1.3355927481812364e-06, + "loss": 0.5199, + "step": 25090 + }, + { + "epoch": 0.7690020840995464, + "grad_norm": 1.815427830371705, + "learning_rate": 1.335255093235877e-06, + "loss": 0.5125, + "step": 25091 + }, + { + "epoch": 0.7690327326222876, + "grad_norm": 1.919661129742888, + "learning_rate": 1.3349174744001875e-06, + "loss": 0.6936, + "step": 25092 + }, + { + "epoch": 0.7690633811450288, + "grad_norm": 0.7881825747876735, + "learning_rate": 1.334579891677495e-06, + "loss": 0.395, + "step": 25093 + }, + { + "epoch": 0.76909402966777, + "grad_norm": 1.966538782669029, + "learning_rate": 1.3342423450711246e-06, + "loss": 0.5661, + "step": 25094 + }, + { + "epoch": 0.7691246781905112, + "grad_norm": 0.8403403723897411, + "learning_rate": 1.3339048345843992e-06, + "loss": 0.4286, + "step": 25095 + }, + { + "epoch": 0.7691553267132524, + "grad_norm": 1.5687302660532834, + "learning_rate": 1.3335673602206506e-06, + "loss": 0.5659, + "step": 25096 + }, + { + "epoch": 0.7691859752359936, + "grad_norm": 1.8524029452859812, + "learning_rate": 1.3332299219832011e-06, + "loss": 0.5341, + "step": 25097 + }, + { + "epoch": 0.7692166237587348, + "grad_norm": 1.8367280180236798, + "learning_rate": 1.3328925198753739e-06, + "loss": 0.6201, + "step": 25098 + }, + { + "epoch": 0.769247272281476, + "grad_norm": 1.8121280535976945, + "learning_rate": 1.332555153900495e-06, + "loss": 0.5387, + "step": 25099 + }, + { + "epoch": 0.7692779208042172, + "grad_norm": 1.7398149466410464, + "learning_rate": 1.3322178240618893e-06, + "loss": 0.525, + "step": 25100 + }, + { + "epoch": 0.7693085693269585, + "grad_norm": 1.8021509807465161, + "learning_rate": 1.331880530362879e-06, + "loss": 0.5587, + "step": 25101 + }, + { + "epoch": 0.7693392178496996, + "grad_norm": 0.8214413065993182, + "learning_rate": 1.3315432728067884e-06, + "loss": 0.4166, + "step": 25102 + }, + { + "epoch": 0.7693698663724409, + "grad_norm": 1.746528440108579, + "learning_rate": 1.3312060513969399e-06, + "loss": 0.5537, + "step": 25103 + }, + { + "epoch": 0.769400514895182, + "grad_norm": 2.0468201883701673, + "learning_rate": 1.3308688661366592e-06, + "loss": 0.4943, + "step": 25104 + }, + { + "epoch": 0.7694311634179233, + "grad_norm": 1.940709993400627, + "learning_rate": 1.3305317170292658e-06, + "loss": 0.6103, + "step": 25105 + }, + { + "epoch": 0.7694618119406644, + "grad_norm": 1.609875934352975, + "learning_rate": 1.3301946040780794e-06, + "loss": 0.5031, + "step": 25106 + }, + { + "epoch": 0.7694924604634057, + "grad_norm": 2.074506825863726, + "learning_rate": 1.3298575272864277e-06, + "loss": 0.6479, + "step": 25107 + }, + { + "epoch": 0.7695231089861468, + "grad_norm": 0.7651906106961331, + "learning_rate": 1.3295204866576289e-06, + "loss": 0.4197, + "step": 25108 + }, + { + "epoch": 0.7695537575088881, + "grad_norm": 1.915782533030814, + "learning_rate": 1.3291834821950017e-06, + "loss": 0.5425, + "step": 25109 + }, + { + "epoch": 0.7695844060316293, + "grad_norm": 1.762282345589327, + "learning_rate": 1.3288465139018696e-06, + "loss": 0.6507, + "step": 25110 + }, + { + "epoch": 0.7696150545543705, + "grad_norm": 0.8029195064117757, + "learning_rate": 1.3285095817815518e-06, + "loss": 0.3927, + "step": 25111 + }, + { + "epoch": 0.7696457030771117, + "grad_norm": 1.9333618870473308, + "learning_rate": 1.32817268583737e-06, + "loss": 0.5521, + "step": 25112 + }, + { + "epoch": 0.7696763515998529, + "grad_norm": 1.790951473767084, + "learning_rate": 1.3278358260726409e-06, + "loss": 0.6472, + "step": 25113 + }, + { + "epoch": 0.7697070001225941, + "grad_norm": 1.9455523266378874, + "learning_rate": 1.3274990024906847e-06, + "loss": 0.5794, + "step": 25114 + }, + { + "epoch": 0.7697376486453353, + "grad_norm": 1.875695449893981, + "learning_rate": 1.3271622150948222e-06, + "loss": 0.5504, + "step": 25115 + }, + { + "epoch": 0.7697682971680765, + "grad_norm": 0.8183999305818805, + "learning_rate": 1.32682546388837e-06, + "loss": 0.4152, + "step": 25116 + }, + { + "epoch": 0.7697989456908177, + "grad_norm": 1.7975350093648113, + "learning_rate": 1.326488748874643e-06, + "loss": 0.6007, + "step": 25117 + }, + { + "epoch": 0.7698295942135589, + "grad_norm": 0.8110545878491089, + "learning_rate": 1.3261520700569658e-06, + "loss": 0.3898, + "step": 25118 + }, + { + "epoch": 0.7698602427363002, + "grad_norm": 1.7414977385531847, + "learning_rate": 1.3258154274386502e-06, + "loss": 0.4696, + "step": 25119 + }, + { + "epoch": 0.7698908912590413, + "grad_norm": 0.7932710059629492, + "learning_rate": 1.3254788210230175e-06, + "loss": 0.4128, + "step": 25120 + }, + { + "epoch": 0.7699215397817826, + "grad_norm": 1.7691356688390245, + "learning_rate": 1.3251422508133805e-06, + "loss": 0.6126, + "step": 25121 + }, + { + "epoch": 0.7699521883045237, + "grad_norm": 1.4792288323854283, + "learning_rate": 1.3248057168130567e-06, + "loss": 0.4834, + "step": 25122 + }, + { + "epoch": 0.7699828368272649, + "grad_norm": 0.8204129790629034, + "learning_rate": 1.3244692190253644e-06, + "loss": 0.4004, + "step": 25123 + }, + { + "epoch": 0.7700134853500061, + "grad_norm": 1.784837306127818, + "learning_rate": 1.3241327574536157e-06, + "loss": 0.5451, + "step": 25124 + }, + { + "epoch": 0.7700441338727473, + "grad_norm": 2.021496175885507, + "learning_rate": 1.3237963321011272e-06, + "loss": 0.5835, + "step": 25125 + }, + { + "epoch": 0.7700747823954885, + "grad_norm": 1.8109332020422575, + "learning_rate": 1.3234599429712159e-06, + "loss": 0.5138, + "step": 25126 + }, + { + "epoch": 0.7701054309182297, + "grad_norm": 1.9754487999011936, + "learning_rate": 1.323123590067194e-06, + "loss": 0.608, + "step": 25127 + }, + { + "epoch": 0.770136079440971, + "grad_norm": 1.832800986833745, + "learning_rate": 1.3227872733923736e-06, + "loss": 0.6143, + "step": 25128 + }, + { + "epoch": 0.7701667279637121, + "grad_norm": 1.8035724319220423, + "learning_rate": 1.322450992950074e-06, + "loss": 0.5332, + "step": 25129 + }, + { + "epoch": 0.7701973764864534, + "grad_norm": 2.1915920190995686, + "learning_rate": 1.3221147487436036e-06, + "loss": 0.6606, + "step": 25130 + }, + { + "epoch": 0.7702280250091945, + "grad_norm": 1.8691302055977495, + "learning_rate": 1.3217785407762801e-06, + "loss": 0.5488, + "step": 25131 + }, + { + "epoch": 0.7702586735319358, + "grad_norm": 1.927916313368889, + "learning_rate": 1.3214423690514117e-06, + "loss": 0.5455, + "step": 25132 + }, + { + "epoch": 0.7702893220546769, + "grad_norm": 1.5863815787030369, + "learning_rate": 1.3211062335723128e-06, + "loss": 0.561, + "step": 25133 + }, + { + "epoch": 0.7703199705774182, + "grad_norm": 1.7923042079264104, + "learning_rate": 1.3207701343422968e-06, + "loss": 0.6026, + "step": 25134 + }, + { + "epoch": 0.7703506191001593, + "grad_norm": 1.5633297656603813, + "learning_rate": 1.320434071364673e-06, + "loss": 0.5888, + "step": 25135 + }, + { + "epoch": 0.7703812676229006, + "grad_norm": 1.9189576524866003, + "learning_rate": 1.3200980446427536e-06, + "loss": 0.6122, + "step": 25136 + }, + { + "epoch": 0.7704119161456418, + "grad_norm": 1.509399055705542, + "learning_rate": 1.3197620541798512e-06, + "loss": 0.4344, + "step": 25137 + }, + { + "epoch": 0.770442564668383, + "grad_norm": 1.9333745857123787, + "learning_rate": 1.319426099979273e-06, + "loss": 0.5742, + "step": 25138 + }, + { + "epoch": 0.7704732131911242, + "grad_norm": 2.0369775517964985, + "learning_rate": 1.319090182044333e-06, + "loss": 0.5803, + "step": 25139 + }, + { + "epoch": 0.7705038617138654, + "grad_norm": 1.7864622844327716, + "learning_rate": 1.3187543003783383e-06, + "loss": 0.5486, + "step": 25140 + }, + { + "epoch": 0.7705345102366066, + "grad_norm": 2.037153688783522, + "learning_rate": 1.3184184549845985e-06, + "loss": 0.5967, + "step": 25141 + }, + { + "epoch": 0.7705651587593478, + "grad_norm": 1.89530205627128, + "learning_rate": 1.3180826458664253e-06, + "loss": 0.5977, + "step": 25142 + }, + { + "epoch": 0.770595807282089, + "grad_norm": 1.9854668209791273, + "learning_rate": 1.3177468730271247e-06, + "loss": 0.5757, + "step": 25143 + }, + { + "epoch": 0.7706264558048302, + "grad_norm": 2.0809534690920857, + "learning_rate": 1.317411136470006e-06, + "loss": 0.5248, + "step": 25144 + }, + { + "epoch": 0.7706571043275714, + "grad_norm": 1.8484533812370452, + "learning_rate": 1.317075436198379e-06, + "loss": 0.5758, + "step": 25145 + }, + { + "epoch": 0.7706877528503127, + "grad_norm": 1.7770667989146467, + "learning_rate": 1.316739772215549e-06, + "loss": 0.534, + "step": 25146 + }, + { + "epoch": 0.7707184013730538, + "grad_norm": 1.506135802539274, + "learning_rate": 1.3164041445248244e-06, + "loss": 0.4542, + "step": 25147 + }, + { + "epoch": 0.7707490498957951, + "grad_norm": 0.7690890612129693, + "learning_rate": 1.3160685531295132e-06, + "loss": 0.3907, + "step": 25148 + }, + { + "epoch": 0.7707796984185362, + "grad_norm": 2.0235112769192782, + "learning_rate": 1.3157329980329204e-06, + "loss": 0.6204, + "step": 25149 + }, + { + "epoch": 0.7708103469412775, + "grad_norm": 1.7597133967173135, + "learning_rate": 1.315397479238354e-06, + "loss": 0.5186, + "step": 25150 + }, + { + "epoch": 0.7708409954640186, + "grad_norm": 1.9863582323859232, + "learning_rate": 1.3150619967491173e-06, + "loss": 0.5904, + "step": 25151 + }, + { + "epoch": 0.7708716439867599, + "grad_norm": 0.791352680816449, + "learning_rate": 1.314726550568518e-06, + "loss": 0.4154, + "step": 25152 + }, + { + "epoch": 0.770902292509501, + "grad_norm": 0.8473609365074987, + "learning_rate": 1.3143911406998621e-06, + "loss": 0.3945, + "step": 25153 + }, + { + "epoch": 0.7709329410322422, + "grad_norm": 0.8225721666440826, + "learning_rate": 1.3140557671464522e-06, + "loss": 0.4036, + "step": 25154 + }, + { + "epoch": 0.7709635895549835, + "grad_norm": 1.8896029275394945, + "learning_rate": 1.3137204299115935e-06, + "loss": 0.5708, + "step": 25155 + }, + { + "epoch": 0.7709942380777246, + "grad_norm": 1.8649557654145839, + "learning_rate": 1.3133851289985922e-06, + "loss": 0.5994, + "step": 25156 + }, + { + "epoch": 0.7710248866004659, + "grad_norm": 0.7971353070794513, + "learning_rate": 1.3130498644107492e-06, + "loss": 0.4011, + "step": 25157 + }, + { + "epoch": 0.771055535123207, + "grad_norm": 1.7942397781359911, + "learning_rate": 1.3127146361513687e-06, + "loss": 0.6507, + "step": 25158 + }, + { + "epoch": 0.7710861836459483, + "grad_norm": 2.2917177853131223, + "learning_rate": 1.3123794442237564e-06, + "loss": 0.5664, + "step": 25159 + }, + { + "epoch": 0.7711168321686894, + "grad_norm": 1.7080906310100712, + "learning_rate": 1.3120442886312113e-06, + "loss": 0.5749, + "step": 25160 + }, + { + "epoch": 0.7711474806914307, + "grad_norm": 2.0310268340319273, + "learning_rate": 1.311709169377039e-06, + "loss": 0.5817, + "step": 25161 + }, + { + "epoch": 0.7711781292141718, + "grad_norm": 1.6217881013690547, + "learning_rate": 1.311374086464538e-06, + "loss": 0.5429, + "step": 25162 + }, + { + "epoch": 0.7712087777369131, + "grad_norm": 0.7896038262247373, + "learning_rate": 1.3110390398970124e-06, + "loss": 0.3994, + "step": 25163 + }, + { + "epoch": 0.7712394262596542, + "grad_norm": 1.9672898781066392, + "learning_rate": 1.3107040296777645e-06, + "loss": 0.6162, + "step": 25164 + }, + { + "epoch": 0.7712700747823955, + "grad_norm": 1.7744119921002077, + "learning_rate": 1.3103690558100918e-06, + "loss": 0.6073, + "step": 25165 + }, + { + "epoch": 0.7713007233051367, + "grad_norm": 1.833354219029408, + "learning_rate": 1.310034118297297e-06, + "loss": 0.5396, + "step": 25166 + }, + { + "epoch": 0.7713313718278779, + "grad_norm": 1.6388928764327004, + "learning_rate": 1.309699217142682e-06, + "loss": 0.5945, + "step": 25167 + }, + { + "epoch": 0.7713620203506191, + "grad_norm": 1.782935290583931, + "learning_rate": 1.3093643523495432e-06, + "loss": 0.468, + "step": 25168 + }, + { + "epoch": 0.7713926688733603, + "grad_norm": 2.038814206302105, + "learning_rate": 1.3090295239211815e-06, + "loss": 0.5833, + "step": 25169 + }, + { + "epoch": 0.7714233173961015, + "grad_norm": 2.0092808887208506, + "learning_rate": 1.3086947318608977e-06, + "loss": 0.5617, + "step": 25170 + }, + { + "epoch": 0.7714539659188427, + "grad_norm": 1.7690182965060646, + "learning_rate": 1.3083599761719878e-06, + "loss": 0.6258, + "step": 25171 + }, + { + "epoch": 0.7714846144415839, + "grad_norm": 2.0182097956848852, + "learning_rate": 1.3080252568577534e-06, + "loss": 0.637, + "step": 25172 + }, + { + "epoch": 0.7715152629643252, + "grad_norm": 0.8025354782342626, + "learning_rate": 1.3076905739214873e-06, + "loss": 0.4046, + "step": 25173 + }, + { + "epoch": 0.7715459114870663, + "grad_norm": 1.8497369097816874, + "learning_rate": 1.3073559273664937e-06, + "loss": 0.63, + "step": 25174 + }, + { + "epoch": 0.7715765600098076, + "grad_norm": 2.13286544703499, + "learning_rate": 1.3070213171960672e-06, + "loss": 0.6645, + "step": 25175 + }, + { + "epoch": 0.7716072085325487, + "grad_norm": 0.8363528542835371, + "learning_rate": 1.3066867434135033e-06, + "loss": 0.3978, + "step": 25176 + }, + { + "epoch": 0.77163785705529, + "grad_norm": 0.8049792058780855, + "learning_rate": 1.3063522060220995e-06, + "loss": 0.4244, + "step": 25177 + }, + { + "epoch": 0.7716685055780311, + "grad_norm": 1.5755743430991784, + "learning_rate": 1.3060177050251537e-06, + "loss": 0.5443, + "step": 25178 + }, + { + "epoch": 0.7716991541007724, + "grad_norm": 1.7294873312009889, + "learning_rate": 1.3056832404259596e-06, + "loss": 0.5953, + "step": 25179 + }, + { + "epoch": 0.7717298026235135, + "grad_norm": 1.6418347677673977, + "learning_rate": 1.3053488122278136e-06, + "loss": 0.5997, + "step": 25180 + }, + { + "epoch": 0.7717604511462548, + "grad_norm": 1.9289396557148661, + "learning_rate": 1.3050144204340127e-06, + "loss": 0.579, + "step": 25181 + }, + { + "epoch": 0.771791099668996, + "grad_norm": 1.9233917959961448, + "learning_rate": 1.3046800650478487e-06, + "loss": 0.5682, + "step": 25182 + }, + { + "epoch": 0.7718217481917372, + "grad_norm": 1.8942675680410215, + "learning_rate": 1.304345746072619e-06, + "loss": 0.5506, + "step": 25183 + }, + { + "epoch": 0.7718523967144784, + "grad_norm": 1.957887222545395, + "learning_rate": 1.3040114635116141e-06, + "loss": 0.6125, + "step": 25184 + }, + { + "epoch": 0.7718830452372195, + "grad_norm": 0.7642759241438386, + "learning_rate": 1.3036772173681306e-06, + "loss": 0.3944, + "step": 25185 + }, + { + "epoch": 0.7719136937599608, + "grad_norm": 1.7021915654102604, + "learning_rate": 1.3033430076454623e-06, + "loss": 0.5466, + "step": 25186 + }, + { + "epoch": 0.7719443422827019, + "grad_norm": 1.9473032555941399, + "learning_rate": 1.3030088343469e-06, + "loss": 0.6465, + "step": 25187 + }, + { + "epoch": 0.7719749908054432, + "grad_norm": 1.7494982120386964, + "learning_rate": 1.3026746974757376e-06, + "loss": 0.642, + "step": 25188 + }, + { + "epoch": 0.7720056393281843, + "grad_norm": 0.7908975972930995, + "learning_rate": 1.3023405970352688e-06, + "loss": 0.4152, + "step": 25189 + }, + { + "epoch": 0.7720362878509256, + "grad_norm": 1.8476623029710508, + "learning_rate": 1.3020065330287823e-06, + "loss": 0.6144, + "step": 25190 + }, + { + "epoch": 0.7720669363736667, + "grad_norm": 0.7830730121747156, + "learning_rate": 1.3016725054595737e-06, + "loss": 0.3953, + "step": 25191 + }, + { + "epoch": 0.772097584896408, + "grad_norm": 1.9213237452983285, + "learning_rate": 1.3013385143309287e-06, + "loss": 0.5621, + "step": 25192 + }, + { + "epoch": 0.7721282334191492, + "grad_norm": 0.7936744575656439, + "learning_rate": 1.3010045596461451e-06, + "loss": 0.408, + "step": 25193 + }, + { + "epoch": 0.7721588819418904, + "grad_norm": 0.8207347864617894, + "learning_rate": 1.3006706414085096e-06, + "loss": 0.3972, + "step": 25194 + }, + { + "epoch": 0.7721895304646316, + "grad_norm": 0.8657235775280551, + "learning_rate": 1.3003367596213113e-06, + "loss": 0.3939, + "step": 25195 + }, + { + "epoch": 0.7722201789873728, + "grad_norm": 0.792065730121645, + "learning_rate": 1.3000029142878417e-06, + "loss": 0.4046, + "step": 25196 + }, + { + "epoch": 0.772250827510114, + "grad_norm": 1.9022495239892507, + "learning_rate": 1.2996691054113913e-06, + "loss": 0.7143, + "step": 25197 + }, + { + "epoch": 0.7722814760328552, + "grad_norm": 1.9848827833093392, + "learning_rate": 1.2993353329952468e-06, + "loss": 0.5737, + "step": 25198 + }, + { + "epoch": 0.7723121245555964, + "grad_norm": 1.5425531534981816, + "learning_rate": 1.2990015970426984e-06, + "loss": 0.4954, + "step": 25199 + }, + { + "epoch": 0.7723427730783377, + "grad_norm": 1.6903082681663735, + "learning_rate": 1.298667897557035e-06, + "loss": 0.6065, + "step": 25200 + }, + { + "epoch": 0.7723734216010788, + "grad_norm": 2.1602511735600536, + "learning_rate": 1.298334234541543e-06, + "loss": 0.5396, + "step": 25201 + }, + { + "epoch": 0.7724040701238201, + "grad_norm": 1.9329211569858877, + "learning_rate": 1.2980006079995117e-06, + "loss": 0.7009, + "step": 25202 + }, + { + "epoch": 0.7724347186465612, + "grad_norm": 1.922467576349303, + "learning_rate": 1.2976670179342248e-06, + "loss": 0.6093, + "step": 25203 + }, + { + "epoch": 0.7724653671693025, + "grad_norm": 1.8552669638655526, + "learning_rate": 1.2973334643489754e-06, + "loss": 0.5468, + "step": 25204 + }, + { + "epoch": 0.7724960156920436, + "grad_norm": 2.1404570232685574, + "learning_rate": 1.2969999472470468e-06, + "loss": 0.6461, + "step": 25205 + }, + { + "epoch": 0.7725266642147849, + "grad_norm": 1.9919472544290657, + "learning_rate": 1.2966664666317237e-06, + "loss": 0.5756, + "step": 25206 + }, + { + "epoch": 0.772557312737526, + "grad_norm": 0.814252881528571, + "learning_rate": 1.2963330225062925e-06, + "loss": 0.4051, + "step": 25207 + }, + { + "epoch": 0.7725879612602673, + "grad_norm": 1.6621817585924008, + "learning_rate": 1.2959996148740423e-06, + "loss": 0.5053, + "step": 25208 + }, + { + "epoch": 0.7726186097830084, + "grad_norm": 1.7943528800810638, + "learning_rate": 1.2956662437382534e-06, + "loss": 0.5495, + "step": 25209 + }, + { + "epoch": 0.7726492583057497, + "grad_norm": 1.8072410650464157, + "learning_rate": 1.2953329091022131e-06, + "loss": 0.6299, + "step": 25210 + }, + { + "epoch": 0.7726799068284909, + "grad_norm": 1.787450650374374, + "learning_rate": 1.2949996109692053e-06, + "loss": 0.5822, + "step": 25211 + }, + { + "epoch": 0.7727105553512321, + "grad_norm": 0.8058785729101846, + "learning_rate": 1.294666349342516e-06, + "loss": 0.4152, + "step": 25212 + }, + { + "epoch": 0.7727412038739733, + "grad_norm": 1.816776852496896, + "learning_rate": 1.294333124225427e-06, + "loss": 0.626, + "step": 25213 + }, + { + "epoch": 0.7727718523967145, + "grad_norm": 1.8179558327302203, + "learning_rate": 1.2939999356212191e-06, + "loss": 0.6287, + "step": 25214 + }, + { + "epoch": 0.7728025009194557, + "grad_norm": 1.8180649546110772, + "learning_rate": 1.2936667835331813e-06, + "loss": 0.5782, + "step": 25215 + }, + { + "epoch": 0.7728331494421968, + "grad_norm": 1.818864871990556, + "learning_rate": 1.2933336679645925e-06, + "loss": 0.5422, + "step": 25216 + }, + { + "epoch": 0.7728637979649381, + "grad_norm": 1.8181561033079656, + "learning_rate": 1.2930005889187342e-06, + "loss": 0.4776, + "step": 25217 + }, + { + "epoch": 0.7728944464876792, + "grad_norm": 1.9485803077646444, + "learning_rate": 1.2926675463988898e-06, + "loss": 0.5706, + "step": 25218 + }, + { + "epoch": 0.7729250950104205, + "grad_norm": 0.7789209170748856, + "learning_rate": 1.2923345404083398e-06, + "loss": 0.3908, + "step": 25219 + }, + { + "epoch": 0.7729557435331617, + "grad_norm": 1.9105703394485591, + "learning_rate": 1.2920015709503687e-06, + "loss": 0.604, + "step": 25220 + }, + { + "epoch": 0.7729863920559029, + "grad_norm": 0.777427508181924, + "learning_rate": 1.2916686380282528e-06, + "loss": 0.391, + "step": 25221 + }, + { + "epoch": 0.7730170405786441, + "grad_norm": 1.6448175896252384, + "learning_rate": 1.291335741645275e-06, + "loss": 0.574, + "step": 25222 + }, + { + "epoch": 0.7730476891013853, + "grad_norm": 1.8387549576572724, + "learning_rate": 1.291002881804716e-06, + "loss": 0.5435, + "step": 25223 + }, + { + "epoch": 0.7730783376241265, + "grad_norm": 1.8875114360246985, + "learning_rate": 1.2906700585098548e-06, + "loss": 0.5016, + "step": 25224 + }, + { + "epoch": 0.7731089861468677, + "grad_norm": 1.7058954994968027, + "learning_rate": 1.2903372717639678e-06, + "loss": 0.5734, + "step": 25225 + }, + { + "epoch": 0.7731396346696089, + "grad_norm": 1.786136175965757, + "learning_rate": 1.2900045215703394e-06, + "loss": 0.5358, + "step": 25226 + }, + { + "epoch": 0.7731702831923501, + "grad_norm": 1.9684329110091126, + "learning_rate": 1.2896718079322462e-06, + "loss": 0.6747, + "step": 25227 + }, + { + "epoch": 0.7732009317150913, + "grad_norm": 1.7206181169045496, + "learning_rate": 1.289339130852964e-06, + "loss": 0.5797, + "step": 25228 + }, + { + "epoch": 0.7732315802378326, + "grad_norm": 1.7528130864379334, + "learning_rate": 1.2890064903357729e-06, + "loss": 0.6324, + "step": 25229 + }, + { + "epoch": 0.7732622287605737, + "grad_norm": 2.0509375556338236, + "learning_rate": 1.288673886383951e-06, + "loss": 0.611, + "step": 25230 + }, + { + "epoch": 0.773292877283315, + "grad_norm": 1.8337476666732782, + "learning_rate": 1.2883413190007753e-06, + "loss": 0.6476, + "step": 25231 + }, + { + "epoch": 0.7733235258060561, + "grad_norm": 1.9951921845912537, + "learning_rate": 1.2880087881895214e-06, + "loss": 0.6809, + "step": 25232 + }, + { + "epoch": 0.7733541743287974, + "grad_norm": 1.8042477484942085, + "learning_rate": 1.2876762939534665e-06, + "loss": 0.615, + "step": 25233 + }, + { + "epoch": 0.7733848228515385, + "grad_norm": 2.0341547173097174, + "learning_rate": 1.2873438362958884e-06, + "loss": 0.5872, + "step": 25234 + }, + { + "epoch": 0.7734154713742798, + "grad_norm": 2.2777187782612116, + "learning_rate": 1.2870114152200618e-06, + "loss": 0.6361, + "step": 25235 + }, + { + "epoch": 0.773446119897021, + "grad_norm": 1.8146033558463273, + "learning_rate": 1.2866790307292599e-06, + "loss": 0.5845, + "step": 25236 + }, + { + "epoch": 0.7734767684197622, + "grad_norm": 1.8932909267567732, + "learning_rate": 1.2863466828267596e-06, + "loss": 0.5802, + "step": 25237 + }, + { + "epoch": 0.7735074169425034, + "grad_norm": 0.7817662657365145, + "learning_rate": 1.2860143715158359e-06, + "loss": 0.409, + "step": 25238 + }, + { + "epoch": 0.7735380654652446, + "grad_norm": 1.8155609237017565, + "learning_rate": 1.2856820967997642e-06, + "loss": 0.5975, + "step": 25239 + }, + { + "epoch": 0.7735687139879858, + "grad_norm": 1.876652946486727, + "learning_rate": 1.2853498586818154e-06, + "loss": 0.7456, + "step": 25240 + }, + { + "epoch": 0.773599362510727, + "grad_norm": 1.6920544955522263, + "learning_rate": 1.285017657165265e-06, + "loss": 0.5758, + "step": 25241 + }, + { + "epoch": 0.7736300110334682, + "grad_norm": 1.919777428759319, + "learning_rate": 1.2846854922533874e-06, + "loss": 0.5455, + "step": 25242 + }, + { + "epoch": 0.7736606595562094, + "grad_norm": 1.9111518964808316, + "learning_rate": 1.284353363949455e-06, + "loss": 0.5183, + "step": 25243 + }, + { + "epoch": 0.7736913080789506, + "grad_norm": 2.0135418690561044, + "learning_rate": 1.2840212722567359e-06, + "loss": 0.6276, + "step": 25244 + }, + { + "epoch": 0.7737219566016919, + "grad_norm": 1.7470352431300553, + "learning_rate": 1.2836892171785093e-06, + "loss": 0.5764, + "step": 25245 + }, + { + "epoch": 0.773752605124433, + "grad_norm": 1.8917869028761416, + "learning_rate": 1.2833571987180421e-06, + "loss": 0.5708, + "step": 25246 + }, + { + "epoch": 0.7737832536471742, + "grad_norm": 2.076981307285056, + "learning_rate": 1.2830252168786089e-06, + "loss": 0.6708, + "step": 25247 + }, + { + "epoch": 0.7738139021699154, + "grad_norm": 1.7170823716808667, + "learning_rate": 1.2826932716634776e-06, + "loss": 0.537, + "step": 25248 + }, + { + "epoch": 0.7738445506926566, + "grad_norm": 1.7740738075697295, + "learning_rate": 1.2823613630759208e-06, + "loss": 0.5929, + "step": 25249 + }, + { + "epoch": 0.7738751992153978, + "grad_norm": 1.7927700762344878, + "learning_rate": 1.2820294911192098e-06, + "loss": 0.5623, + "step": 25250 + }, + { + "epoch": 0.773905847738139, + "grad_norm": 1.9892673815479138, + "learning_rate": 1.2816976557966127e-06, + "loss": 0.5299, + "step": 25251 + }, + { + "epoch": 0.7739364962608802, + "grad_norm": 1.7470520616335359, + "learning_rate": 1.2813658571113997e-06, + "loss": 0.5345, + "step": 25252 + }, + { + "epoch": 0.7739671447836214, + "grad_norm": 0.8225756679398746, + "learning_rate": 1.2810340950668415e-06, + "loss": 0.4172, + "step": 25253 + }, + { + "epoch": 0.7739977933063626, + "grad_norm": 1.6894990542942971, + "learning_rate": 1.2807023696662063e-06, + "loss": 0.5354, + "step": 25254 + }, + { + "epoch": 0.7740284418291038, + "grad_norm": 1.895398619092306, + "learning_rate": 1.280370680912759e-06, + "loss": 0.6181, + "step": 25255 + }, + { + "epoch": 0.7740590903518451, + "grad_norm": 1.718641634572524, + "learning_rate": 1.2800390288097742e-06, + "loss": 0.5901, + "step": 25256 + }, + { + "epoch": 0.7740897388745862, + "grad_norm": 1.130993285187929, + "learning_rate": 1.2797074133605153e-06, + "loss": 0.3686, + "step": 25257 + }, + { + "epoch": 0.7741203873973275, + "grad_norm": 1.9686195385264997, + "learning_rate": 1.2793758345682522e-06, + "loss": 0.5534, + "step": 25258 + }, + { + "epoch": 0.7741510359200686, + "grad_norm": 1.9613323250470733, + "learning_rate": 1.27904429243625e-06, + "loss": 0.5095, + "step": 25259 + }, + { + "epoch": 0.7741816844428099, + "grad_norm": 1.8704695127895958, + "learning_rate": 1.2787127869677762e-06, + "loss": 0.611, + "step": 25260 + }, + { + "epoch": 0.774212332965551, + "grad_norm": 1.7949462487341816, + "learning_rate": 1.2783813181660986e-06, + "loss": 0.4778, + "step": 25261 + }, + { + "epoch": 0.7742429814882923, + "grad_norm": 1.9985538212616563, + "learning_rate": 1.2780498860344814e-06, + "loss": 0.5741, + "step": 25262 + }, + { + "epoch": 0.7742736300110334, + "grad_norm": 1.6040422998571389, + "learning_rate": 1.2777184905761901e-06, + "loss": 0.5466, + "step": 25263 + }, + { + "epoch": 0.7743042785337747, + "grad_norm": 1.854474507376789, + "learning_rate": 1.277387131794493e-06, + "loss": 0.5805, + "step": 25264 + }, + { + "epoch": 0.7743349270565159, + "grad_norm": 2.020062704296817, + "learning_rate": 1.2770558096926512e-06, + "loss": 0.5338, + "step": 25265 + }, + { + "epoch": 0.7743655755792571, + "grad_norm": 1.8341802432223435, + "learning_rate": 1.2767245242739313e-06, + "loss": 0.549, + "step": 25266 + }, + { + "epoch": 0.7743962241019983, + "grad_norm": 1.7959106539442113, + "learning_rate": 1.2763932755415986e-06, + "loss": 0.5137, + "step": 25267 + }, + { + "epoch": 0.7744268726247395, + "grad_norm": 1.6909191994609682, + "learning_rate": 1.2760620634989141e-06, + "loss": 0.6269, + "step": 25268 + }, + { + "epoch": 0.7744575211474807, + "grad_norm": 1.8925000893930675, + "learning_rate": 1.2757308881491449e-06, + "loss": 0.6067, + "step": 25269 + }, + { + "epoch": 0.7744881696702219, + "grad_norm": 1.7101274725105085, + "learning_rate": 1.27539974949555e-06, + "loss": 0.5376, + "step": 25270 + }, + { + "epoch": 0.7745188181929631, + "grad_norm": 1.78923587671887, + "learning_rate": 1.2750686475413948e-06, + "loss": 0.6256, + "step": 25271 + }, + { + "epoch": 0.7745494667157043, + "grad_norm": 2.0210020846419097, + "learning_rate": 1.2747375822899421e-06, + "loss": 0.5761, + "step": 25272 + }, + { + "epoch": 0.7745801152384455, + "grad_norm": 1.7171821013686475, + "learning_rate": 1.2744065537444522e-06, + "loss": 0.524, + "step": 25273 + }, + { + "epoch": 0.7746107637611868, + "grad_norm": 1.992358608396205, + "learning_rate": 1.2740755619081879e-06, + "loss": 0.6392, + "step": 25274 + }, + { + "epoch": 0.7746414122839279, + "grad_norm": 2.0015306688239694, + "learning_rate": 1.2737446067844116e-06, + "loss": 0.6211, + "step": 25275 + }, + { + "epoch": 0.7746720608066692, + "grad_norm": 1.6324967087995519, + "learning_rate": 1.2734136883763821e-06, + "loss": 0.5093, + "step": 25276 + }, + { + "epoch": 0.7747027093294103, + "grad_norm": 1.7848078347902825, + "learning_rate": 1.2730828066873603e-06, + "loss": 0.6145, + "step": 25277 + }, + { + "epoch": 0.7747333578521515, + "grad_norm": 0.7776938714782584, + "learning_rate": 1.272751961720609e-06, + "loss": 0.3777, + "step": 25278 + }, + { + "epoch": 0.7747640063748927, + "grad_norm": 2.03274346446264, + "learning_rate": 1.2724211534793851e-06, + "loss": 0.6023, + "step": 25279 + }, + { + "epoch": 0.7747946548976339, + "grad_norm": 1.9286150186933662, + "learning_rate": 1.2720903819669506e-06, + "loss": 0.6144, + "step": 25280 + }, + { + "epoch": 0.7748253034203751, + "grad_norm": 1.8336707729715807, + "learning_rate": 1.2717596471865619e-06, + "loss": 0.5631, + "step": 25281 + }, + { + "epoch": 0.7748559519431163, + "grad_norm": 1.9972918892116343, + "learning_rate": 1.271428949141479e-06, + "loss": 0.6162, + "step": 25282 + }, + { + "epoch": 0.7748866004658576, + "grad_norm": 2.107573825334399, + "learning_rate": 1.2710982878349621e-06, + "loss": 0.5954, + "step": 25283 + }, + { + "epoch": 0.7749172489885987, + "grad_norm": 1.7993660879749453, + "learning_rate": 1.2707676632702665e-06, + "loss": 0.529, + "step": 25284 + }, + { + "epoch": 0.77494789751134, + "grad_norm": 2.085631879949078, + "learning_rate": 1.2704370754506517e-06, + "loss": 0.5799, + "step": 25285 + }, + { + "epoch": 0.7749785460340811, + "grad_norm": 1.8120555771246496, + "learning_rate": 1.270106524379376e-06, + "loss": 0.5906, + "step": 25286 + }, + { + "epoch": 0.7750091945568224, + "grad_norm": 1.8432488203949169, + "learning_rate": 1.2697760100596929e-06, + "loss": 0.5222, + "step": 25287 + }, + { + "epoch": 0.7750398430795635, + "grad_norm": 1.7051093136846425, + "learning_rate": 1.2694455324948634e-06, + "loss": 0.5972, + "step": 25288 + }, + { + "epoch": 0.7750704916023048, + "grad_norm": 1.831272144929833, + "learning_rate": 1.26911509168814e-06, + "loss": 0.6101, + "step": 25289 + }, + { + "epoch": 0.7751011401250459, + "grad_norm": 1.7097674654098982, + "learning_rate": 1.2687846876427801e-06, + "loss": 0.5059, + "step": 25290 + }, + { + "epoch": 0.7751317886477872, + "grad_norm": 1.5382498649742855, + "learning_rate": 1.2684543203620402e-06, + "loss": 0.4345, + "step": 25291 + }, + { + "epoch": 0.7751624371705284, + "grad_norm": 1.9998179109044913, + "learning_rate": 1.2681239898491743e-06, + "loss": 0.6977, + "step": 25292 + }, + { + "epoch": 0.7751930856932696, + "grad_norm": 1.9585158173651553, + "learning_rate": 1.2677936961074366e-06, + "loss": 0.643, + "step": 25293 + }, + { + "epoch": 0.7752237342160108, + "grad_norm": 1.7921862824349994, + "learning_rate": 1.2674634391400848e-06, + "loss": 0.5479, + "step": 25294 + }, + { + "epoch": 0.775254382738752, + "grad_norm": 1.9226192298101776, + "learning_rate": 1.2671332189503688e-06, + "loss": 0.5289, + "step": 25295 + }, + { + "epoch": 0.7752850312614932, + "grad_norm": 1.8201316096085443, + "learning_rate": 1.2668030355415446e-06, + "loss": 0.6129, + "step": 25296 + }, + { + "epoch": 0.7753156797842344, + "grad_norm": 1.6731136902335593, + "learning_rate": 1.266472888916866e-06, + "loss": 0.5316, + "step": 25297 + }, + { + "epoch": 0.7753463283069756, + "grad_norm": 0.7969630070158952, + "learning_rate": 1.2661427790795844e-06, + "loss": 0.4118, + "step": 25298 + }, + { + "epoch": 0.7753769768297168, + "grad_norm": 1.8064197021558224, + "learning_rate": 1.265812706032955e-06, + "loss": 0.5049, + "step": 25299 + }, + { + "epoch": 0.775407625352458, + "grad_norm": 0.7905502788868985, + "learning_rate": 1.2654826697802253e-06, + "loss": 0.3927, + "step": 25300 + }, + { + "epoch": 0.7754382738751993, + "grad_norm": 1.9192786448076788, + "learning_rate": 1.2651526703246531e-06, + "loss": 0.5357, + "step": 25301 + }, + { + "epoch": 0.7754689223979404, + "grad_norm": 1.8596200178246636, + "learning_rate": 1.2648227076694875e-06, + "loss": 0.4399, + "step": 25302 + }, + { + "epoch": 0.7754995709206817, + "grad_norm": 1.8283189969820335, + "learning_rate": 1.2644927818179775e-06, + "loss": 0.5518, + "step": 25303 + }, + { + "epoch": 0.7755302194434228, + "grad_norm": 0.802368631759564, + "learning_rate": 1.2641628927733768e-06, + "loss": 0.3991, + "step": 25304 + }, + { + "epoch": 0.7755608679661641, + "grad_norm": 1.724023963756258, + "learning_rate": 1.2638330405389354e-06, + "loss": 0.5379, + "step": 25305 + }, + { + "epoch": 0.7755915164889052, + "grad_norm": 0.7547238587436349, + "learning_rate": 1.2635032251179025e-06, + "loss": 0.3876, + "step": 25306 + }, + { + "epoch": 0.7756221650116465, + "grad_norm": 0.7608818872276577, + "learning_rate": 1.2631734465135275e-06, + "loss": 0.3712, + "step": 25307 + }, + { + "epoch": 0.7756528135343876, + "grad_norm": 1.8510300967514126, + "learning_rate": 1.2628437047290626e-06, + "loss": 0.5421, + "step": 25308 + }, + { + "epoch": 0.7756834620571288, + "grad_norm": 1.7719947329206853, + "learning_rate": 1.2625139997677533e-06, + "loss": 0.5129, + "step": 25309 + }, + { + "epoch": 0.77571411057987, + "grad_norm": 2.0072088889745636, + "learning_rate": 1.2621843316328513e-06, + "loss": 0.5871, + "step": 25310 + }, + { + "epoch": 0.7757447591026112, + "grad_norm": 1.9025011906800222, + "learning_rate": 1.2618547003276005e-06, + "loss": 0.4625, + "step": 25311 + }, + { + "epoch": 0.7757754076253525, + "grad_norm": 0.8202184619863335, + "learning_rate": 1.2615251058552547e-06, + "loss": 0.4087, + "step": 25312 + }, + { + "epoch": 0.7758060561480936, + "grad_norm": 1.817311890100346, + "learning_rate": 1.2611955482190586e-06, + "loss": 0.5715, + "step": 25313 + }, + { + "epoch": 0.7758367046708349, + "grad_norm": 1.9169985771460851, + "learning_rate": 1.2608660274222578e-06, + "loss": 0.6151, + "step": 25314 + }, + { + "epoch": 0.775867353193576, + "grad_norm": 1.776116332281693, + "learning_rate": 1.260536543468101e-06, + "loss": 0.6563, + "step": 25315 + }, + { + "epoch": 0.7758980017163173, + "grad_norm": 1.9487064398797667, + "learning_rate": 1.2602070963598356e-06, + "loss": 0.5801, + "step": 25316 + }, + { + "epoch": 0.7759286502390584, + "grad_norm": 0.7931286484098226, + "learning_rate": 1.259877686100705e-06, + "loss": 0.3984, + "step": 25317 + }, + { + "epoch": 0.7759592987617997, + "grad_norm": 0.8041105904820227, + "learning_rate": 1.2595483126939572e-06, + "loss": 0.4185, + "step": 25318 + }, + { + "epoch": 0.7759899472845408, + "grad_norm": 1.7087688356522794, + "learning_rate": 1.2592189761428364e-06, + "loss": 0.4905, + "step": 25319 + }, + { + "epoch": 0.7760205958072821, + "grad_norm": 1.7022540180484564, + "learning_rate": 1.2588896764505893e-06, + "loss": 0.6791, + "step": 25320 + }, + { + "epoch": 0.7760512443300233, + "grad_norm": 0.8053104042592474, + "learning_rate": 1.2585604136204599e-06, + "loss": 0.4167, + "step": 25321 + }, + { + "epoch": 0.7760818928527645, + "grad_norm": 1.891763931893259, + "learning_rate": 1.258231187655689e-06, + "loss": 0.5552, + "step": 25322 + }, + { + "epoch": 0.7761125413755057, + "grad_norm": 1.9623142754801122, + "learning_rate": 1.2579019985595264e-06, + "loss": 0.5589, + "step": 25323 + }, + { + "epoch": 0.7761431898982469, + "grad_norm": 1.937262640992157, + "learning_rate": 1.2575728463352127e-06, + "loss": 0.5932, + "step": 25324 + }, + { + "epoch": 0.7761738384209881, + "grad_norm": 1.8977387165786994, + "learning_rate": 1.2572437309859902e-06, + "loss": 0.5329, + "step": 25325 + }, + { + "epoch": 0.7762044869437293, + "grad_norm": 1.7454570981341135, + "learning_rate": 1.2569146525151027e-06, + "loss": 0.5152, + "step": 25326 + }, + { + "epoch": 0.7762351354664705, + "grad_norm": 1.9495109415222576, + "learning_rate": 1.2565856109257929e-06, + "loss": 0.5369, + "step": 25327 + }, + { + "epoch": 0.7762657839892118, + "grad_norm": 1.8161792407848905, + "learning_rate": 1.2562566062213044e-06, + "loss": 0.5627, + "step": 25328 + }, + { + "epoch": 0.7762964325119529, + "grad_norm": 1.9805110181574592, + "learning_rate": 1.2559276384048758e-06, + "loss": 0.6716, + "step": 25329 + }, + { + "epoch": 0.7763270810346942, + "grad_norm": 1.738269012873017, + "learning_rate": 1.2555987074797499e-06, + "loss": 0.6027, + "step": 25330 + }, + { + "epoch": 0.7763577295574353, + "grad_norm": 1.934544469707654, + "learning_rate": 1.2552698134491697e-06, + "loss": 0.6784, + "step": 25331 + }, + { + "epoch": 0.7763883780801766, + "grad_norm": 1.8157098971330752, + "learning_rate": 1.2549409563163744e-06, + "loss": 0.6376, + "step": 25332 + }, + { + "epoch": 0.7764190266029177, + "grad_norm": 2.0484374172200663, + "learning_rate": 1.2546121360846025e-06, + "loss": 0.6873, + "step": 25333 + }, + { + "epoch": 0.776449675125659, + "grad_norm": 1.7105750837038973, + "learning_rate": 1.2542833527570952e-06, + "loss": 0.5697, + "step": 25334 + }, + { + "epoch": 0.7764803236484001, + "grad_norm": 2.3111662345460253, + "learning_rate": 1.2539546063370944e-06, + "loss": 0.6097, + "step": 25335 + }, + { + "epoch": 0.7765109721711414, + "grad_norm": 1.6480416369592399, + "learning_rate": 1.2536258968278352e-06, + "loss": 0.4708, + "step": 25336 + }, + { + "epoch": 0.7765416206938826, + "grad_norm": 1.7985314634343588, + "learning_rate": 1.2532972242325593e-06, + "loss": 0.5332, + "step": 25337 + }, + { + "epoch": 0.7765722692166238, + "grad_norm": 0.8083928812172182, + "learning_rate": 1.252968588554504e-06, + "loss": 0.4005, + "step": 25338 + }, + { + "epoch": 0.776602917739365, + "grad_norm": 1.822583118570753, + "learning_rate": 1.2526399897969093e-06, + "loss": 0.5069, + "step": 25339 + }, + { + "epoch": 0.7766335662621061, + "grad_norm": 1.853646030615274, + "learning_rate": 1.2523114279630122e-06, + "loss": 0.5654, + "step": 25340 + }, + { + "epoch": 0.7766642147848474, + "grad_norm": 1.9409014406724165, + "learning_rate": 1.251982903056046e-06, + "loss": 0.6031, + "step": 25341 + }, + { + "epoch": 0.7766948633075885, + "grad_norm": 1.8686295733322724, + "learning_rate": 1.2516544150792543e-06, + "loss": 0.6297, + "step": 25342 + }, + { + "epoch": 0.7767255118303298, + "grad_norm": 1.7487449305567455, + "learning_rate": 1.2513259640358705e-06, + "loss": 0.5612, + "step": 25343 + }, + { + "epoch": 0.7767561603530709, + "grad_norm": 0.812825450614452, + "learning_rate": 1.25099754992913e-06, + "loss": 0.3802, + "step": 25344 + }, + { + "epoch": 0.7767868088758122, + "grad_norm": 2.1044893611569946, + "learning_rate": 1.2506691727622699e-06, + "loss": 0.6122, + "step": 25345 + }, + { + "epoch": 0.7768174573985533, + "grad_norm": 1.8850833552421216, + "learning_rate": 1.2503408325385251e-06, + "loss": 0.5294, + "step": 25346 + }, + { + "epoch": 0.7768481059212946, + "grad_norm": 0.7760911259876242, + "learning_rate": 1.2500125292611336e-06, + "loss": 0.3944, + "step": 25347 + }, + { + "epoch": 0.7768787544440358, + "grad_norm": 1.725636729161641, + "learning_rate": 1.2496842629333267e-06, + "loss": 0.5345, + "step": 25348 + }, + { + "epoch": 0.776909402966777, + "grad_norm": 1.7458941271342976, + "learning_rate": 1.2493560335583399e-06, + "loss": 0.5209, + "step": 25349 + }, + { + "epoch": 0.7769400514895182, + "grad_norm": 1.8769287658014004, + "learning_rate": 1.2490278411394097e-06, + "loss": 0.6155, + "step": 25350 + }, + { + "epoch": 0.7769707000122594, + "grad_norm": 1.8074976340874356, + "learning_rate": 1.2486996856797673e-06, + "loss": 0.5401, + "step": 25351 + }, + { + "epoch": 0.7770013485350006, + "grad_norm": 1.9767158747257745, + "learning_rate": 1.248371567182644e-06, + "loss": 0.6054, + "step": 25352 + }, + { + "epoch": 0.7770319970577418, + "grad_norm": 1.8325752084471494, + "learning_rate": 1.2480434856512786e-06, + "loss": 0.5026, + "step": 25353 + }, + { + "epoch": 0.777062645580483, + "grad_norm": 2.019743986234257, + "learning_rate": 1.2477154410888992e-06, + "loss": 0.582, + "step": 25354 + }, + { + "epoch": 0.7770932941032243, + "grad_norm": 1.8460944439941191, + "learning_rate": 1.2473874334987412e-06, + "loss": 0.6565, + "step": 25355 + }, + { + "epoch": 0.7771239426259654, + "grad_norm": 1.9111494657449934, + "learning_rate": 1.2470594628840333e-06, + "loss": 0.5718, + "step": 25356 + }, + { + "epoch": 0.7771545911487067, + "grad_norm": 1.9510687255927208, + "learning_rate": 1.2467315292480093e-06, + "loss": 0.619, + "step": 25357 + }, + { + "epoch": 0.7771852396714478, + "grad_norm": 1.8597262661461846, + "learning_rate": 1.2464036325939004e-06, + "loss": 0.5522, + "step": 25358 + }, + { + "epoch": 0.7772158881941891, + "grad_norm": 1.8436897624777175, + "learning_rate": 1.2460757729249363e-06, + "loss": 0.5583, + "step": 25359 + }, + { + "epoch": 0.7772465367169302, + "grad_norm": 1.851483874675521, + "learning_rate": 1.2457479502443475e-06, + "loss": 0.5337, + "step": 25360 + }, + { + "epoch": 0.7772771852396715, + "grad_norm": 0.760840968491998, + "learning_rate": 1.2454201645553665e-06, + "loss": 0.4112, + "step": 25361 + }, + { + "epoch": 0.7773078337624126, + "grad_norm": 1.9941469045519022, + "learning_rate": 1.245092415861221e-06, + "loss": 0.6048, + "step": 25362 + }, + { + "epoch": 0.7773384822851539, + "grad_norm": 2.1904106120785247, + "learning_rate": 1.2447647041651378e-06, + "loss": 0.6736, + "step": 25363 + }, + { + "epoch": 0.777369130807895, + "grad_norm": 2.1484654862127144, + "learning_rate": 1.2444370294703517e-06, + "loss": 0.6415, + "step": 25364 + }, + { + "epoch": 0.7773997793306363, + "grad_norm": 0.812470811178368, + "learning_rate": 1.2441093917800872e-06, + "loss": 0.4039, + "step": 25365 + }, + { + "epoch": 0.7774304278533775, + "grad_norm": 1.9318706019906557, + "learning_rate": 1.2437817910975752e-06, + "loss": 0.518, + "step": 25366 + }, + { + "epoch": 0.7774610763761187, + "grad_norm": 1.922408244397969, + "learning_rate": 1.2434542274260408e-06, + "loss": 0.6069, + "step": 25367 + }, + { + "epoch": 0.7774917248988599, + "grad_norm": 2.0606088035647825, + "learning_rate": 1.2431267007687132e-06, + "loss": 0.6023, + "step": 25368 + }, + { + "epoch": 0.7775223734216011, + "grad_norm": 1.7233354575354172, + "learning_rate": 1.2427992111288206e-06, + "loss": 0.6311, + "step": 25369 + }, + { + "epoch": 0.7775530219443423, + "grad_norm": 1.6981404967861848, + "learning_rate": 1.2424717585095875e-06, + "loss": 0.5294, + "step": 25370 + }, + { + "epoch": 0.7775836704670834, + "grad_norm": 1.8556908595806878, + "learning_rate": 1.2421443429142415e-06, + "loss": 0.5337, + "step": 25371 + }, + { + "epoch": 0.7776143189898247, + "grad_norm": 1.7331465275776896, + "learning_rate": 1.2418169643460098e-06, + "loss": 0.5086, + "step": 25372 + }, + { + "epoch": 0.7776449675125658, + "grad_norm": 1.9210509800205942, + "learning_rate": 1.2414896228081164e-06, + "loss": 0.6452, + "step": 25373 + }, + { + "epoch": 0.7776756160353071, + "grad_norm": 1.7695798393783349, + "learning_rate": 1.2411623183037869e-06, + "loss": 0.6436, + "step": 25374 + }, + { + "epoch": 0.7777062645580483, + "grad_norm": 1.702054265987618, + "learning_rate": 1.2408350508362489e-06, + "loss": 0.5533, + "step": 25375 + }, + { + "epoch": 0.7777369130807895, + "grad_norm": 1.7192983636714543, + "learning_rate": 1.2405078204087228e-06, + "loss": 0.6091, + "step": 25376 + }, + { + "epoch": 0.7777675616035307, + "grad_norm": 1.789733867106938, + "learning_rate": 1.2401806270244366e-06, + "loss": 0.5728, + "step": 25377 + }, + { + "epoch": 0.7777982101262719, + "grad_norm": 1.701162609974035, + "learning_rate": 1.2398534706866116e-06, + "loss": 0.505, + "step": 25378 + }, + { + "epoch": 0.7778288586490131, + "grad_norm": 1.7414557273770013, + "learning_rate": 1.2395263513984724e-06, + "loss": 0.5366, + "step": 25379 + }, + { + "epoch": 0.7778595071717543, + "grad_norm": 1.9343268877848896, + "learning_rate": 1.239199269163243e-06, + "loss": 0.6392, + "step": 25380 + }, + { + "epoch": 0.7778901556944955, + "grad_norm": 1.885311505465103, + "learning_rate": 1.238872223984145e-06, + "loss": 0.546, + "step": 25381 + }, + { + "epoch": 0.7779208042172367, + "grad_norm": 1.6377387267159267, + "learning_rate": 1.2385452158644006e-06, + "loss": 0.5743, + "step": 25382 + }, + { + "epoch": 0.7779514527399779, + "grad_norm": 1.9368952544905405, + "learning_rate": 1.2382182448072344e-06, + "loss": 0.6174, + "step": 25383 + }, + { + "epoch": 0.7779821012627192, + "grad_norm": 2.1000674673295543, + "learning_rate": 1.2378913108158647e-06, + "loss": 0.6276, + "step": 25384 + }, + { + "epoch": 0.7780127497854603, + "grad_norm": 1.7817332464466766, + "learning_rate": 1.2375644138935156e-06, + "loss": 0.6589, + "step": 25385 + }, + { + "epoch": 0.7780433983082016, + "grad_norm": 1.8527029187346424, + "learning_rate": 1.2372375540434063e-06, + "loss": 0.5409, + "step": 25386 + }, + { + "epoch": 0.7780740468309427, + "grad_norm": 1.7414443977052712, + "learning_rate": 1.2369107312687572e-06, + "loss": 0.5048, + "step": 25387 + }, + { + "epoch": 0.778104695353684, + "grad_norm": 1.872538123951027, + "learning_rate": 1.2365839455727919e-06, + "loss": 0.5558, + "step": 25388 + }, + { + "epoch": 0.7781353438764251, + "grad_norm": 0.791247579822726, + "learning_rate": 1.2362571969587255e-06, + "loss": 0.4069, + "step": 25389 + }, + { + "epoch": 0.7781659923991664, + "grad_norm": 1.8938733214705368, + "learning_rate": 1.235930485429781e-06, + "loss": 0.5476, + "step": 25390 + }, + { + "epoch": 0.7781966409219075, + "grad_norm": 0.8409964509056679, + "learning_rate": 1.235603810989177e-06, + "loss": 0.4002, + "step": 25391 + }, + { + "epoch": 0.7782272894446488, + "grad_norm": 1.9894083715284836, + "learning_rate": 1.235277173640131e-06, + "loss": 0.7125, + "step": 25392 + }, + { + "epoch": 0.77825793796739, + "grad_norm": 1.9262133860926256, + "learning_rate": 1.2349505733858618e-06, + "loss": 0.5257, + "step": 25393 + }, + { + "epoch": 0.7782885864901312, + "grad_norm": 1.822744010385604, + "learning_rate": 1.2346240102295898e-06, + "loss": 0.5117, + "step": 25394 + }, + { + "epoch": 0.7783192350128724, + "grad_norm": 1.8320206872986333, + "learning_rate": 1.2342974841745292e-06, + "loss": 0.5884, + "step": 25395 + }, + { + "epoch": 0.7783498835356136, + "grad_norm": 1.8194392834305875, + "learning_rate": 1.2339709952239003e-06, + "loss": 0.5785, + "step": 25396 + }, + { + "epoch": 0.7783805320583548, + "grad_norm": 1.8957438037947927, + "learning_rate": 1.2336445433809175e-06, + "loss": 0.5237, + "step": 25397 + }, + { + "epoch": 0.778411180581096, + "grad_norm": 0.7875151742625159, + "learning_rate": 1.2333181286487982e-06, + "loss": 0.3944, + "step": 25398 + }, + { + "epoch": 0.7784418291038372, + "grad_norm": 1.9587257928770543, + "learning_rate": 1.2329917510307616e-06, + "loss": 0.6039, + "step": 25399 + }, + { + "epoch": 0.7784724776265785, + "grad_norm": 0.7635292185652287, + "learning_rate": 1.232665410530019e-06, + "loss": 0.393, + "step": 25400 + }, + { + "epoch": 0.7785031261493196, + "grad_norm": 1.9615660915633764, + "learning_rate": 1.2323391071497882e-06, + "loss": 0.5713, + "step": 25401 + }, + { + "epoch": 0.7785337746720608, + "grad_norm": 1.7197916162379667, + "learning_rate": 1.2320128408932852e-06, + "loss": 0.4431, + "step": 25402 + }, + { + "epoch": 0.778564423194802, + "grad_norm": 1.9897406636950623, + "learning_rate": 1.2316866117637226e-06, + "loss": 0.5964, + "step": 25403 + }, + { + "epoch": 0.7785950717175432, + "grad_norm": 1.6846051172857406, + "learning_rate": 1.2313604197643158e-06, + "loss": 0.5678, + "step": 25404 + }, + { + "epoch": 0.7786257202402844, + "grad_norm": 1.9583763377811043, + "learning_rate": 1.231034264898281e-06, + "loss": 0.6224, + "step": 25405 + }, + { + "epoch": 0.7786563687630256, + "grad_norm": 1.8777913215341846, + "learning_rate": 1.2307081471688282e-06, + "loss": 0.5747, + "step": 25406 + }, + { + "epoch": 0.7786870172857668, + "grad_norm": 0.7501813696350947, + "learning_rate": 1.2303820665791739e-06, + "loss": 0.407, + "step": 25407 + }, + { + "epoch": 0.778717665808508, + "grad_norm": 1.818189393531593, + "learning_rate": 1.2300560231325275e-06, + "loss": 0.566, + "step": 25408 + }, + { + "epoch": 0.7787483143312492, + "grad_norm": 1.8255245295258535, + "learning_rate": 1.2297300168321047e-06, + "loss": 0.5625, + "step": 25409 + }, + { + "epoch": 0.7787789628539904, + "grad_norm": 0.8323670239602011, + "learning_rate": 1.2294040476811176e-06, + "loss": 0.4097, + "step": 25410 + }, + { + "epoch": 0.7788096113767317, + "grad_norm": 2.0342163635273085, + "learning_rate": 1.2290781156827758e-06, + "loss": 0.5887, + "step": 25411 + }, + { + "epoch": 0.7788402598994728, + "grad_norm": 1.8894564132650773, + "learning_rate": 1.228752220840292e-06, + "loss": 0.6458, + "step": 25412 + }, + { + "epoch": 0.7788709084222141, + "grad_norm": 1.747882029277539, + "learning_rate": 1.2284263631568794e-06, + "loss": 0.437, + "step": 25413 + }, + { + "epoch": 0.7789015569449552, + "grad_norm": 1.9258543228996736, + "learning_rate": 1.228100542635745e-06, + "loss": 0.5402, + "step": 25414 + }, + { + "epoch": 0.7789322054676965, + "grad_norm": 2.0530471263487415, + "learning_rate": 1.227774759280101e-06, + "loss": 0.6782, + "step": 25415 + }, + { + "epoch": 0.7789628539904376, + "grad_norm": 1.9519002588382304, + "learning_rate": 1.2274490130931593e-06, + "loss": 0.6459, + "step": 25416 + }, + { + "epoch": 0.7789935025131789, + "grad_norm": 1.8990302856724728, + "learning_rate": 1.227123304078126e-06, + "loss": 0.4983, + "step": 25417 + }, + { + "epoch": 0.77902415103592, + "grad_norm": 1.758998878819253, + "learning_rate": 1.2267976322382136e-06, + "loss": 0.5425, + "step": 25418 + }, + { + "epoch": 0.7790547995586613, + "grad_norm": 1.8518793109081388, + "learning_rate": 1.2264719975766266e-06, + "loss": 0.6056, + "step": 25419 + }, + { + "epoch": 0.7790854480814025, + "grad_norm": 2.0381785939431274, + "learning_rate": 1.2261464000965795e-06, + "loss": 0.6036, + "step": 25420 + }, + { + "epoch": 0.7791160966041437, + "grad_norm": 1.9189421665156938, + "learning_rate": 1.2258208398012772e-06, + "loss": 0.6112, + "step": 25421 + }, + { + "epoch": 0.7791467451268849, + "grad_norm": 1.941675103282926, + "learning_rate": 1.2254953166939266e-06, + "loss": 0.5643, + "step": 25422 + }, + { + "epoch": 0.7791773936496261, + "grad_norm": 1.737690235458732, + "learning_rate": 1.2251698307777365e-06, + "loss": 0.6594, + "step": 25423 + }, + { + "epoch": 0.7792080421723673, + "grad_norm": 1.7306574579581848, + "learning_rate": 1.2248443820559154e-06, + "loss": 0.5897, + "step": 25424 + }, + { + "epoch": 0.7792386906951085, + "grad_norm": 2.0174886937805163, + "learning_rate": 1.2245189705316668e-06, + "loss": 0.5501, + "step": 25425 + }, + { + "epoch": 0.7792693392178497, + "grad_norm": 0.8356482676973969, + "learning_rate": 1.2241935962081991e-06, + "loss": 0.4135, + "step": 25426 + }, + { + "epoch": 0.779299987740591, + "grad_norm": 1.9315939449686104, + "learning_rate": 1.2238682590887174e-06, + "loss": 0.6204, + "step": 25427 + }, + { + "epoch": 0.7793306362633321, + "grad_norm": 1.6130614677028885, + "learning_rate": 1.2235429591764303e-06, + "loss": 0.5186, + "step": 25428 + }, + { + "epoch": 0.7793612847860734, + "grad_norm": 1.9429803606484508, + "learning_rate": 1.22321769647454e-06, + "loss": 0.5706, + "step": 25429 + }, + { + "epoch": 0.7793919333088145, + "grad_norm": 2.024996929745456, + "learning_rate": 1.2228924709862506e-06, + "loss": 0.5662, + "step": 25430 + }, + { + "epoch": 0.7794225818315558, + "grad_norm": 1.8044253737404448, + "learning_rate": 1.2225672827147684e-06, + "loss": 0.5964, + "step": 25431 + }, + { + "epoch": 0.7794532303542969, + "grad_norm": 1.732217927777637, + "learning_rate": 1.2222421316632981e-06, + "loss": 0.5955, + "step": 25432 + }, + { + "epoch": 0.7794838788770381, + "grad_norm": 1.8845158384912615, + "learning_rate": 1.221917017835042e-06, + "loss": 0.6026, + "step": 25433 + }, + { + "epoch": 0.7795145273997793, + "grad_norm": 1.9784903413055368, + "learning_rate": 1.2215919412332038e-06, + "loss": 0.5471, + "step": 25434 + }, + { + "epoch": 0.7795451759225205, + "grad_norm": 1.82179905895946, + "learning_rate": 1.2212669018609884e-06, + "loss": 0.6175, + "step": 25435 + }, + { + "epoch": 0.7795758244452617, + "grad_norm": 1.933068690110983, + "learning_rate": 1.2209418997215955e-06, + "loss": 0.5662, + "step": 25436 + }, + { + "epoch": 0.7796064729680029, + "grad_norm": 1.8525790206068695, + "learning_rate": 1.2206169348182307e-06, + "loss": 0.5337, + "step": 25437 + }, + { + "epoch": 0.7796371214907442, + "grad_norm": 0.7853302917462658, + "learning_rate": 1.2202920071540913e-06, + "loss": 0.4123, + "step": 25438 + }, + { + "epoch": 0.7796677700134853, + "grad_norm": 1.846643205111925, + "learning_rate": 1.2199671167323846e-06, + "loss": 0.5902, + "step": 25439 + }, + { + "epoch": 0.7796984185362266, + "grad_norm": 1.7339089366496419, + "learning_rate": 1.2196422635563093e-06, + "loss": 0.521, + "step": 25440 + }, + { + "epoch": 0.7797290670589677, + "grad_norm": 2.1780951106946658, + "learning_rate": 1.2193174476290643e-06, + "loss": 0.6461, + "step": 25441 + }, + { + "epoch": 0.779759715581709, + "grad_norm": 1.7457641721396748, + "learning_rate": 1.2189926689538516e-06, + "loss": 0.5026, + "step": 25442 + }, + { + "epoch": 0.7797903641044501, + "grad_norm": 1.989509152831434, + "learning_rate": 1.2186679275338737e-06, + "loss": 0.6035, + "step": 25443 + }, + { + "epoch": 0.7798210126271914, + "grad_norm": 1.8246065542092784, + "learning_rate": 1.2183432233723263e-06, + "loss": 0.5751, + "step": 25444 + }, + { + "epoch": 0.7798516611499325, + "grad_norm": 1.8372226162859056, + "learning_rate": 1.2180185564724106e-06, + "loss": 0.6589, + "step": 25445 + }, + { + "epoch": 0.7798823096726738, + "grad_norm": 1.7280877437785749, + "learning_rate": 1.2176939268373255e-06, + "loss": 0.5783, + "step": 25446 + }, + { + "epoch": 0.779912958195415, + "grad_norm": 1.9280244575706036, + "learning_rate": 1.217369334470272e-06, + "loss": 0.5806, + "step": 25447 + }, + { + "epoch": 0.7799436067181562, + "grad_norm": 0.7765963781512538, + "learning_rate": 1.217044779374446e-06, + "loss": 0.4151, + "step": 25448 + }, + { + "epoch": 0.7799742552408974, + "grad_norm": 1.7149005189530309, + "learning_rate": 1.2167202615530427e-06, + "loss": 0.5377, + "step": 25449 + }, + { + "epoch": 0.7800049037636386, + "grad_norm": 1.7731851006927295, + "learning_rate": 1.2163957810092659e-06, + "loss": 0.5076, + "step": 25450 + }, + { + "epoch": 0.7800355522863798, + "grad_norm": 1.8818508354390218, + "learning_rate": 1.216071337746309e-06, + "loss": 0.4762, + "step": 25451 + }, + { + "epoch": 0.780066200809121, + "grad_norm": 1.9867193716166152, + "learning_rate": 1.2157469317673682e-06, + "loss": 0.6121, + "step": 25452 + }, + { + "epoch": 0.7800968493318622, + "grad_norm": 1.812192112751662, + "learning_rate": 1.2154225630756411e-06, + "loss": 0.5053, + "step": 25453 + }, + { + "epoch": 0.7801274978546034, + "grad_norm": 1.8091194556448476, + "learning_rate": 1.2150982316743236e-06, + "loss": 0.5879, + "step": 25454 + }, + { + "epoch": 0.7801581463773446, + "grad_norm": 1.8824417612061444, + "learning_rate": 1.2147739375666134e-06, + "loss": 0.601, + "step": 25455 + }, + { + "epoch": 0.7801887949000859, + "grad_norm": 1.6892237373430392, + "learning_rate": 1.2144496807557027e-06, + "loss": 0.5358, + "step": 25456 + }, + { + "epoch": 0.780219443422827, + "grad_norm": 1.6274763685982774, + "learning_rate": 1.2141254612447877e-06, + "loss": 0.4946, + "step": 25457 + }, + { + "epoch": 0.7802500919455683, + "grad_norm": 1.7994442011329137, + "learning_rate": 1.2138012790370645e-06, + "loss": 0.5721, + "step": 25458 + }, + { + "epoch": 0.7802807404683094, + "grad_norm": 0.7977928735146252, + "learning_rate": 1.2134771341357266e-06, + "loss": 0.3942, + "step": 25459 + }, + { + "epoch": 0.7803113889910507, + "grad_norm": 1.933528026001969, + "learning_rate": 1.2131530265439639e-06, + "loss": 0.4928, + "step": 25460 + }, + { + "epoch": 0.7803420375137918, + "grad_norm": 1.8225866325846856, + "learning_rate": 1.2128289562649765e-06, + "loss": 0.6441, + "step": 25461 + }, + { + "epoch": 0.7803726860365331, + "grad_norm": 1.8746863168092436, + "learning_rate": 1.2125049233019543e-06, + "loss": 0.5018, + "step": 25462 + }, + { + "epoch": 0.7804033345592742, + "grad_norm": 1.7554330864764605, + "learning_rate": 1.2121809276580887e-06, + "loss": 0.6212, + "step": 25463 + }, + { + "epoch": 0.7804339830820154, + "grad_norm": 2.1339794320039287, + "learning_rate": 1.2118569693365733e-06, + "loss": 0.5719, + "step": 25464 + }, + { + "epoch": 0.7804646316047567, + "grad_norm": 1.5776675550990054, + "learning_rate": 1.2115330483406006e-06, + "loss": 0.4695, + "step": 25465 + }, + { + "epoch": 0.7804952801274978, + "grad_norm": 1.87853211243902, + "learning_rate": 1.2112091646733636e-06, + "loss": 0.581, + "step": 25466 + }, + { + "epoch": 0.7805259286502391, + "grad_norm": 0.7895801879639803, + "learning_rate": 1.2108853183380509e-06, + "loss": 0.4116, + "step": 25467 + }, + { + "epoch": 0.7805565771729802, + "grad_norm": 1.7407730981760288, + "learning_rate": 1.2105615093378543e-06, + "loss": 0.5082, + "step": 25468 + }, + { + "epoch": 0.7805872256957215, + "grad_norm": 1.9344075471974018, + "learning_rate": 1.210237737675966e-06, + "loss": 0.5645, + "step": 25469 + }, + { + "epoch": 0.7806178742184626, + "grad_norm": 2.1448290439075137, + "learning_rate": 1.209914003355575e-06, + "loss": 0.5927, + "step": 25470 + }, + { + "epoch": 0.7806485227412039, + "grad_norm": 1.837392835047438, + "learning_rate": 1.2095903063798687e-06, + "loss": 0.5248, + "step": 25471 + }, + { + "epoch": 0.780679171263945, + "grad_norm": 2.237309111093012, + "learning_rate": 1.2092666467520415e-06, + "loss": 0.6072, + "step": 25472 + }, + { + "epoch": 0.7807098197866863, + "grad_norm": 1.8209682588190519, + "learning_rate": 1.2089430244752782e-06, + "loss": 0.5111, + "step": 25473 + }, + { + "epoch": 0.7807404683094274, + "grad_norm": 1.659558957838385, + "learning_rate": 1.2086194395527712e-06, + "loss": 0.5619, + "step": 25474 + }, + { + "epoch": 0.7807711168321687, + "grad_norm": 0.7909896316008342, + "learning_rate": 1.2082958919877052e-06, + "loss": 0.3752, + "step": 25475 + }, + { + "epoch": 0.7808017653549099, + "grad_norm": 1.7648403608842196, + "learning_rate": 1.20797238178327e-06, + "loss": 0.5555, + "step": 25476 + }, + { + "epoch": 0.7808324138776511, + "grad_norm": 1.972471201922596, + "learning_rate": 1.2076489089426545e-06, + "loss": 0.5657, + "step": 25477 + }, + { + "epoch": 0.7808630624003923, + "grad_norm": 1.8800960173443666, + "learning_rate": 1.2073254734690433e-06, + "loss": 0.6147, + "step": 25478 + }, + { + "epoch": 0.7808937109231335, + "grad_norm": 1.7525478315352658, + "learning_rate": 1.2070020753656248e-06, + "loss": 0.5577, + "step": 25479 + }, + { + "epoch": 0.7809243594458747, + "grad_norm": 1.9821668662475895, + "learning_rate": 1.2066787146355863e-06, + "loss": 0.63, + "step": 25480 + }, + { + "epoch": 0.7809550079686159, + "grad_norm": 2.0202215932811356, + "learning_rate": 1.2063553912821118e-06, + "loss": 0.5609, + "step": 25481 + }, + { + "epoch": 0.7809856564913571, + "grad_norm": 1.858146002127331, + "learning_rate": 1.2060321053083895e-06, + "loss": 0.5536, + "step": 25482 + }, + { + "epoch": 0.7810163050140984, + "grad_norm": 1.7848485781591836, + "learning_rate": 1.2057088567176024e-06, + "loss": 0.5039, + "step": 25483 + }, + { + "epoch": 0.7810469535368395, + "grad_norm": 1.8761269346742995, + "learning_rate": 1.2053856455129365e-06, + "loss": 0.6767, + "step": 25484 + }, + { + "epoch": 0.7810776020595808, + "grad_norm": 0.8257464960588935, + "learning_rate": 1.2050624716975785e-06, + "loss": 0.3992, + "step": 25485 + }, + { + "epoch": 0.7811082505823219, + "grad_norm": 1.9256334965623039, + "learning_rate": 1.2047393352747095e-06, + "loss": 0.6018, + "step": 25486 + }, + { + "epoch": 0.7811388991050632, + "grad_norm": 1.9334689059523567, + "learning_rate": 1.2044162362475148e-06, + "loss": 0.5282, + "step": 25487 + }, + { + "epoch": 0.7811695476278043, + "grad_norm": 1.8267974315617903, + "learning_rate": 1.2040931746191792e-06, + "loss": 0.5294, + "step": 25488 + }, + { + "epoch": 0.7812001961505456, + "grad_norm": 1.7339696218781457, + "learning_rate": 1.203770150392885e-06, + "loss": 0.5793, + "step": 25489 + }, + { + "epoch": 0.7812308446732867, + "grad_norm": 1.7303123767524313, + "learning_rate": 1.2034471635718121e-06, + "loss": 0.5344, + "step": 25490 + }, + { + "epoch": 0.781261493196028, + "grad_norm": 1.6443644008979679, + "learning_rate": 1.203124214159148e-06, + "loss": 0.482, + "step": 25491 + }, + { + "epoch": 0.7812921417187692, + "grad_norm": 1.8374435251226766, + "learning_rate": 1.202801302158072e-06, + "loss": 0.6, + "step": 25492 + }, + { + "epoch": 0.7813227902415104, + "grad_norm": 1.7034430114329309, + "learning_rate": 1.202478427571767e-06, + "loss": 0.5184, + "step": 25493 + }, + { + "epoch": 0.7813534387642516, + "grad_norm": 1.7842832281681396, + "learning_rate": 1.2021555904034127e-06, + "loss": 0.6012, + "step": 25494 + }, + { + "epoch": 0.7813840872869927, + "grad_norm": 0.7936134037844215, + "learning_rate": 1.2018327906561911e-06, + "loss": 0.4076, + "step": 25495 + }, + { + "epoch": 0.781414735809734, + "grad_norm": 1.932145823769056, + "learning_rate": 1.2015100283332838e-06, + "loss": 0.6626, + "step": 25496 + }, + { + "epoch": 0.7814453843324751, + "grad_norm": 1.8451534591215044, + "learning_rate": 1.201187303437869e-06, + "loss": 0.5927, + "step": 25497 + }, + { + "epoch": 0.7814760328552164, + "grad_norm": 1.995984506727971, + "learning_rate": 1.2008646159731274e-06, + "loss": 0.5546, + "step": 25498 + }, + { + "epoch": 0.7815066813779575, + "grad_norm": 1.9678483874722457, + "learning_rate": 1.2005419659422401e-06, + "loss": 0.6436, + "step": 25499 + }, + { + "epoch": 0.7815373299006988, + "grad_norm": 1.7880660055591715, + "learning_rate": 1.2002193533483842e-06, + "loss": 0.5253, + "step": 25500 + }, + { + "epoch": 0.78156797842344, + "grad_norm": 1.8864221811059234, + "learning_rate": 1.1998967781947385e-06, + "loss": 0.6138, + "step": 25501 + }, + { + "epoch": 0.7815986269461812, + "grad_norm": 1.9273178655052015, + "learning_rate": 1.199574240484484e-06, + "loss": 0.5433, + "step": 25502 + }, + { + "epoch": 0.7816292754689224, + "grad_norm": 1.8869990693445389, + "learning_rate": 1.1992517402207954e-06, + "loss": 0.628, + "step": 25503 + }, + { + "epoch": 0.7816599239916636, + "grad_norm": 0.8080072928369175, + "learning_rate": 1.1989292774068533e-06, + "loss": 0.3932, + "step": 25504 + }, + { + "epoch": 0.7816905725144048, + "grad_norm": 1.614899788860525, + "learning_rate": 1.1986068520458322e-06, + "loss": 0.5923, + "step": 25505 + }, + { + "epoch": 0.781721221037146, + "grad_norm": 1.8068276178559246, + "learning_rate": 1.1982844641409103e-06, + "loss": 0.5722, + "step": 25506 + }, + { + "epoch": 0.7817518695598872, + "grad_norm": 1.7473165258909782, + "learning_rate": 1.1979621136952657e-06, + "loss": 0.53, + "step": 25507 + }, + { + "epoch": 0.7817825180826284, + "grad_norm": 2.006655676254433, + "learning_rate": 1.1976398007120715e-06, + "loss": 0.5616, + "step": 25508 + }, + { + "epoch": 0.7818131666053696, + "grad_norm": 0.8347117606606713, + "learning_rate": 1.1973175251945058e-06, + "loss": 0.4109, + "step": 25509 + }, + { + "epoch": 0.7818438151281109, + "grad_norm": 1.8382675845348582, + "learning_rate": 1.1969952871457442e-06, + "loss": 0.545, + "step": 25510 + }, + { + "epoch": 0.781874463650852, + "grad_norm": 2.0196631876433613, + "learning_rate": 1.1966730865689602e-06, + "loss": 0.5981, + "step": 25511 + }, + { + "epoch": 0.7819051121735933, + "grad_norm": 1.9371876156549523, + "learning_rate": 1.1963509234673293e-06, + "loss": 0.6722, + "step": 25512 + }, + { + "epoch": 0.7819357606963344, + "grad_norm": 1.5558954022796738, + "learning_rate": 1.196028797844027e-06, + "loss": 0.5948, + "step": 25513 + }, + { + "epoch": 0.7819664092190757, + "grad_norm": 1.8560942891451793, + "learning_rate": 1.1957067097022252e-06, + "loss": 0.622, + "step": 25514 + }, + { + "epoch": 0.7819970577418168, + "grad_norm": 1.870811770979526, + "learning_rate": 1.1953846590451002e-06, + "loss": 0.5651, + "step": 25515 + }, + { + "epoch": 0.7820277062645581, + "grad_norm": 1.9604185193880619, + "learning_rate": 1.1950626458758218e-06, + "loss": 0.6349, + "step": 25516 + }, + { + "epoch": 0.7820583547872992, + "grad_norm": 1.893724071205314, + "learning_rate": 1.194740670197565e-06, + "loss": 0.6211, + "step": 25517 + }, + { + "epoch": 0.7820890033100405, + "grad_norm": 1.9354084483623535, + "learning_rate": 1.1944187320135031e-06, + "loss": 0.5596, + "step": 25518 + }, + { + "epoch": 0.7821196518327816, + "grad_norm": 1.9922043492384196, + "learning_rate": 1.1940968313268058e-06, + "loss": 0.5911, + "step": 25519 + }, + { + "epoch": 0.7821503003555229, + "grad_norm": 1.9532987588015658, + "learning_rate": 1.1937749681406464e-06, + "loss": 0.634, + "step": 25520 + }, + { + "epoch": 0.7821809488782641, + "grad_norm": 1.8634415273675662, + "learning_rate": 1.1934531424581973e-06, + "loss": 0.5593, + "step": 25521 + }, + { + "epoch": 0.7822115974010053, + "grad_norm": 1.671472742444399, + "learning_rate": 1.1931313542826268e-06, + "loss": 0.5899, + "step": 25522 + }, + { + "epoch": 0.7822422459237465, + "grad_norm": 1.8880155959396399, + "learning_rate": 1.1928096036171072e-06, + "loss": 0.5392, + "step": 25523 + }, + { + "epoch": 0.7822728944464877, + "grad_norm": 1.9614411281607347, + "learning_rate": 1.19248789046481e-06, + "loss": 0.6063, + "step": 25524 + }, + { + "epoch": 0.7823035429692289, + "grad_norm": 1.740684483794386, + "learning_rate": 1.1921662148289027e-06, + "loss": 0.5174, + "step": 25525 + }, + { + "epoch": 0.78233419149197, + "grad_norm": 1.9716302209940941, + "learning_rate": 1.1918445767125575e-06, + "loss": 0.5812, + "step": 25526 + }, + { + "epoch": 0.7823648400147113, + "grad_norm": 1.876050699096946, + "learning_rate": 1.19152297611894e-06, + "loss": 0.6121, + "step": 25527 + }, + { + "epoch": 0.7823954885374524, + "grad_norm": 0.7682922449127667, + "learning_rate": 1.1912014130512216e-06, + "loss": 0.3974, + "step": 25528 + }, + { + "epoch": 0.7824261370601937, + "grad_norm": 1.8794290590526108, + "learning_rate": 1.1908798875125715e-06, + "loss": 0.5157, + "step": 25529 + }, + { + "epoch": 0.7824567855829349, + "grad_norm": 2.038987256035558, + "learning_rate": 1.1905583995061548e-06, + "loss": 0.6629, + "step": 25530 + }, + { + "epoch": 0.7824874341056761, + "grad_norm": 2.113537004067147, + "learning_rate": 1.1902369490351412e-06, + "loss": 0.6091, + "step": 25531 + }, + { + "epoch": 0.7825180826284173, + "grad_norm": 1.980263786878026, + "learning_rate": 1.1899155361026992e-06, + "loss": 0.5527, + "step": 25532 + }, + { + "epoch": 0.7825487311511585, + "grad_norm": 2.236010638807427, + "learning_rate": 1.1895941607119926e-06, + "loss": 0.6702, + "step": 25533 + }, + { + "epoch": 0.7825793796738997, + "grad_norm": 2.136739899966405, + "learning_rate": 1.189272822866191e-06, + "loss": 0.5792, + "step": 25534 + }, + { + "epoch": 0.7826100281966409, + "grad_norm": 1.5794125407944957, + "learning_rate": 1.1889515225684583e-06, + "loss": 0.6001, + "step": 25535 + }, + { + "epoch": 0.7826406767193821, + "grad_norm": 1.999162170812214, + "learning_rate": 1.1886302598219607e-06, + "loss": 0.5843, + "step": 25536 + }, + { + "epoch": 0.7826713252421234, + "grad_norm": 1.8239291708473673, + "learning_rate": 1.1883090346298665e-06, + "loss": 0.6468, + "step": 25537 + }, + { + "epoch": 0.7827019737648645, + "grad_norm": 2.0081445296308416, + "learning_rate": 1.1879878469953366e-06, + "loss": 0.6397, + "step": 25538 + }, + { + "epoch": 0.7827326222876058, + "grad_norm": 1.7677449537829244, + "learning_rate": 1.1876666969215384e-06, + "loss": 0.5529, + "step": 25539 + }, + { + "epoch": 0.7827632708103469, + "grad_norm": 1.921878653011173, + "learning_rate": 1.1873455844116366e-06, + "loss": 0.539, + "step": 25540 + }, + { + "epoch": 0.7827939193330882, + "grad_norm": 1.8805462215730675, + "learning_rate": 1.1870245094687926e-06, + "loss": 0.5599, + "step": 25541 + }, + { + "epoch": 0.7828245678558293, + "grad_norm": 1.8426211609772019, + "learning_rate": 1.1867034720961722e-06, + "loss": 0.6152, + "step": 25542 + }, + { + "epoch": 0.7828552163785706, + "grad_norm": 1.8864192086484388, + "learning_rate": 1.1863824722969396e-06, + "loss": 0.5954, + "step": 25543 + }, + { + "epoch": 0.7828858649013117, + "grad_norm": 1.929996317013276, + "learning_rate": 1.1860615100742546e-06, + "loss": 0.5664, + "step": 25544 + }, + { + "epoch": 0.782916513424053, + "grad_norm": 0.8091621079768189, + "learning_rate": 1.1857405854312832e-06, + "loss": 0.3935, + "step": 25545 + }, + { + "epoch": 0.7829471619467941, + "grad_norm": 2.036799757478781, + "learning_rate": 1.1854196983711823e-06, + "loss": 0.5287, + "step": 25546 + }, + { + "epoch": 0.7829778104695354, + "grad_norm": 1.7352745772650258, + "learning_rate": 1.1850988488971205e-06, + "loss": 0.5589, + "step": 25547 + }, + { + "epoch": 0.7830084589922766, + "grad_norm": 1.8517170527735176, + "learning_rate": 1.1847780370122552e-06, + "loss": 0.5149, + "step": 25548 + }, + { + "epoch": 0.7830391075150178, + "grad_norm": 2.016789227163648, + "learning_rate": 1.184457262719747e-06, + "loss": 0.6227, + "step": 25549 + }, + { + "epoch": 0.783069756037759, + "grad_norm": 1.884364346409054, + "learning_rate": 1.1841365260227578e-06, + "loss": 0.5971, + "step": 25550 + }, + { + "epoch": 0.7831004045605002, + "grad_norm": 2.0733367121535577, + "learning_rate": 1.1838158269244488e-06, + "loss": 0.566, + "step": 25551 + }, + { + "epoch": 0.7831310530832414, + "grad_norm": 1.7607742147154741, + "learning_rate": 1.1834951654279775e-06, + "loss": 0.6521, + "step": 25552 + }, + { + "epoch": 0.7831617016059826, + "grad_norm": 1.7936697605314413, + "learning_rate": 1.1831745415365054e-06, + "loss": 0.4597, + "step": 25553 + }, + { + "epoch": 0.7831923501287238, + "grad_norm": 1.8015753744187202, + "learning_rate": 1.1828539552531903e-06, + "loss": 0.6069, + "step": 25554 + }, + { + "epoch": 0.783222998651465, + "grad_norm": 1.8851216349676905, + "learning_rate": 1.182533406581194e-06, + "loss": 0.5534, + "step": 25555 + }, + { + "epoch": 0.7832536471742062, + "grad_norm": 0.8124082533278855, + "learning_rate": 1.1822128955236722e-06, + "loss": 0.3968, + "step": 25556 + }, + { + "epoch": 0.7832842956969474, + "grad_norm": 1.8099481058511488, + "learning_rate": 1.1818924220837812e-06, + "loss": 0.6114, + "step": 25557 + }, + { + "epoch": 0.7833149442196886, + "grad_norm": 1.8329388460698022, + "learning_rate": 1.1815719862646835e-06, + "loss": 0.567, + "step": 25558 + }, + { + "epoch": 0.7833455927424298, + "grad_norm": 2.036050647272853, + "learning_rate": 1.1812515880695342e-06, + "loss": 0.6352, + "step": 25559 + }, + { + "epoch": 0.783376241265171, + "grad_norm": 2.0918945066827925, + "learning_rate": 1.180931227501489e-06, + "loss": 0.6527, + "step": 25560 + }, + { + "epoch": 0.7834068897879122, + "grad_norm": 2.054677305368272, + "learning_rate": 1.1806109045637048e-06, + "loss": 0.5543, + "step": 25561 + }, + { + "epoch": 0.7834375383106534, + "grad_norm": 2.0919218887449875, + "learning_rate": 1.1802906192593404e-06, + "loss": 0.5872, + "step": 25562 + }, + { + "epoch": 0.7834681868333946, + "grad_norm": 1.6119745156130327, + "learning_rate": 1.1799703715915485e-06, + "loss": 0.5887, + "step": 25563 + }, + { + "epoch": 0.7834988353561358, + "grad_norm": 1.8524861435443771, + "learning_rate": 1.179650161563486e-06, + "loss": 0.6166, + "step": 25564 + }, + { + "epoch": 0.783529483878877, + "grad_norm": 1.7106221456885078, + "learning_rate": 1.1793299891783078e-06, + "loss": 0.557, + "step": 25565 + }, + { + "epoch": 0.7835601324016183, + "grad_norm": 1.796679861139894, + "learning_rate": 1.1790098544391699e-06, + "loss": 0.5403, + "step": 25566 + }, + { + "epoch": 0.7835907809243594, + "grad_norm": 2.02425143792035, + "learning_rate": 1.1786897573492262e-06, + "loss": 0.6641, + "step": 25567 + }, + { + "epoch": 0.7836214294471007, + "grad_norm": 1.9950176920864904, + "learning_rate": 1.1783696979116265e-06, + "loss": 0.5543, + "step": 25568 + }, + { + "epoch": 0.7836520779698418, + "grad_norm": 1.7686315843244333, + "learning_rate": 1.1780496761295312e-06, + "loss": 0.5972, + "step": 25569 + }, + { + "epoch": 0.7836827264925831, + "grad_norm": 2.298778537065116, + "learning_rate": 1.1777296920060905e-06, + "loss": 0.6085, + "step": 25570 + }, + { + "epoch": 0.7837133750153242, + "grad_norm": 2.6051507364299757, + "learning_rate": 1.1774097455444554e-06, + "loss": 0.6383, + "step": 25571 + }, + { + "epoch": 0.7837440235380655, + "grad_norm": 0.7874421508022017, + "learning_rate": 1.17708983674778e-06, + "loss": 0.3852, + "step": 25572 + }, + { + "epoch": 0.7837746720608066, + "grad_norm": 1.9644427767609698, + "learning_rate": 1.1767699656192172e-06, + "loss": 0.6202, + "step": 25573 + }, + { + "epoch": 0.7838053205835479, + "grad_norm": 1.903226753773476, + "learning_rate": 1.1764501321619186e-06, + "loss": 0.5639, + "step": 25574 + }, + { + "epoch": 0.783835969106289, + "grad_norm": 2.105942463013197, + "learning_rate": 1.1761303363790343e-06, + "loss": 0.6155, + "step": 25575 + }, + { + "epoch": 0.7838666176290303, + "grad_norm": 2.280672470701753, + "learning_rate": 1.1758105782737167e-06, + "loss": 0.5418, + "step": 25576 + }, + { + "epoch": 0.7838972661517715, + "grad_norm": 0.789093146168813, + "learning_rate": 1.1754908578491164e-06, + "loss": 0.3977, + "step": 25577 + }, + { + "epoch": 0.7839279146745127, + "grad_norm": 1.7344315528393568, + "learning_rate": 1.175171175108384e-06, + "loss": 0.5232, + "step": 25578 + }, + { + "epoch": 0.7839585631972539, + "grad_norm": 1.7309331097547225, + "learning_rate": 1.1748515300546666e-06, + "loss": 0.5679, + "step": 25579 + }, + { + "epoch": 0.7839892117199951, + "grad_norm": 1.7707555286689194, + "learning_rate": 1.174531922691116e-06, + "loss": 0.5838, + "step": 25580 + }, + { + "epoch": 0.7840198602427363, + "grad_norm": 1.7464958530403942, + "learning_rate": 1.174212353020881e-06, + "loss": 0.7272, + "step": 25581 + }, + { + "epoch": 0.7840505087654775, + "grad_norm": 1.6774384943974456, + "learning_rate": 1.1738928210471124e-06, + "loss": 0.5187, + "step": 25582 + }, + { + "epoch": 0.7840811572882187, + "grad_norm": 0.7947921045594606, + "learning_rate": 1.173573326772955e-06, + "loss": 0.38, + "step": 25583 + }, + { + "epoch": 0.78411180581096, + "grad_norm": 1.9163016123761394, + "learning_rate": 1.173253870201559e-06, + "loss": 0.6579, + "step": 25584 + }, + { + "epoch": 0.7841424543337011, + "grad_norm": 1.743981943428278, + "learning_rate": 1.172934451336073e-06, + "loss": 0.5601, + "step": 25585 + }, + { + "epoch": 0.7841731028564424, + "grad_norm": 1.8519655907563166, + "learning_rate": 1.172615070179643e-06, + "loss": 0.5645, + "step": 25586 + }, + { + "epoch": 0.7842037513791835, + "grad_norm": 1.864466337722945, + "learning_rate": 1.172295726735413e-06, + "loss": 0.6248, + "step": 25587 + }, + { + "epoch": 0.7842343999019247, + "grad_norm": 0.8801204673574002, + "learning_rate": 1.1719764210065354e-06, + "loss": 0.4203, + "step": 25588 + }, + { + "epoch": 0.7842650484246659, + "grad_norm": 1.4942644745866673, + "learning_rate": 1.1716571529961535e-06, + "loss": 0.5876, + "step": 25589 + }, + { + "epoch": 0.7842956969474071, + "grad_norm": 1.7428638811368102, + "learning_rate": 1.1713379227074123e-06, + "loss": 0.5534, + "step": 25590 + }, + { + "epoch": 0.7843263454701483, + "grad_norm": 1.9780117399772432, + "learning_rate": 1.1710187301434578e-06, + "loss": 0.617, + "step": 25591 + }, + { + "epoch": 0.7843569939928895, + "grad_norm": 2.096028775460724, + "learning_rate": 1.1706995753074352e-06, + "loss": 0.6393, + "step": 25592 + }, + { + "epoch": 0.7843876425156308, + "grad_norm": 2.0320856457821783, + "learning_rate": 1.1703804582024914e-06, + "loss": 0.6143, + "step": 25593 + }, + { + "epoch": 0.7844182910383719, + "grad_norm": 1.8374403695072348, + "learning_rate": 1.1700613788317666e-06, + "loss": 0.561, + "step": 25594 + }, + { + "epoch": 0.7844489395611132, + "grad_norm": 0.7838493448541929, + "learning_rate": 1.1697423371984079e-06, + "loss": 0.4164, + "step": 25595 + }, + { + "epoch": 0.7844795880838543, + "grad_norm": 1.8064642389454857, + "learning_rate": 1.169423333305559e-06, + "loss": 0.5279, + "step": 25596 + }, + { + "epoch": 0.7845102366065956, + "grad_norm": 1.8978374979007542, + "learning_rate": 1.1691043671563619e-06, + "loss": 0.5784, + "step": 25597 + }, + { + "epoch": 0.7845408851293367, + "grad_norm": 1.854611296622981, + "learning_rate": 1.1687854387539566e-06, + "loss": 0.585, + "step": 25598 + }, + { + "epoch": 0.784571533652078, + "grad_norm": 1.6479425642055983, + "learning_rate": 1.1684665481014922e-06, + "loss": 0.627, + "step": 25599 + }, + { + "epoch": 0.7846021821748191, + "grad_norm": 1.784048312730774, + "learning_rate": 1.1681476952021054e-06, + "loss": 0.5573, + "step": 25600 + }, + { + "epoch": 0.7846328306975604, + "grad_norm": 1.9047477551453642, + "learning_rate": 1.167828880058941e-06, + "loss": 0.5734, + "step": 25601 + }, + { + "epoch": 0.7846634792203016, + "grad_norm": 1.8507187216918959, + "learning_rate": 1.1675101026751378e-06, + "loss": 0.5987, + "step": 25602 + }, + { + "epoch": 0.7846941277430428, + "grad_norm": 1.7482305462331633, + "learning_rate": 1.1671913630538384e-06, + "loss": 0.5552, + "step": 25603 + }, + { + "epoch": 0.784724776265784, + "grad_norm": 1.8396251199802252, + "learning_rate": 1.1668726611981846e-06, + "loss": 0.6695, + "step": 25604 + }, + { + "epoch": 0.7847554247885252, + "grad_norm": 1.8594267289883233, + "learning_rate": 1.1665539971113138e-06, + "loss": 0.5321, + "step": 25605 + }, + { + "epoch": 0.7847860733112664, + "grad_norm": 1.846242104861063, + "learning_rate": 1.166235370796367e-06, + "loss": 0.5763, + "step": 25606 + }, + { + "epoch": 0.7848167218340076, + "grad_norm": 2.035663560784449, + "learning_rate": 1.165916782256486e-06, + "loss": 0.6692, + "step": 25607 + }, + { + "epoch": 0.7848473703567488, + "grad_norm": 1.5814053939183326, + "learning_rate": 1.165598231494806e-06, + "loss": 0.4151, + "step": 25608 + }, + { + "epoch": 0.78487801887949, + "grad_norm": 0.7733966431608071, + "learning_rate": 1.1652797185144677e-06, + "loss": 0.3884, + "step": 25609 + }, + { + "epoch": 0.7849086674022312, + "grad_norm": 1.9579217296039682, + "learning_rate": 1.1649612433186108e-06, + "loss": 0.5532, + "step": 25610 + }, + { + "epoch": 0.7849393159249725, + "grad_norm": 1.9216209946107115, + "learning_rate": 1.1646428059103709e-06, + "loss": 0.6234, + "step": 25611 + }, + { + "epoch": 0.7849699644477136, + "grad_norm": 1.8703794677942303, + "learning_rate": 1.1643244062928881e-06, + "loss": 0.6068, + "step": 25612 + }, + { + "epoch": 0.7850006129704549, + "grad_norm": 1.7620639674454057, + "learning_rate": 1.1640060444692968e-06, + "loss": 0.5024, + "step": 25613 + }, + { + "epoch": 0.785031261493196, + "grad_norm": 1.736769132345883, + "learning_rate": 1.163687720442736e-06, + "loss": 0.5395, + "step": 25614 + }, + { + "epoch": 0.7850619100159373, + "grad_norm": 1.9396663405503274, + "learning_rate": 1.1633694342163426e-06, + "loss": 0.5702, + "step": 25615 + }, + { + "epoch": 0.7850925585386784, + "grad_norm": 0.810204543308388, + "learning_rate": 1.1630511857932504e-06, + "loss": 0.382, + "step": 25616 + }, + { + "epoch": 0.7851232070614197, + "grad_norm": 0.7920873845855663, + "learning_rate": 1.1627329751765964e-06, + "loss": 0.3925, + "step": 25617 + }, + { + "epoch": 0.7851538555841608, + "grad_norm": 1.7810224970426516, + "learning_rate": 1.1624148023695175e-06, + "loss": 0.5086, + "step": 25618 + }, + { + "epoch": 0.785184504106902, + "grad_norm": 1.7210807027014947, + "learning_rate": 1.1620966673751466e-06, + "loss": 0.5752, + "step": 25619 + }, + { + "epoch": 0.7852151526296433, + "grad_norm": 1.9310199608444223, + "learning_rate": 1.1617785701966188e-06, + "loss": 0.6486, + "step": 25620 + }, + { + "epoch": 0.7852458011523844, + "grad_norm": 1.8172119448385147, + "learning_rate": 1.1614605108370703e-06, + "loss": 0.5279, + "step": 25621 + }, + { + "epoch": 0.7852764496751257, + "grad_norm": 1.8890912312441974, + "learning_rate": 1.1611424892996327e-06, + "loss": 0.5947, + "step": 25622 + }, + { + "epoch": 0.7853070981978668, + "grad_norm": 1.7160535239908234, + "learning_rate": 1.1608245055874407e-06, + "loss": 0.5251, + "step": 25623 + }, + { + "epoch": 0.7853377467206081, + "grad_norm": 1.901824710017792, + "learning_rate": 1.1605065597036264e-06, + "loss": 0.6681, + "step": 25624 + }, + { + "epoch": 0.7853683952433492, + "grad_norm": 0.8192769736573641, + "learning_rate": 1.1601886516513234e-06, + "loss": 0.399, + "step": 25625 + }, + { + "epoch": 0.7853990437660905, + "grad_norm": 1.7409619243828682, + "learning_rate": 1.159870781433665e-06, + "loss": 0.4774, + "step": 25626 + }, + { + "epoch": 0.7854296922888316, + "grad_norm": 1.7972995488960952, + "learning_rate": 1.1595529490537815e-06, + "loss": 0.5403, + "step": 25627 + }, + { + "epoch": 0.7854603408115729, + "grad_norm": 1.8038974189988566, + "learning_rate": 1.1592351545148051e-06, + "loss": 0.5704, + "step": 25628 + }, + { + "epoch": 0.785490989334314, + "grad_norm": 2.2232571704118715, + "learning_rate": 1.1589173978198687e-06, + "loss": 0.6018, + "step": 25629 + }, + { + "epoch": 0.7855216378570553, + "grad_norm": 1.8566855808104021, + "learning_rate": 1.1585996789721004e-06, + "loss": 0.6753, + "step": 25630 + }, + { + "epoch": 0.7855522863797965, + "grad_norm": 1.8481001183865218, + "learning_rate": 1.1582819979746347e-06, + "loss": 0.5467, + "step": 25631 + }, + { + "epoch": 0.7855829349025377, + "grad_norm": 2.044876785417199, + "learning_rate": 1.157964354830597e-06, + "loss": 0.5894, + "step": 25632 + }, + { + "epoch": 0.7856135834252789, + "grad_norm": 1.9169288522844508, + "learning_rate": 1.1576467495431199e-06, + "loss": 0.6327, + "step": 25633 + }, + { + "epoch": 0.7856442319480201, + "grad_norm": 0.8233247976709562, + "learning_rate": 1.1573291821153338e-06, + "loss": 0.4077, + "step": 25634 + }, + { + "epoch": 0.7856748804707613, + "grad_norm": 1.9168976009429755, + "learning_rate": 1.157011652550365e-06, + "loss": 0.5323, + "step": 25635 + }, + { + "epoch": 0.7857055289935025, + "grad_norm": 1.909237428416549, + "learning_rate": 1.1566941608513438e-06, + "loss": 0.5957, + "step": 25636 + }, + { + "epoch": 0.7857361775162437, + "grad_norm": 1.7452430288705314, + "learning_rate": 1.1563767070214e-06, + "loss": 0.5879, + "step": 25637 + }, + { + "epoch": 0.785766826038985, + "grad_norm": 1.9610915986712318, + "learning_rate": 1.1560592910636582e-06, + "loss": 0.5363, + "step": 25638 + }, + { + "epoch": 0.7857974745617261, + "grad_norm": 1.8776103481367277, + "learning_rate": 1.155741912981248e-06, + "loss": 0.6578, + "step": 25639 + }, + { + "epoch": 0.7858281230844674, + "grad_norm": 1.9726636985994037, + "learning_rate": 1.1554245727772978e-06, + "loss": 0.5366, + "step": 25640 + }, + { + "epoch": 0.7858587716072085, + "grad_norm": 1.7854971899740806, + "learning_rate": 1.1551072704549309e-06, + "loss": 0.5414, + "step": 25641 + }, + { + "epoch": 0.7858894201299498, + "grad_norm": 1.808059718543966, + "learning_rate": 1.1547900060172779e-06, + "loss": 0.5848, + "step": 25642 + }, + { + "epoch": 0.7859200686526909, + "grad_norm": 1.892252903165144, + "learning_rate": 1.154472779467461e-06, + "loss": 0.5797, + "step": 25643 + }, + { + "epoch": 0.7859507171754322, + "grad_norm": 1.8710268183950054, + "learning_rate": 1.1541555908086077e-06, + "loss": 0.5317, + "step": 25644 + }, + { + "epoch": 0.7859813656981733, + "grad_norm": 1.825259335821043, + "learning_rate": 1.1538384400438451e-06, + "loss": 0.6045, + "step": 25645 + }, + { + "epoch": 0.7860120142209146, + "grad_norm": 2.207541899416707, + "learning_rate": 1.153521327176295e-06, + "loss": 0.5714, + "step": 25646 + }, + { + "epoch": 0.7860426627436558, + "grad_norm": 1.9086537868483093, + "learning_rate": 1.153204252209083e-06, + "loss": 0.5953, + "step": 25647 + }, + { + "epoch": 0.786073311266397, + "grad_norm": 1.9165482012498356, + "learning_rate": 1.1528872151453357e-06, + "loss": 0.6281, + "step": 25648 + }, + { + "epoch": 0.7861039597891382, + "grad_norm": 1.8888487125199735, + "learning_rate": 1.1525702159881735e-06, + "loss": 0.6435, + "step": 25649 + }, + { + "epoch": 0.7861346083118793, + "grad_norm": 2.2097493200317313, + "learning_rate": 1.1522532547407212e-06, + "loss": 0.6534, + "step": 25650 + }, + { + "epoch": 0.7861652568346206, + "grad_norm": 1.870642914067741, + "learning_rate": 1.1519363314061033e-06, + "loss": 0.6118, + "step": 25651 + }, + { + "epoch": 0.7861959053573617, + "grad_norm": 2.1239714317486573, + "learning_rate": 1.1516194459874403e-06, + "loss": 0.6082, + "step": 25652 + }, + { + "epoch": 0.786226553880103, + "grad_norm": 1.7120261010487192, + "learning_rate": 1.1513025984878567e-06, + "loss": 0.513, + "step": 25653 + }, + { + "epoch": 0.7862572024028441, + "grad_norm": 1.9982386603093907, + "learning_rate": 1.1509857889104704e-06, + "loss": 0.688, + "step": 25654 + }, + { + "epoch": 0.7862878509255854, + "grad_norm": 1.895794079719645, + "learning_rate": 1.150669017258409e-06, + "loss": 0.5812, + "step": 25655 + }, + { + "epoch": 0.7863184994483265, + "grad_norm": 1.9360601672573337, + "learning_rate": 1.1503522835347908e-06, + "loss": 0.5117, + "step": 25656 + }, + { + "epoch": 0.7863491479710678, + "grad_norm": 1.8521998634602193, + "learning_rate": 1.1500355877427348e-06, + "loss": 0.5741, + "step": 25657 + }, + { + "epoch": 0.786379796493809, + "grad_norm": 1.8928892426322224, + "learning_rate": 1.1497189298853634e-06, + "loss": 0.6195, + "step": 25658 + }, + { + "epoch": 0.7864104450165502, + "grad_norm": 1.9231033131899737, + "learning_rate": 1.1494023099657975e-06, + "loss": 0.6278, + "step": 25659 + }, + { + "epoch": 0.7864410935392914, + "grad_norm": 1.9099769669206585, + "learning_rate": 1.1490857279871548e-06, + "loss": 0.5317, + "step": 25660 + }, + { + "epoch": 0.7864717420620326, + "grad_norm": 0.7691002765438859, + "learning_rate": 1.1487691839525561e-06, + "loss": 0.3908, + "step": 25661 + }, + { + "epoch": 0.7865023905847738, + "grad_norm": 1.8384381731152966, + "learning_rate": 1.1484526778651195e-06, + "loss": 0.5875, + "step": 25662 + }, + { + "epoch": 0.786533039107515, + "grad_norm": 1.8255319955939604, + "learning_rate": 1.1481362097279653e-06, + "loss": 0.5592, + "step": 25663 + }, + { + "epoch": 0.7865636876302562, + "grad_norm": 1.613420647317439, + "learning_rate": 1.147819779544211e-06, + "loss": 0.5552, + "step": 25664 + }, + { + "epoch": 0.7865943361529975, + "grad_norm": 1.5722181669108917, + "learning_rate": 1.1475033873169728e-06, + "loss": 0.483, + "step": 25665 + }, + { + "epoch": 0.7866249846757386, + "grad_norm": 2.0358690913923856, + "learning_rate": 1.147187033049369e-06, + "loss": 0.6762, + "step": 25666 + }, + { + "epoch": 0.7866556331984799, + "grad_norm": 1.9592261409302976, + "learning_rate": 1.1468707167445187e-06, + "loss": 0.6596, + "step": 25667 + }, + { + "epoch": 0.786686281721221, + "grad_norm": 0.7772816235770293, + "learning_rate": 1.1465544384055355e-06, + "loss": 0.3903, + "step": 25668 + }, + { + "epoch": 0.7867169302439623, + "grad_norm": 1.9236085203289683, + "learning_rate": 1.1462381980355381e-06, + "loss": 0.5277, + "step": 25669 + }, + { + "epoch": 0.7867475787667034, + "grad_norm": 2.0494583849894346, + "learning_rate": 1.1459219956376421e-06, + "loss": 0.6267, + "step": 25670 + }, + { + "epoch": 0.7867782272894447, + "grad_norm": 1.8277934090628583, + "learning_rate": 1.145605831214962e-06, + "loss": 0.5744, + "step": 25671 + }, + { + "epoch": 0.7868088758121858, + "grad_norm": 1.7411982741222904, + "learning_rate": 1.145289704770614e-06, + "loss": 0.5325, + "step": 25672 + }, + { + "epoch": 0.7868395243349271, + "grad_norm": 2.5590662801631696, + "learning_rate": 1.1449736163077125e-06, + "loss": 0.6953, + "step": 25673 + }, + { + "epoch": 0.7868701728576682, + "grad_norm": 1.8170873177262017, + "learning_rate": 1.144657565829374e-06, + "loss": 0.5843, + "step": 25674 + }, + { + "epoch": 0.7869008213804095, + "grad_norm": 2.027625266066971, + "learning_rate": 1.1443415533387103e-06, + "loss": 0.6305, + "step": 25675 + }, + { + "epoch": 0.7869314699031507, + "grad_norm": 2.083860343337145, + "learning_rate": 1.144025578838835e-06, + "loss": 0.6226, + "step": 25676 + }, + { + "epoch": 0.7869621184258919, + "grad_norm": 1.6389142243359496, + "learning_rate": 1.143709642332862e-06, + "loss": 0.5542, + "step": 25677 + }, + { + "epoch": 0.7869927669486331, + "grad_norm": 1.590653846139505, + "learning_rate": 1.1433937438239062e-06, + "loss": 0.5283, + "step": 25678 + }, + { + "epoch": 0.7870234154713743, + "grad_norm": 1.88684274259642, + "learning_rate": 1.1430778833150768e-06, + "loss": 0.5857, + "step": 25679 + }, + { + "epoch": 0.7870540639941155, + "grad_norm": 2.014558046828614, + "learning_rate": 1.1427620608094881e-06, + "loss": 0.6237, + "step": 25680 + }, + { + "epoch": 0.7870847125168566, + "grad_norm": 1.6692505618068267, + "learning_rate": 1.1424462763102517e-06, + "loss": 0.6487, + "step": 25681 + }, + { + "epoch": 0.7871153610395979, + "grad_norm": 1.7579977676517642, + "learning_rate": 1.1421305298204805e-06, + "loss": 0.5309, + "step": 25682 + }, + { + "epoch": 0.787146009562339, + "grad_norm": 2.0632654077345647, + "learning_rate": 1.1418148213432846e-06, + "loss": 0.5395, + "step": 25683 + }, + { + "epoch": 0.7871766580850803, + "grad_norm": 1.7621415318802536, + "learning_rate": 1.1414991508817713e-06, + "loss": 0.6224, + "step": 25684 + }, + { + "epoch": 0.7872073066078215, + "grad_norm": 1.879171496025268, + "learning_rate": 1.1411835184390569e-06, + "loss": 0.5838, + "step": 25685 + }, + { + "epoch": 0.7872379551305627, + "grad_norm": 1.8561768619943328, + "learning_rate": 1.1408679240182485e-06, + "loss": 0.5106, + "step": 25686 + }, + { + "epoch": 0.7872686036533039, + "grad_norm": 1.7868907852963172, + "learning_rate": 1.1405523676224551e-06, + "loss": 0.5079, + "step": 25687 + }, + { + "epoch": 0.7872992521760451, + "grad_norm": 1.969347006674886, + "learning_rate": 1.140236849254786e-06, + "loss": 0.6648, + "step": 25688 + }, + { + "epoch": 0.7873299006987863, + "grad_norm": 1.8199204756381435, + "learning_rate": 1.1399213689183509e-06, + "loss": 0.52, + "step": 25689 + }, + { + "epoch": 0.7873605492215275, + "grad_norm": 1.9717954536909663, + "learning_rate": 1.1396059266162596e-06, + "loss": 0.7031, + "step": 25690 + }, + { + "epoch": 0.7873911977442687, + "grad_norm": 0.8004217296433636, + "learning_rate": 1.1392905223516175e-06, + "loss": 0.4143, + "step": 25691 + }, + { + "epoch": 0.78742184626701, + "grad_norm": 1.724391063391672, + "learning_rate": 1.1389751561275336e-06, + "loss": 0.5379, + "step": 25692 + }, + { + "epoch": 0.7874524947897511, + "grad_norm": 2.1216363250935917, + "learning_rate": 1.1386598279471174e-06, + "loss": 0.6701, + "step": 25693 + }, + { + "epoch": 0.7874831433124924, + "grad_norm": 2.0158827376263218, + "learning_rate": 1.1383445378134734e-06, + "loss": 0.5946, + "step": 25694 + }, + { + "epoch": 0.7875137918352335, + "grad_norm": 2.3827908408844944, + "learning_rate": 1.1380292857297053e-06, + "loss": 0.6098, + "step": 25695 + }, + { + "epoch": 0.7875444403579748, + "grad_norm": 0.783131588281687, + "learning_rate": 1.1377140716989265e-06, + "loss": 0.4058, + "step": 25696 + }, + { + "epoch": 0.7875750888807159, + "grad_norm": 1.8794484468869455, + "learning_rate": 1.1373988957242388e-06, + "loss": 0.6268, + "step": 25697 + }, + { + "epoch": 0.7876057374034572, + "grad_norm": 1.769229335724374, + "learning_rate": 1.1370837578087468e-06, + "loss": 0.4851, + "step": 25698 + }, + { + "epoch": 0.7876363859261983, + "grad_norm": 1.9029736601124614, + "learning_rate": 1.1367686579555565e-06, + "loss": 0.5931, + "step": 25699 + }, + { + "epoch": 0.7876670344489396, + "grad_norm": 1.8929493620774591, + "learning_rate": 1.1364535961677736e-06, + "loss": 0.4703, + "step": 25700 + }, + { + "epoch": 0.7876976829716807, + "grad_norm": 1.8945231235020152, + "learning_rate": 1.136138572448503e-06, + "loss": 0.6285, + "step": 25701 + }, + { + "epoch": 0.787728331494422, + "grad_norm": 1.8734663908225966, + "learning_rate": 1.1358235868008466e-06, + "loss": 0.5932, + "step": 25702 + }, + { + "epoch": 0.7877589800171632, + "grad_norm": 1.9468322588618536, + "learning_rate": 1.1355086392279085e-06, + "loss": 0.5681, + "step": 25703 + }, + { + "epoch": 0.7877896285399044, + "grad_norm": 1.8929176246178465, + "learning_rate": 1.1351937297327942e-06, + "loss": 0.5468, + "step": 25704 + }, + { + "epoch": 0.7878202770626456, + "grad_norm": 2.0484879189298324, + "learning_rate": 1.1348788583186054e-06, + "loss": 0.6176, + "step": 25705 + }, + { + "epoch": 0.7878509255853868, + "grad_norm": 1.9039432736286452, + "learning_rate": 1.134564024988441e-06, + "loss": 0.5631, + "step": 25706 + }, + { + "epoch": 0.787881574108128, + "grad_norm": 1.8904853051051402, + "learning_rate": 1.134249229745409e-06, + "loss": 0.6112, + "step": 25707 + }, + { + "epoch": 0.7879122226308692, + "grad_norm": 2.0356131291070927, + "learning_rate": 1.133934472592607e-06, + "loss": 0.592, + "step": 25708 + }, + { + "epoch": 0.7879428711536104, + "grad_norm": 2.0630740374864556, + "learning_rate": 1.1336197535331395e-06, + "loss": 0.6012, + "step": 25709 + }, + { + "epoch": 0.7879735196763517, + "grad_norm": 1.5946304613919993, + "learning_rate": 1.133305072570104e-06, + "loss": 0.6115, + "step": 25710 + }, + { + "epoch": 0.7880041681990928, + "grad_norm": 1.9195050315291362, + "learning_rate": 1.132990429706603e-06, + "loss": 0.5041, + "step": 25711 + }, + { + "epoch": 0.788034816721834, + "grad_norm": 1.772918169131188, + "learning_rate": 1.1326758249457387e-06, + "loss": 0.6134, + "step": 25712 + }, + { + "epoch": 0.7880654652445752, + "grad_norm": 1.9733430501304678, + "learning_rate": 1.1323612582906069e-06, + "loss": 0.5447, + "step": 25713 + }, + { + "epoch": 0.7880961137673164, + "grad_norm": 1.6339319566928525, + "learning_rate": 1.1320467297443094e-06, + "loss": 0.4575, + "step": 25714 + }, + { + "epoch": 0.7881267622900576, + "grad_norm": 2.034848614908111, + "learning_rate": 1.1317322393099468e-06, + "loss": 0.5606, + "step": 25715 + }, + { + "epoch": 0.7881574108127988, + "grad_norm": 1.7022424047544413, + "learning_rate": 1.1314177869906163e-06, + "loss": 0.6203, + "step": 25716 + }, + { + "epoch": 0.78818805933554, + "grad_norm": 1.8693575653713563, + "learning_rate": 1.1311033727894144e-06, + "loss": 0.5776, + "step": 25717 + }, + { + "epoch": 0.7882187078582812, + "grad_norm": 1.8895149063166807, + "learning_rate": 1.130788996709441e-06, + "loss": 0.5613, + "step": 25718 + }, + { + "epoch": 0.7882493563810224, + "grad_norm": 1.7998053082651104, + "learning_rate": 1.1304746587537935e-06, + "loss": 0.5621, + "step": 25719 + }, + { + "epoch": 0.7882800049037636, + "grad_norm": 1.7630972169982786, + "learning_rate": 1.1301603589255705e-06, + "loss": 0.5259, + "step": 25720 + }, + { + "epoch": 0.7883106534265049, + "grad_norm": 1.6532125854427955, + "learning_rate": 1.1298460972278663e-06, + "loss": 0.5105, + "step": 25721 + }, + { + "epoch": 0.788341301949246, + "grad_norm": 1.780206920737253, + "learning_rate": 1.129531873663779e-06, + "loss": 0.5464, + "step": 25722 + }, + { + "epoch": 0.7883719504719873, + "grad_norm": 0.8190016879983659, + "learning_rate": 1.129217688236406e-06, + "loss": 0.4034, + "step": 25723 + }, + { + "epoch": 0.7884025989947284, + "grad_norm": 1.8865081209012018, + "learning_rate": 1.1289035409488391e-06, + "loss": 0.4262, + "step": 25724 + }, + { + "epoch": 0.7884332475174697, + "grad_norm": 0.7798930817471839, + "learning_rate": 1.1285894318041769e-06, + "loss": 0.4213, + "step": 25725 + }, + { + "epoch": 0.7884638960402108, + "grad_norm": 1.6608144921281376, + "learning_rate": 1.1282753608055152e-06, + "loss": 0.5891, + "step": 25726 + }, + { + "epoch": 0.7884945445629521, + "grad_norm": 1.7505975668992235, + "learning_rate": 1.127961327955945e-06, + "loss": 0.5386, + "step": 25727 + }, + { + "epoch": 0.7885251930856932, + "grad_norm": 1.7812592246947043, + "learning_rate": 1.127647333258564e-06, + "loss": 0.4731, + "step": 25728 + }, + { + "epoch": 0.7885558416084345, + "grad_norm": 1.9482627547634486, + "learning_rate": 1.1273333767164634e-06, + "loss": 0.5401, + "step": 25729 + }, + { + "epoch": 0.7885864901311757, + "grad_norm": 2.0121553373332683, + "learning_rate": 1.127019458332738e-06, + "loss": 0.5778, + "step": 25730 + }, + { + "epoch": 0.7886171386539169, + "grad_norm": 1.8268850698348729, + "learning_rate": 1.126705578110482e-06, + "loss": 0.6526, + "step": 25731 + }, + { + "epoch": 0.7886477871766581, + "grad_norm": 2.0793170214438033, + "learning_rate": 1.126391736052786e-06, + "loss": 0.5874, + "step": 25732 + }, + { + "epoch": 0.7886784356993993, + "grad_norm": 2.020186068571469, + "learning_rate": 1.1260779321627429e-06, + "loss": 0.6141, + "step": 25733 + }, + { + "epoch": 0.7887090842221405, + "grad_norm": 1.9098481423915588, + "learning_rate": 1.1257641664434466e-06, + "loss": 0.6906, + "step": 25734 + }, + { + "epoch": 0.7887397327448817, + "grad_norm": 1.95257480091386, + "learning_rate": 1.1254504388979859e-06, + "loss": 0.6436, + "step": 25735 + }, + { + "epoch": 0.7887703812676229, + "grad_norm": 2.0329813520974724, + "learning_rate": 1.125136749529453e-06, + "loss": 0.598, + "step": 25736 + }, + { + "epoch": 0.7888010297903641, + "grad_norm": 1.7883237279501585, + "learning_rate": 1.1248230983409409e-06, + "loss": 0.5157, + "step": 25737 + }, + { + "epoch": 0.7888316783131053, + "grad_norm": 1.8996986247474337, + "learning_rate": 1.124509485335537e-06, + "loss": 0.5697, + "step": 25738 + }, + { + "epoch": 0.7888623268358466, + "grad_norm": 2.019509430173143, + "learning_rate": 1.124195910516334e-06, + "loss": 0.5473, + "step": 25739 + }, + { + "epoch": 0.7888929753585877, + "grad_norm": 1.696329292018439, + "learning_rate": 1.123882373886419e-06, + "loss": 0.5886, + "step": 25740 + }, + { + "epoch": 0.788923623881329, + "grad_norm": 0.8229543816059511, + "learning_rate": 1.1235688754488828e-06, + "loss": 0.4023, + "step": 25741 + }, + { + "epoch": 0.7889542724040701, + "grad_norm": 2.104088856655647, + "learning_rate": 1.1232554152068154e-06, + "loss": 0.5416, + "step": 25742 + }, + { + "epoch": 0.7889849209268113, + "grad_norm": 0.7841715084486532, + "learning_rate": 1.122941993163303e-06, + "loss": 0.388, + "step": 25743 + }, + { + "epoch": 0.7890155694495525, + "grad_norm": 1.7177865928951856, + "learning_rate": 1.122628609321435e-06, + "loss": 0.6317, + "step": 25744 + }, + { + "epoch": 0.7890462179722937, + "grad_norm": 0.8256783996254407, + "learning_rate": 1.1223152636843016e-06, + "loss": 0.4238, + "step": 25745 + }, + { + "epoch": 0.789076866495035, + "grad_norm": 1.9202985172585352, + "learning_rate": 1.1220019562549856e-06, + "loss": 0.5675, + "step": 25746 + }, + { + "epoch": 0.7891075150177761, + "grad_norm": 2.1099399686904308, + "learning_rate": 1.1216886870365774e-06, + "loss": 0.6294, + "step": 25747 + }, + { + "epoch": 0.7891381635405174, + "grad_norm": 1.697674146117691, + "learning_rate": 1.1213754560321638e-06, + "loss": 0.5844, + "step": 25748 + }, + { + "epoch": 0.7891688120632585, + "grad_norm": 1.7384654081829347, + "learning_rate": 1.1210622632448287e-06, + "loss": 0.4928, + "step": 25749 + }, + { + "epoch": 0.7891994605859998, + "grad_norm": 1.9512946671436373, + "learning_rate": 1.1207491086776613e-06, + "loss": 0.4977, + "step": 25750 + }, + { + "epoch": 0.7892301091087409, + "grad_norm": 1.8921650776999708, + "learning_rate": 1.1204359923337437e-06, + "loss": 0.5606, + "step": 25751 + }, + { + "epoch": 0.7892607576314822, + "grad_norm": 0.7975247440459553, + "learning_rate": 1.1201229142161634e-06, + "loss": 0.4042, + "step": 25752 + }, + { + "epoch": 0.7892914061542233, + "grad_norm": 1.88367691200478, + "learning_rate": 1.1198098743280056e-06, + "loss": 0.5642, + "step": 25753 + }, + { + "epoch": 0.7893220546769646, + "grad_norm": 1.9449373997845831, + "learning_rate": 1.1194968726723533e-06, + "loss": 0.5972, + "step": 25754 + }, + { + "epoch": 0.7893527031997057, + "grad_norm": 2.056291962809844, + "learning_rate": 1.1191839092522904e-06, + "loss": 0.6399, + "step": 25755 + }, + { + "epoch": 0.789383351722447, + "grad_norm": 1.8060432114924956, + "learning_rate": 1.1188709840709028e-06, + "loss": 0.4402, + "step": 25756 + }, + { + "epoch": 0.7894140002451882, + "grad_norm": 1.888446910168868, + "learning_rate": 1.1185580971312716e-06, + "loss": 0.5668, + "step": 25757 + }, + { + "epoch": 0.7894446487679294, + "grad_norm": 1.7997957559713742, + "learning_rate": 1.1182452484364803e-06, + "loss": 0.6154, + "step": 25758 + }, + { + "epoch": 0.7894752972906706, + "grad_norm": 1.743806328802246, + "learning_rate": 1.117932437989613e-06, + "loss": 0.601, + "step": 25759 + }, + { + "epoch": 0.7895059458134118, + "grad_norm": 1.8709187502374023, + "learning_rate": 1.1176196657937495e-06, + "loss": 0.6054, + "step": 25760 + }, + { + "epoch": 0.789536594336153, + "grad_norm": 2.1180724632424717, + "learning_rate": 1.117306931851974e-06, + "loss": 0.5711, + "step": 25761 + }, + { + "epoch": 0.7895672428588942, + "grad_norm": 1.92079012190885, + "learning_rate": 1.1169942361673651e-06, + "loss": 0.5694, + "step": 25762 + }, + { + "epoch": 0.7895978913816354, + "grad_norm": 1.889918489608592, + "learning_rate": 1.1166815787430062e-06, + "loss": 0.5559, + "step": 25763 + }, + { + "epoch": 0.7896285399043766, + "grad_norm": 1.5785438586385963, + "learning_rate": 1.1163689595819783e-06, + "loss": 0.5499, + "step": 25764 + }, + { + "epoch": 0.7896591884271178, + "grad_norm": 0.8384167901571851, + "learning_rate": 1.1160563786873591e-06, + "loss": 0.4115, + "step": 25765 + }, + { + "epoch": 0.7896898369498591, + "grad_norm": 1.7460366510302678, + "learning_rate": 1.1157438360622302e-06, + "loss": 0.4978, + "step": 25766 + }, + { + "epoch": 0.7897204854726002, + "grad_norm": 1.8602432058312177, + "learning_rate": 1.1154313317096726e-06, + "loss": 0.5462, + "step": 25767 + }, + { + "epoch": 0.7897511339953415, + "grad_norm": 1.997887482501697, + "learning_rate": 1.1151188656327627e-06, + "loss": 0.6422, + "step": 25768 + }, + { + "epoch": 0.7897817825180826, + "grad_norm": 1.9982100291073461, + "learning_rate": 1.1148064378345825e-06, + "loss": 0.6328, + "step": 25769 + }, + { + "epoch": 0.7898124310408239, + "grad_norm": 0.7720116881616541, + "learning_rate": 1.1144940483182064e-06, + "loss": 0.3988, + "step": 25770 + }, + { + "epoch": 0.789843079563565, + "grad_norm": 1.8369675688925569, + "learning_rate": 1.1141816970867148e-06, + "loss": 0.5795, + "step": 25771 + }, + { + "epoch": 0.7898737280863063, + "grad_norm": 2.1515489961881493, + "learning_rate": 1.1138693841431864e-06, + "loss": 0.6281, + "step": 25772 + }, + { + "epoch": 0.7899043766090474, + "grad_norm": 1.907252552102599, + "learning_rate": 1.1135571094906961e-06, + "loss": 0.6189, + "step": 25773 + }, + { + "epoch": 0.7899350251317886, + "grad_norm": 1.7167953944747814, + "learning_rate": 1.113244873132322e-06, + "loss": 0.5744, + "step": 25774 + }, + { + "epoch": 0.7899656736545299, + "grad_norm": 2.224734786882194, + "learning_rate": 1.1129326750711417e-06, + "loss": 0.6224, + "step": 25775 + }, + { + "epoch": 0.789996322177271, + "grad_norm": 1.8149214392905249, + "learning_rate": 1.1126205153102293e-06, + "loss": 0.5398, + "step": 25776 + }, + { + "epoch": 0.7900269707000123, + "grad_norm": 2.0422294710112063, + "learning_rate": 1.1123083938526612e-06, + "loss": 0.5958, + "step": 25777 + }, + { + "epoch": 0.7900576192227534, + "grad_norm": 2.1876939167671505, + "learning_rate": 1.1119963107015153e-06, + "loss": 0.6027, + "step": 25778 + }, + { + "epoch": 0.7900882677454947, + "grad_norm": 0.7706934551011383, + "learning_rate": 1.1116842658598626e-06, + "loss": 0.3712, + "step": 25779 + }, + { + "epoch": 0.7901189162682358, + "grad_norm": 1.734122933614931, + "learning_rate": 1.1113722593307813e-06, + "loss": 0.5696, + "step": 25780 + }, + { + "epoch": 0.7901495647909771, + "grad_norm": 0.8018352011407547, + "learning_rate": 1.1110602911173413e-06, + "loss": 0.3821, + "step": 25781 + }, + { + "epoch": 0.7901802133137182, + "grad_norm": 1.9077320969606766, + "learning_rate": 1.1107483612226216e-06, + "loss": 0.5725, + "step": 25782 + }, + { + "epoch": 0.7902108618364595, + "grad_norm": 1.7679086568229316, + "learning_rate": 1.1104364696496938e-06, + "loss": 0.522, + "step": 25783 + }, + { + "epoch": 0.7902415103592006, + "grad_norm": 1.9172514240917384, + "learning_rate": 1.1101246164016295e-06, + "loss": 0.6317, + "step": 25784 + }, + { + "epoch": 0.7902721588819419, + "grad_norm": 1.6991185258611734, + "learning_rate": 1.109812801481502e-06, + "loss": 0.4657, + "step": 25785 + }, + { + "epoch": 0.7903028074046831, + "grad_norm": 1.8027631799983699, + "learning_rate": 1.1095010248923859e-06, + "loss": 0.5856, + "step": 25786 + }, + { + "epoch": 0.7903334559274243, + "grad_norm": 2.030345779974763, + "learning_rate": 1.1091892866373506e-06, + "loss": 0.5743, + "step": 25787 + }, + { + "epoch": 0.7903641044501655, + "grad_norm": 1.8135802231994402, + "learning_rate": 1.1088775867194684e-06, + "loss": 0.5607, + "step": 25788 + }, + { + "epoch": 0.7903947529729067, + "grad_norm": 1.9344182617543486, + "learning_rate": 1.1085659251418113e-06, + "loss": 0.6434, + "step": 25789 + }, + { + "epoch": 0.7904254014956479, + "grad_norm": 2.0327770669780976, + "learning_rate": 1.108254301907451e-06, + "loss": 0.6951, + "step": 25790 + }, + { + "epoch": 0.7904560500183891, + "grad_norm": 1.9880141083828557, + "learning_rate": 1.1079427170194568e-06, + "loss": 0.5271, + "step": 25791 + }, + { + "epoch": 0.7904866985411303, + "grad_norm": 2.16869261457797, + "learning_rate": 1.1076311704808957e-06, + "loss": 0.636, + "step": 25792 + }, + { + "epoch": 0.7905173470638716, + "grad_norm": 1.7968405316406504, + "learning_rate": 1.107319662294844e-06, + "loss": 0.5726, + "step": 25793 + }, + { + "epoch": 0.7905479955866127, + "grad_norm": 1.776102426565221, + "learning_rate": 1.1070081924643672e-06, + "loss": 0.5315, + "step": 25794 + }, + { + "epoch": 0.790578644109354, + "grad_norm": 1.7969389217529164, + "learning_rate": 1.1066967609925333e-06, + "loss": 0.6192, + "step": 25795 + }, + { + "epoch": 0.7906092926320951, + "grad_norm": 1.7646871326932818, + "learning_rate": 1.1063853678824127e-06, + "loss": 0.5417, + "step": 25796 + }, + { + "epoch": 0.7906399411548364, + "grad_norm": 1.838908124490944, + "learning_rate": 1.1060740131370744e-06, + "loss": 0.6514, + "step": 25797 + }, + { + "epoch": 0.7906705896775775, + "grad_norm": 2.034824490542588, + "learning_rate": 1.105762696759584e-06, + "loss": 0.6176, + "step": 25798 + }, + { + "epoch": 0.7907012382003188, + "grad_norm": 1.8398918213418438, + "learning_rate": 1.1054514187530102e-06, + "loss": 0.6038, + "step": 25799 + }, + { + "epoch": 0.7907318867230599, + "grad_norm": 2.0355405428750233, + "learning_rate": 1.1051401791204197e-06, + "loss": 0.6327, + "step": 25800 + }, + { + "epoch": 0.7907625352458012, + "grad_norm": 1.9855550022465438, + "learning_rate": 1.1048289778648814e-06, + "loss": 0.5569, + "step": 25801 + }, + { + "epoch": 0.7907931837685424, + "grad_norm": 1.8886383385497958, + "learning_rate": 1.1045178149894592e-06, + "loss": 0.6492, + "step": 25802 + }, + { + "epoch": 0.7908238322912836, + "grad_norm": 1.8140558314968835, + "learning_rate": 1.104206690497217e-06, + "loss": 0.5273, + "step": 25803 + }, + { + "epoch": 0.7908544808140248, + "grad_norm": 1.9148488040239386, + "learning_rate": 1.1038956043912264e-06, + "loss": 0.5449, + "step": 25804 + }, + { + "epoch": 0.7908851293367659, + "grad_norm": 0.7870000838118864, + "learning_rate": 1.103584556674549e-06, + "loss": 0.402, + "step": 25805 + }, + { + "epoch": 0.7909157778595072, + "grad_norm": 1.9256904893713043, + "learning_rate": 1.103273547350248e-06, + "loss": 0.5324, + "step": 25806 + }, + { + "epoch": 0.7909464263822483, + "grad_norm": 2.0622070044386183, + "learning_rate": 1.1029625764213903e-06, + "loss": 0.5827, + "step": 25807 + }, + { + "epoch": 0.7909770749049896, + "grad_norm": 0.8181791845763883, + "learning_rate": 1.1026516438910396e-06, + "loss": 0.4053, + "step": 25808 + }, + { + "epoch": 0.7910077234277307, + "grad_norm": 1.6598400980886387, + "learning_rate": 1.1023407497622601e-06, + "loss": 0.5503, + "step": 25809 + }, + { + "epoch": 0.791038371950472, + "grad_norm": 1.8924688453168308, + "learning_rate": 1.1020298940381135e-06, + "loss": 0.5337, + "step": 25810 + }, + { + "epoch": 0.7910690204732131, + "grad_norm": 2.134440408882015, + "learning_rate": 1.101719076721664e-06, + "loss": 0.5863, + "step": 25811 + }, + { + "epoch": 0.7910996689959544, + "grad_norm": 1.8537086827096976, + "learning_rate": 1.101408297815975e-06, + "loss": 0.5447, + "step": 25812 + }, + { + "epoch": 0.7911303175186956, + "grad_norm": 0.9841333921619142, + "learning_rate": 1.1010975573241073e-06, + "loss": 0.4059, + "step": 25813 + }, + { + "epoch": 0.7911609660414368, + "grad_norm": 1.5938240125924656, + "learning_rate": 1.1007868552491219e-06, + "loss": 0.5001, + "step": 25814 + }, + { + "epoch": 0.791191614564178, + "grad_norm": 1.7035010606255123, + "learning_rate": 1.100476191594081e-06, + "loss": 0.491, + "step": 25815 + }, + { + "epoch": 0.7912222630869192, + "grad_norm": 1.6710275977491071, + "learning_rate": 1.1001655663620463e-06, + "loss": 0.5939, + "step": 25816 + }, + { + "epoch": 0.7912529116096604, + "grad_norm": 1.8476087089363569, + "learning_rate": 1.0998549795560791e-06, + "loss": 0.6268, + "step": 25817 + }, + { + "epoch": 0.7912835601324016, + "grad_norm": 1.6785639972174, + "learning_rate": 1.0995444311792374e-06, + "loss": 0.582, + "step": 25818 + }, + { + "epoch": 0.7913142086551428, + "grad_norm": 2.0809384478052375, + "learning_rate": 1.0992339212345827e-06, + "loss": 0.5313, + "step": 25819 + }, + { + "epoch": 0.791344857177884, + "grad_norm": 1.891315227032959, + "learning_rate": 1.098923449725175e-06, + "loss": 0.5818, + "step": 25820 + }, + { + "epoch": 0.7913755057006252, + "grad_norm": 0.8083399499010081, + "learning_rate": 1.098613016654073e-06, + "loss": 0.3941, + "step": 25821 + }, + { + "epoch": 0.7914061542233665, + "grad_norm": 1.818311901723467, + "learning_rate": 1.0983026220243326e-06, + "loss": 0.5485, + "step": 25822 + }, + { + "epoch": 0.7914368027461076, + "grad_norm": 1.8952718833394377, + "learning_rate": 1.097992265839017e-06, + "loss": 0.5722, + "step": 25823 + }, + { + "epoch": 0.7914674512688489, + "grad_norm": 1.9967638672897579, + "learning_rate": 1.0976819481011824e-06, + "loss": 0.6122, + "step": 25824 + }, + { + "epoch": 0.79149809979159, + "grad_norm": 0.8178962455093635, + "learning_rate": 1.0973716688138847e-06, + "loss": 0.3944, + "step": 25825 + }, + { + "epoch": 0.7915287483143313, + "grad_norm": 2.2092735604270834, + "learning_rate": 1.0970614279801823e-06, + "loss": 0.5654, + "step": 25826 + }, + { + "epoch": 0.7915593968370724, + "grad_norm": 1.7646726086309157, + "learning_rate": 1.0967512256031322e-06, + "loss": 0.5912, + "step": 25827 + }, + { + "epoch": 0.7915900453598137, + "grad_norm": 1.9348892433228961, + "learning_rate": 1.0964410616857924e-06, + "loss": 0.5803, + "step": 25828 + }, + { + "epoch": 0.7916206938825548, + "grad_norm": 1.8660775211065936, + "learning_rate": 1.0961309362312162e-06, + "loss": 0.6262, + "step": 25829 + }, + { + "epoch": 0.7916513424052961, + "grad_norm": 1.630218610115692, + "learning_rate": 1.0958208492424605e-06, + "loss": 0.5096, + "step": 25830 + }, + { + "epoch": 0.7916819909280373, + "grad_norm": 1.8414183617182622, + "learning_rate": 1.0955108007225828e-06, + "loss": 0.5709, + "step": 25831 + }, + { + "epoch": 0.7917126394507785, + "grad_norm": 1.7837005391562606, + "learning_rate": 1.095200790674636e-06, + "loss": 0.6266, + "step": 25832 + }, + { + "epoch": 0.7917432879735197, + "grad_norm": 1.9003660153701558, + "learning_rate": 1.0948908191016716e-06, + "loss": 0.5887, + "step": 25833 + }, + { + "epoch": 0.7917739364962609, + "grad_norm": 1.9358025143380722, + "learning_rate": 1.0945808860067502e-06, + "loss": 0.5811, + "step": 25834 + }, + { + "epoch": 0.7918045850190021, + "grad_norm": 1.895704627261191, + "learning_rate": 1.0942709913929216e-06, + "loss": 0.5546, + "step": 25835 + }, + { + "epoch": 0.7918352335417432, + "grad_norm": 1.9139216543701274, + "learning_rate": 1.0939611352632413e-06, + "loss": 0.6032, + "step": 25836 + }, + { + "epoch": 0.7918658820644845, + "grad_norm": 2.0242225649232606, + "learning_rate": 1.0936513176207603e-06, + "loss": 0.5708, + "step": 25837 + }, + { + "epoch": 0.7918965305872256, + "grad_norm": 2.0656175197337925, + "learning_rate": 1.093341538468532e-06, + "loss": 0.6766, + "step": 25838 + }, + { + "epoch": 0.7919271791099669, + "grad_norm": 1.74844369930058, + "learning_rate": 1.0930317978096106e-06, + "loss": 0.6053, + "step": 25839 + }, + { + "epoch": 0.7919578276327081, + "grad_norm": 1.619258970720239, + "learning_rate": 1.0927220956470446e-06, + "loss": 0.5499, + "step": 25840 + }, + { + "epoch": 0.7919884761554493, + "grad_norm": 0.7946987675412542, + "learning_rate": 1.092412431983888e-06, + "loss": 0.3984, + "step": 25841 + }, + { + "epoch": 0.7920191246781905, + "grad_norm": 1.9417019566703928, + "learning_rate": 1.092102806823193e-06, + "loss": 0.5596, + "step": 25842 + }, + { + "epoch": 0.7920497732009317, + "grad_norm": 1.7448725160492466, + "learning_rate": 1.0917932201680075e-06, + "loss": 0.6284, + "step": 25843 + }, + { + "epoch": 0.7920804217236729, + "grad_norm": 2.206720235875401, + "learning_rate": 1.0914836720213835e-06, + "loss": 0.6402, + "step": 25844 + }, + { + "epoch": 0.7921110702464141, + "grad_norm": 1.8272605384777896, + "learning_rate": 1.091174162386372e-06, + "loss": 0.5525, + "step": 25845 + }, + { + "epoch": 0.7921417187691553, + "grad_norm": 1.921453328499941, + "learning_rate": 1.0908646912660204e-06, + "loss": 0.6145, + "step": 25846 + }, + { + "epoch": 0.7921723672918966, + "grad_norm": 2.7925708414827097, + "learning_rate": 1.0905552586633804e-06, + "loss": 0.5334, + "step": 25847 + }, + { + "epoch": 0.7922030158146377, + "grad_norm": 1.9339812611417075, + "learning_rate": 1.0902458645814984e-06, + "loss": 0.6422, + "step": 25848 + }, + { + "epoch": 0.792233664337379, + "grad_norm": 1.649497130898711, + "learning_rate": 1.0899365090234244e-06, + "loss": 0.5901, + "step": 25849 + }, + { + "epoch": 0.7922643128601201, + "grad_norm": 1.921526298784752, + "learning_rate": 1.0896271919922074e-06, + "loss": 0.5331, + "step": 25850 + }, + { + "epoch": 0.7922949613828614, + "grad_norm": 1.8103876554331297, + "learning_rate": 1.0893179134908932e-06, + "loss": 0.5722, + "step": 25851 + }, + { + "epoch": 0.7923256099056025, + "grad_norm": 1.7039781787743051, + "learning_rate": 1.0890086735225296e-06, + "loss": 0.5742, + "step": 25852 + }, + { + "epoch": 0.7923562584283438, + "grad_norm": 2.0016189488584746, + "learning_rate": 1.088699472090166e-06, + "loss": 0.5104, + "step": 25853 + }, + { + "epoch": 0.7923869069510849, + "grad_norm": 1.9681233981690147, + "learning_rate": 1.0883903091968461e-06, + "loss": 0.6633, + "step": 25854 + }, + { + "epoch": 0.7924175554738262, + "grad_norm": 1.9904552244551041, + "learning_rate": 1.0880811848456169e-06, + "loss": 0.5422, + "step": 25855 + }, + { + "epoch": 0.7924482039965673, + "grad_norm": 2.201774504906676, + "learning_rate": 1.0877720990395263e-06, + "loss": 0.6652, + "step": 25856 + }, + { + "epoch": 0.7924788525193086, + "grad_norm": 1.8861051906655923, + "learning_rate": 1.087463051781617e-06, + "loss": 0.6124, + "step": 25857 + }, + { + "epoch": 0.7925095010420498, + "grad_norm": 1.9408065102790715, + "learning_rate": 1.0871540430749366e-06, + "loss": 0.5135, + "step": 25858 + }, + { + "epoch": 0.792540149564791, + "grad_norm": 2.1217754598717278, + "learning_rate": 1.0868450729225272e-06, + "loss": 0.6575, + "step": 25859 + }, + { + "epoch": 0.7925707980875322, + "grad_norm": 1.875832284526621, + "learning_rate": 1.086536141327434e-06, + "loss": 0.5733, + "step": 25860 + }, + { + "epoch": 0.7926014466102734, + "grad_norm": 2.147801848771749, + "learning_rate": 1.0862272482927033e-06, + "loss": 0.6392, + "step": 25861 + }, + { + "epoch": 0.7926320951330146, + "grad_norm": 1.7081125461156976, + "learning_rate": 1.0859183938213759e-06, + "loss": 0.5026, + "step": 25862 + }, + { + "epoch": 0.7926627436557558, + "grad_norm": 1.8814889444927552, + "learning_rate": 1.0856095779164955e-06, + "loss": 0.4914, + "step": 25863 + }, + { + "epoch": 0.792693392178497, + "grad_norm": 1.9526520449331002, + "learning_rate": 1.0853008005811067e-06, + "loss": 0.5424, + "step": 25864 + }, + { + "epoch": 0.7927240407012383, + "grad_norm": 1.8840800400307685, + "learning_rate": 1.0849920618182496e-06, + "loss": 0.5463, + "step": 25865 + }, + { + "epoch": 0.7927546892239794, + "grad_norm": 1.5776287334246981, + "learning_rate": 1.0846833616309687e-06, + "loss": 0.491, + "step": 25866 + }, + { + "epoch": 0.7927853377467206, + "grad_norm": 1.9402568797200148, + "learning_rate": 1.084374700022303e-06, + "loss": 0.6231, + "step": 25867 + }, + { + "epoch": 0.7928159862694618, + "grad_norm": 1.7690488734115346, + "learning_rate": 1.084066076995296e-06, + "loss": 0.5779, + "step": 25868 + }, + { + "epoch": 0.792846634792203, + "grad_norm": 1.8216260712534555, + "learning_rate": 1.0837574925529887e-06, + "loss": 0.5218, + "step": 25869 + }, + { + "epoch": 0.7928772833149442, + "grad_norm": 2.007774289005818, + "learning_rate": 1.0834489466984193e-06, + "loss": 0.5782, + "step": 25870 + }, + { + "epoch": 0.7929079318376854, + "grad_norm": 1.8735357925918872, + "learning_rate": 1.0831404394346294e-06, + "loss": 0.605, + "step": 25871 + }, + { + "epoch": 0.7929385803604266, + "grad_norm": 2.071166039841302, + "learning_rate": 1.0828319707646606e-06, + "loss": 0.5779, + "step": 25872 + }, + { + "epoch": 0.7929692288831678, + "grad_norm": 2.127612430682575, + "learning_rate": 1.082523540691549e-06, + "loss": 0.7186, + "step": 25873 + }, + { + "epoch": 0.792999877405909, + "grad_norm": 0.8152545277844255, + "learning_rate": 1.0822151492183359e-06, + "loss": 0.4042, + "step": 25874 + }, + { + "epoch": 0.7930305259286502, + "grad_norm": 1.808931852428712, + "learning_rate": 1.08190679634806e-06, + "loss": 0.6175, + "step": 25875 + }, + { + "epoch": 0.7930611744513915, + "grad_norm": 0.9036355033712701, + "learning_rate": 1.0815984820837577e-06, + "loss": 0.4079, + "step": 25876 + }, + { + "epoch": 0.7930918229741326, + "grad_norm": 1.6542373500862808, + "learning_rate": 1.0812902064284697e-06, + "loss": 0.4807, + "step": 25877 + }, + { + "epoch": 0.7931224714968739, + "grad_norm": 1.8104677646841503, + "learning_rate": 1.0809819693852308e-06, + "loss": 0.5932, + "step": 25878 + }, + { + "epoch": 0.793153120019615, + "grad_norm": 0.8146735844086849, + "learning_rate": 1.0806737709570786e-06, + "loss": 0.3923, + "step": 25879 + }, + { + "epoch": 0.7931837685423563, + "grad_norm": 2.075829639056029, + "learning_rate": 1.0803656111470523e-06, + "loss": 0.6813, + "step": 25880 + }, + { + "epoch": 0.7932144170650974, + "grad_norm": 0.8076094335479616, + "learning_rate": 1.080057489958185e-06, + "loss": 0.3997, + "step": 25881 + }, + { + "epoch": 0.7932450655878387, + "grad_norm": 1.8273266964088521, + "learning_rate": 1.0797494073935143e-06, + "loss": 0.5045, + "step": 25882 + }, + { + "epoch": 0.7932757141105798, + "grad_norm": 2.0424645290178858, + "learning_rate": 1.0794413634560775e-06, + "loss": 0.5825, + "step": 25883 + }, + { + "epoch": 0.7933063626333211, + "grad_norm": 1.9595095404646925, + "learning_rate": 1.0791333581489061e-06, + "loss": 0.566, + "step": 25884 + }, + { + "epoch": 0.7933370111560623, + "grad_norm": 1.9007469101032297, + "learning_rate": 1.0788253914750373e-06, + "loss": 0.5363, + "step": 25885 + }, + { + "epoch": 0.7933676596788035, + "grad_norm": 1.5917228879619256, + "learning_rate": 1.0785174634375068e-06, + "loss": 0.5035, + "step": 25886 + }, + { + "epoch": 0.7933983082015447, + "grad_norm": 1.9991443927939958, + "learning_rate": 1.078209574039345e-06, + "loss": 0.5862, + "step": 25887 + }, + { + "epoch": 0.7934289567242859, + "grad_norm": 2.118536077422673, + "learning_rate": 1.0779017232835893e-06, + "loss": 0.5313, + "step": 25888 + }, + { + "epoch": 0.7934596052470271, + "grad_norm": 1.9366073382139253, + "learning_rate": 1.0775939111732692e-06, + "loss": 0.6205, + "step": 25889 + }, + { + "epoch": 0.7934902537697683, + "grad_norm": 1.8479617722140982, + "learning_rate": 1.077286137711422e-06, + "loss": 0.5795, + "step": 25890 + }, + { + "epoch": 0.7935209022925095, + "grad_norm": 2.030219969178157, + "learning_rate": 1.0769784029010783e-06, + "loss": 0.5962, + "step": 25891 + }, + { + "epoch": 0.7935515508152508, + "grad_norm": 1.6101033114275618, + "learning_rate": 1.0766707067452692e-06, + "loss": 0.4461, + "step": 25892 + }, + { + "epoch": 0.7935821993379919, + "grad_norm": 1.9523532713297347, + "learning_rate": 1.0763630492470267e-06, + "loss": 0.543, + "step": 25893 + }, + { + "epoch": 0.7936128478607332, + "grad_norm": 1.9947389779204554, + "learning_rate": 1.076055430409385e-06, + "loss": 0.5836, + "step": 25894 + }, + { + "epoch": 0.7936434963834743, + "grad_norm": 1.6024874331635282, + "learning_rate": 1.0757478502353713e-06, + "loss": 0.4679, + "step": 25895 + }, + { + "epoch": 0.7936741449062156, + "grad_norm": 1.7492948404265276, + "learning_rate": 1.0754403087280185e-06, + "loss": 0.5645, + "step": 25896 + }, + { + "epoch": 0.7937047934289567, + "grad_norm": 1.917619573529259, + "learning_rate": 1.0751328058903576e-06, + "loss": 0.5513, + "step": 25897 + }, + { + "epoch": 0.7937354419516979, + "grad_norm": 1.8626110806310638, + "learning_rate": 1.0748253417254162e-06, + "loss": 0.554, + "step": 25898 + }, + { + "epoch": 0.7937660904744391, + "grad_norm": 1.910147154114865, + "learning_rate": 1.0745179162362263e-06, + "loss": 0.5422, + "step": 25899 + }, + { + "epoch": 0.7937967389971803, + "grad_norm": 1.764864558900632, + "learning_rate": 1.0742105294258126e-06, + "loss": 0.5494, + "step": 25900 + }, + { + "epoch": 0.7938273875199215, + "grad_norm": 2.047504217528488, + "learning_rate": 1.07390318129721e-06, + "loss": 0.6341, + "step": 25901 + }, + { + "epoch": 0.7938580360426627, + "grad_norm": 1.8037700561950087, + "learning_rate": 1.0735958718534445e-06, + "loss": 0.5257, + "step": 25902 + }, + { + "epoch": 0.793888684565404, + "grad_norm": 1.9262975995276992, + "learning_rate": 1.0732886010975414e-06, + "loss": 0.5904, + "step": 25903 + }, + { + "epoch": 0.7939193330881451, + "grad_norm": 1.9018028746415085, + "learning_rate": 1.0729813690325307e-06, + "loss": 0.6341, + "step": 25904 + }, + { + "epoch": 0.7939499816108864, + "grad_norm": 2.1114348777166616, + "learning_rate": 1.0726741756614406e-06, + "loss": 0.7043, + "step": 25905 + }, + { + "epoch": 0.7939806301336275, + "grad_norm": 1.9596036817627798, + "learning_rate": 1.0723670209872956e-06, + "loss": 0.5435, + "step": 25906 + }, + { + "epoch": 0.7940112786563688, + "grad_norm": 0.7823141929826044, + "learning_rate": 1.0720599050131225e-06, + "loss": 0.3933, + "step": 25907 + }, + { + "epoch": 0.7940419271791099, + "grad_norm": 1.8584348729521527, + "learning_rate": 1.0717528277419491e-06, + "loss": 0.6454, + "step": 25908 + }, + { + "epoch": 0.7940725757018512, + "grad_norm": 1.6457504977077222, + "learning_rate": 1.071445789176801e-06, + "loss": 0.5523, + "step": 25909 + }, + { + "epoch": 0.7941032242245923, + "grad_norm": 1.5730878410506381, + "learning_rate": 1.0711387893207026e-06, + "loss": 0.5269, + "step": 25910 + }, + { + "epoch": 0.7941338727473336, + "grad_norm": 2.082524824800221, + "learning_rate": 1.0708318281766784e-06, + "loss": 0.6448, + "step": 25911 + }, + { + "epoch": 0.7941645212700748, + "grad_norm": 0.8136065375205893, + "learning_rate": 1.0705249057477524e-06, + "loss": 0.4006, + "step": 25912 + }, + { + "epoch": 0.794195169792816, + "grad_norm": 1.8529854253804243, + "learning_rate": 1.070218022036952e-06, + "loss": 0.627, + "step": 25913 + }, + { + "epoch": 0.7942258183155572, + "grad_norm": 2.1475883114926404, + "learning_rate": 1.0699111770472976e-06, + "loss": 0.7147, + "step": 25914 + }, + { + "epoch": 0.7942564668382984, + "grad_norm": 1.8630111557446898, + "learning_rate": 1.0696043707818132e-06, + "loss": 0.6563, + "step": 25915 + }, + { + "epoch": 0.7942871153610396, + "grad_norm": 1.8422842697171837, + "learning_rate": 1.0692976032435232e-06, + "loss": 0.5795, + "step": 25916 + }, + { + "epoch": 0.7943177638837808, + "grad_norm": 0.7827271017555331, + "learning_rate": 1.068990874435451e-06, + "loss": 0.4022, + "step": 25917 + }, + { + "epoch": 0.794348412406522, + "grad_norm": 0.7882953081892908, + "learning_rate": 1.0686841843606171e-06, + "loss": 0.4042, + "step": 25918 + }, + { + "epoch": 0.7943790609292632, + "grad_norm": 2.0062812127930907, + "learning_rate": 1.0683775330220414e-06, + "loss": 0.5839, + "step": 25919 + }, + { + "epoch": 0.7944097094520044, + "grad_norm": 1.769268883656798, + "learning_rate": 1.0680709204227507e-06, + "loss": 0.6036, + "step": 25920 + }, + { + "epoch": 0.7944403579747457, + "grad_norm": 1.8588561208423622, + "learning_rate": 1.0677643465657628e-06, + "loss": 0.5704, + "step": 25921 + }, + { + "epoch": 0.7944710064974868, + "grad_norm": 0.8550623641047733, + "learning_rate": 1.0674578114540979e-06, + "loss": 0.3958, + "step": 25922 + }, + { + "epoch": 0.7945016550202281, + "grad_norm": 1.7873603695493288, + "learning_rate": 1.0671513150907776e-06, + "loss": 0.5436, + "step": 25923 + }, + { + "epoch": 0.7945323035429692, + "grad_norm": 1.7459529594972651, + "learning_rate": 1.066844857478822e-06, + "loss": 0.5486, + "step": 25924 + }, + { + "epoch": 0.7945629520657105, + "grad_norm": 1.9438268961390945, + "learning_rate": 1.0665384386212501e-06, + "loss": 0.5956, + "step": 25925 + }, + { + "epoch": 0.7945936005884516, + "grad_norm": 1.8991529616361236, + "learning_rate": 1.0662320585210806e-06, + "loss": 0.5912, + "step": 25926 + }, + { + "epoch": 0.7946242491111929, + "grad_norm": 2.2453047838628106, + "learning_rate": 1.0659257171813337e-06, + "loss": 0.6397, + "step": 25927 + }, + { + "epoch": 0.794654897633934, + "grad_norm": 1.797332577133649, + "learning_rate": 1.0656194146050281e-06, + "loss": 0.4891, + "step": 25928 + }, + { + "epoch": 0.7946855461566752, + "grad_norm": 1.7102190629705996, + "learning_rate": 1.0653131507951814e-06, + "loss": 0.4871, + "step": 25929 + }, + { + "epoch": 0.7947161946794165, + "grad_norm": 1.8707337659523076, + "learning_rate": 1.0650069257548084e-06, + "loss": 0.6478, + "step": 25930 + }, + { + "epoch": 0.7947468432021576, + "grad_norm": 1.6005410993167772, + "learning_rate": 1.0647007394869313e-06, + "loss": 0.5104, + "step": 25931 + }, + { + "epoch": 0.7947774917248989, + "grad_norm": 1.6831803572750714, + "learning_rate": 1.0643945919945652e-06, + "loss": 0.4944, + "step": 25932 + }, + { + "epoch": 0.79480814024764, + "grad_norm": 1.8002530795961846, + "learning_rate": 1.064088483280724e-06, + "loss": 0.5627, + "step": 25933 + }, + { + "epoch": 0.7948387887703813, + "grad_norm": 1.9208468676742594, + "learning_rate": 1.063782413348427e-06, + "loss": 0.5312, + "step": 25934 + }, + { + "epoch": 0.7948694372931224, + "grad_norm": 0.7874364193818946, + "learning_rate": 1.0634763822006883e-06, + "loss": 0.4098, + "step": 25935 + }, + { + "epoch": 0.7949000858158637, + "grad_norm": 1.8755754927208699, + "learning_rate": 1.0631703898405255e-06, + "loss": 0.5891, + "step": 25936 + }, + { + "epoch": 0.7949307343386048, + "grad_norm": 1.829917132975799, + "learning_rate": 1.0628644362709512e-06, + "loss": 0.5203, + "step": 25937 + }, + { + "epoch": 0.7949613828613461, + "grad_norm": 1.8616354740385925, + "learning_rate": 1.0625585214949802e-06, + "loss": 0.6192, + "step": 25938 + }, + { + "epoch": 0.7949920313840872, + "grad_norm": 2.2072050692676153, + "learning_rate": 1.0622526455156297e-06, + "loss": 0.59, + "step": 25939 + }, + { + "epoch": 0.7950226799068285, + "grad_norm": 1.7973763097692645, + "learning_rate": 1.0619468083359107e-06, + "loss": 0.563, + "step": 25940 + }, + { + "epoch": 0.7950533284295697, + "grad_norm": 0.8024034477102129, + "learning_rate": 1.0616410099588349e-06, + "loss": 0.4001, + "step": 25941 + }, + { + "epoch": 0.7950839769523109, + "grad_norm": 0.7639607522656413, + "learning_rate": 1.0613352503874209e-06, + "loss": 0.3976, + "step": 25942 + }, + { + "epoch": 0.7951146254750521, + "grad_norm": 0.8012403082875711, + "learning_rate": 1.061029529624677e-06, + "loss": 0.3954, + "step": 25943 + }, + { + "epoch": 0.7951452739977933, + "grad_norm": 1.564893110865973, + "learning_rate": 1.060723847673618e-06, + "loss": 0.5734, + "step": 25944 + }, + { + "epoch": 0.7951759225205345, + "grad_norm": 1.7146803972843896, + "learning_rate": 1.0604182045372535e-06, + "loss": 0.4893, + "step": 25945 + }, + { + "epoch": 0.7952065710432757, + "grad_norm": 1.8204465891539194, + "learning_rate": 1.060112600218597e-06, + "loss": 0.597, + "step": 25946 + }, + { + "epoch": 0.7952372195660169, + "grad_norm": 1.8680081009826284, + "learning_rate": 1.0598070347206595e-06, + "loss": 0.5425, + "step": 25947 + }, + { + "epoch": 0.7952678680887582, + "grad_norm": 0.7891064522925632, + "learning_rate": 1.059501508046451e-06, + "loss": 0.4126, + "step": 25948 + }, + { + "epoch": 0.7952985166114993, + "grad_norm": 0.7996226174796336, + "learning_rate": 1.0591960201989821e-06, + "loss": 0.3886, + "step": 25949 + }, + { + "epoch": 0.7953291651342406, + "grad_norm": 1.9106810791541327, + "learning_rate": 1.0588905711812641e-06, + "loss": 0.5954, + "step": 25950 + }, + { + "epoch": 0.7953598136569817, + "grad_norm": 0.8396902705572098, + "learning_rate": 1.058585160996306e-06, + "loss": 0.4168, + "step": 25951 + }, + { + "epoch": 0.795390462179723, + "grad_norm": 1.8258253352905986, + "learning_rate": 1.0582797896471143e-06, + "loss": 0.6103, + "step": 25952 + }, + { + "epoch": 0.7954211107024641, + "grad_norm": 1.6454569546712199, + "learning_rate": 1.0579744571367023e-06, + "loss": 0.5508, + "step": 25953 + }, + { + "epoch": 0.7954517592252054, + "grad_norm": 1.9512526739112341, + "learning_rate": 1.0576691634680758e-06, + "loss": 0.6436, + "step": 25954 + }, + { + "epoch": 0.7954824077479465, + "grad_norm": 2.0657018839694667, + "learning_rate": 1.057363908644245e-06, + "loss": 0.6254, + "step": 25955 + }, + { + "epoch": 0.7955130562706878, + "grad_norm": 1.7809464273906204, + "learning_rate": 1.0570586926682153e-06, + "loss": 0.5392, + "step": 25956 + }, + { + "epoch": 0.795543704793429, + "grad_norm": 0.7830664200829626, + "learning_rate": 1.0567535155429947e-06, + "loss": 0.3914, + "step": 25957 + }, + { + "epoch": 0.7955743533161702, + "grad_norm": 1.7102686677617935, + "learning_rate": 1.0564483772715922e-06, + "loss": 0.5046, + "step": 25958 + }, + { + "epoch": 0.7956050018389114, + "grad_norm": 1.874538231779064, + "learning_rate": 1.0561432778570113e-06, + "loss": 0.5861, + "step": 25959 + }, + { + "epoch": 0.7956356503616525, + "grad_norm": 1.738255880265191, + "learning_rate": 1.0558382173022596e-06, + "loss": 0.5951, + "step": 25960 + }, + { + "epoch": 0.7956662988843938, + "grad_norm": 1.756065316178672, + "learning_rate": 1.0555331956103448e-06, + "loss": 0.6938, + "step": 25961 + }, + { + "epoch": 0.7956969474071349, + "grad_norm": 1.8707062815465851, + "learning_rate": 1.0552282127842694e-06, + "loss": 0.6379, + "step": 25962 + }, + { + "epoch": 0.7957275959298762, + "grad_norm": 1.819761009070845, + "learning_rate": 1.054923268827041e-06, + "loss": 0.4404, + "step": 25963 + }, + { + "epoch": 0.7957582444526173, + "grad_norm": 1.6588154829921478, + "learning_rate": 1.0546183637416612e-06, + "loss": 0.5583, + "step": 25964 + }, + { + "epoch": 0.7957888929753586, + "grad_norm": 0.7870858308767446, + "learning_rate": 1.054313497531137e-06, + "loss": 0.3931, + "step": 25965 + }, + { + "epoch": 0.7958195414980997, + "grad_norm": 1.8365058036012263, + "learning_rate": 1.054008670198472e-06, + "loss": 0.555, + "step": 25966 + }, + { + "epoch": 0.795850190020841, + "grad_norm": 1.7792989964976111, + "learning_rate": 1.0537038817466682e-06, + "loss": 0.5613, + "step": 25967 + }, + { + "epoch": 0.7958808385435822, + "grad_norm": 1.8121226165326696, + "learning_rate": 1.05339913217873e-06, + "loss": 0.6186, + "step": 25968 + }, + { + "epoch": 0.7959114870663234, + "grad_norm": 0.8055070106780433, + "learning_rate": 1.0530944214976608e-06, + "loss": 0.4061, + "step": 25969 + }, + { + "epoch": 0.7959421355890646, + "grad_norm": 1.7886380610534551, + "learning_rate": 1.052789749706461e-06, + "loss": 0.6141, + "step": 25970 + }, + { + "epoch": 0.7959727841118058, + "grad_norm": 1.6933957902906949, + "learning_rate": 1.0524851168081341e-06, + "loss": 0.5464, + "step": 25971 + }, + { + "epoch": 0.796003432634547, + "grad_norm": 1.8199869398999662, + "learning_rate": 1.052180522805682e-06, + "loss": 0.6278, + "step": 25972 + }, + { + "epoch": 0.7960340811572882, + "grad_norm": 1.782937362215698, + "learning_rate": 1.0518759677021046e-06, + "loss": 0.6037, + "step": 25973 + }, + { + "epoch": 0.7960647296800294, + "grad_norm": 2.1217435576565085, + "learning_rate": 1.0515714515004043e-06, + "loss": 0.6837, + "step": 25974 + }, + { + "epoch": 0.7960953782027707, + "grad_norm": 1.6873008535746354, + "learning_rate": 1.0512669742035798e-06, + "loss": 0.5034, + "step": 25975 + }, + { + "epoch": 0.7961260267255118, + "grad_norm": 1.9407082097226136, + "learning_rate": 1.050962535814632e-06, + "loss": 0.6545, + "step": 25976 + }, + { + "epoch": 0.7961566752482531, + "grad_norm": 1.7854605637542695, + "learning_rate": 1.0506581363365615e-06, + "loss": 0.6531, + "step": 25977 + }, + { + "epoch": 0.7961873237709942, + "grad_norm": 1.9558923377623683, + "learning_rate": 1.0503537757723664e-06, + "loss": 0.636, + "step": 25978 + }, + { + "epoch": 0.7962179722937355, + "grad_norm": 1.6828694814928027, + "learning_rate": 1.0500494541250455e-06, + "loss": 0.5239, + "step": 25979 + }, + { + "epoch": 0.7962486208164766, + "grad_norm": 2.432279834713902, + "learning_rate": 1.0497451713975997e-06, + "loss": 0.538, + "step": 25980 + }, + { + "epoch": 0.7962792693392179, + "grad_norm": 1.983266601030714, + "learning_rate": 1.049440927593024e-06, + "loss": 0.525, + "step": 25981 + }, + { + "epoch": 0.796309917861959, + "grad_norm": 1.8692831588331311, + "learning_rate": 1.0491367227143173e-06, + "loss": 0.6633, + "step": 25982 + }, + { + "epoch": 0.7963405663847003, + "grad_norm": 0.8131243113643114, + "learning_rate": 1.0488325567644792e-06, + "loss": 0.4288, + "step": 25983 + }, + { + "epoch": 0.7963712149074414, + "grad_norm": 1.889756181477782, + "learning_rate": 1.0485284297465032e-06, + "loss": 0.6101, + "step": 25984 + }, + { + "epoch": 0.7964018634301827, + "grad_norm": 2.0107740039701825, + "learning_rate": 1.048224341663389e-06, + "loss": 0.5348, + "step": 25985 + }, + { + "epoch": 0.7964325119529239, + "grad_norm": 1.7783909031528875, + "learning_rate": 1.0479202925181303e-06, + "loss": 0.5977, + "step": 25986 + }, + { + "epoch": 0.7964631604756651, + "grad_norm": 1.6219140866544677, + "learning_rate": 1.0476162823137238e-06, + "loss": 0.5178, + "step": 25987 + }, + { + "epoch": 0.7964938089984063, + "grad_norm": 1.5270029057843097, + "learning_rate": 1.047312311053167e-06, + "loss": 0.5411, + "step": 25988 + }, + { + "epoch": 0.7965244575211475, + "grad_norm": 2.1121674661491077, + "learning_rate": 1.047008378739452e-06, + "loss": 0.6727, + "step": 25989 + }, + { + "epoch": 0.7965551060438887, + "grad_norm": 0.752912156539349, + "learning_rate": 1.0467044853755741e-06, + "loss": 0.4054, + "step": 25990 + }, + { + "epoch": 0.7965857545666298, + "grad_norm": 0.75714799726157, + "learning_rate": 1.0464006309645308e-06, + "loss": 0.3974, + "step": 25991 + }, + { + "epoch": 0.7966164030893711, + "grad_norm": 1.706465949970194, + "learning_rate": 1.046096815509312e-06, + "loss": 0.6009, + "step": 25992 + }, + { + "epoch": 0.7966470516121122, + "grad_norm": 2.1530107191219643, + "learning_rate": 1.0457930390129129e-06, + "loss": 0.6938, + "step": 25993 + }, + { + "epoch": 0.7966777001348535, + "grad_norm": 0.8367988423434436, + "learning_rate": 1.045489301478328e-06, + "loss": 0.4237, + "step": 25994 + }, + { + "epoch": 0.7967083486575947, + "grad_norm": 0.7849988294240116, + "learning_rate": 1.0451856029085473e-06, + "loss": 0.3989, + "step": 25995 + }, + { + "epoch": 0.7967389971803359, + "grad_norm": 1.8013982609075272, + "learning_rate": 1.044881943306566e-06, + "loss": 0.5722, + "step": 25996 + }, + { + "epoch": 0.7967696457030771, + "grad_norm": 1.7966897930805648, + "learning_rate": 1.0445783226753725e-06, + "loss": 0.5285, + "step": 25997 + }, + { + "epoch": 0.7968002942258183, + "grad_norm": 1.5150830207109018, + "learning_rate": 1.0442747410179633e-06, + "loss": 0.4986, + "step": 25998 + }, + { + "epoch": 0.7968309427485595, + "grad_norm": 1.8717052356590318, + "learning_rate": 1.0439711983373275e-06, + "loss": 0.6492, + "step": 25999 + }, + { + "epoch": 0.7968615912713007, + "grad_norm": 1.8637418433901038, + "learning_rate": 1.0436676946364544e-06, + "loss": 0.5879, + "step": 26000 + }, + { + "epoch": 0.7968922397940419, + "grad_norm": 2.132964941314673, + "learning_rate": 1.0433642299183355e-06, + "loss": 0.633, + "step": 26001 + }, + { + "epoch": 0.7969228883167832, + "grad_norm": 1.9595822998414234, + "learning_rate": 1.0430608041859624e-06, + "loss": 0.5633, + "step": 26002 + }, + { + "epoch": 0.7969535368395243, + "grad_norm": 1.7866456147669543, + "learning_rate": 1.042757417442322e-06, + "loss": 0.5602, + "step": 26003 + }, + { + "epoch": 0.7969841853622656, + "grad_norm": 1.6535474444051013, + "learning_rate": 1.042454069690406e-06, + "loss": 0.616, + "step": 26004 + }, + { + "epoch": 0.7970148338850067, + "grad_norm": 1.8155733645744634, + "learning_rate": 1.0421507609332038e-06, + "loss": 0.5372, + "step": 26005 + }, + { + "epoch": 0.797045482407748, + "grad_norm": 1.8195430130496222, + "learning_rate": 1.0418474911737014e-06, + "loss": 0.5767, + "step": 26006 + }, + { + "epoch": 0.7970761309304891, + "grad_norm": 1.8102206894781827, + "learning_rate": 1.0415442604148896e-06, + "loss": 0.5081, + "step": 26007 + }, + { + "epoch": 0.7971067794532304, + "grad_norm": 1.8149017082953716, + "learning_rate": 1.0412410686597542e-06, + "loss": 0.5848, + "step": 26008 + }, + { + "epoch": 0.7971374279759715, + "grad_norm": 1.6901568113245087, + "learning_rate": 1.0409379159112826e-06, + "loss": 0.6525, + "step": 26009 + }, + { + "epoch": 0.7971680764987128, + "grad_norm": 1.6780743543714027, + "learning_rate": 1.0406348021724645e-06, + "loss": 0.5794, + "step": 26010 + }, + { + "epoch": 0.797198725021454, + "grad_norm": 1.773130312409526, + "learning_rate": 1.0403317274462833e-06, + "loss": 0.5299, + "step": 26011 + }, + { + "epoch": 0.7972293735441952, + "grad_norm": 1.8797275004123073, + "learning_rate": 1.0400286917357267e-06, + "loss": 0.6071, + "step": 26012 + }, + { + "epoch": 0.7972600220669364, + "grad_norm": 1.824750540110969, + "learning_rate": 1.0397256950437822e-06, + "loss": 0.5209, + "step": 26013 + }, + { + "epoch": 0.7972906705896776, + "grad_norm": 2.036468693138532, + "learning_rate": 1.0394227373734322e-06, + "loss": 0.5847, + "step": 26014 + }, + { + "epoch": 0.7973213191124188, + "grad_norm": 1.6678492048650346, + "learning_rate": 1.0391198187276646e-06, + "loss": 0.5037, + "step": 26015 + }, + { + "epoch": 0.79735196763516, + "grad_norm": 1.8854588812028343, + "learning_rate": 1.03881693910946e-06, + "loss": 0.5808, + "step": 26016 + }, + { + "epoch": 0.7973826161579012, + "grad_norm": 1.9099180595621135, + "learning_rate": 1.0385140985218085e-06, + "loss": 0.6297, + "step": 26017 + }, + { + "epoch": 0.7974132646806424, + "grad_norm": 2.2827380935475654, + "learning_rate": 1.038211296967691e-06, + "loss": 0.6009, + "step": 26018 + }, + { + "epoch": 0.7974439132033836, + "grad_norm": 0.8530809395939623, + "learning_rate": 1.0379085344500905e-06, + "loss": 0.4061, + "step": 26019 + }, + { + "epoch": 0.7974745617261249, + "grad_norm": 1.654384323252351, + "learning_rate": 1.0376058109719906e-06, + "loss": 0.6581, + "step": 26020 + }, + { + "epoch": 0.797505210248866, + "grad_norm": 1.6219164714084497, + "learning_rate": 1.037303126536376e-06, + "loss": 0.5467, + "step": 26021 + }, + { + "epoch": 0.7975358587716072, + "grad_norm": 1.91810911603109, + "learning_rate": 1.0370004811462258e-06, + "loss": 0.5702, + "step": 26022 + }, + { + "epoch": 0.7975665072943484, + "grad_norm": 1.8542668241097078, + "learning_rate": 1.0366978748045236e-06, + "loss": 0.5821, + "step": 26023 + }, + { + "epoch": 0.7975971558170896, + "grad_norm": 2.110365008351913, + "learning_rate": 1.0363953075142519e-06, + "loss": 0.5558, + "step": 26024 + }, + { + "epoch": 0.7976278043398308, + "grad_norm": 1.9874631805795586, + "learning_rate": 1.0360927792783925e-06, + "loss": 0.6541, + "step": 26025 + }, + { + "epoch": 0.797658452862572, + "grad_norm": 1.9072067418808345, + "learning_rate": 1.0357902900999256e-06, + "loss": 0.6326, + "step": 26026 + }, + { + "epoch": 0.7976891013853132, + "grad_norm": 1.9040083867626232, + "learning_rate": 1.035487839981828e-06, + "loss": 0.5997, + "step": 26027 + }, + { + "epoch": 0.7977197499080544, + "grad_norm": 1.7374403788860255, + "learning_rate": 1.0351854289270857e-06, + "loss": 0.5532, + "step": 26028 + }, + { + "epoch": 0.7977503984307956, + "grad_norm": 1.6236163370658117, + "learning_rate": 1.0348830569386764e-06, + "loss": 0.5148, + "step": 26029 + }, + { + "epoch": 0.7977810469535368, + "grad_norm": 2.078446786262601, + "learning_rate": 1.034580724019577e-06, + "loss": 0.6514, + "step": 26030 + }, + { + "epoch": 0.7978116954762781, + "grad_norm": 1.7420933983474365, + "learning_rate": 1.0342784301727688e-06, + "loss": 0.517, + "step": 26031 + }, + { + "epoch": 0.7978423439990192, + "grad_norm": 0.7866787880647522, + "learning_rate": 1.0339761754012307e-06, + "loss": 0.3882, + "step": 26032 + }, + { + "epoch": 0.7978729925217605, + "grad_norm": 1.8603604881460427, + "learning_rate": 1.0336739597079387e-06, + "loss": 0.6794, + "step": 26033 + }, + { + "epoch": 0.7979036410445016, + "grad_norm": 1.8553193984197682, + "learning_rate": 1.0333717830958729e-06, + "loss": 0.5091, + "step": 26034 + }, + { + "epoch": 0.7979342895672429, + "grad_norm": 2.12302684811726, + "learning_rate": 1.0330696455680089e-06, + "loss": 0.6314, + "step": 26035 + }, + { + "epoch": 0.797964938089984, + "grad_norm": 1.7492633847903012, + "learning_rate": 1.0327675471273262e-06, + "loss": 0.5294, + "step": 26036 + }, + { + "epoch": 0.7979955866127253, + "grad_norm": 1.8410278773437136, + "learning_rate": 1.0324654877768003e-06, + "loss": 0.5036, + "step": 26037 + }, + { + "epoch": 0.7980262351354664, + "grad_norm": 1.8316322847685231, + "learning_rate": 1.0321634675194042e-06, + "loss": 0.5629, + "step": 26038 + }, + { + "epoch": 0.7980568836582077, + "grad_norm": 1.8919986670734477, + "learning_rate": 1.0318614863581188e-06, + "loss": 0.6126, + "step": 26039 + }, + { + "epoch": 0.7980875321809489, + "grad_norm": 1.7700748628592318, + "learning_rate": 1.0315595442959182e-06, + "loss": 0.4748, + "step": 26040 + }, + { + "epoch": 0.7981181807036901, + "grad_norm": 2.281831099801764, + "learning_rate": 1.0312576413357755e-06, + "loss": 0.5557, + "step": 26041 + }, + { + "epoch": 0.7981488292264313, + "grad_norm": 1.5189443555213993, + "learning_rate": 1.0309557774806662e-06, + "loss": 0.5558, + "step": 26042 + }, + { + "epoch": 0.7981794777491725, + "grad_norm": 1.8263371691211765, + "learning_rate": 1.0306539527335652e-06, + "loss": 0.6064, + "step": 26043 + }, + { + "epoch": 0.7982101262719137, + "grad_norm": 1.8715885364833527, + "learning_rate": 1.0303521670974481e-06, + "loss": 0.5092, + "step": 26044 + }, + { + "epoch": 0.7982407747946549, + "grad_norm": 0.8044169165864871, + "learning_rate": 1.0300504205752853e-06, + "loss": 0.419, + "step": 26045 + }, + { + "epoch": 0.7982714233173961, + "grad_norm": 1.918465961024517, + "learning_rate": 1.0297487131700512e-06, + "loss": 0.6405, + "step": 26046 + }, + { + "epoch": 0.7983020718401374, + "grad_norm": 0.7784603079180578, + "learning_rate": 1.0294470448847204e-06, + "loss": 0.4021, + "step": 26047 + }, + { + "epoch": 0.7983327203628785, + "grad_norm": 1.783006551125467, + "learning_rate": 1.029145415722264e-06, + "loss": 0.5502, + "step": 26048 + }, + { + "epoch": 0.7983633688856198, + "grad_norm": 1.9650486533100116, + "learning_rate": 1.028843825685651e-06, + "loss": 0.6102, + "step": 26049 + }, + { + "epoch": 0.7983940174083609, + "grad_norm": 2.016234607805312, + "learning_rate": 1.0285422747778583e-06, + "loss": 0.587, + "step": 26050 + }, + { + "epoch": 0.7984246659311022, + "grad_norm": 1.679658872130664, + "learning_rate": 1.028240763001855e-06, + "loss": 0.5592, + "step": 26051 + }, + { + "epoch": 0.7984553144538433, + "grad_norm": 1.804055179795286, + "learning_rate": 1.02793929036061e-06, + "loss": 0.607, + "step": 26052 + }, + { + "epoch": 0.7984859629765845, + "grad_norm": 1.9255819415070292, + "learning_rate": 1.0276378568570955e-06, + "loss": 0.6644, + "step": 26053 + }, + { + "epoch": 0.7985166114993257, + "grad_norm": 1.8931568539559667, + "learning_rate": 1.0273364624942816e-06, + "loss": 0.5662, + "step": 26054 + }, + { + "epoch": 0.7985472600220669, + "grad_norm": 2.102157569515186, + "learning_rate": 1.0270351072751394e-06, + "loss": 0.5857, + "step": 26055 + }, + { + "epoch": 0.7985779085448081, + "grad_norm": 1.9998062503935907, + "learning_rate": 1.026733791202636e-06, + "loss": 0.5143, + "step": 26056 + }, + { + "epoch": 0.7986085570675493, + "grad_norm": 2.06049708678213, + "learning_rate": 1.0264325142797405e-06, + "loss": 0.6227, + "step": 26057 + }, + { + "epoch": 0.7986392055902906, + "grad_norm": 1.9150531802775768, + "learning_rate": 1.026131276509424e-06, + "loss": 0.5179, + "step": 26058 + }, + { + "epoch": 0.7986698541130317, + "grad_norm": 1.759222650809232, + "learning_rate": 1.0258300778946522e-06, + "loss": 0.6154, + "step": 26059 + }, + { + "epoch": 0.798700502635773, + "grad_norm": 1.9456466003343982, + "learning_rate": 1.0255289184383921e-06, + "loss": 0.5278, + "step": 26060 + }, + { + "epoch": 0.7987311511585141, + "grad_norm": 1.8218682073431542, + "learning_rate": 1.0252277981436131e-06, + "loss": 0.5936, + "step": 26061 + }, + { + "epoch": 0.7987617996812554, + "grad_norm": 1.7611774840661472, + "learning_rate": 1.024926717013281e-06, + "loss": 0.5973, + "step": 26062 + }, + { + "epoch": 0.7987924482039965, + "grad_norm": 1.9448502693565306, + "learning_rate": 1.0246256750503648e-06, + "loss": 0.5539, + "step": 26063 + }, + { + "epoch": 0.7988230967267378, + "grad_norm": 0.7658759223642343, + "learning_rate": 1.024324672257827e-06, + "loss": 0.3881, + "step": 26064 + }, + { + "epoch": 0.7988537452494789, + "grad_norm": 1.9280811091014367, + "learning_rate": 1.0240237086386363e-06, + "loss": 0.5887, + "step": 26065 + }, + { + "epoch": 0.7988843937722202, + "grad_norm": 1.9741591480031835, + "learning_rate": 1.023722784195758e-06, + "loss": 0.5559, + "step": 26066 + }, + { + "epoch": 0.7989150422949614, + "grad_norm": 1.9890358395991263, + "learning_rate": 1.0234218989321564e-06, + "loss": 0.5456, + "step": 26067 + }, + { + "epoch": 0.7989456908177026, + "grad_norm": 0.7768655811073157, + "learning_rate": 1.023121052850794e-06, + "loss": 0.3968, + "step": 26068 + }, + { + "epoch": 0.7989763393404438, + "grad_norm": 1.6861323997356252, + "learning_rate": 1.0228202459546398e-06, + "loss": 0.5446, + "step": 26069 + }, + { + "epoch": 0.799006987863185, + "grad_norm": 1.9160051815893373, + "learning_rate": 1.0225194782466546e-06, + "loss": 0.6236, + "step": 26070 + }, + { + "epoch": 0.7990376363859262, + "grad_norm": 1.8873551316504142, + "learning_rate": 1.0222187497298037e-06, + "loss": 0.5898, + "step": 26071 + }, + { + "epoch": 0.7990682849086674, + "grad_norm": 2.0782895289569483, + "learning_rate": 1.0219180604070472e-06, + "loss": 0.6236, + "step": 26072 + }, + { + "epoch": 0.7990989334314086, + "grad_norm": 1.9204161823535848, + "learning_rate": 1.0216174102813504e-06, + "loss": 0.4801, + "step": 26073 + }, + { + "epoch": 0.7991295819541498, + "grad_norm": 1.9293885904553625, + "learning_rate": 1.0213167993556767e-06, + "loss": 0.6284, + "step": 26074 + }, + { + "epoch": 0.799160230476891, + "grad_norm": 1.876909454839717, + "learning_rate": 1.021016227632985e-06, + "loss": 0.5713, + "step": 26075 + }, + { + "epoch": 0.7991908789996323, + "grad_norm": 1.9142746320543116, + "learning_rate": 1.0207156951162384e-06, + "loss": 0.5941, + "step": 26076 + }, + { + "epoch": 0.7992215275223734, + "grad_norm": 1.8112365728283166, + "learning_rate": 1.0204152018083995e-06, + "loss": 0.6095, + "step": 26077 + }, + { + "epoch": 0.7992521760451147, + "grad_norm": 1.9547686905664496, + "learning_rate": 1.0201147477124284e-06, + "loss": 0.5493, + "step": 26078 + }, + { + "epoch": 0.7992828245678558, + "grad_norm": 1.9697808137387518, + "learning_rate": 1.0198143328312816e-06, + "loss": 0.5075, + "step": 26079 + }, + { + "epoch": 0.7993134730905971, + "grad_norm": 0.8185680474137734, + "learning_rate": 1.0195139571679258e-06, + "loss": 0.396, + "step": 26080 + }, + { + "epoch": 0.7993441216133382, + "grad_norm": 1.8025465054399923, + "learning_rate": 1.0192136207253156e-06, + "loss": 0.6123, + "step": 26081 + }, + { + "epoch": 0.7993747701360795, + "grad_norm": 1.8648609581089357, + "learning_rate": 1.0189133235064135e-06, + "loss": 0.5348, + "step": 26082 + }, + { + "epoch": 0.7994054186588206, + "grad_norm": 1.9879418694023487, + "learning_rate": 1.0186130655141763e-06, + "loss": 0.5867, + "step": 26083 + }, + { + "epoch": 0.7994360671815618, + "grad_norm": 1.920920448155268, + "learning_rate": 1.0183128467515625e-06, + "loss": 0.5911, + "step": 26084 + }, + { + "epoch": 0.799466715704303, + "grad_norm": 1.8374864884109359, + "learning_rate": 1.018012667221533e-06, + "loss": 0.5829, + "step": 26085 + }, + { + "epoch": 0.7994973642270442, + "grad_norm": 1.9668754752359796, + "learning_rate": 1.0177125269270415e-06, + "loss": 0.5284, + "step": 26086 + }, + { + "epoch": 0.7995280127497855, + "grad_norm": 1.8452806084392948, + "learning_rate": 1.0174124258710477e-06, + "loss": 0.5635, + "step": 26087 + }, + { + "epoch": 0.7995586612725266, + "grad_norm": 1.6035415231382866, + "learning_rate": 1.0171123640565095e-06, + "loss": 0.5452, + "step": 26088 + }, + { + "epoch": 0.7995893097952679, + "grad_norm": 1.8687356320623076, + "learning_rate": 1.0168123414863813e-06, + "loss": 0.6277, + "step": 26089 + }, + { + "epoch": 0.799619958318009, + "grad_norm": 1.686271094312086, + "learning_rate": 1.01651235816362e-06, + "loss": 0.6015, + "step": 26090 + }, + { + "epoch": 0.7996506068407503, + "grad_norm": 1.8479716792946173, + "learning_rate": 1.0162124140911827e-06, + "loss": 0.5894, + "step": 26091 + }, + { + "epoch": 0.7996812553634914, + "grad_norm": 0.8213219912088767, + "learning_rate": 1.015912509272023e-06, + "loss": 0.3983, + "step": 26092 + }, + { + "epoch": 0.7997119038862327, + "grad_norm": 1.7379752679168352, + "learning_rate": 1.0156126437090973e-06, + "loss": 0.6808, + "step": 26093 + }, + { + "epoch": 0.7997425524089739, + "grad_norm": 2.0494180685390524, + "learning_rate": 1.015312817405359e-06, + "loss": 0.5828, + "step": 26094 + }, + { + "epoch": 0.7997732009317151, + "grad_norm": 1.7592458207140953, + "learning_rate": 1.0150130303637628e-06, + "loss": 0.5785, + "step": 26095 + }, + { + "epoch": 0.7998038494544563, + "grad_norm": 1.8580981049667475, + "learning_rate": 1.0147132825872641e-06, + "loss": 0.6111, + "step": 26096 + }, + { + "epoch": 0.7998344979771975, + "grad_norm": 1.8989358380582781, + "learning_rate": 1.0144135740788142e-06, + "loss": 0.552, + "step": 26097 + }, + { + "epoch": 0.7998651464999387, + "grad_norm": 2.014848883280252, + "learning_rate": 1.014113904841366e-06, + "loss": 0.5785, + "step": 26098 + }, + { + "epoch": 0.7998957950226799, + "grad_norm": 2.016258257636917, + "learning_rate": 1.0138142748778756e-06, + "loss": 0.6527, + "step": 26099 + }, + { + "epoch": 0.7999264435454211, + "grad_norm": 2.0763789601660823, + "learning_rate": 1.0135146841912918e-06, + "loss": 0.5493, + "step": 26100 + }, + { + "epoch": 0.7999570920681623, + "grad_norm": 1.884396345123129, + "learning_rate": 1.0132151327845674e-06, + "loss": 0.5802, + "step": 26101 + }, + { + "epoch": 0.7999877405909035, + "grad_norm": 1.9710871627195854, + "learning_rate": 1.012915620660656e-06, + "loss": 0.5227, + "step": 26102 + }, + { + "epoch": 0.8000183891136448, + "grad_norm": 1.6737243812277465, + "learning_rate": 1.0126161478225055e-06, + "loss": 0.4852, + "step": 26103 + }, + { + "epoch": 0.8000490376363859, + "grad_norm": 0.8122184856665697, + "learning_rate": 1.01231671427307e-06, + "loss": 0.4058, + "step": 26104 + }, + { + "epoch": 0.8000796861591272, + "grad_norm": 2.153752984553308, + "learning_rate": 1.0120173200152967e-06, + "loss": 0.5546, + "step": 26105 + }, + { + "epoch": 0.8001103346818683, + "grad_norm": 1.9300925166206475, + "learning_rate": 1.011717965052137e-06, + "loss": 0.5859, + "step": 26106 + }, + { + "epoch": 0.8001409832046096, + "grad_norm": 1.7353092336238305, + "learning_rate": 1.011418649386542e-06, + "loss": 0.5766, + "step": 26107 + }, + { + "epoch": 0.8001716317273507, + "grad_norm": 0.8738421806085811, + "learning_rate": 1.0111193730214581e-06, + "loss": 0.4086, + "step": 26108 + }, + { + "epoch": 0.800202280250092, + "grad_norm": 1.7625668441217093, + "learning_rate": 1.0108201359598357e-06, + "loss": 0.5108, + "step": 26109 + }, + { + "epoch": 0.8002329287728331, + "grad_norm": 1.9705520365666656, + "learning_rate": 1.0105209382046244e-06, + "loss": 0.5557, + "step": 26110 + }, + { + "epoch": 0.8002635772955744, + "grad_norm": 1.8989137892119694, + "learning_rate": 1.01022177975877e-06, + "loss": 0.5349, + "step": 26111 + }, + { + "epoch": 0.8002942258183156, + "grad_norm": 1.6156468800394446, + "learning_rate": 1.0099226606252226e-06, + "loss": 0.4823, + "step": 26112 + }, + { + "epoch": 0.8003248743410568, + "grad_norm": 1.7503513424842825, + "learning_rate": 1.0096235808069266e-06, + "loss": 0.5967, + "step": 26113 + }, + { + "epoch": 0.800355522863798, + "grad_norm": 1.787610838067192, + "learning_rate": 1.0093245403068308e-06, + "loss": 0.6103, + "step": 26114 + }, + { + "epoch": 0.8003861713865391, + "grad_norm": 1.6854541778601533, + "learning_rate": 1.0090255391278819e-06, + "loss": 0.5759, + "step": 26115 + }, + { + "epoch": 0.8004168199092804, + "grad_norm": 1.726295719646213, + "learning_rate": 1.008726577273025e-06, + "loss": 0.486, + "step": 26116 + }, + { + "epoch": 0.8004474684320215, + "grad_norm": 1.8101769031890214, + "learning_rate": 1.008427654745206e-06, + "loss": 0.5445, + "step": 26117 + }, + { + "epoch": 0.8004781169547628, + "grad_norm": 1.8573565168849522, + "learning_rate": 1.008128771547372e-06, + "loss": 0.6069, + "step": 26118 + }, + { + "epoch": 0.8005087654775039, + "grad_norm": 2.1360355279108454, + "learning_rate": 1.007829927682465e-06, + "loss": 0.6015, + "step": 26119 + }, + { + "epoch": 0.8005394140002452, + "grad_norm": 2.086306428578305, + "learning_rate": 1.0075311231534314e-06, + "loss": 0.5229, + "step": 26120 + }, + { + "epoch": 0.8005700625229863, + "grad_norm": 1.7633849845792613, + "learning_rate": 1.0072323579632165e-06, + "loss": 0.6318, + "step": 26121 + }, + { + "epoch": 0.8006007110457276, + "grad_norm": 1.856986754421337, + "learning_rate": 1.0069336321147616e-06, + "loss": 0.5835, + "step": 26122 + }, + { + "epoch": 0.8006313595684688, + "grad_norm": 1.9233028407954684, + "learning_rate": 1.006634945611012e-06, + "loss": 0.5832, + "step": 26123 + }, + { + "epoch": 0.80066200809121, + "grad_norm": 1.8789933686314473, + "learning_rate": 1.006336298454908e-06, + "loss": 0.5727, + "step": 26124 + }, + { + "epoch": 0.8006926566139512, + "grad_norm": 0.7878722981572625, + "learning_rate": 1.0060376906493968e-06, + "loss": 0.3889, + "step": 26125 + }, + { + "epoch": 0.8007233051366924, + "grad_norm": 1.928541056829663, + "learning_rate": 1.0057391221974178e-06, + "loss": 0.5339, + "step": 26126 + }, + { + "epoch": 0.8007539536594336, + "grad_norm": 1.9009837087860721, + "learning_rate": 1.0054405931019124e-06, + "loss": 0.5132, + "step": 26127 + }, + { + "epoch": 0.8007846021821748, + "grad_norm": 0.7884926151227881, + "learning_rate": 1.0051421033658226e-06, + "loss": 0.3849, + "step": 26128 + }, + { + "epoch": 0.800815250704916, + "grad_norm": 2.027840013978195, + "learning_rate": 1.0048436529920908e-06, + "loss": 0.5818, + "step": 26129 + }, + { + "epoch": 0.8008458992276573, + "grad_norm": 1.6466222263003327, + "learning_rate": 1.004545241983656e-06, + "loss": 0.6384, + "step": 26130 + }, + { + "epoch": 0.8008765477503984, + "grad_norm": 1.8764523237956545, + "learning_rate": 1.0042468703434588e-06, + "loss": 0.5933, + "step": 26131 + }, + { + "epoch": 0.8009071962731397, + "grad_norm": 1.8563893057283993, + "learning_rate": 1.0039485380744408e-06, + "loss": 0.6296, + "step": 26132 + }, + { + "epoch": 0.8009378447958808, + "grad_norm": 2.0077577626253924, + "learning_rate": 1.0036502451795393e-06, + "loss": 0.5694, + "step": 26133 + }, + { + "epoch": 0.8009684933186221, + "grad_norm": 1.8625991833908435, + "learning_rate": 1.0033519916616958e-06, + "loss": 0.5874, + "step": 26134 + }, + { + "epoch": 0.8009991418413632, + "grad_norm": 1.918837678011883, + "learning_rate": 1.0030537775238447e-06, + "loss": 0.5885, + "step": 26135 + }, + { + "epoch": 0.8010297903641045, + "grad_norm": 1.774614278415829, + "learning_rate": 1.00275560276893e-06, + "loss": 0.581, + "step": 26136 + }, + { + "epoch": 0.8010604388868456, + "grad_norm": 1.7867448814839093, + "learning_rate": 1.0024574673998871e-06, + "loss": 0.5668, + "step": 26137 + }, + { + "epoch": 0.8010910874095869, + "grad_norm": 1.8981238082515532, + "learning_rate": 1.0021593714196525e-06, + "loss": 0.6153, + "step": 26138 + }, + { + "epoch": 0.801121735932328, + "grad_norm": 1.70014390109954, + "learning_rate": 1.0018613148311646e-06, + "loss": 0.4931, + "step": 26139 + }, + { + "epoch": 0.8011523844550693, + "grad_norm": 2.0073713129705677, + "learning_rate": 1.0015632976373612e-06, + "loss": 0.6461, + "step": 26140 + }, + { + "epoch": 0.8011830329778105, + "grad_norm": 0.8298734246945647, + "learning_rate": 1.0012653198411765e-06, + "loss": 0.3924, + "step": 26141 + }, + { + "epoch": 0.8012136815005517, + "grad_norm": 1.874746741224655, + "learning_rate": 1.0009673814455478e-06, + "loss": 0.513, + "step": 26142 + }, + { + "epoch": 0.8012443300232929, + "grad_norm": 1.9373197021592123, + "learning_rate": 1.0006694824534109e-06, + "loss": 0.6044, + "step": 26143 + }, + { + "epoch": 0.8012749785460341, + "grad_norm": 0.7680307389356031, + "learning_rate": 1.0003716228677018e-06, + "loss": 0.3952, + "step": 26144 + }, + { + "epoch": 0.8013056270687753, + "grad_norm": 1.723050579758409, + "learning_rate": 1.0000738026913542e-06, + "loss": 0.5878, + "step": 26145 + }, + { + "epoch": 0.8013362755915164, + "grad_norm": 1.8191972215395507, + "learning_rate": 9.997760219273006e-07, + "loss": 0.5716, + "step": 26146 + }, + { + "epoch": 0.8013669241142577, + "grad_norm": 1.898059632192553, + "learning_rate": 9.994782805784798e-07, + "loss": 0.5534, + "step": 26147 + }, + { + "epoch": 0.8013975726369988, + "grad_norm": 0.8331844465084138, + "learning_rate": 9.99180578647823e-07, + "loss": 0.4274, + "step": 26148 + }, + { + "epoch": 0.8014282211597401, + "grad_norm": 1.7391552526974166, + "learning_rate": 9.98882916138263e-07, + "loss": 0.6173, + "step": 26149 + }, + { + "epoch": 0.8014588696824813, + "grad_norm": 1.8556868302716203, + "learning_rate": 9.985852930527329e-07, + "loss": 0.6098, + "step": 26150 + }, + { + "epoch": 0.8014895182052225, + "grad_norm": 1.7606834523200425, + "learning_rate": 9.982877093941655e-07, + "loss": 0.5563, + "step": 26151 + }, + { + "epoch": 0.8015201667279637, + "grad_norm": 0.7925331232453956, + "learning_rate": 9.979901651654944e-07, + "loss": 0.3966, + "step": 26152 + }, + { + "epoch": 0.8015508152507049, + "grad_norm": 1.8881959315365682, + "learning_rate": 9.97692660369649e-07, + "loss": 0.6156, + "step": 26153 + }, + { + "epoch": 0.8015814637734461, + "grad_norm": 1.8502194597257016, + "learning_rate": 9.973951950095624e-07, + "loss": 0.6049, + "step": 26154 + }, + { + "epoch": 0.8016121122961873, + "grad_norm": 0.7962988045953375, + "learning_rate": 9.970977690881656e-07, + "loss": 0.4005, + "step": 26155 + }, + { + "epoch": 0.8016427608189285, + "grad_norm": 1.9782990692260882, + "learning_rate": 9.968003826083889e-07, + "loss": 0.4479, + "step": 26156 + }, + { + "epoch": 0.8016734093416698, + "grad_norm": 1.728587039673, + "learning_rate": 9.965030355731614e-07, + "loss": 0.5398, + "step": 26157 + }, + { + "epoch": 0.8017040578644109, + "grad_norm": 1.8885025777162254, + "learning_rate": 9.962057279854132e-07, + "loss": 0.5979, + "step": 26158 + }, + { + "epoch": 0.8017347063871522, + "grad_norm": 2.2026228345520886, + "learning_rate": 9.959084598480762e-07, + "loss": 0.6565, + "step": 26159 + }, + { + "epoch": 0.8017653549098933, + "grad_norm": 1.8345478848191348, + "learning_rate": 9.956112311640758e-07, + "loss": 0.5704, + "step": 26160 + }, + { + "epoch": 0.8017960034326346, + "grad_norm": 2.1220426648335384, + "learning_rate": 9.953140419363433e-07, + "loss": 0.616, + "step": 26161 + }, + { + "epoch": 0.8018266519553757, + "grad_norm": 1.7128513377643748, + "learning_rate": 9.950168921678056e-07, + "loss": 0.5542, + "step": 26162 + }, + { + "epoch": 0.801857300478117, + "grad_norm": 0.7597129868241397, + "learning_rate": 9.947197818613923e-07, + "loss": 0.4058, + "step": 26163 + }, + { + "epoch": 0.8018879490008581, + "grad_norm": 1.7152632707324793, + "learning_rate": 9.944227110200305e-07, + "loss": 0.5329, + "step": 26164 + }, + { + "epoch": 0.8019185975235994, + "grad_norm": 1.8118879722032344, + "learning_rate": 9.941256796466432e-07, + "loss": 0.5937, + "step": 26165 + }, + { + "epoch": 0.8019492460463405, + "grad_norm": 2.0322270182760653, + "learning_rate": 9.938286877441639e-07, + "loss": 0.5735, + "step": 26166 + }, + { + "epoch": 0.8019798945690818, + "grad_norm": 1.6733979523549105, + "learning_rate": 9.93531735315515e-07, + "loss": 0.5088, + "step": 26167 + }, + { + "epoch": 0.802010543091823, + "grad_norm": 0.7799536862483254, + "learning_rate": 9.932348223636217e-07, + "loss": 0.4106, + "step": 26168 + }, + { + "epoch": 0.8020411916145642, + "grad_norm": 0.8012456504520188, + "learning_rate": 9.92937948891411e-07, + "loss": 0.3979, + "step": 26169 + }, + { + "epoch": 0.8020718401373054, + "grad_norm": 2.1176645205591997, + "learning_rate": 9.92641114901808e-07, + "loss": 0.6682, + "step": 26170 + }, + { + "epoch": 0.8021024886600466, + "grad_norm": 1.833546434773196, + "learning_rate": 9.92344320397739e-07, + "loss": 0.5713, + "step": 26171 + }, + { + "epoch": 0.8021331371827878, + "grad_norm": 1.8922760634190028, + "learning_rate": 9.920475653821248e-07, + "loss": 0.5642, + "step": 26172 + }, + { + "epoch": 0.802163785705529, + "grad_norm": 1.882000557135685, + "learning_rate": 9.91750849857892e-07, + "loss": 0.5962, + "step": 26173 + }, + { + "epoch": 0.8021944342282702, + "grad_norm": 2.003508979572098, + "learning_rate": 9.914541738279648e-07, + "loss": 0.5075, + "step": 26174 + }, + { + "epoch": 0.8022250827510115, + "grad_norm": 1.891745633152644, + "learning_rate": 9.911575372952653e-07, + "loss": 0.6289, + "step": 26175 + }, + { + "epoch": 0.8022557312737526, + "grad_norm": 1.823837881711338, + "learning_rate": 9.908609402627135e-07, + "loss": 0.5596, + "step": 26176 + }, + { + "epoch": 0.8022863797964938, + "grad_norm": 1.9172256993348644, + "learning_rate": 9.905643827332373e-07, + "loss": 0.5306, + "step": 26177 + }, + { + "epoch": 0.802317028319235, + "grad_norm": 1.7924971589513377, + "learning_rate": 9.902678647097547e-07, + "loss": 0.6609, + "step": 26178 + }, + { + "epoch": 0.8023476768419762, + "grad_norm": 1.8732564103747447, + "learning_rate": 9.899713861951905e-07, + "loss": 0.5888, + "step": 26179 + }, + { + "epoch": 0.8023783253647174, + "grad_norm": 1.7263223086229078, + "learning_rate": 9.896749471924627e-07, + "loss": 0.5299, + "step": 26180 + }, + { + "epoch": 0.8024089738874586, + "grad_norm": 1.7731963419750416, + "learning_rate": 9.893785477044936e-07, + "loss": 0.5347, + "step": 26181 + }, + { + "epoch": 0.8024396224101998, + "grad_norm": 1.9617102021623796, + "learning_rate": 9.89082187734205e-07, + "loss": 0.6023, + "step": 26182 + }, + { + "epoch": 0.802470270932941, + "grad_norm": 1.948681447724386, + "learning_rate": 9.887858672845146e-07, + "loss": 0.5438, + "step": 26183 + }, + { + "epoch": 0.8025009194556822, + "grad_norm": 1.774014650634734, + "learning_rate": 9.884895863583437e-07, + "loss": 0.4343, + "step": 26184 + }, + { + "epoch": 0.8025315679784234, + "grad_norm": 2.033381487822126, + "learning_rate": 9.881933449586123e-07, + "loss": 0.5873, + "step": 26185 + }, + { + "epoch": 0.8025622165011647, + "grad_norm": 1.7532509973431196, + "learning_rate": 9.878971430882388e-07, + "loss": 0.5169, + "step": 26186 + }, + { + "epoch": 0.8025928650239058, + "grad_norm": 1.7942593081941955, + "learning_rate": 9.87600980750138e-07, + "loss": 0.628, + "step": 26187 + }, + { + "epoch": 0.8026235135466471, + "grad_norm": 1.7282220687965761, + "learning_rate": 9.873048579472344e-07, + "loss": 0.5492, + "step": 26188 + }, + { + "epoch": 0.8026541620693882, + "grad_norm": 1.6256882030775344, + "learning_rate": 9.870087746824414e-07, + "loss": 0.5115, + "step": 26189 + }, + { + "epoch": 0.8026848105921295, + "grad_norm": 1.9521805443056934, + "learning_rate": 9.86712730958679e-07, + "loss": 0.5954, + "step": 26190 + }, + { + "epoch": 0.8027154591148706, + "grad_norm": 1.8432392394557024, + "learning_rate": 9.864167267788615e-07, + "loss": 0.5374, + "step": 26191 + }, + { + "epoch": 0.8027461076376119, + "grad_norm": 1.7965245853361593, + "learning_rate": 9.861207621459068e-07, + "loss": 0.5466, + "step": 26192 + }, + { + "epoch": 0.802776756160353, + "grad_norm": 1.9699125447670376, + "learning_rate": 9.858248370627327e-07, + "loss": 0.6078, + "step": 26193 + }, + { + "epoch": 0.8028074046830943, + "grad_norm": 2.0454755936965543, + "learning_rate": 9.855289515322524e-07, + "loss": 0.6278, + "step": 26194 + }, + { + "epoch": 0.8028380532058355, + "grad_norm": 1.9380326999281836, + "learning_rate": 9.852331055573822e-07, + "loss": 0.5767, + "step": 26195 + }, + { + "epoch": 0.8028687017285767, + "grad_norm": 1.870714868748047, + "learning_rate": 9.849372991410388e-07, + "loss": 0.5705, + "step": 26196 + }, + { + "epoch": 0.8028993502513179, + "grad_norm": 1.7785790037146638, + "learning_rate": 9.84641532286134e-07, + "loss": 0.5181, + "step": 26197 + }, + { + "epoch": 0.8029299987740591, + "grad_norm": 1.957437394997724, + "learning_rate": 9.843458049955839e-07, + "loss": 0.6728, + "step": 26198 + }, + { + "epoch": 0.8029606472968003, + "grad_norm": 2.1847302182840025, + "learning_rate": 9.840501172723033e-07, + "loss": 0.6107, + "step": 26199 + }, + { + "epoch": 0.8029912958195415, + "grad_norm": 1.812210836445899, + "learning_rate": 9.83754469119203e-07, + "loss": 0.5841, + "step": 26200 + }, + { + "epoch": 0.8030219443422827, + "grad_norm": 1.824109626985059, + "learning_rate": 9.834588605391988e-07, + "loss": 0.5698, + "step": 26201 + }, + { + "epoch": 0.803052592865024, + "grad_norm": 1.8737032397963795, + "learning_rate": 9.831632915352013e-07, + "loss": 0.5654, + "step": 26202 + }, + { + "epoch": 0.8030832413877651, + "grad_norm": 1.8998754070060928, + "learning_rate": 9.828677621101229e-07, + "loss": 0.5878, + "step": 26203 + }, + { + "epoch": 0.8031138899105064, + "grad_norm": 1.9042096224704, + "learning_rate": 9.82572272266878e-07, + "loss": 0.5906, + "step": 26204 + }, + { + "epoch": 0.8031445384332475, + "grad_norm": 1.9604465407271352, + "learning_rate": 9.822768220083751e-07, + "loss": 0.6018, + "step": 26205 + }, + { + "epoch": 0.8031751869559888, + "grad_norm": 1.9625821446734626, + "learning_rate": 9.819814113375264e-07, + "loss": 0.5305, + "step": 26206 + }, + { + "epoch": 0.8032058354787299, + "grad_norm": 1.8950482547931602, + "learning_rate": 9.816860402572442e-07, + "loss": 0.5624, + "step": 26207 + }, + { + "epoch": 0.8032364840014711, + "grad_norm": 1.7290411591442736, + "learning_rate": 9.813907087704366e-07, + "loss": 0.5442, + "step": 26208 + }, + { + "epoch": 0.8032671325242123, + "grad_norm": 1.702032191988799, + "learning_rate": 9.810954168800157e-07, + "loss": 0.4893, + "step": 26209 + }, + { + "epoch": 0.8032977810469535, + "grad_norm": 1.8350904990417867, + "learning_rate": 9.808001645888888e-07, + "loss": 0.6298, + "step": 26210 + }, + { + "epoch": 0.8033284295696947, + "grad_norm": 1.772604286327237, + "learning_rate": 9.80504951899966e-07, + "loss": 0.4855, + "step": 26211 + }, + { + "epoch": 0.8033590780924359, + "grad_norm": 1.9244431988099782, + "learning_rate": 9.802097788161574e-07, + "loss": 0.6504, + "step": 26212 + }, + { + "epoch": 0.8033897266151772, + "grad_norm": 1.8724956407471003, + "learning_rate": 9.799146453403696e-07, + "loss": 0.4837, + "step": 26213 + }, + { + "epoch": 0.8034203751379183, + "grad_norm": 1.6741921575849945, + "learning_rate": 9.796195514755107e-07, + "loss": 0.5836, + "step": 26214 + }, + { + "epoch": 0.8034510236606596, + "grad_norm": 1.761782172275186, + "learning_rate": 9.79324497224491e-07, + "loss": 0.539, + "step": 26215 + }, + { + "epoch": 0.8034816721834007, + "grad_norm": 1.5932967327757592, + "learning_rate": 9.790294825902141e-07, + "loss": 0.5722, + "step": 26216 + }, + { + "epoch": 0.803512320706142, + "grad_norm": 1.936130383038261, + "learning_rate": 9.78734507575589e-07, + "loss": 0.6017, + "step": 26217 + }, + { + "epoch": 0.8035429692288831, + "grad_norm": 1.873026075605795, + "learning_rate": 9.784395721835222e-07, + "loss": 0.6104, + "step": 26218 + }, + { + "epoch": 0.8035736177516244, + "grad_norm": 1.762881754968436, + "learning_rate": 9.781446764169184e-07, + "loss": 0.5999, + "step": 26219 + }, + { + "epoch": 0.8036042662743655, + "grad_norm": 1.913632225597898, + "learning_rate": 9.778498202786858e-07, + "loss": 0.5713, + "step": 26220 + }, + { + "epoch": 0.8036349147971068, + "grad_norm": 0.8219142036181154, + "learning_rate": 9.775550037717263e-07, + "loss": 0.3935, + "step": 26221 + }, + { + "epoch": 0.803665563319848, + "grad_norm": 2.0026608643448225, + "learning_rate": 9.772602268989462e-07, + "loss": 0.5857, + "step": 26222 + }, + { + "epoch": 0.8036962118425892, + "grad_norm": 1.4777919048282195, + "learning_rate": 9.769654896632524e-07, + "loss": 0.4253, + "step": 26223 + }, + { + "epoch": 0.8037268603653304, + "grad_norm": 1.8371669026781667, + "learning_rate": 9.76670792067545e-07, + "loss": 0.5762, + "step": 26224 + }, + { + "epoch": 0.8037575088880716, + "grad_norm": 2.023513656475765, + "learning_rate": 9.763761341147299e-07, + "loss": 0.5707, + "step": 26225 + }, + { + "epoch": 0.8037881574108128, + "grad_norm": 1.5928168825081994, + "learning_rate": 9.76081515807712e-07, + "loss": 0.6121, + "step": 26226 + }, + { + "epoch": 0.803818805933554, + "grad_norm": 2.4658270592255733, + "learning_rate": 9.757869371493906e-07, + "loss": 0.5905, + "step": 26227 + }, + { + "epoch": 0.8038494544562952, + "grad_norm": 1.998195029205417, + "learning_rate": 9.754923981426706e-07, + "loss": 0.5523, + "step": 26228 + }, + { + "epoch": 0.8038801029790364, + "grad_norm": 1.896078880724743, + "learning_rate": 9.751978987904547e-07, + "loss": 0.5119, + "step": 26229 + }, + { + "epoch": 0.8039107515017776, + "grad_norm": 1.741814861416227, + "learning_rate": 9.749034390956424e-07, + "loss": 0.5444, + "step": 26230 + }, + { + "epoch": 0.8039414000245189, + "grad_norm": 1.866981501015517, + "learning_rate": 9.74609019061138e-07, + "loss": 0.6335, + "step": 26231 + }, + { + "epoch": 0.80397204854726, + "grad_norm": 1.9415553270997574, + "learning_rate": 9.74314638689839e-07, + "loss": 0.59, + "step": 26232 + }, + { + "epoch": 0.8040026970700013, + "grad_norm": 2.0092616324969246, + "learning_rate": 9.74020297984648e-07, + "loss": 0.6024, + "step": 26233 + }, + { + "epoch": 0.8040333455927424, + "grad_norm": 1.7905019084473464, + "learning_rate": 9.73725996948467e-07, + "loss": 0.6474, + "step": 26234 + }, + { + "epoch": 0.8040639941154837, + "grad_norm": 2.052272109527024, + "learning_rate": 9.734317355841922e-07, + "loss": 0.6526, + "step": 26235 + }, + { + "epoch": 0.8040946426382248, + "grad_norm": 2.0168103327617644, + "learning_rate": 9.731375138947246e-07, + "loss": 0.6404, + "step": 26236 + }, + { + "epoch": 0.8041252911609661, + "grad_norm": 1.782115093888291, + "learning_rate": 9.72843331882965e-07, + "loss": 0.508, + "step": 26237 + }, + { + "epoch": 0.8041559396837072, + "grad_norm": 1.8302360381456209, + "learning_rate": 9.725491895518092e-07, + "loss": 0.5685, + "step": 26238 + }, + { + "epoch": 0.8041865882064484, + "grad_norm": 1.9008263916115187, + "learning_rate": 9.722550869041563e-07, + "loss": 0.5649, + "step": 26239 + }, + { + "epoch": 0.8042172367291897, + "grad_norm": 1.9739616912490416, + "learning_rate": 9.719610239429062e-07, + "loss": 0.7112, + "step": 26240 + }, + { + "epoch": 0.8042478852519308, + "grad_norm": 2.0295059722682947, + "learning_rate": 9.716670006709533e-07, + "loss": 0.5436, + "step": 26241 + }, + { + "epoch": 0.8042785337746721, + "grad_norm": 1.7472044059198801, + "learning_rate": 9.713730170911973e-07, + "loss": 0.4824, + "step": 26242 + }, + { + "epoch": 0.8043091822974132, + "grad_norm": 1.8625606856347658, + "learning_rate": 9.71079073206531e-07, + "loss": 0.5441, + "step": 26243 + }, + { + "epoch": 0.8043398308201545, + "grad_norm": 1.724699010871267, + "learning_rate": 9.707851690198565e-07, + "loss": 0.5396, + "step": 26244 + }, + { + "epoch": 0.8043704793428956, + "grad_norm": 1.8011271159355218, + "learning_rate": 9.704913045340664e-07, + "loss": 0.5005, + "step": 26245 + }, + { + "epoch": 0.8044011278656369, + "grad_norm": 0.8432712198232949, + "learning_rate": 9.701974797520553e-07, + "loss": 0.4073, + "step": 26246 + }, + { + "epoch": 0.804431776388378, + "grad_norm": 1.9436238702327526, + "learning_rate": 9.6990369467672e-07, + "loss": 0.5883, + "step": 26247 + }, + { + "epoch": 0.8044624249111193, + "grad_norm": 0.9286851455587088, + "learning_rate": 9.69609949310955e-07, + "loss": 0.4122, + "step": 26248 + }, + { + "epoch": 0.8044930734338605, + "grad_norm": 2.1578477812081402, + "learning_rate": 9.693162436576537e-07, + "loss": 0.6495, + "step": 26249 + }, + { + "epoch": 0.8045237219566017, + "grad_norm": 1.8772487226038959, + "learning_rate": 9.690225777197104e-07, + "loss": 0.6311, + "step": 26250 + }, + { + "epoch": 0.8045543704793429, + "grad_norm": 1.7928331039755163, + "learning_rate": 9.687289515000192e-07, + "loss": 0.6016, + "step": 26251 + }, + { + "epoch": 0.8045850190020841, + "grad_norm": 1.8025644020653475, + "learning_rate": 9.684353650014749e-07, + "loss": 0.5709, + "step": 26252 + }, + { + "epoch": 0.8046156675248253, + "grad_norm": 1.9625767332241548, + "learning_rate": 9.681418182269682e-07, + "loss": 0.5641, + "step": 26253 + }, + { + "epoch": 0.8046463160475665, + "grad_norm": 1.7833714723608445, + "learning_rate": 9.678483111793896e-07, + "loss": 0.513, + "step": 26254 + }, + { + "epoch": 0.8046769645703077, + "grad_norm": 1.9061165942589224, + "learning_rate": 9.67554843861634e-07, + "loss": 0.5651, + "step": 26255 + }, + { + "epoch": 0.804707613093049, + "grad_norm": 1.7577822752764571, + "learning_rate": 9.672614162765936e-07, + "loss": 0.5453, + "step": 26256 + }, + { + "epoch": 0.8047382616157901, + "grad_norm": 1.8823108243714546, + "learning_rate": 9.66968028427157e-07, + "loss": 0.5788, + "step": 26257 + }, + { + "epoch": 0.8047689101385314, + "grad_norm": 1.7800551274617664, + "learning_rate": 9.666746803162163e-07, + "loss": 0.5573, + "step": 26258 + }, + { + "epoch": 0.8047995586612725, + "grad_norm": 0.8281288007835865, + "learning_rate": 9.663813719466631e-07, + "loss": 0.4194, + "step": 26259 + }, + { + "epoch": 0.8048302071840138, + "grad_norm": 0.7855870355386062, + "learning_rate": 9.660881033213847e-07, + "loss": 0.394, + "step": 26260 + }, + { + "epoch": 0.8048608557067549, + "grad_norm": 1.6461751672476506, + "learning_rate": 9.657948744432743e-07, + "loss": 0.5001, + "step": 26261 + }, + { + "epoch": 0.8048915042294962, + "grad_norm": 1.7330623440745248, + "learning_rate": 9.65501685315216e-07, + "loss": 0.5287, + "step": 26262 + }, + { + "epoch": 0.8049221527522373, + "grad_norm": 1.7144524325201715, + "learning_rate": 9.652085359401047e-07, + "loss": 0.4938, + "step": 26263 + }, + { + "epoch": 0.8049528012749786, + "grad_norm": 0.7839410377798828, + "learning_rate": 9.64915426320826e-07, + "loss": 0.3988, + "step": 26264 + }, + { + "epoch": 0.8049834497977197, + "grad_norm": 2.0415632558354257, + "learning_rate": 9.64622356460267e-07, + "loss": 0.58, + "step": 26265 + }, + { + "epoch": 0.805014098320461, + "grad_norm": 0.8455080108304754, + "learning_rate": 9.643293263613162e-07, + "loss": 0.3856, + "step": 26266 + }, + { + "epoch": 0.8050447468432022, + "grad_norm": 2.1527136846432913, + "learning_rate": 9.640363360268623e-07, + "loss": 0.6343, + "step": 26267 + }, + { + "epoch": 0.8050753953659434, + "grad_norm": 1.924386175000749, + "learning_rate": 9.6374338545979e-07, + "loss": 0.5601, + "step": 26268 + }, + { + "epoch": 0.8051060438886846, + "grad_norm": 1.7869571195497487, + "learning_rate": 9.634504746629863e-07, + "loss": 0.5707, + "step": 26269 + }, + { + "epoch": 0.8051366924114257, + "grad_norm": 1.7234956257541394, + "learning_rate": 9.631576036393386e-07, + "loss": 0.4538, + "step": 26270 + }, + { + "epoch": 0.805167340934167, + "grad_norm": 1.81999470294471, + "learning_rate": 9.628647723917329e-07, + "loss": 0.5097, + "step": 26271 + }, + { + "epoch": 0.8051979894569081, + "grad_norm": 1.6597095646547755, + "learning_rate": 9.625719809230532e-07, + "loss": 0.5331, + "step": 26272 + }, + { + "epoch": 0.8052286379796494, + "grad_norm": 1.8466803005193464, + "learning_rate": 9.622792292361827e-07, + "loss": 0.5072, + "step": 26273 + }, + { + "epoch": 0.8052592865023905, + "grad_norm": 1.8804555342670435, + "learning_rate": 9.619865173340105e-07, + "loss": 0.6231, + "step": 26274 + }, + { + "epoch": 0.8052899350251318, + "grad_norm": 1.6560936410867864, + "learning_rate": 9.61693845219418e-07, + "loss": 0.622, + "step": 26275 + }, + { + "epoch": 0.805320583547873, + "grad_norm": 1.4911852674553374, + "learning_rate": 9.614012128952888e-07, + "loss": 0.478, + "step": 26276 + }, + { + "epoch": 0.8053512320706142, + "grad_norm": 1.7372019802317107, + "learning_rate": 9.61108620364506e-07, + "loss": 0.5712, + "step": 26277 + }, + { + "epoch": 0.8053818805933554, + "grad_norm": 1.7807459510926913, + "learning_rate": 9.608160676299534e-07, + "loss": 0.5301, + "step": 26278 + }, + { + "epoch": 0.8054125291160966, + "grad_norm": 1.8247704277900039, + "learning_rate": 9.605235546945152e-07, + "loss": 0.5181, + "step": 26279 + }, + { + "epoch": 0.8054431776388378, + "grad_norm": 1.7222265388790243, + "learning_rate": 9.602310815610705e-07, + "loss": 0.4751, + "step": 26280 + }, + { + "epoch": 0.805473826161579, + "grad_norm": 2.1025485460105564, + "learning_rate": 9.599386482325024e-07, + "loss": 0.5176, + "step": 26281 + }, + { + "epoch": 0.8055044746843202, + "grad_norm": 1.7041471793900134, + "learning_rate": 9.596462547116942e-07, + "loss": 0.5321, + "step": 26282 + }, + { + "epoch": 0.8055351232070614, + "grad_norm": 1.985194756622553, + "learning_rate": 9.593539010015245e-07, + "loss": 0.5932, + "step": 26283 + }, + { + "epoch": 0.8055657717298026, + "grad_norm": 1.9225659402347681, + "learning_rate": 9.59061587104873e-07, + "loss": 0.5213, + "step": 26284 + }, + { + "epoch": 0.8055964202525439, + "grad_norm": 1.717187126036837, + "learning_rate": 9.587693130246235e-07, + "loss": 0.5913, + "step": 26285 + }, + { + "epoch": 0.805627068775285, + "grad_norm": 1.8359607097881212, + "learning_rate": 9.584770787636543e-07, + "loss": 0.5912, + "step": 26286 + }, + { + "epoch": 0.8056577172980263, + "grad_norm": 1.7550955108337, + "learning_rate": 9.58184884324843e-07, + "loss": 0.5037, + "step": 26287 + }, + { + "epoch": 0.8056883658207674, + "grad_norm": 1.9779285663426873, + "learning_rate": 9.578927297110701e-07, + "loss": 0.6559, + "step": 26288 + }, + { + "epoch": 0.8057190143435087, + "grad_norm": 1.9013527174378035, + "learning_rate": 9.576006149252148e-07, + "loss": 0.5013, + "step": 26289 + }, + { + "epoch": 0.8057496628662498, + "grad_norm": 0.840385278175887, + "learning_rate": 9.573085399701558e-07, + "loss": 0.3957, + "step": 26290 + }, + { + "epoch": 0.8057803113889911, + "grad_norm": 1.9617551651764797, + "learning_rate": 9.57016504848769e-07, + "loss": 0.4532, + "step": 26291 + }, + { + "epoch": 0.8058109599117322, + "grad_norm": 1.884749962492417, + "learning_rate": 9.567245095639333e-07, + "loss": 0.5629, + "step": 26292 + }, + { + "epoch": 0.8058416084344735, + "grad_norm": 1.8054500748529694, + "learning_rate": 9.564325541185266e-07, + "loss": 0.6619, + "step": 26293 + }, + { + "epoch": 0.8058722569572146, + "grad_norm": 1.8838234249244548, + "learning_rate": 9.561406385154243e-07, + "loss": 0.5734, + "step": 26294 + }, + { + "epoch": 0.8059029054799559, + "grad_norm": 1.7542934997389954, + "learning_rate": 9.558487627575002e-07, + "loss": 0.5862, + "step": 26295 + }, + { + "epoch": 0.8059335540026971, + "grad_norm": 1.8135774377359308, + "learning_rate": 9.55556926847636e-07, + "loss": 0.569, + "step": 26296 + }, + { + "epoch": 0.8059642025254383, + "grad_norm": 0.7652951485104869, + "learning_rate": 9.552651307887028e-07, + "loss": 0.3884, + "step": 26297 + }, + { + "epoch": 0.8059948510481795, + "grad_norm": 1.8215311586135017, + "learning_rate": 9.549733745835787e-07, + "loss": 0.5447, + "step": 26298 + }, + { + "epoch": 0.8060254995709207, + "grad_norm": 2.0440272760440457, + "learning_rate": 9.546816582351354e-07, + "loss": 0.5715, + "step": 26299 + }, + { + "epoch": 0.8060561480936619, + "grad_norm": 1.7846096321237972, + "learning_rate": 9.543899817462488e-07, + "loss": 0.5577, + "step": 26300 + }, + { + "epoch": 0.806086796616403, + "grad_norm": 2.0954333878326334, + "learning_rate": 9.540983451197939e-07, + "loss": 0.5909, + "step": 26301 + }, + { + "epoch": 0.8061174451391443, + "grad_norm": 2.1029546954457476, + "learning_rate": 9.538067483586422e-07, + "loss": 0.5789, + "step": 26302 + }, + { + "epoch": 0.8061480936618854, + "grad_norm": 0.8046464688774853, + "learning_rate": 9.535151914656676e-07, + "loss": 0.4084, + "step": 26303 + }, + { + "epoch": 0.8061787421846267, + "grad_norm": 2.0581127377905664, + "learning_rate": 9.532236744437445e-07, + "loss": 0.6385, + "step": 26304 + }, + { + "epoch": 0.8062093907073679, + "grad_norm": 2.0599512073308825, + "learning_rate": 9.529321972957428e-07, + "loss": 0.5972, + "step": 26305 + }, + { + "epoch": 0.8062400392301091, + "grad_norm": 1.7524532228537022, + "learning_rate": 9.526407600245369e-07, + "loss": 0.5789, + "step": 26306 + }, + { + "epoch": 0.8062706877528503, + "grad_norm": 1.9688377083593853, + "learning_rate": 9.523493626329961e-07, + "loss": 0.5656, + "step": 26307 + }, + { + "epoch": 0.8063013362755915, + "grad_norm": 2.125030041056799, + "learning_rate": 9.520580051239925e-07, + "loss": 0.6175, + "step": 26308 + }, + { + "epoch": 0.8063319847983327, + "grad_norm": 1.6833146497650555, + "learning_rate": 9.517666875003978e-07, + "loss": 0.5201, + "step": 26309 + }, + { + "epoch": 0.8063626333210739, + "grad_norm": 1.8816176996023481, + "learning_rate": 9.514754097650813e-07, + "loss": 0.623, + "step": 26310 + }, + { + "epoch": 0.8063932818438151, + "grad_norm": 2.06620570357826, + "learning_rate": 9.51184171920913e-07, + "loss": 0.5355, + "step": 26311 + }, + { + "epoch": 0.8064239303665564, + "grad_norm": 0.7900476026228758, + "learning_rate": 9.508929739707639e-07, + "loss": 0.4014, + "step": 26312 + }, + { + "epoch": 0.8064545788892975, + "grad_norm": 2.0595555671351136, + "learning_rate": 9.506018159175029e-07, + "loss": 0.547, + "step": 26313 + }, + { + "epoch": 0.8064852274120388, + "grad_norm": 1.8847705673228858, + "learning_rate": 9.503106977639959e-07, + "loss": 0.5472, + "step": 26314 + }, + { + "epoch": 0.8065158759347799, + "grad_norm": 1.7335330025587286, + "learning_rate": 9.500196195131156e-07, + "loss": 0.5475, + "step": 26315 + }, + { + "epoch": 0.8065465244575212, + "grad_norm": 1.974993912703293, + "learning_rate": 9.497285811677276e-07, + "loss": 0.5732, + "step": 26316 + }, + { + "epoch": 0.8065771729802623, + "grad_norm": 1.9659255578705228, + "learning_rate": 9.494375827307012e-07, + "loss": 0.6067, + "step": 26317 + }, + { + "epoch": 0.8066078215030036, + "grad_norm": 1.945740007827795, + "learning_rate": 9.491466242049014e-07, + "loss": 0.537, + "step": 26318 + }, + { + "epoch": 0.8066384700257447, + "grad_norm": 1.8164286200457658, + "learning_rate": 9.488557055931963e-07, + "loss": 0.6193, + "step": 26319 + }, + { + "epoch": 0.806669118548486, + "grad_norm": 1.9996814211963863, + "learning_rate": 9.485648268984538e-07, + "loss": 0.5613, + "step": 26320 + }, + { + "epoch": 0.8066997670712271, + "grad_norm": 1.7263898385301102, + "learning_rate": 9.482739881235375e-07, + "loss": 0.5216, + "step": 26321 + }, + { + "epoch": 0.8067304155939684, + "grad_norm": 1.9711259113089057, + "learning_rate": 9.479831892713143e-07, + "loss": 0.5021, + "step": 26322 + }, + { + "epoch": 0.8067610641167096, + "grad_norm": 1.800129882579945, + "learning_rate": 9.476924303446505e-07, + "loss": 0.5662, + "step": 26323 + }, + { + "epoch": 0.8067917126394508, + "grad_norm": 1.759640779933341, + "learning_rate": 9.47401711346409e-07, + "loss": 0.5155, + "step": 26324 + }, + { + "epoch": 0.806822361162192, + "grad_norm": 2.0309990883640334, + "learning_rate": 9.471110322794552e-07, + "loss": 0.524, + "step": 26325 + }, + { + "epoch": 0.8068530096849332, + "grad_norm": 1.9755056795982435, + "learning_rate": 9.468203931466546e-07, + "loss": 0.6387, + "step": 26326 + }, + { + "epoch": 0.8068836582076744, + "grad_norm": 1.8720369853973924, + "learning_rate": 9.465297939508688e-07, + "loss": 0.5592, + "step": 26327 + }, + { + "epoch": 0.8069143067304156, + "grad_norm": 1.8279501507106737, + "learning_rate": 9.462392346949629e-07, + "loss": 0.6006, + "step": 26328 + }, + { + "epoch": 0.8069449552531568, + "grad_norm": 2.068855138317394, + "learning_rate": 9.459487153817981e-07, + "loss": 0.4927, + "step": 26329 + }, + { + "epoch": 0.806975603775898, + "grad_norm": 1.9939692325819405, + "learning_rate": 9.456582360142375e-07, + "loss": 0.5543, + "step": 26330 + }, + { + "epoch": 0.8070062522986392, + "grad_norm": 2.0068042283590466, + "learning_rate": 9.453677965951452e-07, + "loss": 0.4733, + "step": 26331 + }, + { + "epoch": 0.8070369008213804, + "grad_norm": 1.8779786372396379, + "learning_rate": 9.450773971273797e-07, + "loss": 0.531, + "step": 26332 + }, + { + "epoch": 0.8070675493441216, + "grad_norm": 0.8394409800139712, + "learning_rate": 9.447870376138047e-07, + "loss": 0.3876, + "step": 26333 + }, + { + "epoch": 0.8070981978668628, + "grad_norm": 1.9959265934745012, + "learning_rate": 9.444967180572817e-07, + "loss": 0.5955, + "step": 26334 + }, + { + "epoch": 0.807128846389604, + "grad_norm": 1.9424118302882558, + "learning_rate": 9.442064384606687e-07, + "loss": 0.5332, + "step": 26335 + }, + { + "epoch": 0.8071594949123452, + "grad_norm": 1.7550557708305683, + "learning_rate": 9.439161988268275e-07, + "loss": 0.5221, + "step": 26336 + }, + { + "epoch": 0.8071901434350864, + "grad_norm": 0.8029449512480945, + "learning_rate": 9.436259991586188e-07, + "loss": 0.3856, + "step": 26337 + }, + { + "epoch": 0.8072207919578276, + "grad_norm": 1.8736763512116281, + "learning_rate": 9.433358394589003e-07, + "loss": 0.6575, + "step": 26338 + }, + { + "epoch": 0.8072514404805688, + "grad_norm": 1.8374312253823764, + "learning_rate": 9.430457197305326e-07, + "loss": 0.6318, + "step": 26339 + }, + { + "epoch": 0.80728208900331, + "grad_norm": 1.88788148418415, + "learning_rate": 9.427556399763721e-07, + "loss": 0.5803, + "step": 26340 + }, + { + "epoch": 0.8073127375260513, + "grad_norm": 1.7744558106832462, + "learning_rate": 9.424656001992788e-07, + "loss": 0.563, + "step": 26341 + }, + { + "epoch": 0.8073433860487924, + "grad_norm": 1.753291461108071, + "learning_rate": 9.42175600402111e-07, + "loss": 0.6326, + "step": 26342 + }, + { + "epoch": 0.8073740345715337, + "grad_norm": 1.8005207184953473, + "learning_rate": 9.418856405877241e-07, + "loss": 0.5891, + "step": 26343 + }, + { + "epoch": 0.8074046830942748, + "grad_norm": 1.7326144694204832, + "learning_rate": 9.415957207589765e-07, + "loss": 0.4994, + "step": 26344 + }, + { + "epoch": 0.8074353316170161, + "grad_norm": 1.9849169788212653, + "learning_rate": 9.413058409187254e-07, + "loss": 0.4772, + "step": 26345 + }, + { + "epoch": 0.8074659801397572, + "grad_norm": 0.7863856362872214, + "learning_rate": 9.410160010698255e-07, + "loss": 0.3972, + "step": 26346 + }, + { + "epoch": 0.8074966286624985, + "grad_norm": 1.8456406073617395, + "learning_rate": 9.40726201215133e-07, + "loss": 0.6639, + "step": 26347 + }, + { + "epoch": 0.8075272771852396, + "grad_norm": 0.8068290155866257, + "learning_rate": 9.404364413575057e-07, + "loss": 0.4018, + "step": 26348 + }, + { + "epoch": 0.8075579257079809, + "grad_norm": 1.763876438048496, + "learning_rate": 9.401467214997956e-07, + "loss": 0.5527, + "step": 26349 + }, + { + "epoch": 0.8075885742307221, + "grad_norm": 1.9551632529338463, + "learning_rate": 9.398570416448593e-07, + "loss": 0.5212, + "step": 26350 + }, + { + "epoch": 0.8076192227534633, + "grad_norm": 1.7560686883511174, + "learning_rate": 9.395674017955492e-07, + "loss": 0.4723, + "step": 26351 + }, + { + "epoch": 0.8076498712762045, + "grad_norm": 1.9705612890638555, + "learning_rate": 9.392778019547205e-07, + "loss": 0.6223, + "step": 26352 + }, + { + "epoch": 0.8076805197989457, + "grad_norm": 2.147624280080635, + "learning_rate": 9.389882421252284e-07, + "loss": 0.6441, + "step": 26353 + }, + { + "epoch": 0.8077111683216869, + "grad_norm": 1.726760630708477, + "learning_rate": 9.386987223099225e-07, + "loss": 0.5557, + "step": 26354 + }, + { + "epoch": 0.8077418168444281, + "grad_norm": 0.7913451335845388, + "learning_rate": 9.38409242511657e-07, + "loss": 0.3789, + "step": 26355 + }, + { + "epoch": 0.8077724653671693, + "grad_norm": 1.973165834106145, + "learning_rate": 9.381198027332861e-07, + "loss": 0.5849, + "step": 26356 + }, + { + "epoch": 0.8078031138899106, + "grad_norm": 1.9075005453550173, + "learning_rate": 9.378304029776586e-07, + "loss": 0.6703, + "step": 26357 + }, + { + "epoch": 0.8078337624126517, + "grad_norm": 2.0434296859880066, + "learning_rate": 9.37541043247629e-07, + "loss": 0.5666, + "step": 26358 + }, + { + "epoch": 0.807864410935393, + "grad_norm": 1.9750672887672518, + "learning_rate": 9.372517235460437e-07, + "loss": 0.6726, + "step": 26359 + }, + { + "epoch": 0.8078950594581341, + "grad_norm": 0.7558115809218715, + "learning_rate": 9.369624438757597e-07, + "loss": 0.4081, + "step": 26360 + }, + { + "epoch": 0.8079257079808754, + "grad_norm": 1.8187641767655014, + "learning_rate": 9.366732042396243e-07, + "loss": 0.4905, + "step": 26361 + }, + { + "epoch": 0.8079563565036165, + "grad_norm": 1.9327128447613875, + "learning_rate": 9.363840046404865e-07, + "loss": 0.6092, + "step": 26362 + }, + { + "epoch": 0.8079870050263577, + "grad_norm": 2.1076040808743284, + "learning_rate": 9.360948450811963e-07, + "loss": 0.5612, + "step": 26363 + }, + { + "epoch": 0.8080176535490989, + "grad_norm": 1.8561764607093938, + "learning_rate": 9.358057255646047e-07, + "loss": 0.5133, + "step": 26364 + }, + { + "epoch": 0.8080483020718401, + "grad_norm": 1.9897836625336922, + "learning_rate": 9.355166460935583e-07, + "loss": 0.6312, + "step": 26365 + }, + { + "epoch": 0.8080789505945813, + "grad_norm": 1.7734903955870736, + "learning_rate": 9.352276066709059e-07, + "loss": 0.508, + "step": 26366 + }, + { + "epoch": 0.8081095991173225, + "grad_norm": 1.8953738490140228, + "learning_rate": 9.349386072994976e-07, + "loss": 0.665, + "step": 26367 + }, + { + "epoch": 0.8081402476400638, + "grad_norm": 1.8508859393690822, + "learning_rate": 9.346496479821776e-07, + "loss": 0.5083, + "step": 26368 + }, + { + "epoch": 0.8081708961628049, + "grad_norm": 1.7947429720352746, + "learning_rate": 9.343607287217959e-07, + "loss": 0.5159, + "step": 26369 + }, + { + "epoch": 0.8082015446855462, + "grad_norm": 1.6317696853512484, + "learning_rate": 9.340718495211965e-07, + "loss": 0.4873, + "step": 26370 + }, + { + "epoch": 0.8082321932082873, + "grad_norm": 1.7692432849150495, + "learning_rate": 9.337830103832291e-07, + "loss": 0.5779, + "step": 26371 + }, + { + "epoch": 0.8082628417310286, + "grad_norm": 1.9927879275733564, + "learning_rate": 9.334942113107387e-07, + "loss": 0.5368, + "step": 26372 + }, + { + "epoch": 0.8082934902537697, + "grad_norm": 2.1923664437376273, + "learning_rate": 9.332054523065686e-07, + "loss": 0.5944, + "step": 26373 + }, + { + "epoch": 0.808324138776511, + "grad_norm": 1.8949056817585108, + "learning_rate": 9.329167333735661e-07, + "loss": 0.5996, + "step": 26374 + }, + { + "epoch": 0.8083547872992521, + "grad_norm": 1.8089036952465434, + "learning_rate": 9.326280545145766e-07, + "loss": 0.5385, + "step": 26375 + }, + { + "epoch": 0.8083854358219934, + "grad_norm": 2.0428068053915536, + "learning_rate": 9.323394157324422e-07, + "loss": 0.6103, + "step": 26376 + }, + { + "epoch": 0.8084160843447346, + "grad_norm": 1.767100984245416, + "learning_rate": 9.320508170300085e-07, + "loss": 0.5319, + "step": 26377 + }, + { + "epoch": 0.8084467328674758, + "grad_norm": 1.981208083069247, + "learning_rate": 9.317622584101194e-07, + "loss": 0.5878, + "step": 26378 + }, + { + "epoch": 0.808477381390217, + "grad_norm": 1.7493992267944227, + "learning_rate": 9.31473739875618e-07, + "loss": 0.578, + "step": 26379 + }, + { + "epoch": 0.8085080299129582, + "grad_norm": 1.9993581818098458, + "learning_rate": 9.311852614293476e-07, + "loss": 0.6454, + "step": 26380 + }, + { + "epoch": 0.8085386784356994, + "grad_norm": 1.8182869332195684, + "learning_rate": 9.308968230741467e-07, + "loss": 0.4986, + "step": 26381 + }, + { + "epoch": 0.8085693269584406, + "grad_norm": 1.813603413234137, + "learning_rate": 9.306084248128638e-07, + "loss": 0.5524, + "step": 26382 + }, + { + "epoch": 0.8085999754811818, + "grad_norm": 1.754964382635899, + "learning_rate": 9.303200666483364e-07, + "loss": 0.6506, + "step": 26383 + }, + { + "epoch": 0.808630624003923, + "grad_norm": 0.811038844389649, + "learning_rate": 9.30031748583406e-07, + "loss": 0.3935, + "step": 26384 + }, + { + "epoch": 0.8086612725266642, + "grad_norm": 2.204214320629235, + "learning_rate": 9.297434706209141e-07, + "loss": 0.6102, + "step": 26385 + }, + { + "epoch": 0.8086919210494055, + "grad_norm": 1.9716153691684584, + "learning_rate": 9.294552327637025e-07, + "loss": 0.5555, + "step": 26386 + }, + { + "epoch": 0.8087225695721466, + "grad_norm": 0.8007162992089707, + "learning_rate": 9.291670350146087e-07, + "loss": 0.428, + "step": 26387 + }, + { + "epoch": 0.8087532180948879, + "grad_norm": 1.7411227643226441, + "learning_rate": 9.288788773764734e-07, + "loss": 0.4808, + "step": 26388 + }, + { + "epoch": 0.808783866617629, + "grad_norm": 1.7931411862505335, + "learning_rate": 9.285907598521359e-07, + "loss": 0.5463, + "step": 26389 + }, + { + "epoch": 0.8088145151403703, + "grad_norm": 2.010556078852886, + "learning_rate": 9.283026824444374e-07, + "loss": 0.5926, + "step": 26390 + }, + { + "epoch": 0.8088451636631114, + "grad_norm": 1.8677679207681461, + "learning_rate": 9.280146451562139e-07, + "loss": 0.5071, + "step": 26391 + }, + { + "epoch": 0.8088758121858527, + "grad_norm": 1.7116468597394994, + "learning_rate": 9.27726647990303e-07, + "loss": 0.618, + "step": 26392 + }, + { + "epoch": 0.8089064607085938, + "grad_norm": 0.7553239703095436, + "learning_rate": 9.274386909495431e-07, + "loss": 0.3985, + "step": 26393 + }, + { + "epoch": 0.808937109231335, + "grad_norm": 1.8507710391001866, + "learning_rate": 9.27150774036773e-07, + "loss": 0.4873, + "step": 26394 + }, + { + "epoch": 0.8089677577540763, + "grad_norm": 0.8353300319945084, + "learning_rate": 9.268628972548272e-07, + "loss": 0.4113, + "step": 26395 + }, + { + "epoch": 0.8089984062768174, + "grad_norm": 1.8416658279657776, + "learning_rate": 9.265750606065438e-07, + "loss": 0.6802, + "step": 26396 + }, + { + "epoch": 0.8090290547995587, + "grad_norm": 1.9062137718618664, + "learning_rate": 9.262872640947579e-07, + "loss": 0.5909, + "step": 26397 + }, + { + "epoch": 0.8090597033222998, + "grad_norm": 1.8789253788007927, + "learning_rate": 9.259995077223077e-07, + "loss": 0.6413, + "step": 26398 + }, + { + "epoch": 0.8090903518450411, + "grad_norm": 2.109039490794495, + "learning_rate": 9.257117914920249e-07, + "loss": 0.6449, + "step": 26399 + }, + { + "epoch": 0.8091210003677822, + "grad_norm": 1.7891726739373328, + "learning_rate": 9.254241154067467e-07, + "loss": 0.5111, + "step": 26400 + }, + { + "epoch": 0.8091516488905235, + "grad_norm": 1.8162977342649314, + "learning_rate": 9.251364794693085e-07, + "loss": 0.4934, + "step": 26401 + }, + { + "epoch": 0.8091822974132646, + "grad_norm": 1.8937482840417892, + "learning_rate": 9.248488836825431e-07, + "loss": 0.5539, + "step": 26402 + }, + { + "epoch": 0.8092129459360059, + "grad_norm": 2.4319153376809406, + "learning_rate": 9.245613280492833e-07, + "loss": 0.6298, + "step": 26403 + }, + { + "epoch": 0.809243594458747, + "grad_norm": 1.6930940294838606, + "learning_rate": 9.242738125723633e-07, + "loss": 0.5368, + "step": 26404 + }, + { + "epoch": 0.8092742429814883, + "grad_norm": 1.7804313828722087, + "learning_rate": 9.239863372546159e-07, + "loss": 0.5789, + "step": 26405 + }, + { + "epoch": 0.8093048915042295, + "grad_norm": 1.8409592184058172, + "learning_rate": 9.236989020988757e-07, + "loss": 0.5342, + "step": 26406 + }, + { + "epoch": 0.8093355400269707, + "grad_norm": 2.1933448554938435, + "learning_rate": 9.234115071079713e-07, + "loss": 0.6751, + "step": 26407 + }, + { + "epoch": 0.8093661885497119, + "grad_norm": 1.904211877848435, + "learning_rate": 9.231241522847373e-07, + "loss": 0.5891, + "step": 26408 + }, + { + "epoch": 0.8093968370724531, + "grad_norm": 2.1294492799025693, + "learning_rate": 9.228368376320046e-07, + "loss": 0.5927, + "step": 26409 + }, + { + "epoch": 0.8094274855951943, + "grad_norm": 2.113748991876853, + "learning_rate": 9.225495631526044e-07, + "loss": 0.5615, + "step": 26410 + }, + { + "epoch": 0.8094581341179355, + "grad_norm": 1.5217046380710724, + "learning_rate": 9.222623288493637e-07, + "loss": 0.4973, + "step": 26411 + }, + { + "epoch": 0.8094887826406767, + "grad_norm": 1.9863043028669853, + "learning_rate": 9.219751347251183e-07, + "loss": 0.632, + "step": 26412 + }, + { + "epoch": 0.809519431163418, + "grad_norm": 2.090630062825337, + "learning_rate": 9.216879807826951e-07, + "loss": 0.5863, + "step": 26413 + }, + { + "epoch": 0.8095500796861591, + "grad_norm": 1.9621711054033064, + "learning_rate": 9.214008670249225e-07, + "loss": 0.6475, + "step": 26414 + }, + { + "epoch": 0.8095807282089004, + "grad_norm": 1.8723600706391115, + "learning_rate": 9.211137934546304e-07, + "loss": 0.5797, + "step": 26415 + }, + { + "epoch": 0.8096113767316415, + "grad_norm": 1.7206111564549267, + "learning_rate": 9.208267600746479e-07, + "loss": 0.6091, + "step": 26416 + }, + { + "epoch": 0.8096420252543828, + "grad_norm": 1.8089467358861415, + "learning_rate": 9.205397668878046e-07, + "loss": 0.5889, + "step": 26417 + }, + { + "epoch": 0.8096726737771239, + "grad_norm": 0.7796867707382262, + "learning_rate": 9.202528138969252e-07, + "loss": 0.4098, + "step": 26418 + }, + { + "epoch": 0.8097033222998652, + "grad_norm": 1.6043990376504147, + "learning_rate": 9.199659011048389e-07, + "loss": 0.4616, + "step": 26419 + }, + { + "epoch": 0.8097339708226063, + "grad_norm": 0.8012528557636576, + "learning_rate": 9.196790285143736e-07, + "loss": 0.4072, + "step": 26420 + }, + { + "epoch": 0.8097646193453476, + "grad_norm": 1.8694004837526812, + "learning_rate": 9.193921961283552e-07, + "loss": 0.5353, + "step": 26421 + }, + { + "epoch": 0.8097952678680888, + "grad_norm": 2.086002710790227, + "learning_rate": 9.191054039496067e-07, + "loss": 0.6237, + "step": 26422 + }, + { + "epoch": 0.80982591639083, + "grad_norm": 1.8634571952388037, + "learning_rate": 9.188186519809594e-07, + "loss": 0.615, + "step": 26423 + }, + { + "epoch": 0.8098565649135712, + "grad_norm": 1.7419994862968435, + "learning_rate": 9.185319402252346e-07, + "loss": 0.4913, + "step": 26424 + }, + { + "epoch": 0.8098872134363123, + "grad_norm": 2.1897435601221678, + "learning_rate": 9.182452686852605e-07, + "loss": 0.6218, + "step": 26425 + }, + { + "epoch": 0.8099178619590536, + "grad_norm": 2.0896677924714826, + "learning_rate": 9.179586373638588e-07, + "loss": 0.6083, + "step": 26426 + }, + { + "epoch": 0.8099485104817947, + "grad_norm": 1.7572719178860123, + "learning_rate": 9.176720462638549e-07, + "loss": 0.55, + "step": 26427 + }, + { + "epoch": 0.809979159004536, + "grad_norm": 1.5692708717071824, + "learning_rate": 9.173854953880745e-07, + "loss": 0.5454, + "step": 26428 + }, + { + "epoch": 0.8100098075272771, + "grad_norm": 1.9759988627911556, + "learning_rate": 9.170989847393375e-07, + "loss": 0.6053, + "step": 26429 + }, + { + "epoch": 0.8100404560500184, + "grad_norm": 1.8475503634894075, + "learning_rate": 9.168125143204692e-07, + "loss": 0.5622, + "step": 26430 + }, + { + "epoch": 0.8100711045727595, + "grad_norm": 2.0311331291711907, + "learning_rate": 9.165260841342933e-07, + "loss": 0.5798, + "step": 26431 + }, + { + "epoch": 0.8101017530955008, + "grad_norm": 0.7902616485108538, + "learning_rate": 9.162396941836293e-07, + "loss": 0.4179, + "step": 26432 + }, + { + "epoch": 0.810132401618242, + "grad_norm": 2.010939927342932, + "learning_rate": 9.159533444713003e-07, + "loss": 0.6334, + "step": 26433 + }, + { + "epoch": 0.8101630501409832, + "grad_norm": 2.1398079166925212, + "learning_rate": 9.156670350001295e-07, + "loss": 0.6215, + "step": 26434 + }, + { + "epoch": 0.8101936986637244, + "grad_norm": 1.9082780002239472, + "learning_rate": 9.153807657729352e-07, + "loss": 0.634, + "step": 26435 + }, + { + "epoch": 0.8102243471864656, + "grad_norm": 1.84686522898894, + "learning_rate": 9.150945367925407e-07, + "loss": 0.5818, + "step": 26436 + }, + { + "epoch": 0.8102549957092068, + "grad_norm": 1.7612001686145295, + "learning_rate": 9.148083480617631e-07, + "loss": 0.5604, + "step": 26437 + }, + { + "epoch": 0.810285644231948, + "grad_norm": 0.7956326627861707, + "learning_rate": 9.145221995834247e-07, + "loss": 0.3956, + "step": 26438 + }, + { + "epoch": 0.8103162927546892, + "grad_norm": 1.8637785424077487, + "learning_rate": 9.142360913603449e-07, + "loss": 0.5283, + "step": 26439 + }, + { + "epoch": 0.8103469412774305, + "grad_norm": 2.066202300687306, + "learning_rate": 9.139500233953419e-07, + "loss": 0.6449, + "step": 26440 + }, + { + "epoch": 0.8103775898001716, + "grad_norm": 1.7664424703587438, + "learning_rate": 9.136639956912341e-07, + "loss": 0.56, + "step": 26441 + }, + { + "epoch": 0.8104082383229129, + "grad_norm": 2.0610359428624467, + "learning_rate": 9.13378008250842e-07, + "loss": 0.5916, + "step": 26442 + }, + { + "epoch": 0.810438886845654, + "grad_norm": 1.817246564664792, + "learning_rate": 9.130920610769806e-07, + "loss": 0.602, + "step": 26443 + }, + { + "epoch": 0.8104695353683953, + "grad_norm": 1.9166427761196645, + "learning_rate": 9.128061541724704e-07, + "loss": 0.5678, + "step": 26444 + }, + { + "epoch": 0.8105001838911364, + "grad_norm": 0.7630192916981438, + "learning_rate": 9.125202875401251e-07, + "loss": 0.3994, + "step": 26445 + }, + { + "epoch": 0.8105308324138777, + "grad_norm": 1.9684935423847185, + "learning_rate": 9.122344611827639e-07, + "loss": 0.6287, + "step": 26446 + }, + { + "epoch": 0.8105614809366188, + "grad_norm": 0.7926983234763548, + "learning_rate": 9.119486751032031e-07, + "loss": 0.3939, + "step": 26447 + }, + { + "epoch": 0.8105921294593601, + "grad_norm": 0.7656997748710983, + "learning_rate": 9.116629293042567e-07, + "loss": 0.3942, + "step": 26448 + }, + { + "epoch": 0.8106227779821013, + "grad_norm": 0.7775152114579147, + "learning_rate": 9.11377223788742e-07, + "loss": 0.4036, + "step": 26449 + }, + { + "epoch": 0.8106534265048425, + "grad_norm": 1.8246651260297069, + "learning_rate": 9.110915585594748e-07, + "loss": 0.6035, + "step": 26450 + }, + { + "epoch": 0.8106840750275837, + "grad_norm": 2.027045943294949, + "learning_rate": 9.10805933619267e-07, + "loss": 0.5507, + "step": 26451 + }, + { + "epoch": 0.8107147235503249, + "grad_norm": 0.7638878266693246, + "learning_rate": 9.105203489709353e-07, + "loss": 0.3766, + "step": 26452 + }, + { + "epoch": 0.8107453720730661, + "grad_norm": 2.067012536783393, + "learning_rate": 9.102348046172937e-07, + "loss": 0.5242, + "step": 26453 + }, + { + "epoch": 0.8107760205958073, + "grad_norm": 1.8476097629884858, + "learning_rate": 9.099493005611537e-07, + "loss": 0.5584, + "step": 26454 + }, + { + "epoch": 0.8108066691185485, + "grad_norm": 1.6612161777799652, + "learning_rate": 9.096638368053312e-07, + "loss": 0.6059, + "step": 26455 + }, + { + "epoch": 0.8108373176412896, + "grad_norm": 1.9549241267556, + "learning_rate": 9.093784133526357e-07, + "loss": 0.5102, + "step": 26456 + }, + { + "epoch": 0.8108679661640309, + "grad_norm": 1.9081998626958692, + "learning_rate": 9.09093030205882e-07, + "loss": 0.6002, + "step": 26457 + }, + { + "epoch": 0.810898614686772, + "grad_norm": 1.8214767784001098, + "learning_rate": 9.088076873678825e-07, + "loss": 0.5714, + "step": 26458 + }, + { + "epoch": 0.8109292632095133, + "grad_norm": 1.9417695591361783, + "learning_rate": 9.08522384841446e-07, + "loss": 0.5382, + "step": 26459 + }, + { + "epoch": 0.8109599117322545, + "grad_norm": 1.9609287273350555, + "learning_rate": 9.082371226293856e-07, + "loss": 0.6044, + "step": 26460 + }, + { + "epoch": 0.8109905602549957, + "grad_norm": 1.899300351480911, + "learning_rate": 9.079519007345128e-07, + "loss": 0.5667, + "step": 26461 + }, + { + "epoch": 0.8110212087777369, + "grad_norm": 1.5703835451836219, + "learning_rate": 9.076667191596355e-07, + "loss": 0.5849, + "step": 26462 + }, + { + "epoch": 0.8110518573004781, + "grad_norm": 2.1578915588780014, + "learning_rate": 9.073815779075657e-07, + "loss": 0.612, + "step": 26463 + }, + { + "epoch": 0.8110825058232193, + "grad_norm": 1.6296550630209607, + "learning_rate": 9.070964769811131e-07, + "loss": 0.5528, + "step": 26464 + }, + { + "epoch": 0.8111131543459605, + "grad_norm": 1.9299833346384947, + "learning_rate": 9.068114163830854e-07, + "loss": 0.6569, + "step": 26465 + }, + { + "epoch": 0.8111438028687017, + "grad_norm": 1.794850647566138, + "learning_rate": 9.065263961162929e-07, + "loss": 0.5095, + "step": 26466 + }, + { + "epoch": 0.811174451391443, + "grad_norm": 1.8023504039065226, + "learning_rate": 9.06241416183542e-07, + "loss": 0.6079, + "step": 26467 + }, + { + "epoch": 0.8112050999141841, + "grad_norm": 1.933160771754221, + "learning_rate": 9.059564765876417e-07, + "loss": 0.5966, + "step": 26468 + }, + { + "epoch": 0.8112357484369254, + "grad_norm": 1.858042190424, + "learning_rate": 9.056715773314012e-07, + "loss": 0.5637, + "step": 26469 + }, + { + "epoch": 0.8112663969596665, + "grad_norm": 1.9860868196663208, + "learning_rate": 9.053867184176252e-07, + "loss": 0.5494, + "step": 26470 + }, + { + "epoch": 0.8112970454824078, + "grad_norm": 0.7738587279189221, + "learning_rate": 9.051018998491212e-07, + "loss": 0.4105, + "step": 26471 + }, + { + "epoch": 0.8113276940051489, + "grad_norm": 1.7939136306553893, + "learning_rate": 9.048171216286971e-07, + "loss": 0.5763, + "step": 26472 + }, + { + "epoch": 0.8113583425278902, + "grad_norm": 1.8366770864550543, + "learning_rate": 9.045323837591569e-07, + "loss": 0.603, + "step": 26473 + }, + { + "epoch": 0.8113889910506313, + "grad_norm": 1.8415084730056048, + "learning_rate": 9.042476862433064e-07, + "loss": 0.6278, + "step": 26474 + }, + { + "epoch": 0.8114196395733726, + "grad_norm": 2.0692826207512405, + "learning_rate": 9.039630290839529e-07, + "loss": 0.6314, + "step": 26475 + }, + { + "epoch": 0.8114502880961137, + "grad_norm": 1.7749378505394342, + "learning_rate": 9.036784122838987e-07, + "loss": 0.5923, + "step": 26476 + }, + { + "epoch": 0.811480936618855, + "grad_norm": 2.0542851663268475, + "learning_rate": 9.033938358459504e-07, + "loss": 0.6141, + "step": 26477 + }, + { + "epoch": 0.8115115851415962, + "grad_norm": 1.6875747867519975, + "learning_rate": 9.03109299772908e-07, + "loss": 0.5081, + "step": 26478 + }, + { + "epoch": 0.8115422336643374, + "grad_norm": 2.0102589646674955, + "learning_rate": 9.028248040675802e-07, + "loss": 0.5371, + "step": 26479 + }, + { + "epoch": 0.8115728821870786, + "grad_norm": 1.8765793586540982, + "learning_rate": 9.025403487327683e-07, + "loss": 0.5341, + "step": 26480 + }, + { + "epoch": 0.8116035307098198, + "grad_norm": 1.9866288962365055, + "learning_rate": 9.022559337712733e-07, + "loss": 0.6444, + "step": 26481 + }, + { + "epoch": 0.811634179232561, + "grad_norm": 2.0746399743581376, + "learning_rate": 9.01971559185899e-07, + "loss": 0.5979, + "step": 26482 + }, + { + "epoch": 0.8116648277553022, + "grad_norm": 1.8268201255686478, + "learning_rate": 9.01687224979449e-07, + "loss": 0.5903, + "step": 26483 + }, + { + "epoch": 0.8116954762780434, + "grad_norm": 1.8921982098829726, + "learning_rate": 9.014029311547223e-07, + "loss": 0.5555, + "step": 26484 + }, + { + "epoch": 0.8117261248007847, + "grad_norm": 1.9884520914684813, + "learning_rate": 9.011186777145209e-07, + "loss": 0.5509, + "step": 26485 + }, + { + "epoch": 0.8117567733235258, + "grad_norm": 2.0529192718572973, + "learning_rate": 9.008344646616457e-07, + "loss": 0.6788, + "step": 26486 + }, + { + "epoch": 0.811787421846267, + "grad_norm": 1.8675721358347317, + "learning_rate": 9.005502919988984e-07, + "loss": 0.6472, + "step": 26487 + }, + { + "epoch": 0.8118180703690082, + "grad_norm": 1.8610013309506732, + "learning_rate": 9.002661597290785e-07, + "loss": 0.4743, + "step": 26488 + }, + { + "epoch": 0.8118487188917494, + "grad_norm": 0.8157836992984859, + "learning_rate": 8.999820678549836e-07, + "loss": 0.4144, + "step": 26489 + }, + { + "epoch": 0.8118793674144906, + "grad_norm": 1.4968906919266287, + "learning_rate": 8.996980163794145e-07, + "loss": 0.4564, + "step": 26490 + }, + { + "epoch": 0.8119100159372318, + "grad_norm": 1.8445084977194364, + "learning_rate": 8.994140053051715e-07, + "loss": 0.5615, + "step": 26491 + }, + { + "epoch": 0.811940664459973, + "grad_norm": 0.8027761209309102, + "learning_rate": 8.991300346350495e-07, + "loss": 0.405, + "step": 26492 + }, + { + "epoch": 0.8119713129827142, + "grad_norm": 1.948310385062276, + "learning_rate": 8.988461043718489e-07, + "loss": 0.5939, + "step": 26493 + }, + { + "epoch": 0.8120019615054554, + "grad_norm": 1.8789034442442618, + "learning_rate": 8.985622145183687e-07, + "loss": 0.5484, + "step": 26494 + }, + { + "epoch": 0.8120326100281966, + "grad_norm": 2.128569501793506, + "learning_rate": 8.982783650774024e-07, + "loss": 0.6018, + "step": 26495 + }, + { + "epoch": 0.8120632585509379, + "grad_norm": 1.8218008999769815, + "learning_rate": 8.979945560517506e-07, + "loss": 0.5862, + "step": 26496 + }, + { + "epoch": 0.812093907073679, + "grad_norm": 1.9846836724150319, + "learning_rate": 8.977107874442048e-07, + "loss": 0.519, + "step": 26497 + }, + { + "epoch": 0.8121245555964203, + "grad_norm": 0.7691028310399245, + "learning_rate": 8.974270592575673e-07, + "loss": 0.3892, + "step": 26498 + }, + { + "epoch": 0.8121552041191614, + "grad_norm": 1.6958140360841079, + "learning_rate": 8.97143371494631e-07, + "loss": 0.4981, + "step": 26499 + }, + { + "epoch": 0.8121858526419027, + "grad_norm": 1.6632786389318053, + "learning_rate": 8.968597241581889e-07, + "loss": 0.5559, + "step": 26500 + }, + { + "epoch": 0.8122165011646438, + "grad_norm": 1.9271231202274393, + "learning_rate": 8.965761172510379e-07, + "loss": 0.6052, + "step": 26501 + }, + { + "epoch": 0.8122471496873851, + "grad_norm": 1.9922678899881174, + "learning_rate": 8.96292550775974e-07, + "loss": 0.5098, + "step": 26502 + }, + { + "epoch": 0.8122777982101262, + "grad_norm": 0.8245725601654554, + "learning_rate": 8.960090247357878e-07, + "loss": 0.3911, + "step": 26503 + }, + { + "epoch": 0.8123084467328675, + "grad_norm": 1.901999246957141, + "learning_rate": 8.957255391332748e-07, + "loss": 0.656, + "step": 26504 + }, + { + "epoch": 0.8123390952556087, + "grad_norm": 1.9309444927593877, + "learning_rate": 8.954420939712283e-07, + "loss": 0.6139, + "step": 26505 + }, + { + "epoch": 0.8123697437783499, + "grad_norm": 1.6198376897535887, + "learning_rate": 8.951586892524422e-07, + "loss": 0.4969, + "step": 26506 + }, + { + "epoch": 0.8124003923010911, + "grad_norm": 1.9236675444755629, + "learning_rate": 8.948753249797082e-07, + "loss": 0.6765, + "step": 26507 + }, + { + "epoch": 0.8124310408238323, + "grad_norm": 1.720132855557755, + "learning_rate": 8.945920011558152e-07, + "loss": 0.5609, + "step": 26508 + }, + { + "epoch": 0.8124616893465735, + "grad_norm": 1.7585022721620394, + "learning_rate": 8.943087177835602e-07, + "loss": 0.51, + "step": 26509 + }, + { + "epoch": 0.8124923378693147, + "grad_norm": 2.186849940184145, + "learning_rate": 8.940254748657317e-07, + "loss": 0.597, + "step": 26510 + }, + { + "epoch": 0.8125229863920559, + "grad_norm": 0.7830577455961969, + "learning_rate": 8.937422724051193e-07, + "loss": 0.3838, + "step": 26511 + }, + { + "epoch": 0.8125536349147972, + "grad_norm": 1.9560521111232492, + "learning_rate": 8.934591104045154e-07, + "loss": 0.6525, + "step": 26512 + }, + { + "epoch": 0.8125842834375383, + "grad_norm": 2.158465658480585, + "learning_rate": 8.931759888667096e-07, + "loss": 0.5755, + "step": 26513 + }, + { + "epoch": 0.8126149319602796, + "grad_norm": 0.7745351283846718, + "learning_rate": 8.928929077944925e-07, + "loss": 0.3996, + "step": 26514 + }, + { + "epoch": 0.8126455804830207, + "grad_norm": 1.7209981737246653, + "learning_rate": 8.926098671906514e-07, + "loss": 0.4976, + "step": 26515 + }, + { + "epoch": 0.812676229005762, + "grad_norm": 2.015154919196892, + "learning_rate": 8.92326867057976e-07, + "loss": 0.6463, + "step": 26516 + }, + { + "epoch": 0.8127068775285031, + "grad_norm": 1.8408217602694985, + "learning_rate": 8.920439073992565e-07, + "loss": 0.5687, + "step": 26517 + }, + { + "epoch": 0.8127375260512444, + "grad_norm": 0.762163865264814, + "learning_rate": 8.91760988217279e-07, + "loss": 0.4068, + "step": 26518 + }, + { + "epoch": 0.8127681745739855, + "grad_norm": 1.7535516093093568, + "learning_rate": 8.914781095148294e-07, + "loss": 0.5687, + "step": 26519 + }, + { + "epoch": 0.8127988230967267, + "grad_norm": 1.7206615461680987, + "learning_rate": 8.911952712946997e-07, + "loss": 0.5542, + "step": 26520 + }, + { + "epoch": 0.812829471619468, + "grad_norm": 1.757189611438309, + "learning_rate": 8.909124735596741e-07, + "loss": 0.5385, + "step": 26521 + }, + { + "epoch": 0.8128601201422091, + "grad_norm": 1.7209854344296622, + "learning_rate": 8.906297163125382e-07, + "loss": 0.5331, + "step": 26522 + }, + { + "epoch": 0.8128907686649504, + "grad_norm": 1.726609174801975, + "learning_rate": 8.903469995560792e-07, + "loss": 0.5444, + "step": 26523 + }, + { + "epoch": 0.8129214171876915, + "grad_norm": 1.919921547787409, + "learning_rate": 8.900643232930827e-07, + "loss": 0.6438, + "step": 26524 + }, + { + "epoch": 0.8129520657104328, + "grad_norm": 1.9412833737899144, + "learning_rate": 8.897816875263348e-07, + "loss": 0.5456, + "step": 26525 + }, + { + "epoch": 0.8129827142331739, + "grad_norm": 1.7393923286511976, + "learning_rate": 8.894990922586189e-07, + "loss": 0.6221, + "step": 26526 + }, + { + "epoch": 0.8130133627559152, + "grad_norm": 1.6130794788375875, + "learning_rate": 8.892165374927198e-07, + "loss": 0.5457, + "step": 26527 + }, + { + "epoch": 0.8130440112786563, + "grad_norm": 1.840113943335541, + "learning_rate": 8.889340232314236e-07, + "loss": 0.5288, + "step": 26528 + }, + { + "epoch": 0.8130746598013976, + "grad_norm": 0.809292794811565, + "learning_rate": 8.886515494775122e-07, + "loss": 0.3978, + "step": 26529 + }, + { + "epoch": 0.8131053083241387, + "grad_norm": 2.0369448289890983, + "learning_rate": 8.88369116233766e-07, + "loss": 0.5705, + "step": 26530 + }, + { + "epoch": 0.81313595684688, + "grad_norm": 1.948787834333352, + "learning_rate": 8.880867235029739e-07, + "loss": 0.5549, + "step": 26531 + }, + { + "epoch": 0.8131666053696212, + "grad_norm": 0.8259099999843044, + "learning_rate": 8.878043712879142e-07, + "loss": 0.4055, + "step": 26532 + }, + { + "epoch": 0.8131972538923624, + "grad_norm": 1.6638296386242668, + "learning_rate": 8.875220595913714e-07, + "loss": 0.6059, + "step": 26533 + }, + { + "epoch": 0.8132279024151036, + "grad_norm": 1.7650501794130204, + "learning_rate": 8.872397884161244e-07, + "loss": 0.5498, + "step": 26534 + }, + { + "epoch": 0.8132585509378448, + "grad_norm": 1.7527572619584435, + "learning_rate": 8.869575577649564e-07, + "loss": 0.5784, + "step": 26535 + }, + { + "epoch": 0.813289199460586, + "grad_norm": 1.763989697597091, + "learning_rate": 8.866753676406486e-07, + "loss": 0.4626, + "step": 26536 + }, + { + "epoch": 0.8133198479833272, + "grad_norm": 1.8221231263962463, + "learning_rate": 8.8639321804598e-07, + "loss": 0.6267, + "step": 26537 + }, + { + "epoch": 0.8133504965060684, + "grad_norm": 1.7151716622375597, + "learning_rate": 8.861111089837315e-07, + "loss": 0.4799, + "step": 26538 + }, + { + "epoch": 0.8133811450288096, + "grad_norm": 1.7693551585102656, + "learning_rate": 8.858290404566844e-07, + "loss": 0.5041, + "step": 26539 + }, + { + "epoch": 0.8134117935515508, + "grad_norm": 1.8098390072148707, + "learning_rate": 8.855470124676152e-07, + "loss": 0.4956, + "step": 26540 + }, + { + "epoch": 0.8134424420742921, + "grad_norm": 1.7067273384987163, + "learning_rate": 8.852650250193045e-07, + "loss": 0.4943, + "step": 26541 + }, + { + "epoch": 0.8134730905970332, + "grad_norm": 1.8816504525156963, + "learning_rate": 8.849830781145297e-07, + "loss": 0.5526, + "step": 26542 + }, + { + "epoch": 0.8135037391197745, + "grad_norm": 1.9479439966905445, + "learning_rate": 8.847011717560694e-07, + "loss": 0.615, + "step": 26543 + }, + { + "epoch": 0.8135343876425156, + "grad_norm": 1.9367369856809802, + "learning_rate": 8.844193059467027e-07, + "loss": 0.4829, + "step": 26544 + }, + { + "epoch": 0.8135650361652569, + "grad_norm": 2.056680081051303, + "learning_rate": 8.841374806892039e-07, + "loss": 0.613, + "step": 26545 + }, + { + "epoch": 0.813595684687998, + "grad_norm": 0.8067597937664143, + "learning_rate": 8.83855695986352e-07, + "loss": 0.423, + "step": 26546 + }, + { + "epoch": 0.8136263332107393, + "grad_norm": 1.9351365477163203, + "learning_rate": 8.835739518409242e-07, + "loss": 0.6029, + "step": 26547 + }, + { + "epoch": 0.8136569817334804, + "grad_norm": 0.780605084802672, + "learning_rate": 8.832922482556961e-07, + "loss": 0.3915, + "step": 26548 + }, + { + "epoch": 0.8136876302562217, + "grad_norm": 0.8034712788005591, + "learning_rate": 8.830105852334392e-07, + "loss": 0.4147, + "step": 26549 + }, + { + "epoch": 0.8137182787789629, + "grad_norm": 1.9314347717270925, + "learning_rate": 8.827289627769358e-07, + "loss": 0.6053, + "step": 26550 + }, + { + "epoch": 0.813748927301704, + "grad_norm": 1.8910468315139852, + "learning_rate": 8.824473808889555e-07, + "loss": 0.6205, + "step": 26551 + }, + { + "epoch": 0.8137795758244453, + "grad_norm": 1.9674391510743425, + "learning_rate": 8.82165839572276e-07, + "loss": 0.6078, + "step": 26552 + }, + { + "epoch": 0.8138102243471864, + "grad_norm": 1.8478834227252021, + "learning_rate": 8.818843388296694e-07, + "loss": 0.5997, + "step": 26553 + }, + { + "epoch": 0.8138408728699277, + "grad_norm": 1.8477656630136587, + "learning_rate": 8.816028786639097e-07, + "loss": 0.6106, + "step": 26554 + }, + { + "epoch": 0.8138715213926688, + "grad_norm": 1.8059865651350702, + "learning_rate": 8.813214590777713e-07, + "loss": 0.6584, + "step": 26555 + }, + { + "epoch": 0.8139021699154101, + "grad_norm": 2.030772742406324, + "learning_rate": 8.81040080074026e-07, + "loss": 0.6348, + "step": 26556 + }, + { + "epoch": 0.8139328184381512, + "grad_norm": 1.7825765271637897, + "learning_rate": 8.807587416554464e-07, + "loss": 0.5615, + "step": 26557 + }, + { + "epoch": 0.8139634669608925, + "grad_norm": 2.243687882365001, + "learning_rate": 8.80477443824806e-07, + "loss": 0.6416, + "step": 26558 + }, + { + "epoch": 0.8139941154836337, + "grad_norm": 2.371235578015242, + "learning_rate": 8.80196186584874e-07, + "loss": 0.6051, + "step": 26559 + }, + { + "epoch": 0.8140247640063749, + "grad_norm": 1.808606079031358, + "learning_rate": 8.79914969938423e-07, + "loss": 0.6168, + "step": 26560 + }, + { + "epoch": 0.8140554125291161, + "grad_norm": 1.9184416062811394, + "learning_rate": 8.796337938882254e-07, + "loss": 0.5496, + "step": 26561 + }, + { + "epoch": 0.8140860610518573, + "grad_norm": 1.7748993378075912, + "learning_rate": 8.793526584370493e-07, + "loss": 0.5636, + "step": 26562 + }, + { + "epoch": 0.8141167095745985, + "grad_norm": 1.9455530880047245, + "learning_rate": 8.790715635876667e-07, + "loss": 0.6285, + "step": 26563 + }, + { + "epoch": 0.8141473580973397, + "grad_norm": 0.788117360021848, + "learning_rate": 8.78790509342845e-07, + "loss": 0.3894, + "step": 26564 + }, + { + "epoch": 0.8141780066200809, + "grad_norm": 1.8511633392698963, + "learning_rate": 8.785094957053552e-07, + "loss": 0.5267, + "step": 26565 + }, + { + "epoch": 0.8142086551428221, + "grad_norm": 2.2100913535197653, + "learning_rate": 8.782285226779669e-07, + "loss": 0.6407, + "step": 26566 + }, + { + "epoch": 0.8142393036655633, + "grad_norm": 1.8728733784292284, + "learning_rate": 8.779475902634466e-07, + "loss": 0.6313, + "step": 26567 + }, + { + "epoch": 0.8142699521883046, + "grad_norm": 0.8047512057743897, + "learning_rate": 8.776666984645632e-07, + "loss": 0.396, + "step": 26568 + }, + { + "epoch": 0.8143006007110457, + "grad_norm": 1.8901396309044531, + "learning_rate": 8.773858472840857e-07, + "loss": 0.5749, + "step": 26569 + }, + { + "epoch": 0.814331249233787, + "grad_norm": 1.8810082183900751, + "learning_rate": 8.771050367247791e-07, + "loss": 0.5847, + "step": 26570 + }, + { + "epoch": 0.8143618977565281, + "grad_norm": 2.014701011992346, + "learning_rate": 8.768242667894112e-07, + "loss": 0.5286, + "step": 26571 + }, + { + "epoch": 0.8143925462792694, + "grad_norm": 2.0900621020310117, + "learning_rate": 8.765435374807501e-07, + "loss": 0.6418, + "step": 26572 + }, + { + "epoch": 0.8144231948020105, + "grad_norm": 0.7922627472413012, + "learning_rate": 8.762628488015596e-07, + "loss": 0.4351, + "step": 26573 + }, + { + "epoch": 0.8144538433247518, + "grad_norm": 1.9042522838293217, + "learning_rate": 8.759822007546076e-07, + "loss": 0.5351, + "step": 26574 + }, + { + "epoch": 0.8144844918474929, + "grad_norm": 1.876603024364398, + "learning_rate": 8.757015933426566e-07, + "loss": 0.5719, + "step": 26575 + }, + { + "epoch": 0.8145151403702342, + "grad_norm": 0.7918835757661257, + "learning_rate": 8.754210265684732e-07, + "loss": 0.3851, + "step": 26576 + }, + { + "epoch": 0.8145457888929754, + "grad_norm": 2.013969349363403, + "learning_rate": 8.751405004348229e-07, + "loss": 0.6073, + "step": 26577 + }, + { + "epoch": 0.8145764374157166, + "grad_norm": 1.9846279755358707, + "learning_rate": 8.748600149444674e-07, + "loss": 0.4878, + "step": 26578 + }, + { + "epoch": 0.8146070859384578, + "grad_norm": 1.8747234728764777, + "learning_rate": 8.745795701001719e-07, + "loss": 0.5693, + "step": 26579 + }, + { + "epoch": 0.814637734461199, + "grad_norm": 2.0091566519914896, + "learning_rate": 8.742991659047006e-07, + "loss": 0.5035, + "step": 26580 + }, + { + "epoch": 0.8146683829839402, + "grad_norm": 1.822786573103614, + "learning_rate": 8.740188023608137e-07, + "loss": 0.5775, + "step": 26581 + }, + { + "epoch": 0.8146990315066813, + "grad_norm": 0.8121495350380058, + "learning_rate": 8.737384794712755e-07, + "loss": 0.3871, + "step": 26582 + }, + { + "epoch": 0.8147296800294226, + "grad_norm": 0.8359572586960695, + "learning_rate": 8.73458197238849e-07, + "loss": 0.3952, + "step": 26583 + }, + { + "epoch": 0.8147603285521637, + "grad_norm": 1.7222873980946396, + "learning_rate": 8.731779556662934e-07, + "loss": 0.6577, + "step": 26584 + }, + { + "epoch": 0.814790977074905, + "grad_norm": 1.7074648502885919, + "learning_rate": 8.728977547563727e-07, + "loss": 0.5061, + "step": 26585 + }, + { + "epoch": 0.8148216255976461, + "grad_norm": 0.7971964649849852, + "learning_rate": 8.726175945118449e-07, + "loss": 0.3974, + "step": 26586 + }, + { + "epoch": 0.8148522741203874, + "grad_norm": 1.7822876561790397, + "learning_rate": 8.723374749354719e-07, + "loss": 0.5238, + "step": 26587 + }, + { + "epoch": 0.8148829226431286, + "grad_norm": 1.7064816305601227, + "learning_rate": 8.720573960300155e-07, + "loss": 0.4786, + "step": 26588 + }, + { + "epoch": 0.8149135711658698, + "grad_norm": 1.941494606852312, + "learning_rate": 8.717773577982325e-07, + "loss": 0.5956, + "step": 26589 + }, + { + "epoch": 0.814944219688611, + "grad_norm": 1.8312682416315127, + "learning_rate": 8.714973602428828e-07, + "loss": 0.5821, + "step": 26590 + }, + { + "epoch": 0.8149748682113522, + "grad_norm": 1.8412463913811572, + "learning_rate": 8.712174033667281e-07, + "loss": 0.5816, + "step": 26591 + }, + { + "epoch": 0.8150055167340934, + "grad_norm": 0.7922266264285495, + "learning_rate": 8.70937487172523e-07, + "loss": 0.3775, + "step": 26592 + }, + { + "epoch": 0.8150361652568346, + "grad_norm": 1.5745773413136328, + "learning_rate": 8.706576116630283e-07, + "loss": 0.5056, + "step": 26593 + }, + { + "epoch": 0.8150668137795758, + "grad_norm": 1.8304334047044508, + "learning_rate": 8.703777768409999e-07, + "loss": 0.5167, + "step": 26594 + }, + { + "epoch": 0.815097462302317, + "grad_norm": 1.8277038579114735, + "learning_rate": 8.700979827091954e-07, + "loss": 0.6135, + "step": 26595 + }, + { + "epoch": 0.8151281108250582, + "grad_norm": 1.7364009354437542, + "learning_rate": 8.698182292703738e-07, + "loss": 0.4876, + "step": 26596 + }, + { + "epoch": 0.8151587593477995, + "grad_norm": 1.67257208046431, + "learning_rate": 8.695385165272884e-07, + "loss": 0.5798, + "step": 26597 + }, + { + "epoch": 0.8151894078705406, + "grad_norm": 2.050487891744971, + "learning_rate": 8.692588444826972e-07, + "loss": 0.6631, + "step": 26598 + }, + { + "epoch": 0.8152200563932819, + "grad_norm": 0.7984244247163753, + "learning_rate": 8.689792131393566e-07, + "loss": 0.4008, + "step": 26599 + }, + { + "epoch": 0.815250704916023, + "grad_norm": 1.9275070814213564, + "learning_rate": 8.686996225000194e-07, + "loss": 0.5586, + "step": 26600 + }, + { + "epoch": 0.8152813534387643, + "grad_norm": 1.8869470919841422, + "learning_rate": 8.684200725674419e-07, + "loss": 0.633, + "step": 26601 + }, + { + "epoch": 0.8153120019615054, + "grad_norm": 1.6788378059254434, + "learning_rate": 8.681405633443795e-07, + "loss": 0.4888, + "step": 26602 + }, + { + "epoch": 0.8153426504842467, + "grad_norm": 1.9630216509906946, + "learning_rate": 8.678610948335847e-07, + "loss": 0.6361, + "step": 26603 + }, + { + "epoch": 0.8153732990069879, + "grad_norm": 1.8686909361042208, + "learning_rate": 8.675816670378123e-07, + "loss": 0.6086, + "step": 26604 + }, + { + "epoch": 0.8154039475297291, + "grad_norm": 2.0199289426587868, + "learning_rate": 8.67302279959813e-07, + "loss": 0.5722, + "step": 26605 + }, + { + "epoch": 0.8154345960524703, + "grad_norm": 1.9451738034735884, + "learning_rate": 8.670229336023445e-07, + "loss": 0.5669, + "step": 26606 + }, + { + "epoch": 0.8154652445752115, + "grad_norm": 1.6622381675321631, + "learning_rate": 8.667436279681563e-07, + "loss": 0.5968, + "step": 26607 + }, + { + "epoch": 0.8154958930979527, + "grad_norm": 2.3529656718603102, + "learning_rate": 8.664643630599989e-07, + "loss": 0.7075, + "step": 26608 + }, + { + "epoch": 0.8155265416206939, + "grad_norm": 0.7641437074932086, + "learning_rate": 8.661851388806264e-07, + "loss": 0.38, + "step": 26609 + }, + { + "epoch": 0.8155571901434351, + "grad_norm": 1.7458652455094983, + "learning_rate": 8.659059554327904e-07, + "loss": 0.5819, + "step": 26610 + }, + { + "epoch": 0.8155878386661763, + "grad_norm": 1.9821641375884884, + "learning_rate": 8.656268127192397e-07, + "loss": 0.6608, + "step": 26611 + }, + { + "epoch": 0.8156184871889175, + "grad_norm": 1.944149039888491, + "learning_rate": 8.653477107427255e-07, + "loss": 0.5729, + "step": 26612 + }, + { + "epoch": 0.8156491357116586, + "grad_norm": 2.0973152581086447, + "learning_rate": 8.650686495059984e-07, + "loss": 0.687, + "step": 26613 + }, + { + "epoch": 0.8156797842343999, + "grad_norm": 1.8928654912453768, + "learning_rate": 8.64789629011809e-07, + "loss": 0.5201, + "step": 26614 + }, + { + "epoch": 0.8157104327571411, + "grad_norm": 1.7302370346571514, + "learning_rate": 8.645106492629057e-07, + "loss": 0.4959, + "step": 26615 + }, + { + "epoch": 0.8157410812798823, + "grad_norm": 1.8568172102823648, + "learning_rate": 8.642317102620346e-07, + "loss": 0.566, + "step": 26616 + }, + { + "epoch": 0.8157717298026235, + "grad_norm": 1.7197484682364903, + "learning_rate": 8.639528120119489e-07, + "loss": 0.5164, + "step": 26617 + }, + { + "epoch": 0.8158023783253647, + "grad_norm": 1.923656609040762, + "learning_rate": 8.636739545153944e-07, + "loss": 0.5534, + "step": 26618 + }, + { + "epoch": 0.8158330268481059, + "grad_norm": 0.8046668587131148, + "learning_rate": 8.633951377751176e-07, + "loss": 0.4142, + "step": 26619 + }, + { + "epoch": 0.8158636753708471, + "grad_norm": 1.8072702172204407, + "learning_rate": 8.631163617938665e-07, + "loss": 0.5176, + "step": 26620 + }, + { + "epoch": 0.8158943238935883, + "grad_norm": 1.8511846321994476, + "learning_rate": 8.628376265743898e-07, + "loss": 0.5316, + "step": 26621 + }, + { + "epoch": 0.8159249724163296, + "grad_norm": 1.9324340531624886, + "learning_rate": 8.625589321194317e-07, + "loss": 0.5578, + "step": 26622 + }, + { + "epoch": 0.8159556209390707, + "grad_norm": 1.863264324521101, + "learning_rate": 8.622802784317385e-07, + "loss": 0.5682, + "step": 26623 + }, + { + "epoch": 0.815986269461812, + "grad_norm": 2.090704490250569, + "learning_rate": 8.620016655140567e-07, + "loss": 0.6042, + "step": 26624 + }, + { + "epoch": 0.8160169179845531, + "grad_norm": 0.7990383836490678, + "learning_rate": 8.617230933691329e-07, + "loss": 0.3814, + "step": 26625 + }, + { + "epoch": 0.8160475665072944, + "grad_norm": 1.8344705481155064, + "learning_rate": 8.614445619997097e-07, + "loss": 0.6456, + "step": 26626 + }, + { + "epoch": 0.8160782150300355, + "grad_norm": 2.1696660357916357, + "learning_rate": 8.611660714085296e-07, + "loss": 0.5484, + "step": 26627 + }, + { + "epoch": 0.8161088635527768, + "grad_norm": 1.9361690967142764, + "learning_rate": 8.608876215983419e-07, + "loss": 0.6616, + "step": 26628 + }, + { + "epoch": 0.8161395120755179, + "grad_norm": 1.9203518351528803, + "learning_rate": 8.606092125718873e-07, + "loss": 0.5492, + "step": 26629 + }, + { + "epoch": 0.8161701605982592, + "grad_norm": 1.7897678742118088, + "learning_rate": 8.603308443319081e-07, + "loss": 0.6627, + "step": 26630 + }, + { + "epoch": 0.8162008091210003, + "grad_norm": 1.9088966220239427, + "learning_rate": 8.600525168811485e-07, + "loss": 0.5384, + "step": 26631 + }, + { + "epoch": 0.8162314576437416, + "grad_norm": 0.7577930408475277, + "learning_rate": 8.597742302223505e-07, + "loss": 0.373, + "step": 26632 + }, + { + "epoch": 0.8162621061664828, + "grad_norm": 1.920432153207324, + "learning_rate": 8.594959843582573e-07, + "loss": 0.5383, + "step": 26633 + }, + { + "epoch": 0.816292754689224, + "grad_norm": 2.10629663419144, + "learning_rate": 8.592177792916084e-07, + "loss": 0.5705, + "step": 26634 + }, + { + "epoch": 0.8163234032119652, + "grad_norm": 1.7481022037815672, + "learning_rate": 8.589396150251467e-07, + "loss": 0.5141, + "step": 26635 + }, + { + "epoch": 0.8163540517347064, + "grad_norm": 1.8495641409703938, + "learning_rate": 8.586614915616131e-07, + "loss": 0.6509, + "step": 26636 + }, + { + "epoch": 0.8163847002574476, + "grad_norm": 1.6991360488608092, + "learning_rate": 8.583834089037479e-07, + "loss": 0.5851, + "step": 26637 + }, + { + "epoch": 0.8164153487801888, + "grad_norm": 1.7540403766409096, + "learning_rate": 8.581053670542894e-07, + "loss": 0.466, + "step": 26638 + }, + { + "epoch": 0.81644599730293, + "grad_norm": 1.9453368918735787, + "learning_rate": 8.57827366015978e-07, + "loss": 0.6104, + "step": 26639 + }, + { + "epoch": 0.8164766458256713, + "grad_norm": 1.8651487391322596, + "learning_rate": 8.57549405791554e-07, + "loss": 0.5383, + "step": 26640 + }, + { + "epoch": 0.8165072943484124, + "grad_norm": 2.026769289316973, + "learning_rate": 8.572714863837567e-07, + "loss": 0.5228, + "step": 26641 + }, + { + "epoch": 0.8165379428711537, + "grad_norm": 1.9965606082924567, + "learning_rate": 8.569936077953217e-07, + "loss": 0.5443, + "step": 26642 + }, + { + "epoch": 0.8165685913938948, + "grad_norm": 2.024475068501668, + "learning_rate": 8.567157700289891e-07, + "loss": 0.6675, + "step": 26643 + }, + { + "epoch": 0.816599239916636, + "grad_norm": 1.7799071731769691, + "learning_rate": 8.564379730874972e-07, + "loss": 0.6198, + "step": 26644 + }, + { + "epoch": 0.8166298884393772, + "grad_norm": 1.7646405660472675, + "learning_rate": 8.561602169735822e-07, + "loss": 0.6084, + "step": 26645 + }, + { + "epoch": 0.8166605369621184, + "grad_norm": 0.7864286408960919, + "learning_rate": 8.558825016899785e-07, + "loss": 0.3979, + "step": 26646 + }, + { + "epoch": 0.8166911854848596, + "grad_norm": 1.7797476236433116, + "learning_rate": 8.556048272394274e-07, + "loss": 0.5778, + "step": 26647 + }, + { + "epoch": 0.8167218340076008, + "grad_norm": 1.7682671563652046, + "learning_rate": 8.553271936246621e-07, + "loss": 0.5863, + "step": 26648 + }, + { + "epoch": 0.816752482530342, + "grad_norm": 1.732314913716593, + "learning_rate": 8.550496008484171e-07, + "loss": 0.5017, + "step": 26649 + }, + { + "epoch": 0.8167831310530832, + "grad_norm": 1.594650111103803, + "learning_rate": 8.547720489134287e-07, + "loss": 0.4448, + "step": 26650 + }, + { + "epoch": 0.8168137795758245, + "grad_norm": 1.9963256523019524, + "learning_rate": 8.544945378224323e-07, + "loss": 0.6291, + "step": 26651 + }, + { + "epoch": 0.8168444280985656, + "grad_norm": 1.9695106106411933, + "learning_rate": 8.542170675781631e-07, + "loss": 0.5284, + "step": 26652 + }, + { + "epoch": 0.8168750766213069, + "grad_norm": 0.7698638940215241, + "learning_rate": 8.539396381833526e-07, + "loss": 0.3982, + "step": 26653 + }, + { + "epoch": 0.816905725144048, + "grad_norm": 0.8059463933739092, + "learning_rate": 8.536622496407354e-07, + "loss": 0.4137, + "step": 26654 + }, + { + "epoch": 0.8169363736667893, + "grad_norm": 0.8000233727304206, + "learning_rate": 8.533849019530466e-07, + "loss": 0.4081, + "step": 26655 + }, + { + "epoch": 0.8169670221895304, + "grad_norm": 1.803550195644055, + "learning_rate": 8.531075951230172e-07, + "loss": 0.6483, + "step": 26656 + }, + { + "epoch": 0.8169976707122717, + "grad_norm": 1.8154228065192723, + "learning_rate": 8.528303291533774e-07, + "loss": 0.5851, + "step": 26657 + }, + { + "epoch": 0.8170283192350128, + "grad_norm": 0.7866460465819936, + "learning_rate": 8.525531040468632e-07, + "loss": 0.3729, + "step": 26658 + }, + { + "epoch": 0.8170589677577541, + "grad_norm": 1.8631094387285008, + "learning_rate": 8.522759198062036e-07, + "loss": 0.6368, + "step": 26659 + }, + { + "epoch": 0.8170896162804953, + "grad_norm": 1.7095657945432623, + "learning_rate": 8.51998776434132e-07, + "loss": 0.5808, + "step": 26660 + }, + { + "epoch": 0.8171202648032365, + "grad_norm": 1.9008967990660826, + "learning_rate": 8.517216739333767e-07, + "loss": 0.5446, + "step": 26661 + }, + { + "epoch": 0.8171509133259777, + "grad_norm": 0.8239723719002295, + "learning_rate": 8.514446123066689e-07, + "loss": 0.4096, + "step": 26662 + }, + { + "epoch": 0.8171815618487189, + "grad_norm": 1.8474122806122573, + "learning_rate": 8.511675915567402e-07, + "loss": 0.5821, + "step": 26663 + }, + { + "epoch": 0.8172122103714601, + "grad_norm": 1.729175689928521, + "learning_rate": 8.508906116863169e-07, + "loss": 0.5096, + "step": 26664 + }, + { + "epoch": 0.8172428588942013, + "grad_norm": 1.9755580991163706, + "learning_rate": 8.506136726981307e-07, + "loss": 0.5556, + "step": 26665 + }, + { + "epoch": 0.8172735074169425, + "grad_norm": 2.48976422910093, + "learning_rate": 8.503367745949103e-07, + "loss": 0.5032, + "step": 26666 + }, + { + "epoch": 0.8173041559396838, + "grad_norm": 1.6897206910657316, + "learning_rate": 8.500599173793828e-07, + "loss": 0.5079, + "step": 26667 + }, + { + "epoch": 0.8173348044624249, + "grad_norm": 1.8354920579038578, + "learning_rate": 8.497831010542762e-07, + "loss": 0.6001, + "step": 26668 + }, + { + "epoch": 0.8173654529851662, + "grad_norm": 1.7468793942818994, + "learning_rate": 8.495063256223201e-07, + "loss": 0.5606, + "step": 26669 + }, + { + "epoch": 0.8173961015079073, + "grad_norm": 1.7814642704669408, + "learning_rate": 8.492295910862386e-07, + "loss": 0.5737, + "step": 26670 + }, + { + "epoch": 0.8174267500306486, + "grad_norm": 1.7974264481948452, + "learning_rate": 8.489528974487615e-07, + "loss": 0.5146, + "step": 26671 + }, + { + "epoch": 0.8174573985533897, + "grad_norm": 1.8999838357943553, + "learning_rate": 8.486762447126123e-07, + "loss": 0.6354, + "step": 26672 + }, + { + "epoch": 0.817488047076131, + "grad_norm": 1.7452930221426, + "learning_rate": 8.483996328805183e-07, + "loss": 0.5527, + "step": 26673 + }, + { + "epoch": 0.8175186955988721, + "grad_norm": 0.7810201966017759, + "learning_rate": 8.481230619552061e-07, + "loss": 0.4192, + "step": 26674 + }, + { + "epoch": 0.8175493441216133, + "grad_norm": 1.9670617986199006, + "learning_rate": 8.478465319393986e-07, + "loss": 0.5054, + "step": 26675 + }, + { + "epoch": 0.8175799926443545, + "grad_norm": 1.7662240655636576, + "learning_rate": 8.475700428358213e-07, + "loss": 0.5342, + "step": 26676 + }, + { + "epoch": 0.8176106411670957, + "grad_norm": 1.938701917891031, + "learning_rate": 8.472935946472e-07, + "loss": 0.6362, + "step": 26677 + }, + { + "epoch": 0.817641289689837, + "grad_norm": 1.9789453464166797, + "learning_rate": 8.470171873762561e-07, + "loss": 0.6451, + "step": 26678 + }, + { + "epoch": 0.8176719382125781, + "grad_norm": 1.7160970031792764, + "learning_rate": 8.46740821025715e-07, + "loss": 0.4999, + "step": 26679 + }, + { + "epoch": 0.8177025867353194, + "grad_norm": 1.9131317058155275, + "learning_rate": 8.464644955983004e-07, + "loss": 0.627, + "step": 26680 + }, + { + "epoch": 0.8177332352580605, + "grad_norm": 1.952675342058686, + "learning_rate": 8.461882110967323e-07, + "loss": 0.5014, + "step": 26681 + }, + { + "epoch": 0.8177638837808018, + "grad_norm": 1.8251944206296682, + "learning_rate": 8.459119675237354e-07, + "loss": 0.5122, + "step": 26682 + }, + { + "epoch": 0.8177945323035429, + "grad_norm": 1.7683258581692498, + "learning_rate": 8.456357648820302e-07, + "loss": 0.551, + "step": 26683 + }, + { + "epoch": 0.8178251808262842, + "grad_norm": 2.4453897368572504, + "learning_rate": 8.453596031743388e-07, + "loss": 0.5576, + "step": 26684 + }, + { + "epoch": 0.8178558293490253, + "grad_norm": 2.029143652898224, + "learning_rate": 8.450834824033832e-07, + "loss": 0.6573, + "step": 26685 + }, + { + "epoch": 0.8178864778717666, + "grad_norm": 1.7798335497523834, + "learning_rate": 8.448074025718816e-07, + "loss": 0.6214, + "step": 26686 + }, + { + "epoch": 0.8179171263945078, + "grad_norm": 0.8075122601553669, + "learning_rate": 8.445313636825564e-07, + "loss": 0.3895, + "step": 26687 + }, + { + "epoch": 0.817947774917249, + "grad_norm": 0.8325934420446706, + "learning_rate": 8.442553657381275e-07, + "loss": 0.4056, + "step": 26688 + }, + { + "epoch": 0.8179784234399902, + "grad_norm": 1.8422369982212616, + "learning_rate": 8.439794087413133e-07, + "loss": 0.5249, + "step": 26689 + }, + { + "epoch": 0.8180090719627314, + "grad_norm": 1.9145669920941515, + "learning_rate": 8.43703492694834e-07, + "loss": 0.5214, + "step": 26690 + }, + { + "epoch": 0.8180397204854726, + "grad_norm": 2.062975903693594, + "learning_rate": 8.434276176014067e-07, + "loss": 0.6706, + "step": 26691 + }, + { + "epoch": 0.8180703690082138, + "grad_norm": 1.9098267341110131, + "learning_rate": 8.431517834637504e-07, + "loss": 0.583, + "step": 26692 + }, + { + "epoch": 0.818101017530955, + "grad_norm": 2.1124820860539124, + "learning_rate": 8.428759902845846e-07, + "loss": 0.5926, + "step": 26693 + }, + { + "epoch": 0.8181316660536962, + "grad_norm": 1.9535714290182369, + "learning_rate": 8.426002380666237e-07, + "loss": 0.5314, + "step": 26694 + }, + { + "epoch": 0.8181623145764374, + "grad_norm": 2.0151375274757135, + "learning_rate": 8.423245268125862e-07, + "loss": 0.6599, + "step": 26695 + }, + { + "epoch": 0.8181929630991787, + "grad_norm": 1.7469554831676775, + "learning_rate": 8.420488565251911e-07, + "loss": 0.5964, + "step": 26696 + }, + { + "epoch": 0.8182236116219198, + "grad_norm": 1.6371175260174957, + "learning_rate": 8.417732272071505e-07, + "loss": 0.5191, + "step": 26697 + }, + { + "epoch": 0.8182542601446611, + "grad_norm": 1.8723996464956039, + "learning_rate": 8.414976388611823e-07, + "loss": 0.6378, + "step": 26698 + }, + { + "epoch": 0.8182849086674022, + "grad_norm": 1.6340591763630972, + "learning_rate": 8.412220914900032e-07, + "loss": 0.5813, + "step": 26699 + }, + { + "epoch": 0.8183155571901435, + "grad_norm": 1.8258177279425782, + "learning_rate": 8.409465850963255e-07, + "loss": 0.5834, + "step": 26700 + }, + { + "epoch": 0.8183462057128846, + "grad_norm": 1.8409364102723285, + "learning_rate": 8.406711196828671e-07, + "loss": 0.5684, + "step": 26701 + }, + { + "epoch": 0.8183768542356259, + "grad_norm": 1.750931252659344, + "learning_rate": 8.403956952523384e-07, + "loss": 0.5197, + "step": 26702 + }, + { + "epoch": 0.818407502758367, + "grad_norm": 1.8245439199482394, + "learning_rate": 8.401203118074558e-07, + "loss": 0.567, + "step": 26703 + }, + { + "epoch": 0.8184381512811083, + "grad_norm": 2.268865105077229, + "learning_rate": 8.398449693509325e-07, + "loss": 0.5937, + "step": 26704 + }, + { + "epoch": 0.8184687998038495, + "grad_norm": 1.7978044936431377, + "learning_rate": 8.395696678854809e-07, + "loss": 0.4472, + "step": 26705 + }, + { + "epoch": 0.8184994483265906, + "grad_norm": 1.6562823344586788, + "learning_rate": 8.392944074138132e-07, + "loss": 0.5303, + "step": 26706 + }, + { + "epoch": 0.8185300968493319, + "grad_norm": 2.05955930060119, + "learning_rate": 8.390191879386439e-07, + "loss": 0.546, + "step": 26707 + }, + { + "epoch": 0.818560745372073, + "grad_norm": 2.153264393844235, + "learning_rate": 8.387440094626815e-07, + "loss": 0.592, + "step": 26708 + }, + { + "epoch": 0.8185913938948143, + "grad_norm": 1.6270573836816247, + "learning_rate": 8.384688719886391e-07, + "loss": 0.5547, + "step": 26709 + }, + { + "epoch": 0.8186220424175554, + "grad_norm": 1.89767298617084, + "learning_rate": 8.381937755192293e-07, + "loss": 0.5303, + "step": 26710 + }, + { + "epoch": 0.8186526909402967, + "grad_norm": 1.8559305345816641, + "learning_rate": 8.379187200571598e-07, + "loss": 0.5552, + "step": 26711 + }, + { + "epoch": 0.8186833394630378, + "grad_norm": 1.7169568160325308, + "learning_rate": 8.376437056051429e-07, + "loss": 0.4882, + "step": 26712 + }, + { + "epoch": 0.8187139879857791, + "grad_norm": 0.8018944313732368, + "learning_rate": 8.373687321658853e-07, + "loss": 0.3815, + "step": 26713 + }, + { + "epoch": 0.8187446365085203, + "grad_norm": 2.089152009974782, + "learning_rate": 8.370937997421014e-07, + "loss": 0.6183, + "step": 26714 + }, + { + "epoch": 0.8187752850312615, + "grad_norm": 0.7730109224197056, + "learning_rate": 8.368189083364969e-07, + "loss": 0.3798, + "step": 26715 + }, + { + "epoch": 0.8188059335540027, + "grad_norm": 1.9844247261109582, + "learning_rate": 8.365440579517803e-07, + "loss": 0.6528, + "step": 26716 + }, + { + "epoch": 0.8188365820767439, + "grad_norm": 0.7679134142381215, + "learning_rate": 8.362692485906599e-07, + "loss": 0.4149, + "step": 26717 + }, + { + "epoch": 0.8188672305994851, + "grad_norm": 1.828566717386852, + "learning_rate": 8.35994480255845e-07, + "loss": 0.5547, + "step": 26718 + }, + { + "epoch": 0.8188978791222263, + "grad_norm": 0.8344417066549572, + "learning_rate": 8.35719752950041e-07, + "loss": 0.3973, + "step": 26719 + }, + { + "epoch": 0.8189285276449675, + "grad_norm": 1.9222084418996386, + "learning_rate": 8.354450666759556e-07, + "loss": 0.5963, + "step": 26720 + }, + { + "epoch": 0.8189591761677087, + "grad_norm": 1.773752744990228, + "learning_rate": 8.351704214362955e-07, + "loss": 0.5894, + "step": 26721 + }, + { + "epoch": 0.8189898246904499, + "grad_norm": 1.9355170938304906, + "learning_rate": 8.348958172337684e-07, + "loss": 0.6201, + "step": 26722 + }, + { + "epoch": 0.8190204732131912, + "grad_norm": 0.7671000962123068, + "learning_rate": 8.34621254071078e-07, + "loss": 0.4033, + "step": 26723 + }, + { + "epoch": 0.8190511217359323, + "grad_norm": 1.9052136138896487, + "learning_rate": 8.34346731950928e-07, + "loss": 0.5108, + "step": 26724 + }, + { + "epoch": 0.8190817702586736, + "grad_norm": 0.8341539326597527, + "learning_rate": 8.34072250876028e-07, + "loss": 0.4029, + "step": 26725 + }, + { + "epoch": 0.8191124187814147, + "grad_norm": 1.7863861476868006, + "learning_rate": 8.337978108490797e-07, + "loss": 0.544, + "step": 26726 + }, + { + "epoch": 0.819143067304156, + "grad_norm": 1.974640815368191, + "learning_rate": 8.335234118727864e-07, + "loss": 0.5169, + "step": 26727 + }, + { + "epoch": 0.8191737158268971, + "grad_norm": 0.7596912932758496, + "learning_rate": 8.332490539498533e-07, + "loss": 0.3977, + "step": 26728 + }, + { + "epoch": 0.8192043643496384, + "grad_norm": 1.7925429724644808, + "learning_rate": 8.329747370829844e-07, + "loss": 0.5605, + "step": 26729 + }, + { + "epoch": 0.8192350128723795, + "grad_norm": 1.6893538714449776, + "learning_rate": 8.327004612748802e-07, + "loss": 0.5544, + "step": 26730 + }, + { + "epoch": 0.8192656613951208, + "grad_norm": 2.1201574450618543, + "learning_rate": 8.324262265282446e-07, + "loss": 0.5366, + "step": 26731 + }, + { + "epoch": 0.819296309917862, + "grad_norm": 1.7919578836158088, + "learning_rate": 8.3215203284578e-07, + "loss": 0.5641, + "step": 26732 + }, + { + "epoch": 0.8193269584406032, + "grad_norm": 1.8790018010534575, + "learning_rate": 8.318778802301891e-07, + "loss": 0.5851, + "step": 26733 + }, + { + "epoch": 0.8193576069633444, + "grad_norm": 1.9965441661197467, + "learning_rate": 8.316037686841716e-07, + "loss": 0.5151, + "step": 26734 + }, + { + "epoch": 0.8193882554860856, + "grad_norm": 1.9438077464410968, + "learning_rate": 8.313296982104274e-07, + "loss": 0.6057, + "step": 26735 + }, + { + "epoch": 0.8194189040088268, + "grad_norm": 0.7657630571831957, + "learning_rate": 8.310556688116583e-07, + "loss": 0.3844, + "step": 26736 + }, + { + "epoch": 0.8194495525315679, + "grad_norm": 0.8027519143686455, + "learning_rate": 8.307816804905661e-07, + "loss": 0.4048, + "step": 26737 + }, + { + "epoch": 0.8194802010543092, + "grad_norm": 1.8414485119286692, + "learning_rate": 8.305077332498468e-07, + "loss": 0.5109, + "step": 26738 + }, + { + "epoch": 0.8195108495770503, + "grad_norm": 1.7564157441024273, + "learning_rate": 8.302338270922022e-07, + "loss": 0.5887, + "step": 26739 + }, + { + "epoch": 0.8195414980997916, + "grad_norm": 1.8872685056940586, + "learning_rate": 8.299599620203303e-07, + "loss": 0.5584, + "step": 26740 + }, + { + "epoch": 0.8195721466225327, + "grad_norm": 1.7068634548971988, + "learning_rate": 8.296861380369308e-07, + "loss": 0.629, + "step": 26741 + }, + { + "epoch": 0.819602795145274, + "grad_norm": 1.745252307458341, + "learning_rate": 8.294123551447014e-07, + "loss": 0.5583, + "step": 26742 + }, + { + "epoch": 0.8196334436680152, + "grad_norm": 1.9806372698750172, + "learning_rate": 8.291386133463363e-07, + "loss": 0.5324, + "step": 26743 + }, + { + "epoch": 0.8196640921907564, + "grad_norm": 2.2201762869095956, + "learning_rate": 8.288649126445381e-07, + "loss": 0.5366, + "step": 26744 + }, + { + "epoch": 0.8196947407134976, + "grad_norm": 1.8485362070916371, + "learning_rate": 8.285912530420015e-07, + "loss": 0.6075, + "step": 26745 + }, + { + "epoch": 0.8197253892362388, + "grad_norm": 1.7442101325196053, + "learning_rate": 8.283176345414212e-07, + "loss": 0.5822, + "step": 26746 + }, + { + "epoch": 0.81975603775898, + "grad_norm": 0.8003681330624549, + "learning_rate": 8.280440571454945e-07, + "loss": 0.4002, + "step": 26747 + }, + { + "epoch": 0.8197866862817212, + "grad_norm": 1.7958579448806726, + "learning_rate": 8.277705208569181e-07, + "loss": 0.4819, + "step": 26748 + }, + { + "epoch": 0.8198173348044624, + "grad_norm": 0.7820914256962461, + "learning_rate": 8.274970256783854e-07, + "loss": 0.3816, + "step": 26749 + }, + { + "epoch": 0.8198479833272037, + "grad_norm": 1.7472986377833533, + "learning_rate": 8.272235716125921e-07, + "loss": 0.5517, + "step": 26750 + }, + { + "epoch": 0.8198786318499448, + "grad_norm": 1.9249019963602192, + "learning_rate": 8.269501586622325e-07, + "loss": 0.5807, + "step": 26751 + }, + { + "epoch": 0.8199092803726861, + "grad_norm": 1.5975701429215652, + "learning_rate": 8.266767868300019e-07, + "loss": 0.5475, + "step": 26752 + }, + { + "epoch": 0.8199399288954272, + "grad_norm": 3.1623165677981055, + "learning_rate": 8.264034561185924e-07, + "loss": 0.6022, + "step": 26753 + }, + { + "epoch": 0.8199705774181685, + "grad_norm": 2.0372622501229833, + "learning_rate": 8.261301665306959e-07, + "loss": 0.6057, + "step": 26754 + }, + { + "epoch": 0.8200012259409096, + "grad_norm": 1.995940963202813, + "learning_rate": 8.258569180690085e-07, + "loss": 0.6533, + "step": 26755 + }, + { + "epoch": 0.8200318744636509, + "grad_norm": 1.7347260798433213, + "learning_rate": 8.255837107362213e-07, + "loss": 0.5795, + "step": 26756 + }, + { + "epoch": 0.820062522986392, + "grad_norm": 2.076935387572919, + "learning_rate": 8.253105445350245e-07, + "loss": 0.6366, + "step": 26757 + }, + { + "epoch": 0.8200931715091333, + "grad_norm": 2.0170889073375244, + "learning_rate": 8.25037419468111e-07, + "loss": 0.5975, + "step": 26758 + }, + { + "epoch": 0.8201238200318745, + "grad_norm": 1.8502941231716001, + "learning_rate": 8.247643355381718e-07, + "loss": 0.5254, + "step": 26759 + }, + { + "epoch": 0.8201544685546157, + "grad_norm": 1.813215384025071, + "learning_rate": 8.244912927478992e-07, + "loss": 0.5777, + "step": 26760 + }, + { + "epoch": 0.8201851170773569, + "grad_norm": 1.7136564109589645, + "learning_rate": 8.242182910999813e-07, + "loss": 0.5465, + "step": 26761 + }, + { + "epoch": 0.8202157656000981, + "grad_norm": 1.9012223188061617, + "learning_rate": 8.239453305971091e-07, + "loss": 0.5245, + "step": 26762 + }, + { + "epoch": 0.8202464141228393, + "grad_norm": 1.7286669800271637, + "learning_rate": 8.236724112419731e-07, + "loss": 0.6687, + "step": 26763 + }, + { + "epoch": 0.8202770626455805, + "grad_norm": 1.9334465600293445, + "learning_rate": 8.233995330372613e-07, + "loss": 0.5784, + "step": 26764 + }, + { + "epoch": 0.8203077111683217, + "grad_norm": 1.7773064095482802, + "learning_rate": 8.231266959856599e-07, + "loss": 0.5611, + "step": 26765 + }, + { + "epoch": 0.820338359691063, + "grad_norm": 1.8601324280244345, + "learning_rate": 8.228539000898627e-07, + "loss": 0.6051, + "step": 26766 + }, + { + "epoch": 0.8203690082138041, + "grad_norm": 2.0411955787733196, + "learning_rate": 8.225811453525534e-07, + "loss": 0.6034, + "step": 26767 + }, + { + "epoch": 0.8203996567365452, + "grad_norm": 1.9459830227237074, + "learning_rate": 8.223084317764219e-07, + "loss": 0.5422, + "step": 26768 + }, + { + "epoch": 0.8204303052592865, + "grad_norm": 1.91878999848975, + "learning_rate": 8.220357593641532e-07, + "loss": 0.6173, + "step": 26769 + }, + { + "epoch": 0.8204609537820277, + "grad_norm": 1.6816162477563732, + "learning_rate": 8.217631281184352e-07, + "loss": 0.6435, + "step": 26770 + }, + { + "epoch": 0.8204916023047689, + "grad_norm": 1.6768262439896517, + "learning_rate": 8.214905380419557e-07, + "loss": 0.5535, + "step": 26771 + }, + { + "epoch": 0.8205222508275101, + "grad_norm": 1.9639468941615037, + "learning_rate": 8.212179891373979e-07, + "loss": 0.5933, + "step": 26772 + }, + { + "epoch": 0.8205528993502513, + "grad_norm": 1.859934969440138, + "learning_rate": 8.20945481407448e-07, + "loss": 0.529, + "step": 26773 + }, + { + "epoch": 0.8205835478729925, + "grad_norm": 1.903699582014121, + "learning_rate": 8.206730148547926e-07, + "loss": 0.5985, + "step": 26774 + }, + { + "epoch": 0.8206141963957337, + "grad_norm": 1.800556521803119, + "learning_rate": 8.204005894821154e-07, + "loss": 0.4785, + "step": 26775 + }, + { + "epoch": 0.8206448449184749, + "grad_norm": 2.1313628498845874, + "learning_rate": 8.201282052920984e-07, + "loss": 0.6696, + "step": 26776 + }, + { + "epoch": 0.8206754934412162, + "grad_norm": 0.8290045596994083, + "learning_rate": 8.198558622874303e-07, + "loss": 0.4157, + "step": 26777 + }, + { + "epoch": 0.8207061419639573, + "grad_norm": 1.847366575062571, + "learning_rate": 8.195835604707903e-07, + "loss": 0.5894, + "step": 26778 + }, + { + "epoch": 0.8207367904866986, + "grad_norm": 1.6897999343036556, + "learning_rate": 8.193112998448644e-07, + "loss": 0.585, + "step": 26779 + }, + { + "epoch": 0.8207674390094397, + "grad_norm": 1.7461062032752317, + "learning_rate": 8.190390804123327e-07, + "loss": 0.5574, + "step": 26780 + }, + { + "epoch": 0.820798087532181, + "grad_norm": 2.276259083310716, + "learning_rate": 8.187669021758788e-07, + "loss": 0.5786, + "step": 26781 + }, + { + "epoch": 0.8208287360549221, + "grad_norm": 1.9400858697419505, + "learning_rate": 8.184947651381853e-07, + "loss": 0.4588, + "step": 26782 + }, + { + "epoch": 0.8208593845776634, + "grad_norm": 1.7769930737481792, + "learning_rate": 8.182226693019318e-07, + "loss": 0.5755, + "step": 26783 + }, + { + "epoch": 0.8208900331004045, + "grad_norm": 1.9033869576730749, + "learning_rate": 8.179506146698008e-07, + "loss": 0.6212, + "step": 26784 + }, + { + "epoch": 0.8209206816231458, + "grad_norm": 1.7922663973889725, + "learning_rate": 8.176786012444727e-07, + "loss": 0.5383, + "step": 26785 + }, + { + "epoch": 0.820951330145887, + "grad_norm": 1.7904265618789983, + "learning_rate": 8.17406629028627e-07, + "loss": 0.5341, + "step": 26786 + }, + { + "epoch": 0.8209819786686282, + "grad_norm": 2.046905229370849, + "learning_rate": 8.171346980249445e-07, + "loss": 0.5795, + "step": 26787 + }, + { + "epoch": 0.8210126271913694, + "grad_norm": 2.0360597665169684, + "learning_rate": 8.168628082361035e-07, + "loss": 0.5629, + "step": 26788 + }, + { + "epoch": 0.8210432757141106, + "grad_norm": 1.738323766581816, + "learning_rate": 8.165909596647831e-07, + "loss": 0.5582, + "step": 26789 + }, + { + "epoch": 0.8210739242368518, + "grad_norm": 2.075429578883557, + "learning_rate": 8.163191523136638e-07, + "loss": 0.5148, + "step": 26790 + }, + { + "epoch": 0.821104572759593, + "grad_norm": 1.925194142522436, + "learning_rate": 8.160473861854206e-07, + "loss": 0.4905, + "step": 26791 + }, + { + "epoch": 0.8211352212823342, + "grad_norm": 1.910007724072935, + "learning_rate": 8.157756612827334e-07, + "loss": 0.5616, + "step": 26792 + }, + { + "epoch": 0.8211658698050754, + "grad_norm": 1.8618612497314289, + "learning_rate": 8.155039776082807e-07, + "loss": 0.5908, + "step": 26793 + }, + { + "epoch": 0.8211965183278166, + "grad_norm": 1.8937879247373468, + "learning_rate": 8.152323351647362e-07, + "loss": 0.5268, + "step": 26794 + }, + { + "epoch": 0.8212271668505579, + "grad_norm": 1.7072357669311216, + "learning_rate": 8.149607339547788e-07, + "loss": 0.4349, + "step": 26795 + }, + { + "epoch": 0.821257815373299, + "grad_norm": 1.9103027206527832, + "learning_rate": 8.146891739810847e-07, + "loss": 0.5469, + "step": 26796 + }, + { + "epoch": 0.8212884638960403, + "grad_norm": 2.1156127326475076, + "learning_rate": 8.144176552463285e-07, + "loss": 0.6219, + "step": 26797 + }, + { + "epoch": 0.8213191124187814, + "grad_norm": 1.8509093641782755, + "learning_rate": 8.141461777531867e-07, + "loss": 0.5274, + "step": 26798 + }, + { + "epoch": 0.8213497609415226, + "grad_norm": 1.8508064547125798, + "learning_rate": 8.138747415043324e-07, + "loss": 0.5176, + "step": 26799 + }, + { + "epoch": 0.8213804094642638, + "grad_norm": 1.8424731430197818, + "learning_rate": 8.136033465024417e-07, + "loss": 0.5032, + "step": 26800 + }, + { + "epoch": 0.821411057987005, + "grad_norm": 1.8191058203853683, + "learning_rate": 8.133319927501893e-07, + "loss": 0.4764, + "step": 26801 + }, + { + "epoch": 0.8214417065097462, + "grad_norm": 2.0524650757812344, + "learning_rate": 8.130606802502467e-07, + "loss": 0.5061, + "step": 26802 + }, + { + "epoch": 0.8214723550324874, + "grad_norm": 1.7621188108086145, + "learning_rate": 8.127894090052884e-07, + "loss": 0.4932, + "step": 26803 + }, + { + "epoch": 0.8215030035552287, + "grad_norm": 1.6807028974759437, + "learning_rate": 8.125181790179892e-07, + "loss": 0.5417, + "step": 26804 + }, + { + "epoch": 0.8215336520779698, + "grad_norm": 1.806931279554893, + "learning_rate": 8.12246990291018e-07, + "loss": 0.5138, + "step": 26805 + }, + { + "epoch": 0.8215643006007111, + "grad_norm": 1.8083027940169654, + "learning_rate": 8.119758428270491e-07, + "loss": 0.5193, + "step": 26806 + }, + { + "epoch": 0.8215949491234522, + "grad_norm": 1.8291966623445661, + "learning_rate": 8.117047366287545e-07, + "loss": 0.5215, + "step": 26807 + }, + { + "epoch": 0.8216255976461935, + "grad_norm": 1.9236759958911602, + "learning_rate": 8.114336716988041e-07, + "loss": 0.5967, + "step": 26808 + }, + { + "epoch": 0.8216562461689346, + "grad_norm": 1.7765522422723692, + "learning_rate": 8.111626480398705e-07, + "loss": 0.4731, + "step": 26809 + }, + { + "epoch": 0.8216868946916759, + "grad_norm": 0.7864492324755724, + "learning_rate": 8.108916656546218e-07, + "loss": 0.3934, + "step": 26810 + }, + { + "epoch": 0.821717543214417, + "grad_norm": 1.777641493897877, + "learning_rate": 8.106207245457293e-07, + "loss": 0.566, + "step": 26811 + }, + { + "epoch": 0.8217481917371583, + "grad_norm": 0.8535573933538726, + "learning_rate": 8.103498247158636e-07, + "loss": 0.3941, + "step": 26812 + }, + { + "epoch": 0.8217788402598994, + "grad_norm": 2.007155641600989, + "learning_rate": 8.100789661676922e-07, + "loss": 0.5021, + "step": 26813 + }, + { + "epoch": 0.8218094887826407, + "grad_norm": 1.9096743828995661, + "learning_rate": 8.098081489038845e-07, + "loss": 0.6304, + "step": 26814 + }, + { + "epoch": 0.8218401373053819, + "grad_norm": 2.177888734475586, + "learning_rate": 8.095373729271111e-07, + "loss": 0.6201, + "step": 26815 + }, + { + "epoch": 0.8218707858281231, + "grad_norm": 1.9829046906373484, + "learning_rate": 8.09266638240036e-07, + "loss": 0.557, + "step": 26816 + }, + { + "epoch": 0.8219014343508643, + "grad_norm": 1.9452398459392795, + "learning_rate": 8.0899594484533e-07, + "loss": 0.5844, + "step": 26817 + }, + { + "epoch": 0.8219320828736055, + "grad_norm": 1.8992749974354752, + "learning_rate": 8.087252927456596e-07, + "loss": 0.6174, + "step": 26818 + }, + { + "epoch": 0.8219627313963467, + "grad_norm": 2.012008493258843, + "learning_rate": 8.084546819436906e-07, + "loss": 0.5572, + "step": 26819 + }, + { + "epoch": 0.8219933799190879, + "grad_norm": 1.684307622647225, + "learning_rate": 8.081841124420909e-07, + "loss": 0.5786, + "step": 26820 + }, + { + "epoch": 0.8220240284418291, + "grad_norm": 1.84615134762561, + "learning_rate": 8.079135842435232e-07, + "loss": 0.5394, + "step": 26821 + }, + { + "epoch": 0.8220546769645704, + "grad_norm": 1.9742648179559388, + "learning_rate": 8.076430973506583e-07, + "loss": 0.5459, + "step": 26822 + }, + { + "epoch": 0.8220853254873115, + "grad_norm": 1.9540235379100022, + "learning_rate": 8.073726517661584e-07, + "loss": 0.5596, + "step": 26823 + }, + { + "epoch": 0.8221159740100528, + "grad_norm": 1.9442156389628402, + "learning_rate": 8.071022474926876e-07, + "loss": 0.6139, + "step": 26824 + }, + { + "epoch": 0.8221466225327939, + "grad_norm": 0.7849868592408685, + "learning_rate": 8.06831884532911e-07, + "loss": 0.401, + "step": 26825 + }, + { + "epoch": 0.8221772710555352, + "grad_norm": 1.8610562200201561, + "learning_rate": 8.065615628894935e-07, + "loss": 0.6004, + "step": 26826 + }, + { + "epoch": 0.8222079195782763, + "grad_norm": 1.9019300842699387, + "learning_rate": 8.06291282565097e-07, + "loss": 0.6202, + "step": 26827 + }, + { + "epoch": 0.8222385681010176, + "grad_norm": 1.7009406034588883, + "learning_rate": 8.060210435623855e-07, + "loss": 0.6361, + "step": 26828 + }, + { + "epoch": 0.8222692166237587, + "grad_norm": 2.0912822451570983, + "learning_rate": 8.05750845884023e-07, + "loss": 0.5844, + "step": 26829 + }, + { + "epoch": 0.8222998651464999, + "grad_norm": 0.787410672388245, + "learning_rate": 8.054806895326695e-07, + "loss": 0.3857, + "step": 26830 + }, + { + "epoch": 0.8223305136692411, + "grad_norm": 1.917987337946808, + "learning_rate": 8.052105745109889e-07, + "loss": 0.6343, + "step": 26831 + }, + { + "epoch": 0.8223611621919823, + "grad_norm": 2.009646355820297, + "learning_rate": 8.049405008216405e-07, + "loss": 0.6022, + "step": 26832 + }, + { + "epoch": 0.8223918107147236, + "grad_norm": 1.8473222967494418, + "learning_rate": 8.046704684672868e-07, + "loss": 0.5883, + "step": 26833 + }, + { + "epoch": 0.8224224592374647, + "grad_norm": 1.8496943577492957, + "learning_rate": 8.044004774505898e-07, + "loss": 0.5218, + "step": 26834 + }, + { + "epoch": 0.822453107760206, + "grad_norm": 1.8059605162308439, + "learning_rate": 8.041305277742073e-07, + "loss": 0.5155, + "step": 26835 + }, + { + "epoch": 0.8224837562829471, + "grad_norm": 1.7424460185654613, + "learning_rate": 8.038606194408e-07, + "loss": 0.5566, + "step": 26836 + }, + { + "epoch": 0.8225144048056884, + "grad_norm": 0.7974104718209553, + "learning_rate": 8.035907524530289e-07, + "loss": 0.3946, + "step": 26837 + }, + { + "epoch": 0.8225450533284295, + "grad_norm": 2.0306201017831493, + "learning_rate": 8.033209268135506e-07, + "loss": 0.5279, + "step": 26838 + }, + { + "epoch": 0.8225757018511708, + "grad_norm": 1.835403736703498, + "learning_rate": 8.03051142525026e-07, + "loss": 0.5885, + "step": 26839 + }, + { + "epoch": 0.8226063503739119, + "grad_norm": 0.8035346738997002, + "learning_rate": 8.027813995901101e-07, + "loss": 0.3951, + "step": 26840 + }, + { + "epoch": 0.8226369988966532, + "grad_norm": 1.907973160487721, + "learning_rate": 8.025116980114656e-07, + "loss": 0.5931, + "step": 26841 + }, + { + "epoch": 0.8226676474193944, + "grad_norm": 1.8071108629594332, + "learning_rate": 8.022420377917467e-07, + "loss": 0.5573, + "step": 26842 + }, + { + "epoch": 0.8226982959421356, + "grad_norm": 1.8730823330601734, + "learning_rate": 8.019724189336103e-07, + "loss": 0.5147, + "step": 26843 + }, + { + "epoch": 0.8227289444648768, + "grad_norm": 1.8184862424841985, + "learning_rate": 8.017028414397138e-07, + "loss": 0.5522, + "step": 26844 + }, + { + "epoch": 0.822759592987618, + "grad_norm": 0.8056130869882842, + "learning_rate": 8.014333053127144e-07, + "loss": 0.3981, + "step": 26845 + }, + { + "epoch": 0.8227902415103592, + "grad_norm": 1.6742951402989183, + "learning_rate": 8.011638105552655e-07, + "loss": 0.4766, + "step": 26846 + }, + { + "epoch": 0.8228208900331004, + "grad_norm": 1.7575086033362006, + "learning_rate": 8.008943571700245e-07, + "loss": 0.5917, + "step": 26847 + }, + { + "epoch": 0.8228515385558416, + "grad_norm": 1.870480313710315, + "learning_rate": 8.006249451596454e-07, + "loss": 0.572, + "step": 26848 + }, + { + "epoch": 0.8228821870785828, + "grad_norm": 1.8479360857037048, + "learning_rate": 8.003555745267844e-07, + "loss": 0.5633, + "step": 26849 + }, + { + "epoch": 0.822912835601324, + "grad_norm": 1.6978010643248487, + "learning_rate": 8.000862452740943e-07, + "loss": 0.5478, + "step": 26850 + }, + { + "epoch": 0.8229434841240653, + "grad_norm": 1.9225419514351576, + "learning_rate": 7.998169574042269e-07, + "loss": 0.4767, + "step": 26851 + }, + { + "epoch": 0.8229741326468064, + "grad_norm": 1.8546254998514176, + "learning_rate": 7.995477109198397e-07, + "loss": 0.5785, + "step": 26852 + }, + { + "epoch": 0.8230047811695477, + "grad_norm": 2.0781878427546263, + "learning_rate": 7.992785058235841e-07, + "loss": 0.5502, + "step": 26853 + }, + { + "epoch": 0.8230354296922888, + "grad_norm": 1.8371299766167235, + "learning_rate": 7.990093421181106e-07, + "loss": 0.6349, + "step": 26854 + }, + { + "epoch": 0.8230660782150301, + "grad_norm": 1.8577605359497362, + "learning_rate": 7.987402198060734e-07, + "loss": 0.5538, + "step": 26855 + }, + { + "epoch": 0.8230967267377712, + "grad_norm": 1.9038222665252367, + "learning_rate": 7.984711388901246e-07, + "loss": 0.5803, + "step": 26856 + }, + { + "epoch": 0.8231273752605125, + "grad_norm": 1.942178559353822, + "learning_rate": 7.982020993729134e-07, + "loss": 0.5514, + "step": 26857 + }, + { + "epoch": 0.8231580237832536, + "grad_norm": 1.860200135425837, + "learning_rate": 7.979331012570923e-07, + "loss": 0.5962, + "step": 26858 + }, + { + "epoch": 0.8231886723059949, + "grad_norm": 1.8775390030405705, + "learning_rate": 7.97664144545311e-07, + "loss": 0.5218, + "step": 26859 + }, + { + "epoch": 0.8232193208287361, + "grad_norm": 2.152579857834674, + "learning_rate": 7.973952292402215e-07, + "loss": 0.6041, + "step": 26860 + }, + { + "epoch": 0.8232499693514772, + "grad_norm": 2.0536632621508364, + "learning_rate": 7.971263553444725e-07, + "loss": 0.5535, + "step": 26861 + }, + { + "epoch": 0.8232806178742185, + "grad_norm": 1.9088456745602655, + "learning_rate": 7.9685752286071e-07, + "loss": 0.5333, + "step": 26862 + }, + { + "epoch": 0.8233112663969596, + "grad_norm": 1.9147093703719176, + "learning_rate": 7.965887317915882e-07, + "loss": 0.5155, + "step": 26863 + }, + { + "epoch": 0.8233419149197009, + "grad_norm": 1.8557092402602502, + "learning_rate": 7.963199821397533e-07, + "loss": 0.5144, + "step": 26864 + }, + { + "epoch": 0.823372563442442, + "grad_norm": 2.09498466985198, + "learning_rate": 7.960512739078519e-07, + "loss": 0.5893, + "step": 26865 + }, + { + "epoch": 0.8234032119651833, + "grad_norm": 1.6769172436910593, + "learning_rate": 7.957826070985331e-07, + "loss": 0.5954, + "step": 26866 + }, + { + "epoch": 0.8234338604879244, + "grad_norm": 1.9538364632332874, + "learning_rate": 7.955139817144436e-07, + "loss": 0.487, + "step": 26867 + }, + { + "epoch": 0.8234645090106657, + "grad_norm": 1.7957855537262792, + "learning_rate": 7.952453977582325e-07, + "loss": 0.5633, + "step": 26868 + }, + { + "epoch": 0.8234951575334069, + "grad_norm": 1.9893228728286627, + "learning_rate": 7.949768552325426e-07, + "loss": 0.6763, + "step": 26869 + }, + { + "epoch": 0.8235258060561481, + "grad_norm": 2.0009662371936563, + "learning_rate": 7.947083541400213e-07, + "loss": 0.6338, + "step": 26870 + }, + { + "epoch": 0.8235564545788893, + "grad_norm": 1.9630302009566136, + "learning_rate": 7.944398944833165e-07, + "loss": 0.5878, + "step": 26871 + }, + { + "epoch": 0.8235871031016305, + "grad_norm": 1.645076228892164, + "learning_rate": 7.941714762650709e-07, + "loss": 0.5366, + "step": 26872 + }, + { + "epoch": 0.8236177516243717, + "grad_norm": 1.9109970894909116, + "learning_rate": 7.939030994879277e-07, + "loss": 0.5903, + "step": 26873 + }, + { + "epoch": 0.8236484001471129, + "grad_norm": 2.041998199618427, + "learning_rate": 7.936347641545356e-07, + "loss": 0.5119, + "step": 26874 + }, + { + "epoch": 0.8236790486698541, + "grad_norm": 0.8012178996349157, + "learning_rate": 7.933664702675354e-07, + "loss": 0.3931, + "step": 26875 + }, + { + "epoch": 0.8237096971925953, + "grad_norm": 1.7759613800804153, + "learning_rate": 7.930982178295732e-07, + "loss": 0.6378, + "step": 26876 + }, + { + "epoch": 0.8237403457153365, + "grad_norm": 1.607181132414293, + "learning_rate": 7.928300068432887e-07, + "loss": 0.4423, + "step": 26877 + }, + { + "epoch": 0.8237709942380778, + "grad_norm": 1.6998491088438734, + "learning_rate": 7.925618373113275e-07, + "loss": 0.4569, + "step": 26878 + }, + { + "epoch": 0.8238016427608189, + "grad_norm": 1.9344235916904151, + "learning_rate": 7.922937092363315e-07, + "loss": 0.5877, + "step": 26879 + }, + { + "epoch": 0.8238322912835602, + "grad_norm": 1.7996216790268418, + "learning_rate": 7.920256226209411e-07, + "loss": 0.6045, + "step": 26880 + }, + { + "epoch": 0.8238629398063013, + "grad_norm": 0.8029985849723281, + "learning_rate": 7.917575774677994e-07, + "loss": 0.3988, + "step": 26881 + }, + { + "epoch": 0.8238935883290426, + "grad_norm": 1.9248492590090052, + "learning_rate": 7.914895737795475e-07, + "loss": 0.5625, + "step": 26882 + }, + { + "epoch": 0.8239242368517837, + "grad_norm": 1.7280297777506046, + "learning_rate": 7.912216115588262e-07, + "loss": 0.4764, + "step": 26883 + }, + { + "epoch": 0.823954885374525, + "grad_norm": 1.8253467316460903, + "learning_rate": 7.909536908082737e-07, + "loss": 0.6128, + "step": 26884 + }, + { + "epoch": 0.8239855338972661, + "grad_norm": 2.1002767228306665, + "learning_rate": 7.906858115305316e-07, + "loss": 0.5569, + "step": 26885 + }, + { + "epoch": 0.8240161824200074, + "grad_norm": 1.8760804687814179, + "learning_rate": 7.904179737282392e-07, + "loss": 0.5049, + "step": 26886 + }, + { + "epoch": 0.8240468309427486, + "grad_norm": 1.8446002894575675, + "learning_rate": 7.901501774040366e-07, + "loss": 0.5188, + "step": 26887 + }, + { + "epoch": 0.8240774794654898, + "grad_norm": 2.077106855416514, + "learning_rate": 7.8988242256056e-07, + "loss": 0.5716, + "step": 26888 + }, + { + "epoch": 0.824108127988231, + "grad_norm": 0.9419687618676986, + "learning_rate": 7.896147092004497e-07, + "loss": 0.3816, + "step": 26889 + }, + { + "epoch": 0.8241387765109722, + "grad_norm": 2.2482786781642394, + "learning_rate": 7.893470373263434e-07, + "loss": 0.6281, + "step": 26890 + }, + { + "epoch": 0.8241694250337134, + "grad_norm": 2.148644291959295, + "learning_rate": 7.890794069408786e-07, + "loss": 0.5737, + "step": 26891 + }, + { + "epoch": 0.8242000735564545, + "grad_norm": 1.7509267755985187, + "learning_rate": 7.888118180466897e-07, + "loss": 0.5502, + "step": 26892 + }, + { + "epoch": 0.8242307220791958, + "grad_norm": 1.7379259900269002, + "learning_rate": 7.885442706464175e-07, + "loss": 0.6597, + "step": 26893 + }, + { + "epoch": 0.8242613706019369, + "grad_norm": 1.8325900364785652, + "learning_rate": 7.882767647426947e-07, + "loss": 0.589, + "step": 26894 + }, + { + "epoch": 0.8242920191246782, + "grad_norm": 2.0241875912410405, + "learning_rate": 7.8800930033816e-07, + "loss": 0.5842, + "step": 26895 + }, + { + "epoch": 0.8243226676474193, + "grad_norm": 1.9466493407339007, + "learning_rate": 7.877418774354462e-07, + "loss": 0.5172, + "step": 26896 + }, + { + "epoch": 0.8243533161701606, + "grad_norm": 1.9869569449811317, + "learning_rate": 7.874744960371894e-07, + "loss": 0.6444, + "step": 26897 + }, + { + "epoch": 0.8243839646929018, + "grad_norm": 0.778012342893351, + "learning_rate": 7.872071561460254e-07, + "loss": 0.3896, + "step": 26898 + }, + { + "epoch": 0.824414613215643, + "grad_norm": 1.8872456452510094, + "learning_rate": 7.869398577645859e-07, + "loss": 0.4959, + "step": 26899 + }, + { + "epoch": 0.8244452617383842, + "grad_norm": 1.7071138013713334, + "learning_rate": 7.866726008955062e-07, + "loss": 0.5556, + "step": 26900 + }, + { + "epoch": 0.8244759102611254, + "grad_norm": 0.7852652684502421, + "learning_rate": 7.8640538554142e-07, + "loss": 0.4015, + "step": 26901 + }, + { + "epoch": 0.8245065587838666, + "grad_norm": 1.976555774045705, + "learning_rate": 7.861382117049599e-07, + "loss": 0.6232, + "step": 26902 + }, + { + "epoch": 0.8245372073066078, + "grad_norm": 0.7931974059180377, + "learning_rate": 7.858710793887558e-07, + "loss": 0.3942, + "step": 26903 + }, + { + "epoch": 0.824567855829349, + "grad_norm": 1.8176513829960987, + "learning_rate": 7.856039885954447e-07, + "loss": 0.5408, + "step": 26904 + }, + { + "epoch": 0.8245985043520903, + "grad_norm": 1.7350243330265205, + "learning_rate": 7.85336939327655e-07, + "loss": 0.5336, + "step": 26905 + }, + { + "epoch": 0.8246291528748314, + "grad_norm": 2.088809710537145, + "learning_rate": 7.850699315880195e-07, + "loss": 0.6044, + "step": 26906 + }, + { + "epoch": 0.8246598013975727, + "grad_norm": 1.9100731483549718, + "learning_rate": 7.848029653791673e-07, + "loss": 0.5226, + "step": 26907 + }, + { + "epoch": 0.8246904499203138, + "grad_norm": 0.7991406517915709, + "learning_rate": 7.8453604070373e-07, + "loss": 0.3887, + "step": 26908 + }, + { + "epoch": 0.8247210984430551, + "grad_norm": 0.7894578429870722, + "learning_rate": 7.842691575643385e-07, + "loss": 0.3891, + "step": 26909 + }, + { + "epoch": 0.8247517469657962, + "grad_norm": 0.8480039071327186, + "learning_rate": 7.840023159636206e-07, + "loss": 0.4124, + "step": 26910 + }, + { + "epoch": 0.8247823954885375, + "grad_norm": 1.8840281674110542, + "learning_rate": 7.83735515904207e-07, + "loss": 0.6009, + "step": 26911 + }, + { + "epoch": 0.8248130440112786, + "grad_norm": 1.9886196022800324, + "learning_rate": 7.834687573887273e-07, + "loss": 0.6006, + "step": 26912 + }, + { + "epoch": 0.8248436925340199, + "grad_norm": 1.9624505577549967, + "learning_rate": 7.832020404198071e-07, + "loss": 0.543, + "step": 26913 + }, + { + "epoch": 0.824874341056761, + "grad_norm": 2.1185450536495596, + "learning_rate": 7.829353650000765e-07, + "loss": 0.5048, + "step": 26914 + }, + { + "epoch": 0.8249049895795023, + "grad_norm": 1.8552391891057385, + "learning_rate": 7.826687311321635e-07, + "loss": 0.5613, + "step": 26915 + }, + { + "epoch": 0.8249356381022435, + "grad_norm": 1.814244611713134, + "learning_rate": 7.824021388186936e-07, + "loss": 0.5331, + "step": 26916 + }, + { + "epoch": 0.8249662866249847, + "grad_norm": 0.7658985626415958, + "learning_rate": 7.82135588062296e-07, + "loss": 0.3987, + "step": 26917 + }, + { + "epoch": 0.8249969351477259, + "grad_norm": 2.003392581153981, + "learning_rate": 7.818690788655941e-07, + "loss": 0.6623, + "step": 26918 + }, + { + "epoch": 0.8250275836704671, + "grad_norm": 1.7866429218088336, + "learning_rate": 7.816026112312159e-07, + "loss": 0.5308, + "step": 26919 + }, + { + "epoch": 0.8250582321932083, + "grad_norm": 0.7714152281222768, + "learning_rate": 7.813361851617873e-07, + "loss": 0.3908, + "step": 26920 + }, + { + "epoch": 0.8250888807159495, + "grad_norm": 1.775889896371896, + "learning_rate": 7.810698006599316e-07, + "loss": 0.5607, + "step": 26921 + }, + { + "epoch": 0.8251195292386907, + "grad_norm": 1.8258298015389747, + "learning_rate": 7.808034577282741e-07, + "loss": 0.5714, + "step": 26922 + }, + { + "epoch": 0.8251501777614318, + "grad_norm": 1.74157015769171, + "learning_rate": 7.805371563694413e-07, + "loss": 0.501, + "step": 26923 + }, + { + "epoch": 0.8251808262841731, + "grad_norm": 0.7540863107203393, + "learning_rate": 7.802708965860545e-07, + "loss": 0.3848, + "step": 26924 + }, + { + "epoch": 0.8252114748069143, + "grad_norm": 0.7667060274227697, + "learning_rate": 7.80004678380738e-07, + "loss": 0.4013, + "step": 26925 + }, + { + "epoch": 0.8252421233296555, + "grad_norm": 1.988562671014952, + "learning_rate": 7.79738501756116e-07, + "loss": 0.6094, + "step": 26926 + }, + { + "epoch": 0.8252727718523967, + "grad_norm": 1.9029140219158294, + "learning_rate": 7.794723667148097e-07, + "loss": 0.589, + "step": 26927 + }, + { + "epoch": 0.8253034203751379, + "grad_norm": 2.1441318097461313, + "learning_rate": 7.79206273259443e-07, + "loss": 0.5543, + "step": 26928 + }, + { + "epoch": 0.8253340688978791, + "grad_norm": 2.04394021508776, + "learning_rate": 7.789402213926356e-07, + "loss": 0.55, + "step": 26929 + }, + { + "epoch": 0.8253647174206203, + "grad_norm": 1.807884830258026, + "learning_rate": 7.786742111170104e-07, + "loss": 0.5546, + "step": 26930 + }, + { + "epoch": 0.8253953659433615, + "grad_norm": 1.9999383696623938, + "learning_rate": 7.784082424351891e-07, + "loss": 0.5505, + "step": 26931 + }, + { + "epoch": 0.8254260144661028, + "grad_norm": 1.8855594992568152, + "learning_rate": 7.781423153497908e-07, + "loss": 0.4899, + "step": 26932 + }, + { + "epoch": 0.8254566629888439, + "grad_norm": 1.9070810680118948, + "learning_rate": 7.778764298634361e-07, + "loss": 0.5133, + "step": 26933 + }, + { + "epoch": 0.8254873115115852, + "grad_norm": 2.1481024133070883, + "learning_rate": 7.776105859787464e-07, + "loss": 0.5823, + "step": 26934 + }, + { + "epoch": 0.8255179600343263, + "grad_norm": 1.8866287159623882, + "learning_rate": 7.773447836983388e-07, + "loss": 0.5339, + "step": 26935 + }, + { + "epoch": 0.8255486085570676, + "grad_norm": 1.6463555401640373, + "learning_rate": 7.770790230248349e-07, + "loss": 0.493, + "step": 26936 + }, + { + "epoch": 0.8255792570798087, + "grad_norm": 1.728509578801977, + "learning_rate": 7.768133039608506e-07, + "loss": 0.5596, + "step": 26937 + }, + { + "epoch": 0.82560990560255, + "grad_norm": 1.9802161785722272, + "learning_rate": 7.765476265090049e-07, + "loss": 0.5888, + "step": 26938 + }, + { + "epoch": 0.8256405541252911, + "grad_norm": 1.969241387180846, + "learning_rate": 7.762819906719177e-07, + "loss": 0.604, + "step": 26939 + }, + { + "epoch": 0.8256712026480324, + "grad_norm": 1.8515333938131775, + "learning_rate": 7.760163964522033e-07, + "loss": 0.547, + "step": 26940 + }, + { + "epoch": 0.8257018511707735, + "grad_norm": 1.691540862519807, + "learning_rate": 7.757508438524803e-07, + "loss": 0.4382, + "step": 26941 + }, + { + "epoch": 0.8257324996935148, + "grad_norm": 1.873290094260009, + "learning_rate": 7.754853328753664e-07, + "loss": 0.6489, + "step": 26942 + }, + { + "epoch": 0.825763148216256, + "grad_norm": 1.7209913288933314, + "learning_rate": 7.752198635234748e-07, + "loss": 0.5373, + "step": 26943 + }, + { + "epoch": 0.8257937967389972, + "grad_norm": 1.8507040611040477, + "learning_rate": 7.749544357994232e-07, + "loss": 0.6253, + "step": 26944 + }, + { + "epoch": 0.8258244452617384, + "grad_norm": 0.7799396220796081, + "learning_rate": 7.746890497058273e-07, + "loss": 0.3974, + "step": 26945 + }, + { + "epoch": 0.8258550937844796, + "grad_norm": 1.7936012435140065, + "learning_rate": 7.744237052453007e-07, + "loss": 0.5479, + "step": 26946 + }, + { + "epoch": 0.8258857423072208, + "grad_norm": 1.845181697178688, + "learning_rate": 7.741584024204596e-07, + "loss": 0.6191, + "step": 26947 + }, + { + "epoch": 0.825916390829962, + "grad_norm": 1.7140088662237145, + "learning_rate": 7.73893141233914e-07, + "loss": 0.4785, + "step": 26948 + }, + { + "epoch": 0.8259470393527032, + "grad_norm": 1.7481273230310437, + "learning_rate": 7.736279216882836e-07, + "loss": 0.5211, + "step": 26949 + }, + { + "epoch": 0.8259776878754445, + "grad_norm": 1.7101046629793866, + "learning_rate": 7.733627437861784e-07, + "loss": 0.4934, + "step": 26950 + }, + { + "epoch": 0.8260083363981856, + "grad_norm": 1.949814465862903, + "learning_rate": 7.730976075302099e-07, + "loss": 0.5239, + "step": 26951 + }, + { + "epoch": 0.8260389849209269, + "grad_norm": 1.659473044025143, + "learning_rate": 7.72832512922993e-07, + "loss": 0.5011, + "step": 26952 + }, + { + "epoch": 0.826069633443668, + "grad_norm": 1.6243861418363668, + "learning_rate": 7.725674599671395e-07, + "loss": 0.4941, + "step": 26953 + }, + { + "epoch": 0.8261002819664092, + "grad_norm": 1.8421307375544915, + "learning_rate": 7.723024486652598e-07, + "loss": 0.5424, + "step": 26954 + }, + { + "epoch": 0.8261309304891504, + "grad_norm": 1.9743868710437258, + "learning_rate": 7.720374790199653e-07, + "loss": 0.606, + "step": 26955 + }, + { + "epoch": 0.8261615790118916, + "grad_norm": 1.9578534483234502, + "learning_rate": 7.717725510338686e-07, + "loss": 0.5977, + "step": 26956 + }, + { + "epoch": 0.8261922275346328, + "grad_norm": 1.9818897888541382, + "learning_rate": 7.715076647095776e-07, + "loss": 0.6282, + "step": 26957 + }, + { + "epoch": 0.826222876057374, + "grad_norm": 1.8053150186755627, + "learning_rate": 7.712428200497047e-07, + "loss": 0.6074, + "step": 26958 + }, + { + "epoch": 0.8262535245801153, + "grad_norm": 1.879326523598657, + "learning_rate": 7.709780170568559e-07, + "loss": 0.6571, + "step": 26959 + }, + { + "epoch": 0.8262841731028564, + "grad_norm": 2.0003145843184695, + "learning_rate": 7.70713255733645e-07, + "loss": 0.5773, + "step": 26960 + }, + { + "epoch": 0.8263148216255977, + "grad_norm": 1.806875016859727, + "learning_rate": 7.704485360826785e-07, + "loss": 0.5976, + "step": 26961 + }, + { + "epoch": 0.8263454701483388, + "grad_norm": 0.7819641057548848, + "learning_rate": 7.701838581065635e-07, + "loss": 0.4002, + "step": 26962 + }, + { + "epoch": 0.8263761186710801, + "grad_norm": 0.7805665990544595, + "learning_rate": 7.699192218079093e-07, + "loss": 0.3789, + "step": 26963 + }, + { + "epoch": 0.8264067671938212, + "grad_norm": 2.034019541089921, + "learning_rate": 7.696546271893252e-07, + "loss": 0.5336, + "step": 26964 + }, + { + "epoch": 0.8264374157165625, + "grad_norm": 1.817739836831582, + "learning_rate": 7.693900742534144e-07, + "loss": 0.6288, + "step": 26965 + }, + { + "epoch": 0.8264680642393036, + "grad_norm": 2.0536500502653374, + "learning_rate": 7.691255630027855e-07, + "loss": 0.5468, + "step": 26966 + }, + { + "epoch": 0.8264987127620449, + "grad_norm": 1.8022837208206255, + "learning_rate": 7.688610934400453e-07, + "loss": 0.4753, + "step": 26967 + }, + { + "epoch": 0.826529361284786, + "grad_norm": 1.9439092687506352, + "learning_rate": 7.685966655678006e-07, + "loss": 0.5425, + "step": 26968 + }, + { + "epoch": 0.8265600098075273, + "grad_norm": 1.8411421508783359, + "learning_rate": 7.683322793886555e-07, + "loss": 0.5326, + "step": 26969 + }, + { + "epoch": 0.8265906583302685, + "grad_norm": 1.9148238217491245, + "learning_rate": 7.680679349052128e-07, + "loss": 0.6381, + "step": 26970 + }, + { + "epoch": 0.8266213068530097, + "grad_norm": 1.934191357567882, + "learning_rate": 7.678036321200821e-07, + "loss": 0.6373, + "step": 26971 + }, + { + "epoch": 0.8266519553757509, + "grad_norm": 1.888934070162858, + "learning_rate": 7.675393710358647e-07, + "loss": 0.6318, + "step": 26972 + }, + { + "epoch": 0.8266826038984921, + "grad_norm": 1.9955076052044105, + "learning_rate": 7.672751516551641e-07, + "loss": 0.6654, + "step": 26973 + }, + { + "epoch": 0.8267132524212333, + "grad_norm": 1.6480930773789375, + "learning_rate": 7.670109739805842e-07, + "loss": 0.515, + "step": 26974 + }, + { + "epoch": 0.8267439009439745, + "grad_norm": 1.8403101720099797, + "learning_rate": 7.667468380147281e-07, + "loss": 0.561, + "step": 26975 + }, + { + "epoch": 0.8267745494667157, + "grad_norm": 0.7441589819258513, + "learning_rate": 7.664827437601996e-07, + "loss": 0.3887, + "step": 26976 + }, + { + "epoch": 0.826805197989457, + "grad_norm": 0.8036851061367859, + "learning_rate": 7.66218691219599e-07, + "loss": 0.4031, + "step": 26977 + }, + { + "epoch": 0.8268358465121981, + "grad_norm": 1.9181068270865833, + "learning_rate": 7.659546803955287e-07, + "loss": 0.6152, + "step": 26978 + }, + { + "epoch": 0.8268664950349394, + "grad_norm": 1.8047623120330862, + "learning_rate": 7.656907112905915e-07, + "loss": 0.478, + "step": 26979 + }, + { + "epoch": 0.8268971435576805, + "grad_norm": 1.8621653866361234, + "learning_rate": 7.65426783907387e-07, + "loss": 0.6111, + "step": 26980 + }, + { + "epoch": 0.8269277920804218, + "grad_norm": 0.8141558243322671, + "learning_rate": 7.651628982485149e-07, + "loss": 0.4081, + "step": 26981 + }, + { + "epoch": 0.8269584406031629, + "grad_norm": 1.7508126236744666, + "learning_rate": 7.648990543165757e-07, + "loss": 0.5225, + "step": 26982 + }, + { + "epoch": 0.8269890891259042, + "grad_norm": 0.8206753514282721, + "learning_rate": 7.646352521141715e-07, + "loss": 0.381, + "step": 26983 + }, + { + "epoch": 0.8270197376486453, + "grad_norm": 0.9338947109298809, + "learning_rate": 7.643714916438982e-07, + "loss": 0.3773, + "step": 26984 + }, + { + "epoch": 0.8270503861713865, + "grad_norm": 1.906266947311379, + "learning_rate": 7.641077729083568e-07, + "loss": 0.6147, + "step": 26985 + }, + { + "epoch": 0.8270810346941277, + "grad_norm": 0.74424609747635, + "learning_rate": 7.638440959101451e-07, + "loss": 0.3755, + "step": 26986 + }, + { + "epoch": 0.8271116832168689, + "grad_norm": 2.08730646319104, + "learning_rate": 7.635804606518626e-07, + "loss": 0.7024, + "step": 26987 + }, + { + "epoch": 0.8271423317396102, + "grad_norm": 1.9720189782103228, + "learning_rate": 7.633168671361058e-07, + "loss": 0.5877, + "step": 26988 + }, + { + "epoch": 0.8271729802623513, + "grad_norm": 1.9521482067098421, + "learning_rate": 7.630533153654695e-07, + "loss": 0.5996, + "step": 26989 + }, + { + "epoch": 0.8272036287850926, + "grad_norm": 0.8014719830194103, + "learning_rate": 7.627898053425553e-07, + "loss": 0.3899, + "step": 26990 + }, + { + "epoch": 0.8272342773078337, + "grad_norm": 1.782507896871469, + "learning_rate": 7.62526337069957e-07, + "loss": 0.5498, + "step": 26991 + }, + { + "epoch": 0.827264925830575, + "grad_norm": 1.9462970070767065, + "learning_rate": 7.622629105502704e-07, + "loss": 0.5014, + "step": 26992 + }, + { + "epoch": 0.8272955743533161, + "grad_norm": 2.1778689519624423, + "learning_rate": 7.619995257860913e-07, + "loss": 0.593, + "step": 26993 + }, + { + "epoch": 0.8273262228760574, + "grad_norm": 1.9340635391034064, + "learning_rate": 7.617361827800152e-07, + "loss": 0.5569, + "step": 26994 + }, + { + "epoch": 0.8273568713987985, + "grad_norm": 1.8157152629845656, + "learning_rate": 7.614728815346378e-07, + "loss": 0.5533, + "step": 26995 + }, + { + "epoch": 0.8273875199215398, + "grad_norm": 1.7899626249999645, + "learning_rate": 7.612096220525517e-07, + "loss": 0.6011, + "step": 26996 + }, + { + "epoch": 0.827418168444281, + "grad_norm": 2.056100007066122, + "learning_rate": 7.609464043363513e-07, + "loss": 0.6266, + "step": 26997 + }, + { + "epoch": 0.8274488169670222, + "grad_norm": 1.7596797807265294, + "learning_rate": 7.606832283886323e-07, + "loss": 0.5422, + "step": 26998 + }, + { + "epoch": 0.8274794654897634, + "grad_norm": 1.6096922527433786, + "learning_rate": 7.604200942119861e-07, + "loss": 0.4807, + "step": 26999 + }, + { + "epoch": 0.8275101140125046, + "grad_norm": 2.036359801870182, + "learning_rate": 7.601570018090027e-07, + "loss": 0.6103, + "step": 27000 + }, + { + "epoch": 0.8275407625352458, + "grad_norm": 1.6417034874689387, + "learning_rate": 7.5989395118228e-07, + "loss": 0.6127, + "step": 27001 + }, + { + "epoch": 0.827571411057987, + "grad_norm": 1.7360696346559754, + "learning_rate": 7.596309423344055e-07, + "loss": 0.4807, + "step": 27002 + }, + { + "epoch": 0.8276020595807282, + "grad_norm": 1.9575558663100905, + "learning_rate": 7.593679752679733e-07, + "loss": 0.5932, + "step": 27003 + }, + { + "epoch": 0.8276327081034694, + "grad_norm": 2.0242778159816663, + "learning_rate": 7.591050499855729e-07, + "loss": 0.5824, + "step": 27004 + }, + { + "epoch": 0.8276633566262106, + "grad_norm": 1.8835197486971325, + "learning_rate": 7.588421664897949e-07, + "loss": 0.5354, + "step": 27005 + }, + { + "epoch": 0.8276940051489519, + "grad_norm": 1.7957669858931316, + "learning_rate": 7.585793247832318e-07, + "loss": 0.5252, + "step": 27006 + }, + { + "epoch": 0.827724653671693, + "grad_norm": 1.8601854885903688, + "learning_rate": 7.583165248684704e-07, + "loss": 0.5342, + "step": 27007 + }, + { + "epoch": 0.8277553021944343, + "grad_norm": 1.7293395137046594, + "learning_rate": 7.580537667481019e-07, + "loss": 0.5792, + "step": 27008 + }, + { + "epoch": 0.8277859507171754, + "grad_norm": 1.9778257814641649, + "learning_rate": 7.57791050424716e-07, + "loss": 0.5662, + "step": 27009 + }, + { + "epoch": 0.8278165992399167, + "grad_norm": 1.9798038268654716, + "learning_rate": 7.575283759009e-07, + "loss": 0.5843, + "step": 27010 + }, + { + "epoch": 0.8278472477626578, + "grad_norm": 1.9055418104643642, + "learning_rate": 7.572657431792402e-07, + "loss": 0.5269, + "step": 27011 + }, + { + "epoch": 0.8278778962853991, + "grad_norm": 1.6937037529965946, + "learning_rate": 7.570031522623289e-07, + "loss": 0.5381, + "step": 27012 + }, + { + "epoch": 0.8279085448081402, + "grad_norm": 2.009742098225345, + "learning_rate": 7.567406031527502e-07, + "loss": 0.6465, + "step": 27013 + }, + { + "epoch": 0.8279391933308815, + "grad_norm": 2.116618695450478, + "learning_rate": 7.564780958530932e-07, + "loss": 0.6011, + "step": 27014 + }, + { + "epoch": 0.8279698418536227, + "grad_norm": 0.795047682922662, + "learning_rate": 7.562156303659419e-07, + "loss": 0.4023, + "step": 27015 + }, + { + "epoch": 0.8280004903763638, + "grad_norm": 1.874664512443071, + "learning_rate": 7.55953206693884e-07, + "loss": 0.5884, + "step": 27016 + }, + { + "epoch": 0.8280311388991051, + "grad_norm": 1.8060129581842579, + "learning_rate": 7.556908248395062e-07, + "loss": 0.5775, + "step": 27017 + }, + { + "epoch": 0.8280617874218462, + "grad_norm": 0.8503135813539435, + "learning_rate": 7.554284848053911e-07, + "loss": 0.4152, + "step": 27018 + }, + { + "epoch": 0.8280924359445875, + "grad_norm": 1.8503663945029998, + "learning_rate": 7.551661865941257e-07, + "loss": 0.4702, + "step": 27019 + }, + { + "epoch": 0.8281230844673286, + "grad_norm": 2.046090898499313, + "learning_rate": 7.54903930208295e-07, + "loss": 0.592, + "step": 27020 + }, + { + "epoch": 0.8281537329900699, + "grad_norm": 1.9344933524765133, + "learning_rate": 7.546417156504804e-07, + "loss": 0.667, + "step": 27021 + }, + { + "epoch": 0.828184381512811, + "grad_norm": 2.1046744649205222, + "learning_rate": 7.543795429232686e-07, + "loss": 0.5893, + "step": 27022 + }, + { + "epoch": 0.8282150300355523, + "grad_norm": 2.111230067285639, + "learning_rate": 7.541174120292405e-07, + "loss": 0.5974, + "step": 27023 + }, + { + "epoch": 0.8282456785582935, + "grad_norm": 2.100836040232562, + "learning_rate": 7.538553229709799e-07, + "loss": 0.5015, + "step": 27024 + }, + { + "epoch": 0.8282763270810347, + "grad_norm": 1.6418926995130538, + "learning_rate": 7.535932757510705e-07, + "loss": 0.5444, + "step": 27025 + }, + { + "epoch": 0.8283069756037759, + "grad_norm": 1.6970302472799477, + "learning_rate": 7.533312703720913e-07, + "loss": 0.5652, + "step": 27026 + }, + { + "epoch": 0.8283376241265171, + "grad_norm": 1.882851657560995, + "learning_rate": 7.530693068366263e-07, + "loss": 0.5362, + "step": 27027 + }, + { + "epoch": 0.8283682726492583, + "grad_norm": 2.00796655706747, + "learning_rate": 7.528073851472567e-07, + "loss": 0.6293, + "step": 27028 + }, + { + "epoch": 0.8283989211719995, + "grad_norm": 1.9511972888024642, + "learning_rate": 7.525455053065617e-07, + "loss": 0.6143, + "step": 27029 + }, + { + "epoch": 0.8284295696947407, + "grad_norm": 1.8390753538633913, + "learning_rate": 7.522836673171224e-07, + "loss": 0.5797, + "step": 27030 + }, + { + "epoch": 0.828460218217482, + "grad_norm": 1.928667399163506, + "learning_rate": 7.520218711815202e-07, + "loss": 0.6472, + "step": 27031 + }, + { + "epoch": 0.8284908667402231, + "grad_norm": 1.7050114220992099, + "learning_rate": 7.517601169023326e-07, + "loss": 0.6267, + "step": 27032 + }, + { + "epoch": 0.8285215152629644, + "grad_norm": 1.715570189875828, + "learning_rate": 7.514984044821405e-07, + "loss": 0.5294, + "step": 27033 + }, + { + "epoch": 0.8285521637857055, + "grad_norm": 1.8697644865134395, + "learning_rate": 7.512367339235205e-07, + "loss": 0.5584, + "step": 27034 + }, + { + "epoch": 0.8285828123084468, + "grad_norm": 0.8481464351877073, + "learning_rate": 7.509751052290515e-07, + "loss": 0.4189, + "step": 27035 + }, + { + "epoch": 0.8286134608311879, + "grad_norm": 2.2249360210201767, + "learning_rate": 7.507135184013137e-07, + "loss": 0.5094, + "step": 27036 + }, + { + "epoch": 0.8286441093539292, + "grad_norm": 1.7757487964820025, + "learning_rate": 7.504519734428817e-07, + "loss": 0.5372, + "step": 27037 + }, + { + "epoch": 0.8286747578766703, + "grad_norm": 1.8118449220462185, + "learning_rate": 7.501904703563334e-07, + "loss": 0.4987, + "step": 27038 + }, + { + "epoch": 0.8287054063994116, + "grad_norm": 1.8768924074289537, + "learning_rate": 7.499290091442468e-07, + "loss": 0.6534, + "step": 27039 + }, + { + "epoch": 0.8287360549221527, + "grad_norm": 1.9492956184277552, + "learning_rate": 7.496675898091965e-07, + "loss": 0.634, + "step": 27040 + }, + { + "epoch": 0.828766703444894, + "grad_norm": 1.7235938402020363, + "learning_rate": 7.494062123537588e-07, + "loss": 0.5663, + "step": 27041 + }, + { + "epoch": 0.8287973519676352, + "grad_norm": 2.0549126567459717, + "learning_rate": 7.491448767805098e-07, + "loss": 0.5888, + "step": 27042 + }, + { + "epoch": 0.8288280004903764, + "grad_norm": 1.8942006711848494, + "learning_rate": 7.488835830920232e-07, + "loss": 0.617, + "step": 27043 + }, + { + "epoch": 0.8288586490131176, + "grad_norm": 1.7788201892561764, + "learning_rate": 7.486223312908758e-07, + "loss": 0.5591, + "step": 27044 + }, + { + "epoch": 0.8288892975358588, + "grad_norm": 1.9172860328840091, + "learning_rate": 7.483611213796388e-07, + "loss": 0.5561, + "step": 27045 + }, + { + "epoch": 0.8289199460586, + "grad_norm": 1.8795427717282334, + "learning_rate": 7.480999533608874e-07, + "loss": 0.6275, + "step": 27046 + }, + { + "epoch": 0.8289505945813411, + "grad_norm": 1.9099408556357937, + "learning_rate": 7.478388272371967e-07, + "loss": 0.5634, + "step": 27047 + }, + { + "epoch": 0.8289812431040824, + "grad_norm": 2.106794912602991, + "learning_rate": 7.475777430111364e-07, + "loss": 0.6228, + "step": 27048 + }, + { + "epoch": 0.8290118916268235, + "grad_norm": 2.2177144885322986, + "learning_rate": 7.473167006852805e-07, + "loss": 0.6408, + "step": 27049 + }, + { + "epoch": 0.8290425401495648, + "grad_norm": 0.7923239185535111, + "learning_rate": 7.470557002622031e-07, + "loss": 0.3805, + "step": 27050 + }, + { + "epoch": 0.829073188672306, + "grad_norm": 1.9896291860210613, + "learning_rate": 7.46794741744472e-07, + "loss": 0.5651, + "step": 27051 + }, + { + "epoch": 0.8291038371950472, + "grad_norm": 1.7655781898481773, + "learning_rate": 7.465338251346616e-07, + "loss": 0.5944, + "step": 27052 + }, + { + "epoch": 0.8291344857177884, + "grad_norm": 1.657977206935957, + "learning_rate": 7.462729504353422e-07, + "loss": 0.5562, + "step": 27053 + }, + { + "epoch": 0.8291651342405296, + "grad_norm": 2.006003647997409, + "learning_rate": 7.460121176490826e-07, + "loss": 0.6315, + "step": 27054 + }, + { + "epoch": 0.8291957827632708, + "grad_norm": 1.7008234923090066, + "learning_rate": 7.45751326778455e-07, + "loss": 0.4715, + "step": 27055 + }, + { + "epoch": 0.829226431286012, + "grad_norm": 1.9683124782697226, + "learning_rate": 7.454905778260263e-07, + "loss": 0.5464, + "step": 27056 + }, + { + "epoch": 0.8292570798087532, + "grad_norm": 1.8407874586069586, + "learning_rate": 7.452298707943694e-07, + "loss": 0.5002, + "step": 27057 + }, + { + "epoch": 0.8292877283314944, + "grad_norm": 2.0613495619806823, + "learning_rate": 7.449692056860513e-07, + "loss": 0.7331, + "step": 27058 + }, + { + "epoch": 0.8293183768542356, + "grad_norm": 1.9119974480547943, + "learning_rate": 7.447085825036393e-07, + "loss": 0.6012, + "step": 27059 + }, + { + "epoch": 0.8293490253769769, + "grad_norm": 1.7202059854404148, + "learning_rate": 7.444480012497024e-07, + "loss": 0.5027, + "step": 27060 + }, + { + "epoch": 0.829379673899718, + "grad_norm": 1.744587332642698, + "learning_rate": 7.441874619268091e-07, + "loss": 0.5449, + "step": 27061 + }, + { + "epoch": 0.8294103224224593, + "grad_norm": 0.7468764551214591, + "learning_rate": 7.439269645375246e-07, + "loss": 0.389, + "step": 27062 + }, + { + "epoch": 0.8294409709452004, + "grad_norm": 1.8634346505894825, + "learning_rate": 7.436665090844169e-07, + "loss": 0.5943, + "step": 27063 + }, + { + "epoch": 0.8294716194679417, + "grad_norm": 2.143608391334463, + "learning_rate": 7.434060955700534e-07, + "loss": 0.6646, + "step": 27064 + }, + { + "epoch": 0.8295022679906828, + "grad_norm": 1.866921029968988, + "learning_rate": 7.431457239969969e-07, + "loss": 0.5511, + "step": 27065 + }, + { + "epoch": 0.8295329165134241, + "grad_norm": 0.7578479424297278, + "learning_rate": 7.428853943678166e-07, + "loss": 0.3937, + "step": 27066 + }, + { + "epoch": 0.8295635650361652, + "grad_norm": 1.8611308568233118, + "learning_rate": 7.426251066850742e-07, + "loss": 0.5536, + "step": 27067 + }, + { + "epoch": 0.8295942135589065, + "grad_norm": 2.0105700403726217, + "learning_rate": 7.423648609513356e-07, + "loss": 0.5906, + "step": 27068 + }, + { + "epoch": 0.8296248620816477, + "grad_norm": 1.9373454800553849, + "learning_rate": 7.42104657169167e-07, + "loss": 0.5129, + "step": 27069 + }, + { + "epoch": 0.8296555106043889, + "grad_norm": 2.0041265026227135, + "learning_rate": 7.418444953411297e-07, + "loss": 0.6814, + "step": 27070 + }, + { + "epoch": 0.8296861591271301, + "grad_norm": 1.7012229673804204, + "learning_rate": 7.415843754697876e-07, + "loss": 0.569, + "step": 27071 + }, + { + "epoch": 0.8297168076498713, + "grad_norm": 1.7917862694136741, + "learning_rate": 7.413242975577056e-07, + "loss": 0.6013, + "step": 27072 + }, + { + "epoch": 0.8297474561726125, + "grad_norm": 0.7884683239896739, + "learning_rate": 7.410642616074437e-07, + "loss": 0.3964, + "step": 27073 + }, + { + "epoch": 0.8297781046953537, + "grad_norm": 1.8069163471439214, + "learning_rate": 7.408042676215665e-07, + "loss": 0.5516, + "step": 27074 + }, + { + "epoch": 0.8298087532180949, + "grad_norm": 1.9425966099887115, + "learning_rate": 7.405443156026327e-07, + "loss": 0.5409, + "step": 27075 + }, + { + "epoch": 0.8298394017408361, + "grad_norm": 0.7601482081145735, + "learning_rate": 7.402844055532072e-07, + "loss": 0.3872, + "step": 27076 + }, + { + "epoch": 0.8298700502635773, + "grad_norm": 2.074613392161602, + "learning_rate": 7.400245374758496e-07, + "loss": 0.6123, + "step": 27077 + }, + { + "epoch": 0.8299006987863184, + "grad_norm": 0.8103017984853688, + "learning_rate": 7.397647113731194e-07, + "loss": 0.4058, + "step": 27078 + }, + { + "epoch": 0.8299313473090597, + "grad_norm": 2.0377052482511835, + "learning_rate": 7.395049272475769e-07, + "loss": 0.56, + "step": 27079 + }, + { + "epoch": 0.8299619958318009, + "grad_norm": 1.7261101330475286, + "learning_rate": 7.39245185101784e-07, + "loss": 0.5236, + "step": 27080 + }, + { + "epoch": 0.8299926443545421, + "grad_norm": 1.935853372259889, + "learning_rate": 7.389854849382972e-07, + "loss": 0.5285, + "step": 27081 + }, + { + "epoch": 0.8300232928772833, + "grad_norm": 1.7648472476347952, + "learning_rate": 7.38725826759677e-07, + "loss": 0.5554, + "step": 27082 + }, + { + "epoch": 0.8300539414000245, + "grad_norm": 2.0133963401944395, + "learning_rate": 7.38466210568482e-07, + "loss": 0.5439, + "step": 27083 + }, + { + "epoch": 0.8300845899227657, + "grad_norm": 2.503945128851562, + "learning_rate": 7.382066363672691e-07, + "loss": 0.5403, + "step": 27084 + }, + { + "epoch": 0.8301152384455069, + "grad_norm": 1.7074228580416035, + "learning_rate": 7.379471041585979e-07, + "loss": 0.5933, + "step": 27085 + }, + { + "epoch": 0.8301458869682481, + "grad_norm": 2.069379097013741, + "learning_rate": 7.376876139450217e-07, + "loss": 0.6167, + "step": 27086 + }, + { + "epoch": 0.8301765354909894, + "grad_norm": 1.7488548639355102, + "learning_rate": 7.374281657291022e-07, + "loss": 0.5211, + "step": 27087 + }, + { + "epoch": 0.8302071840137305, + "grad_norm": 1.9956195445521938, + "learning_rate": 7.371687595133942e-07, + "loss": 0.5865, + "step": 27088 + }, + { + "epoch": 0.8302378325364718, + "grad_norm": 1.8179270014484072, + "learning_rate": 7.369093953004513e-07, + "loss": 0.594, + "step": 27089 + }, + { + "epoch": 0.8302684810592129, + "grad_norm": 1.8718349321444825, + "learning_rate": 7.366500730928311e-07, + "loss": 0.5745, + "step": 27090 + }, + { + "epoch": 0.8302991295819542, + "grad_norm": 0.8199555911142876, + "learning_rate": 7.363907928930903e-07, + "loss": 0.4074, + "step": 27091 + }, + { + "epoch": 0.8303297781046953, + "grad_norm": 1.7307927147637407, + "learning_rate": 7.3613155470378e-07, + "loss": 0.5387, + "step": 27092 + }, + { + "epoch": 0.8303604266274366, + "grad_norm": 1.9158758129434357, + "learning_rate": 7.358723585274569e-07, + "loss": 0.5967, + "step": 27093 + }, + { + "epoch": 0.8303910751501777, + "grad_norm": 2.06954386778551, + "learning_rate": 7.356132043666741e-07, + "loss": 0.5837, + "step": 27094 + }, + { + "epoch": 0.830421723672919, + "grad_norm": 2.030652243092432, + "learning_rate": 7.353540922239865e-07, + "loss": 0.5604, + "step": 27095 + }, + { + "epoch": 0.8304523721956601, + "grad_norm": 1.670532815559836, + "learning_rate": 7.350950221019471e-07, + "loss": 0.3932, + "step": 27096 + }, + { + "epoch": 0.8304830207184014, + "grad_norm": 1.8700469814655827, + "learning_rate": 7.348359940031046e-07, + "loss": 0.5258, + "step": 27097 + }, + { + "epoch": 0.8305136692411426, + "grad_norm": 2.0449181208840863, + "learning_rate": 7.345770079300168e-07, + "loss": 0.6434, + "step": 27098 + }, + { + "epoch": 0.8305443177638838, + "grad_norm": 1.8170790559143042, + "learning_rate": 7.343180638852332e-07, + "loss": 0.5445, + "step": 27099 + }, + { + "epoch": 0.830574966286625, + "grad_norm": 2.0357713594272617, + "learning_rate": 7.340591618713039e-07, + "loss": 0.6805, + "step": 27100 + }, + { + "epoch": 0.8306056148093662, + "grad_norm": 2.090572491004008, + "learning_rate": 7.338003018907808e-07, + "loss": 0.5884, + "step": 27101 + }, + { + "epoch": 0.8306362633321074, + "grad_norm": 2.2747747012101525, + "learning_rate": 7.335414839462157e-07, + "loss": 0.5826, + "step": 27102 + }, + { + "epoch": 0.8306669118548486, + "grad_norm": 1.9210149694479501, + "learning_rate": 7.332827080401584e-07, + "loss": 0.6157, + "step": 27103 + }, + { + "epoch": 0.8306975603775898, + "grad_norm": 1.7832703269832895, + "learning_rate": 7.330239741751577e-07, + "loss": 0.574, + "step": 27104 + }, + { + "epoch": 0.830728208900331, + "grad_norm": 2.2186549601705714, + "learning_rate": 7.327652823537628e-07, + "loss": 0.5367, + "step": 27105 + }, + { + "epoch": 0.8307588574230722, + "grad_norm": 1.7838312493728787, + "learning_rate": 7.325066325785252e-07, + "loss": 0.5449, + "step": 27106 + }, + { + "epoch": 0.8307895059458135, + "grad_norm": 1.7312787923555664, + "learning_rate": 7.322480248519915e-07, + "loss": 0.5252, + "step": 27107 + }, + { + "epoch": 0.8308201544685546, + "grad_norm": 1.8179298725468607, + "learning_rate": 7.319894591767074e-07, + "loss": 0.6424, + "step": 27108 + }, + { + "epoch": 0.8308508029912958, + "grad_norm": 1.8253414511010373, + "learning_rate": 7.317309355552254e-07, + "loss": 0.5611, + "step": 27109 + }, + { + "epoch": 0.830881451514037, + "grad_norm": 2.2667182472662684, + "learning_rate": 7.314724539900913e-07, + "loss": 0.6132, + "step": 27110 + }, + { + "epoch": 0.8309121000367782, + "grad_norm": 1.7626361548017964, + "learning_rate": 7.312140144838493e-07, + "loss": 0.5564, + "step": 27111 + }, + { + "epoch": 0.8309427485595194, + "grad_norm": 1.8790943136242626, + "learning_rate": 7.30955617039048e-07, + "loss": 0.558, + "step": 27112 + }, + { + "epoch": 0.8309733970822606, + "grad_norm": 1.8257798635278786, + "learning_rate": 7.306972616582336e-07, + "loss": 0.5481, + "step": 27113 + }, + { + "epoch": 0.8310040456050019, + "grad_norm": 1.9520604617656356, + "learning_rate": 7.304389483439528e-07, + "loss": 0.5966, + "step": 27114 + }, + { + "epoch": 0.831034694127743, + "grad_norm": 1.9878898088402088, + "learning_rate": 7.30180677098748e-07, + "loss": 0.6142, + "step": 27115 + }, + { + "epoch": 0.8310653426504843, + "grad_norm": 2.021501326336515, + "learning_rate": 7.299224479251649e-07, + "loss": 0.5919, + "step": 27116 + }, + { + "epoch": 0.8310959911732254, + "grad_norm": 1.9464411983853434, + "learning_rate": 7.296642608257503e-07, + "loss": 0.6097, + "step": 27117 + }, + { + "epoch": 0.8311266396959667, + "grad_norm": 1.9896298887228674, + "learning_rate": 7.294061158030463e-07, + "loss": 0.5885, + "step": 27118 + }, + { + "epoch": 0.8311572882187078, + "grad_norm": 2.0731514018461192, + "learning_rate": 7.291480128595951e-07, + "loss": 0.5807, + "step": 27119 + }, + { + "epoch": 0.8311879367414491, + "grad_norm": 2.0994682914604432, + "learning_rate": 7.288899519979414e-07, + "loss": 0.5559, + "step": 27120 + }, + { + "epoch": 0.8312185852641902, + "grad_norm": 2.0856576568892327, + "learning_rate": 7.286319332206276e-07, + "loss": 0.5435, + "step": 27121 + }, + { + "epoch": 0.8312492337869315, + "grad_norm": 1.8120001719419105, + "learning_rate": 7.283739565301978e-07, + "loss": 0.5564, + "step": 27122 + }, + { + "epoch": 0.8312798823096726, + "grad_norm": 1.9263578967002972, + "learning_rate": 7.281160219291911e-07, + "loss": 0.5964, + "step": 27123 + }, + { + "epoch": 0.8313105308324139, + "grad_norm": 0.8007490941791009, + "learning_rate": 7.278581294201504e-07, + "loss": 0.3952, + "step": 27124 + }, + { + "epoch": 0.8313411793551551, + "grad_norm": 1.8539562289201372, + "learning_rate": 7.276002790056175e-07, + "loss": 0.556, + "step": 27125 + }, + { + "epoch": 0.8313718278778963, + "grad_norm": 1.946409109670411, + "learning_rate": 7.273424706881321e-07, + "loss": 0.6542, + "step": 27126 + }, + { + "epoch": 0.8314024764006375, + "grad_norm": 1.7279657218888103, + "learning_rate": 7.270847044702322e-07, + "loss": 0.6122, + "step": 27127 + }, + { + "epoch": 0.8314331249233787, + "grad_norm": 1.9457108315561733, + "learning_rate": 7.268269803544625e-07, + "loss": 0.5664, + "step": 27128 + }, + { + "epoch": 0.8314637734461199, + "grad_norm": 2.030195268683539, + "learning_rate": 7.265692983433586e-07, + "loss": 0.5485, + "step": 27129 + }, + { + "epoch": 0.8314944219688611, + "grad_norm": 1.753175881135227, + "learning_rate": 7.26311658439462e-07, + "loss": 0.535, + "step": 27130 + }, + { + "epoch": 0.8315250704916023, + "grad_norm": 1.7754014614832887, + "learning_rate": 7.260540606453092e-07, + "loss": 0.5679, + "step": 27131 + }, + { + "epoch": 0.8315557190143436, + "grad_norm": 1.7872076583506076, + "learning_rate": 7.257965049634391e-07, + "loss": 0.4957, + "step": 27132 + }, + { + "epoch": 0.8315863675370847, + "grad_norm": 1.8254578908015906, + "learning_rate": 7.255389913963906e-07, + "loss": 0.5975, + "step": 27133 + }, + { + "epoch": 0.831617016059826, + "grad_norm": 1.864864359762946, + "learning_rate": 7.252815199466994e-07, + "loss": 0.4786, + "step": 27134 + }, + { + "epoch": 0.8316476645825671, + "grad_norm": 1.9276790864530593, + "learning_rate": 7.250240906169026e-07, + "loss": 0.6146, + "step": 27135 + }, + { + "epoch": 0.8316783131053084, + "grad_norm": 0.7793348567330975, + "learning_rate": 7.247667034095385e-07, + "loss": 0.4083, + "step": 27136 + }, + { + "epoch": 0.8317089616280495, + "grad_norm": 1.8647357233504123, + "learning_rate": 7.245093583271423e-07, + "loss": 0.4812, + "step": 27137 + }, + { + "epoch": 0.8317396101507908, + "grad_norm": 1.8970471538652114, + "learning_rate": 7.242520553722466e-07, + "loss": 0.6408, + "step": 27138 + }, + { + "epoch": 0.8317702586735319, + "grad_norm": 1.6535594075325504, + "learning_rate": 7.239947945473919e-07, + "loss": 0.5253, + "step": 27139 + }, + { + "epoch": 0.8318009071962731, + "grad_norm": 1.6591934144715157, + "learning_rate": 7.237375758551096e-07, + "loss": 0.5436, + "step": 27140 + }, + { + "epoch": 0.8318315557190143, + "grad_norm": 0.7930853165369188, + "learning_rate": 7.234803992979356e-07, + "loss": 0.4151, + "step": 27141 + }, + { + "epoch": 0.8318622042417555, + "grad_norm": 1.867869179658435, + "learning_rate": 7.232232648784026e-07, + "loss": 0.6369, + "step": 27142 + }, + { + "epoch": 0.8318928527644968, + "grad_norm": 1.806361300529666, + "learning_rate": 7.229661725990455e-07, + "loss": 0.5179, + "step": 27143 + }, + { + "epoch": 0.8319235012872379, + "grad_norm": 1.999394655185593, + "learning_rate": 7.227091224623978e-07, + "loss": 0.5245, + "step": 27144 + }, + { + "epoch": 0.8319541498099792, + "grad_norm": 1.930972287255039, + "learning_rate": 7.2245211447099e-07, + "loss": 0.6381, + "step": 27145 + }, + { + "epoch": 0.8319847983327203, + "grad_norm": 1.9490083237405118, + "learning_rate": 7.221951486273566e-07, + "loss": 0.5974, + "step": 27146 + }, + { + "epoch": 0.8320154468554616, + "grad_norm": 2.065628333859209, + "learning_rate": 7.219382249340296e-07, + "loss": 0.5507, + "step": 27147 + }, + { + "epoch": 0.8320460953782027, + "grad_norm": 1.883571525781592, + "learning_rate": 7.216813433935388e-07, + "loss": 0.6131, + "step": 27148 + }, + { + "epoch": 0.832076743900944, + "grad_norm": 1.736149501924709, + "learning_rate": 7.214245040084167e-07, + "loss": 0.5946, + "step": 27149 + }, + { + "epoch": 0.8321073924236851, + "grad_norm": 0.7875802094176125, + "learning_rate": 7.211677067811945e-07, + "loss": 0.4152, + "step": 27150 + }, + { + "epoch": 0.8321380409464264, + "grad_norm": 0.8082260063558707, + "learning_rate": 7.209109517144008e-07, + "loss": 0.3989, + "step": 27151 + }, + { + "epoch": 0.8321686894691676, + "grad_norm": 2.064988187477585, + "learning_rate": 7.206542388105675e-07, + "loss": 0.5382, + "step": 27152 + }, + { + "epoch": 0.8321993379919088, + "grad_norm": 1.671177281757867, + "learning_rate": 7.203975680722214e-07, + "loss": 0.4914, + "step": 27153 + }, + { + "epoch": 0.83222998651465, + "grad_norm": 0.8039508548345711, + "learning_rate": 7.201409395018932e-07, + "loss": 0.394, + "step": 27154 + }, + { + "epoch": 0.8322606350373912, + "grad_norm": 1.6807950003418424, + "learning_rate": 7.198843531021127e-07, + "loss": 0.4554, + "step": 27155 + }, + { + "epoch": 0.8322912835601324, + "grad_norm": 1.814605986436508, + "learning_rate": 7.196278088754055e-07, + "loss": 0.5893, + "step": 27156 + }, + { + "epoch": 0.8323219320828736, + "grad_norm": 1.9912561042792816, + "learning_rate": 7.193713068243007e-07, + "loss": 0.6927, + "step": 27157 + }, + { + "epoch": 0.8323525806056148, + "grad_norm": 1.946453615392313, + "learning_rate": 7.191148469513265e-07, + "loss": 0.5308, + "step": 27158 + }, + { + "epoch": 0.832383229128356, + "grad_norm": 1.8054663365512238, + "learning_rate": 7.188584292590084e-07, + "loss": 0.5054, + "step": 27159 + }, + { + "epoch": 0.8324138776510972, + "grad_norm": 1.7709216349279377, + "learning_rate": 7.186020537498733e-07, + "loss": 0.5289, + "step": 27160 + }, + { + "epoch": 0.8324445261738385, + "grad_norm": 1.5197208381096825, + "learning_rate": 7.183457204264488e-07, + "loss": 0.5458, + "step": 27161 + }, + { + "epoch": 0.8324751746965796, + "grad_norm": 1.7753841027319355, + "learning_rate": 7.180894292912582e-07, + "loss": 0.545, + "step": 27162 + }, + { + "epoch": 0.8325058232193209, + "grad_norm": 1.977794013385123, + "learning_rate": 7.178331803468292e-07, + "loss": 0.6129, + "step": 27163 + }, + { + "epoch": 0.832536471742062, + "grad_norm": 2.0680395207512663, + "learning_rate": 7.175769735956844e-07, + "loss": 0.6657, + "step": 27164 + }, + { + "epoch": 0.8325671202648033, + "grad_norm": 1.6474868905101014, + "learning_rate": 7.173208090403494e-07, + "loss": 0.5431, + "step": 27165 + }, + { + "epoch": 0.8325977687875444, + "grad_norm": 1.8302118108842047, + "learning_rate": 7.170646866833491e-07, + "loss": 0.5146, + "step": 27166 + }, + { + "epoch": 0.8326284173102857, + "grad_norm": 1.5793151946496935, + "learning_rate": 7.168086065272056e-07, + "loss": 0.6292, + "step": 27167 + }, + { + "epoch": 0.8326590658330268, + "grad_norm": 1.649982988017637, + "learning_rate": 7.165525685744429e-07, + "loss": 0.512, + "step": 27168 + }, + { + "epoch": 0.8326897143557681, + "grad_norm": 2.105864401438965, + "learning_rate": 7.162965728275844e-07, + "loss": 0.6308, + "step": 27169 + }, + { + "epoch": 0.8327203628785093, + "grad_norm": 1.9288706386079035, + "learning_rate": 7.160406192891505e-07, + "loss": 0.6194, + "step": 27170 + }, + { + "epoch": 0.8327510114012504, + "grad_norm": 1.664259153899821, + "learning_rate": 7.157847079616658e-07, + "loss": 0.5632, + "step": 27171 + }, + { + "epoch": 0.8327816599239917, + "grad_norm": 1.7347129321409573, + "learning_rate": 7.1552883884765e-07, + "loss": 0.5425, + "step": 27172 + }, + { + "epoch": 0.8328123084467328, + "grad_norm": 1.9046965146823098, + "learning_rate": 7.152730119496243e-07, + "loss": 0.5944, + "step": 27173 + }, + { + "epoch": 0.8328429569694741, + "grad_norm": 1.8917289593733175, + "learning_rate": 7.150172272701111e-07, + "loss": 0.508, + "step": 27174 + }, + { + "epoch": 0.8328736054922152, + "grad_norm": 1.862695207672162, + "learning_rate": 7.14761484811628e-07, + "loss": 0.4917, + "step": 27175 + }, + { + "epoch": 0.8329042540149565, + "grad_norm": 2.1003692592653587, + "learning_rate": 7.145057845766967e-07, + "loss": 0.6349, + "step": 27176 + }, + { + "epoch": 0.8329349025376976, + "grad_norm": 1.8651624999535692, + "learning_rate": 7.142501265678376e-07, + "loss": 0.6283, + "step": 27177 + }, + { + "epoch": 0.8329655510604389, + "grad_norm": 0.7730036837484717, + "learning_rate": 7.139945107875673e-07, + "loss": 0.3882, + "step": 27178 + }, + { + "epoch": 0.83299619958318, + "grad_norm": 1.9844132523920655, + "learning_rate": 7.137389372384063e-07, + "loss": 0.541, + "step": 27179 + }, + { + "epoch": 0.8330268481059213, + "grad_norm": 1.8998314420312776, + "learning_rate": 7.134834059228729e-07, + "loss": 0.5764, + "step": 27180 + }, + { + "epoch": 0.8330574966286625, + "grad_norm": 2.1140805536411946, + "learning_rate": 7.13227916843483e-07, + "loss": 0.5656, + "step": 27181 + }, + { + "epoch": 0.8330881451514037, + "grad_norm": 1.782205151091349, + "learning_rate": 7.129724700027562e-07, + "loss": 0.5623, + "step": 27182 + }, + { + "epoch": 0.8331187936741449, + "grad_norm": 2.183339607432549, + "learning_rate": 7.127170654032068e-07, + "loss": 0.4354, + "step": 27183 + }, + { + "epoch": 0.8331494421968861, + "grad_norm": 1.8177434274691884, + "learning_rate": 7.124617030473552e-07, + "loss": 0.516, + "step": 27184 + }, + { + "epoch": 0.8331800907196273, + "grad_norm": 0.8138781703184309, + "learning_rate": 7.122063829377151e-07, + "loss": 0.404, + "step": 27185 + }, + { + "epoch": 0.8332107392423685, + "grad_norm": 2.0868799760811596, + "learning_rate": 7.119511050768019e-07, + "loss": 0.5801, + "step": 27186 + }, + { + "epoch": 0.8332413877651097, + "grad_norm": 1.7071083759045285, + "learning_rate": 7.116958694671316e-07, + "loss": 0.5716, + "step": 27187 + }, + { + "epoch": 0.833272036287851, + "grad_norm": 2.0426216864576374, + "learning_rate": 7.114406761112197e-07, + "loss": 0.6412, + "step": 27188 + }, + { + "epoch": 0.8333026848105921, + "grad_norm": 1.8190189966529262, + "learning_rate": 7.111855250115795e-07, + "loss": 0.4448, + "step": 27189 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.7693631923291128, + "learning_rate": 7.109304161707254e-07, + "loss": 0.4013, + "step": 27190 + }, + { + "epoch": 0.8333639818560745, + "grad_norm": 0.8749379288949245, + "learning_rate": 7.106753495911727e-07, + "loss": 0.395, + "step": 27191 + }, + { + "epoch": 0.8333946303788158, + "grad_norm": 2.109228270510071, + "learning_rate": 7.104203252754322e-07, + "loss": 0.5677, + "step": 27192 + }, + { + "epoch": 0.8334252789015569, + "grad_norm": 1.852229847912426, + "learning_rate": 7.101653432260186e-07, + "loss": 0.5654, + "step": 27193 + }, + { + "epoch": 0.8334559274242982, + "grad_norm": 1.8781337720008306, + "learning_rate": 7.099104034454413e-07, + "loss": 0.5221, + "step": 27194 + }, + { + "epoch": 0.8334865759470393, + "grad_norm": 1.8401838279397698, + "learning_rate": 7.096555059362164e-07, + "loss": 0.6366, + "step": 27195 + }, + { + "epoch": 0.8335172244697806, + "grad_norm": 1.88015983749179, + "learning_rate": 7.094006507008539e-07, + "loss": 0.5662, + "step": 27196 + }, + { + "epoch": 0.8335478729925218, + "grad_norm": 1.7231195044526868, + "learning_rate": 7.091458377418636e-07, + "loss": 0.6122, + "step": 27197 + }, + { + "epoch": 0.833578521515263, + "grad_norm": 1.6652703442562318, + "learning_rate": 7.088910670617572e-07, + "loss": 0.5262, + "step": 27198 + }, + { + "epoch": 0.8336091700380042, + "grad_norm": 1.8515555730266526, + "learning_rate": 7.086363386630457e-07, + "loss": 0.5788, + "step": 27199 + }, + { + "epoch": 0.8336398185607454, + "grad_norm": 1.7118144473627257, + "learning_rate": 7.083816525482373e-07, + "loss": 0.4739, + "step": 27200 + }, + { + "epoch": 0.8336704670834866, + "grad_norm": 1.9906714848885438, + "learning_rate": 7.081270087198428e-07, + "loss": 0.6243, + "step": 27201 + }, + { + "epoch": 0.8337011156062277, + "grad_norm": 1.6947994786905953, + "learning_rate": 7.078724071803711e-07, + "loss": 0.4703, + "step": 27202 + }, + { + "epoch": 0.833731764128969, + "grad_norm": 1.9035828979819267, + "learning_rate": 7.07617847932332e-07, + "loss": 0.5243, + "step": 27203 + }, + { + "epoch": 0.8337624126517101, + "grad_norm": 1.9150572377698336, + "learning_rate": 7.073633309782319e-07, + "loss": 0.5494, + "step": 27204 + }, + { + "epoch": 0.8337930611744514, + "grad_norm": 1.8305992896122196, + "learning_rate": 7.071088563205774e-07, + "loss": 0.5394, + "step": 27205 + }, + { + "epoch": 0.8338237096971925, + "grad_norm": 0.7823471062067606, + "learning_rate": 7.068544239618802e-07, + "loss": 0.3904, + "step": 27206 + }, + { + "epoch": 0.8338543582199338, + "grad_norm": 1.8639509179829747, + "learning_rate": 7.066000339046442e-07, + "loss": 0.5885, + "step": 27207 + }, + { + "epoch": 0.833885006742675, + "grad_norm": 1.7151981317768796, + "learning_rate": 7.063456861513756e-07, + "loss": 0.6436, + "step": 27208 + }, + { + "epoch": 0.8339156552654162, + "grad_norm": 1.8883357288634397, + "learning_rate": 7.060913807045816e-07, + "loss": 0.5089, + "step": 27209 + }, + { + "epoch": 0.8339463037881574, + "grad_norm": 1.5699571946928634, + "learning_rate": 7.058371175667683e-07, + "loss": 0.5062, + "step": 27210 + }, + { + "epoch": 0.8339769523108986, + "grad_norm": 5.936052494293772, + "learning_rate": 7.055828967404415e-07, + "loss": 0.614, + "step": 27211 + }, + { + "epoch": 0.8340076008336398, + "grad_norm": 2.0474363114250704, + "learning_rate": 7.053287182281038e-07, + "loss": 0.5813, + "step": 27212 + }, + { + "epoch": 0.834038249356381, + "grad_norm": 1.8971442853007554, + "learning_rate": 7.050745820322613e-07, + "loss": 0.5402, + "step": 27213 + }, + { + "epoch": 0.8340688978791222, + "grad_norm": 2.05452754491487, + "learning_rate": 7.048204881554188e-07, + "loss": 0.4354, + "step": 27214 + }, + { + "epoch": 0.8340995464018635, + "grad_norm": 0.7903643229529651, + "learning_rate": 7.045664366000787e-07, + "loss": 0.405, + "step": 27215 + }, + { + "epoch": 0.8341301949246046, + "grad_norm": 1.9768778762570782, + "learning_rate": 7.043124273687441e-07, + "loss": 0.6225, + "step": 27216 + }, + { + "epoch": 0.8341608434473459, + "grad_norm": 0.7486962573427021, + "learning_rate": 7.040584604639178e-07, + "loss": 0.3691, + "step": 27217 + }, + { + "epoch": 0.834191491970087, + "grad_norm": 0.7857064343197719, + "learning_rate": 7.038045358881041e-07, + "loss": 0.3911, + "step": 27218 + }, + { + "epoch": 0.8342221404928283, + "grad_norm": 2.174428448677365, + "learning_rate": 7.03550653643802e-07, + "loss": 0.6611, + "step": 27219 + }, + { + "epoch": 0.8342527890155694, + "grad_norm": 1.9265422599014839, + "learning_rate": 7.03296813733515e-07, + "loss": 0.5263, + "step": 27220 + }, + { + "epoch": 0.8342834375383107, + "grad_norm": 1.7148267479984358, + "learning_rate": 7.030430161597435e-07, + "loss": 0.5438, + "step": 27221 + }, + { + "epoch": 0.8343140860610518, + "grad_norm": 1.9600182838978506, + "learning_rate": 7.027892609249903e-07, + "loss": 0.6981, + "step": 27222 + }, + { + "epoch": 0.8343447345837931, + "grad_norm": 1.8621479780501682, + "learning_rate": 7.025355480317536e-07, + "loss": 0.5499, + "step": 27223 + }, + { + "epoch": 0.8343753831065343, + "grad_norm": 1.9880005395875777, + "learning_rate": 7.022818774825313e-07, + "loss": 0.6346, + "step": 27224 + }, + { + "epoch": 0.8344060316292755, + "grad_norm": 1.8460165916292897, + "learning_rate": 7.020282492798275e-07, + "loss": 0.5667, + "step": 27225 + }, + { + "epoch": 0.8344366801520167, + "grad_norm": 2.0047130077884194, + "learning_rate": 7.017746634261391e-07, + "loss": 0.524, + "step": 27226 + }, + { + "epoch": 0.8344673286747579, + "grad_norm": 1.697822669763398, + "learning_rate": 7.015211199239641e-07, + "loss": 0.4613, + "step": 27227 + }, + { + "epoch": 0.8344979771974991, + "grad_norm": 1.7298512019821846, + "learning_rate": 7.012676187758006e-07, + "loss": 0.5516, + "step": 27228 + }, + { + "epoch": 0.8345286257202403, + "grad_norm": 1.924618470870334, + "learning_rate": 7.010141599841474e-07, + "loss": 0.6476, + "step": 27229 + }, + { + "epoch": 0.8345592742429815, + "grad_norm": 1.8727421156678832, + "learning_rate": 7.00760743551503e-07, + "loss": 0.6073, + "step": 27230 + }, + { + "epoch": 0.8345899227657227, + "grad_norm": 2.3762161366162298, + "learning_rate": 7.005073694803615e-07, + "loss": 0.6143, + "step": 27231 + }, + { + "epoch": 0.8346205712884639, + "grad_norm": 2.0148510671722577, + "learning_rate": 7.002540377732215e-07, + "loss": 0.6151, + "step": 27232 + }, + { + "epoch": 0.834651219811205, + "grad_norm": 1.8819219553480333, + "learning_rate": 7.000007484325788e-07, + "loss": 0.5414, + "step": 27233 + }, + { + "epoch": 0.8346818683339463, + "grad_norm": 2.031529198326926, + "learning_rate": 6.997475014609295e-07, + "loss": 0.5707, + "step": 27234 + }, + { + "epoch": 0.8347125168566875, + "grad_norm": 1.9809638227501372, + "learning_rate": 6.994942968607665e-07, + "loss": 0.7184, + "step": 27235 + }, + { + "epoch": 0.8347431653794287, + "grad_norm": 1.6923570532500285, + "learning_rate": 6.992411346345884e-07, + "loss": 0.4776, + "step": 27236 + }, + { + "epoch": 0.8347738139021699, + "grad_norm": 1.90625586661087, + "learning_rate": 6.989880147848865e-07, + "loss": 0.5784, + "step": 27237 + }, + { + "epoch": 0.8348044624249111, + "grad_norm": 0.7693777101695197, + "learning_rate": 6.987349373141572e-07, + "loss": 0.3943, + "step": 27238 + }, + { + "epoch": 0.8348351109476523, + "grad_norm": 1.9876387242254074, + "learning_rate": 6.984819022248923e-07, + "loss": 0.5759, + "step": 27239 + }, + { + "epoch": 0.8348657594703935, + "grad_norm": 2.128371977969533, + "learning_rate": 6.982289095195855e-07, + "loss": 0.6689, + "step": 27240 + }, + { + "epoch": 0.8348964079931347, + "grad_norm": 1.8253893514179806, + "learning_rate": 6.979759592007312e-07, + "loss": 0.5725, + "step": 27241 + }, + { + "epoch": 0.834927056515876, + "grad_norm": 1.7878063230573176, + "learning_rate": 6.977230512708194e-07, + "loss": 0.5866, + "step": 27242 + }, + { + "epoch": 0.8349577050386171, + "grad_norm": 0.8652139336508525, + "learning_rate": 6.974701857323429e-07, + "loss": 0.4061, + "step": 27243 + }, + { + "epoch": 0.8349883535613584, + "grad_norm": 1.830943706130405, + "learning_rate": 6.972173625877949e-07, + "loss": 0.6755, + "step": 27244 + }, + { + "epoch": 0.8350190020840995, + "grad_norm": 1.8100938231711683, + "learning_rate": 6.969645818396654e-07, + "loss": 0.5419, + "step": 27245 + }, + { + "epoch": 0.8350496506068408, + "grad_norm": 0.740892813518741, + "learning_rate": 6.967118434904424e-07, + "loss": 0.3695, + "step": 27246 + }, + { + "epoch": 0.8350802991295819, + "grad_norm": 1.9851882180861564, + "learning_rate": 6.964591475426208e-07, + "loss": 0.512, + "step": 27247 + }, + { + "epoch": 0.8351109476523232, + "grad_norm": 1.7829387792386227, + "learning_rate": 6.962064939986868e-07, + "loss": 0.5993, + "step": 27248 + }, + { + "epoch": 0.8351415961750643, + "grad_norm": 2.025217885887236, + "learning_rate": 6.959538828611329e-07, + "loss": 0.5003, + "step": 27249 + }, + { + "epoch": 0.8351722446978056, + "grad_norm": 1.6188055320392496, + "learning_rate": 6.95701314132446e-07, + "loss": 0.4774, + "step": 27250 + }, + { + "epoch": 0.8352028932205467, + "grad_norm": 1.7106268833908407, + "learning_rate": 6.954487878151145e-07, + "loss": 0.52, + "step": 27251 + }, + { + "epoch": 0.835233541743288, + "grad_norm": 0.8243267006094858, + "learning_rate": 6.951963039116288e-07, + "loss": 0.3941, + "step": 27252 + }, + { + "epoch": 0.8352641902660292, + "grad_norm": 1.809520145881829, + "learning_rate": 6.949438624244748e-07, + "loss": 0.5505, + "step": 27253 + }, + { + "epoch": 0.8352948387887704, + "grad_norm": 2.0819951441040265, + "learning_rate": 6.946914633561397e-07, + "loss": 0.5934, + "step": 27254 + }, + { + "epoch": 0.8353254873115116, + "grad_norm": 2.017684426229085, + "learning_rate": 6.944391067091127e-07, + "loss": 0.5042, + "step": 27255 + }, + { + "epoch": 0.8353561358342528, + "grad_norm": 1.7281857635098408, + "learning_rate": 6.941867924858775e-07, + "loss": 0.4863, + "step": 27256 + }, + { + "epoch": 0.835386784356994, + "grad_norm": 1.8598532023914183, + "learning_rate": 6.93934520688922e-07, + "loss": 0.4608, + "step": 27257 + }, + { + "epoch": 0.8354174328797352, + "grad_norm": 0.7587459074892586, + "learning_rate": 6.936822913207319e-07, + "loss": 0.3775, + "step": 27258 + }, + { + "epoch": 0.8354480814024764, + "grad_norm": 1.8433183231578194, + "learning_rate": 6.93430104383791e-07, + "loss": 0.5048, + "step": 27259 + }, + { + "epoch": 0.8354787299252177, + "grad_norm": 0.8635099911774682, + "learning_rate": 6.931779598805865e-07, + "loss": 0.4021, + "step": 27260 + }, + { + "epoch": 0.8355093784479588, + "grad_norm": 1.8801751386233048, + "learning_rate": 6.929258578136005e-07, + "loss": 0.5814, + "step": 27261 + }, + { + "epoch": 0.8355400269707001, + "grad_norm": 0.8061463077394829, + "learning_rate": 6.926737981853177e-07, + "loss": 0.4234, + "step": 27262 + }, + { + "epoch": 0.8355706754934412, + "grad_norm": 1.9337414848145036, + "learning_rate": 6.924217809982231e-07, + "loss": 0.5576, + "step": 27263 + }, + { + "epoch": 0.8356013240161824, + "grad_norm": 1.8044130033690924, + "learning_rate": 6.921698062547983e-07, + "loss": 0.6371, + "step": 27264 + }, + { + "epoch": 0.8356319725389236, + "grad_norm": 1.901211422836862, + "learning_rate": 6.919178739575261e-07, + "loss": 0.5315, + "step": 27265 + }, + { + "epoch": 0.8356626210616648, + "grad_norm": 2.060088795472128, + "learning_rate": 6.916659841088908e-07, + "loss": 0.5958, + "step": 27266 + }, + { + "epoch": 0.835693269584406, + "grad_norm": 2.0204329426626675, + "learning_rate": 6.914141367113714e-07, + "loss": 0.5986, + "step": 27267 + }, + { + "epoch": 0.8357239181071472, + "grad_norm": 1.8634070846804855, + "learning_rate": 6.911623317674521e-07, + "loss": 0.5855, + "step": 27268 + }, + { + "epoch": 0.8357545666298885, + "grad_norm": 0.7819448911523152, + "learning_rate": 6.90910569279612e-07, + "loss": 0.4182, + "step": 27269 + }, + { + "epoch": 0.8357852151526296, + "grad_norm": 1.861173236318662, + "learning_rate": 6.906588492503325e-07, + "loss": 0.486, + "step": 27270 + }, + { + "epoch": 0.8358158636753709, + "grad_norm": 0.7803463319905231, + "learning_rate": 6.904071716820948e-07, + "loss": 0.3831, + "step": 27271 + }, + { + "epoch": 0.835846512198112, + "grad_norm": 2.281489035477965, + "learning_rate": 6.901555365773766e-07, + "loss": 0.5714, + "step": 27272 + }, + { + "epoch": 0.8358771607208533, + "grad_norm": 0.8305102209394917, + "learning_rate": 6.89903943938659e-07, + "loss": 0.3915, + "step": 27273 + }, + { + "epoch": 0.8359078092435944, + "grad_norm": 1.8514609187250444, + "learning_rate": 6.896523937684219e-07, + "loss": 0.5401, + "step": 27274 + }, + { + "epoch": 0.8359384577663357, + "grad_norm": 2.0189552645983966, + "learning_rate": 6.894008860691415e-07, + "loss": 0.6578, + "step": 27275 + }, + { + "epoch": 0.8359691062890768, + "grad_norm": 2.0978559795693372, + "learning_rate": 6.891494208432964e-07, + "loss": 0.6903, + "step": 27276 + }, + { + "epoch": 0.8359997548118181, + "grad_norm": 1.92175896739592, + "learning_rate": 6.888979980933669e-07, + "loss": 0.5037, + "step": 27277 + }, + { + "epoch": 0.8360304033345592, + "grad_norm": 1.8244348546493103, + "learning_rate": 6.88646617821827e-07, + "loss": 0.6405, + "step": 27278 + }, + { + "epoch": 0.8360610518573005, + "grad_norm": 1.9639379690025092, + "learning_rate": 6.883952800311561e-07, + "loss": 0.636, + "step": 27279 + }, + { + "epoch": 0.8360917003800417, + "grad_norm": 0.8114342124163293, + "learning_rate": 6.881439847238292e-07, + "loss": 0.3922, + "step": 27280 + }, + { + "epoch": 0.8361223489027829, + "grad_norm": 1.8200758059699478, + "learning_rate": 6.878927319023221e-07, + "loss": 0.5953, + "step": 27281 + }, + { + "epoch": 0.8361529974255241, + "grad_norm": 1.726198001229524, + "learning_rate": 6.876415215691124e-07, + "loss": 0.5792, + "step": 27282 + }, + { + "epoch": 0.8361836459482653, + "grad_norm": 2.176184216131103, + "learning_rate": 6.873903537266735e-07, + "loss": 0.6028, + "step": 27283 + }, + { + "epoch": 0.8362142944710065, + "grad_norm": 0.7992713667114314, + "learning_rate": 6.871392283774808e-07, + "loss": 0.3865, + "step": 27284 + }, + { + "epoch": 0.8362449429937477, + "grad_norm": 1.7728877811509014, + "learning_rate": 6.868881455240095e-07, + "loss": 0.5161, + "step": 27285 + }, + { + "epoch": 0.8362755915164889, + "grad_norm": 2.1243300740249222, + "learning_rate": 6.866371051687321e-07, + "loss": 0.5824, + "step": 27286 + }, + { + "epoch": 0.8363062400392302, + "grad_norm": 2.045954629707224, + "learning_rate": 6.863861073141226e-07, + "loss": 0.5713, + "step": 27287 + }, + { + "epoch": 0.8363368885619713, + "grad_norm": 1.9372624434638461, + "learning_rate": 6.861351519626558e-07, + "loss": 0.5527, + "step": 27288 + }, + { + "epoch": 0.8363675370847126, + "grad_norm": 1.7611734908483645, + "learning_rate": 6.858842391168019e-07, + "loss": 0.5291, + "step": 27289 + }, + { + "epoch": 0.8363981856074537, + "grad_norm": 1.6298337331750052, + "learning_rate": 6.856333687790357e-07, + "loss": 0.6243, + "step": 27290 + }, + { + "epoch": 0.836428834130195, + "grad_norm": 1.7815040078148647, + "learning_rate": 6.853825409518266e-07, + "loss": 0.5369, + "step": 27291 + }, + { + "epoch": 0.8364594826529361, + "grad_norm": 0.7834252973006502, + "learning_rate": 6.851317556376469e-07, + "loss": 0.4016, + "step": 27292 + }, + { + "epoch": 0.8364901311756774, + "grad_norm": 2.166208801109267, + "learning_rate": 6.848810128389699e-07, + "loss": 0.5118, + "step": 27293 + }, + { + "epoch": 0.8365207796984185, + "grad_norm": 1.9901589809572249, + "learning_rate": 6.846303125582626e-07, + "loss": 0.6316, + "step": 27294 + }, + { + "epoch": 0.8365514282211597, + "grad_norm": 1.99488589213486, + "learning_rate": 6.84379654797997e-07, + "loss": 0.6641, + "step": 27295 + }, + { + "epoch": 0.836582076743901, + "grad_norm": 2.5065148888324202, + "learning_rate": 6.841290395606443e-07, + "loss": 0.6066, + "step": 27296 + }, + { + "epoch": 0.8366127252666421, + "grad_norm": 1.9185496787234433, + "learning_rate": 6.838784668486708e-07, + "loss": 0.6355, + "step": 27297 + }, + { + "epoch": 0.8366433737893834, + "grad_norm": 0.7567154542075465, + "learning_rate": 6.836279366645477e-07, + "loss": 0.3908, + "step": 27298 + }, + { + "epoch": 0.8366740223121245, + "grad_norm": 1.6715213278577057, + "learning_rate": 6.833774490107437e-07, + "loss": 0.5536, + "step": 27299 + }, + { + "epoch": 0.8367046708348658, + "grad_norm": 2.538747918141515, + "learning_rate": 6.831270038897253e-07, + "loss": 0.5706, + "step": 27300 + }, + { + "epoch": 0.8367353193576069, + "grad_norm": 2.0251945906884368, + "learning_rate": 6.828766013039617e-07, + "loss": 0.6604, + "step": 27301 + }, + { + "epoch": 0.8367659678803482, + "grad_norm": 1.918182011984863, + "learning_rate": 6.826262412559176e-07, + "loss": 0.5871, + "step": 27302 + }, + { + "epoch": 0.8367966164030893, + "grad_norm": 0.830961535416196, + "learning_rate": 6.823759237480643e-07, + "loss": 0.4214, + "step": 27303 + }, + { + "epoch": 0.8368272649258306, + "grad_norm": 1.7373247061174752, + "learning_rate": 6.821256487828654e-07, + "loss": 0.5275, + "step": 27304 + }, + { + "epoch": 0.8368579134485717, + "grad_norm": 2.1980689779000095, + "learning_rate": 6.818754163627861e-07, + "loss": 0.5796, + "step": 27305 + }, + { + "epoch": 0.836888561971313, + "grad_norm": 2.041854791923555, + "learning_rate": 6.816252264902934e-07, + "loss": 0.5353, + "step": 27306 + }, + { + "epoch": 0.8369192104940542, + "grad_norm": 1.7810256433312575, + "learning_rate": 6.813750791678531e-07, + "loss": 0.5995, + "step": 27307 + }, + { + "epoch": 0.8369498590167954, + "grad_norm": 1.8909829079860319, + "learning_rate": 6.811249743979281e-07, + "loss": 0.529, + "step": 27308 + }, + { + "epoch": 0.8369805075395366, + "grad_norm": 1.6485860832398513, + "learning_rate": 6.808749121829839e-07, + "loss": 0.5376, + "step": 27309 + }, + { + "epoch": 0.8370111560622778, + "grad_norm": 1.6345686734578417, + "learning_rate": 6.806248925254844e-07, + "loss": 0.5602, + "step": 27310 + }, + { + "epoch": 0.837041804585019, + "grad_norm": 0.807778138943022, + "learning_rate": 6.803749154278938e-07, + "loss": 0.398, + "step": 27311 + }, + { + "epoch": 0.8370724531077602, + "grad_norm": 0.7883831991622529, + "learning_rate": 6.801249808926741e-07, + "loss": 0.3731, + "step": 27312 + }, + { + "epoch": 0.8371031016305014, + "grad_norm": 1.5428479154102488, + "learning_rate": 6.798750889222877e-07, + "loss": 0.5195, + "step": 27313 + }, + { + "epoch": 0.8371337501532427, + "grad_norm": 1.8283788543743011, + "learning_rate": 6.796252395191971e-07, + "loss": 0.5824, + "step": 27314 + }, + { + "epoch": 0.8371643986759838, + "grad_norm": 0.7734648020491443, + "learning_rate": 6.793754326858659e-07, + "loss": 0.3824, + "step": 27315 + }, + { + "epoch": 0.8371950471987251, + "grad_norm": 0.8336272436279156, + "learning_rate": 6.791256684247521e-07, + "loss": 0.4006, + "step": 27316 + }, + { + "epoch": 0.8372256957214662, + "grad_norm": 1.9509350994970835, + "learning_rate": 6.788759467383194e-07, + "loss": 0.5799, + "step": 27317 + }, + { + "epoch": 0.8372563442442075, + "grad_norm": 1.8542881465617056, + "learning_rate": 6.786262676290284e-07, + "loss": 0.6215, + "step": 27318 + }, + { + "epoch": 0.8372869927669486, + "grad_norm": 1.8497585155777663, + "learning_rate": 6.783766310993378e-07, + "loss": 0.6138, + "step": 27319 + }, + { + "epoch": 0.8373176412896899, + "grad_norm": 1.962658246279553, + "learning_rate": 6.781270371517084e-07, + "loss": 0.6404, + "step": 27320 + }, + { + "epoch": 0.837348289812431, + "grad_norm": 1.8641449296869432, + "learning_rate": 6.778774857885973e-07, + "loss": 0.6323, + "step": 27321 + }, + { + "epoch": 0.8373789383351723, + "grad_norm": 1.8529135336133296, + "learning_rate": 6.776279770124677e-07, + "loss": 0.5511, + "step": 27322 + }, + { + "epoch": 0.8374095868579134, + "grad_norm": 2.061693010652835, + "learning_rate": 6.773785108257752e-07, + "loss": 0.5703, + "step": 27323 + }, + { + "epoch": 0.8374402353806547, + "grad_norm": 0.7675139055937162, + "learning_rate": 6.771290872309771e-07, + "loss": 0.4008, + "step": 27324 + }, + { + "epoch": 0.8374708839033959, + "grad_norm": 0.8343115887320661, + "learning_rate": 6.768797062305321e-07, + "loss": 0.4154, + "step": 27325 + }, + { + "epoch": 0.837501532426137, + "grad_norm": 1.6991443270136999, + "learning_rate": 6.766303678268987e-07, + "loss": 0.633, + "step": 27326 + }, + { + "epoch": 0.8375321809488783, + "grad_norm": 2.080483574177966, + "learning_rate": 6.763810720225311e-07, + "loss": 0.5038, + "step": 27327 + }, + { + "epoch": 0.8375628294716194, + "grad_norm": 0.7521936685693151, + "learning_rate": 6.761318188198873e-07, + "loss": 0.3749, + "step": 27328 + }, + { + "epoch": 0.8375934779943607, + "grad_norm": 0.8050233488027767, + "learning_rate": 6.758826082214232e-07, + "loss": 0.4112, + "step": 27329 + }, + { + "epoch": 0.8376241265171018, + "grad_norm": 1.8430827250123227, + "learning_rate": 6.75633440229595e-07, + "loss": 0.57, + "step": 27330 + }, + { + "epoch": 0.8376547750398431, + "grad_norm": 1.9486956565320945, + "learning_rate": 6.75384314846857e-07, + "loss": 0.6776, + "step": 27331 + }, + { + "epoch": 0.8376854235625842, + "grad_norm": 0.7948440744340078, + "learning_rate": 6.751352320756615e-07, + "loss": 0.3947, + "step": 27332 + }, + { + "epoch": 0.8377160720853255, + "grad_norm": 1.9720075952074678, + "learning_rate": 6.748861919184673e-07, + "loss": 0.5968, + "step": 27333 + }, + { + "epoch": 0.8377467206080667, + "grad_norm": 1.8370532529321006, + "learning_rate": 6.746371943777263e-07, + "loss": 0.5163, + "step": 27334 + }, + { + "epoch": 0.8377773691308079, + "grad_norm": 1.8004812107481303, + "learning_rate": 6.743882394558903e-07, + "loss": 0.5412, + "step": 27335 + }, + { + "epoch": 0.8378080176535491, + "grad_norm": 1.6357612635358842, + "learning_rate": 6.741393271554142e-07, + "loss": 0.4821, + "step": 27336 + }, + { + "epoch": 0.8378386661762903, + "grad_norm": 0.8648215648132047, + "learning_rate": 6.738904574787497e-07, + "loss": 0.3895, + "step": 27337 + }, + { + "epoch": 0.8378693146990315, + "grad_norm": 1.7152649351925995, + "learning_rate": 6.73641630428351e-07, + "loss": 0.5608, + "step": 27338 + }, + { + "epoch": 0.8378999632217727, + "grad_norm": 1.8170270811254734, + "learning_rate": 6.733928460066669e-07, + "loss": 0.6068, + "step": 27339 + }, + { + "epoch": 0.8379306117445139, + "grad_norm": 1.8624441836623333, + "learning_rate": 6.731441042161496e-07, + "loss": 0.5853, + "step": 27340 + }, + { + "epoch": 0.8379612602672551, + "grad_norm": 1.8157389030888604, + "learning_rate": 6.728954050592523e-07, + "loss": 0.6393, + "step": 27341 + }, + { + "epoch": 0.8379919087899963, + "grad_norm": 0.7765985259349343, + "learning_rate": 6.726467485384236e-07, + "loss": 0.378, + "step": 27342 + }, + { + "epoch": 0.8380225573127376, + "grad_norm": 1.7125716872988415, + "learning_rate": 6.723981346561115e-07, + "loss": 0.5318, + "step": 27343 + }, + { + "epoch": 0.8380532058354787, + "grad_norm": 2.134616715211792, + "learning_rate": 6.721495634147696e-07, + "loss": 0.5697, + "step": 27344 + }, + { + "epoch": 0.83808385435822, + "grad_norm": 1.7786747036261148, + "learning_rate": 6.719010348168459e-07, + "loss": 0.5718, + "step": 27345 + }, + { + "epoch": 0.8381145028809611, + "grad_norm": 2.0091284642709617, + "learning_rate": 6.716525488647874e-07, + "loss": 0.624, + "step": 27346 + }, + { + "epoch": 0.8381451514037024, + "grad_norm": 1.843261768290642, + "learning_rate": 6.714041055610437e-07, + "loss": 0.6434, + "step": 27347 + }, + { + "epoch": 0.8381757999264435, + "grad_norm": 2.1253017418804236, + "learning_rate": 6.71155704908063e-07, + "loss": 0.6353, + "step": 27348 + }, + { + "epoch": 0.8382064484491848, + "grad_norm": 1.8823654545390067, + "learning_rate": 6.709073469082938e-07, + "loss": 0.491, + "step": 27349 + }, + { + "epoch": 0.8382370969719259, + "grad_norm": 1.8416724732626844, + "learning_rate": 6.706590315641809e-07, + "loss": 0.5448, + "step": 27350 + }, + { + "epoch": 0.8382677454946672, + "grad_norm": 1.9523878127805037, + "learning_rate": 6.704107588781722e-07, + "loss": 0.5858, + "step": 27351 + }, + { + "epoch": 0.8382983940174084, + "grad_norm": 1.9190466930017434, + "learning_rate": 6.701625288527152e-07, + "loss": 0.6258, + "step": 27352 + }, + { + "epoch": 0.8383290425401496, + "grad_norm": 1.8209947339703387, + "learning_rate": 6.699143414902548e-07, + "loss": 0.5611, + "step": 27353 + }, + { + "epoch": 0.8383596910628908, + "grad_norm": 1.6885898346480057, + "learning_rate": 6.696661967932344e-07, + "loss": 0.5874, + "step": 27354 + }, + { + "epoch": 0.838390339585632, + "grad_norm": 2.000887543005191, + "learning_rate": 6.694180947641027e-07, + "loss": 0.6009, + "step": 27355 + }, + { + "epoch": 0.8384209881083732, + "grad_norm": 1.8642869318858795, + "learning_rate": 6.691700354053016e-07, + "loss": 0.5743, + "step": 27356 + }, + { + "epoch": 0.8384516366311143, + "grad_norm": 1.9062556011103744, + "learning_rate": 6.689220187192774e-07, + "loss": 0.5536, + "step": 27357 + }, + { + "epoch": 0.8384822851538556, + "grad_norm": 1.764445343052996, + "learning_rate": 6.686740447084711e-07, + "loss": 0.4657, + "step": 27358 + }, + { + "epoch": 0.8385129336765967, + "grad_norm": 1.856235564544759, + "learning_rate": 6.684261133753283e-07, + "loss": 0.5677, + "step": 27359 + }, + { + "epoch": 0.838543582199338, + "grad_norm": 1.9481585950167368, + "learning_rate": 6.681782247222923e-07, + "loss": 0.5841, + "step": 27360 + }, + { + "epoch": 0.8385742307220792, + "grad_norm": 1.8914919246171034, + "learning_rate": 6.679303787518032e-07, + "loss": 0.5548, + "step": 27361 + }, + { + "epoch": 0.8386048792448204, + "grad_norm": 1.912325355667656, + "learning_rate": 6.676825754663046e-07, + "loss": 0.557, + "step": 27362 + }, + { + "epoch": 0.8386355277675616, + "grad_norm": 0.79939172376257, + "learning_rate": 6.674348148682391e-07, + "loss": 0.3974, + "step": 27363 + }, + { + "epoch": 0.8386661762903028, + "grad_norm": 1.9858427626946327, + "learning_rate": 6.671870969600458e-07, + "loss": 0.6826, + "step": 27364 + }, + { + "epoch": 0.838696824813044, + "grad_norm": 0.7863002808539167, + "learning_rate": 6.669394217441677e-07, + "loss": 0.4034, + "step": 27365 + }, + { + "epoch": 0.8387274733357852, + "grad_norm": 2.168434527181981, + "learning_rate": 6.666917892230434e-07, + "loss": 0.5727, + "step": 27366 + }, + { + "epoch": 0.8387581218585264, + "grad_norm": 2.0065262243225246, + "learning_rate": 6.664441993991134e-07, + "loss": 0.7276, + "step": 27367 + }, + { + "epoch": 0.8387887703812676, + "grad_norm": 1.9176305749408051, + "learning_rate": 6.661966522748187e-07, + "loss": 0.4956, + "step": 27368 + }, + { + "epoch": 0.8388194189040088, + "grad_norm": 1.9222356284327002, + "learning_rate": 6.659491478525959e-07, + "loss": 0.6156, + "step": 27369 + }, + { + "epoch": 0.8388500674267501, + "grad_norm": 1.7459982212876128, + "learning_rate": 6.657016861348852e-07, + "loss": 0.6049, + "step": 27370 + }, + { + "epoch": 0.8388807159494912, + "grad_norm": 1.796519241856739, + "learning_rate": 6.654542671241254e-07, + "loss": 0.542, + "step": 27371 + }, + { + "epoch": 0.8389113644722325, + "grad_norm": 0.7913699514589612, + "learning_rate": 6.652068908227543e-07, + "loss": 0.3895, + "step": 27372 + }, + { + "epoch": 0.8389420129949736, + "grad_norm": 1.9236671468039603, + "learning_rate": 6.649595572332068e-07, + "loss": 0.6079, + "step": 27373 + }, + { + "epoch": 0.8389726615177149, + "grad_norm": 1.6156435766912853, + "learning_rate": 6.647122663579236e-07, + "loss": 0.5525, + "step": 27374 + }, + { + "epoch": 0.839003310040456, + "grad_norm": 2.0045537873839256, + "learning_rate": 6.644650181993389e-07, + "loss": 0.5461, + "step": 27375 + }, + { + "epoch": 0.8390339585631973, + "grad_norm": 1.8291969424122552, + "learning_rate": 6.642178127598908e-07, + "loss": 0.6734, + "step": 27376 + }, + { + "epoch": 0.8390646070859384, + "grad_norm": 1.9531069775571328, + "learning_rate": 6.639706500420124e-07, + "loss": 0.5587, + "step": 27377 + }, + { + "epoch": 0.8390952556086797, + "grad_norm": 2.141781412740373, + "learning_rate": 6.637235300481409e-07, + "loss": 0.5682, + "step": 27378 + }, + { + "epoch": 0.8391259041314209, + "grad_norm": 2.0066956445099766, + "learning_rate": 6.63476452780712e-07, + "loss": 0.4861, + "step": 27379 + }, + { + "epoch": 0.8391565526541621, + "grad_norm": 0.7935202617263462, + "learning_rate": 6.632294182421584e-07, + "loss": 0.4089, + "step": 27380 + }, + { + "epoch": 0.8391872011769033, + "grad_norm": 1.7359260860023258, + "learning_rate": 6.629824264349144e-07, + "loss": 0.549, + "step": 27381 + }, + { + "epoch": 0.8392178496996445, + "grad_norm": 1.9802353152777172, + "learning_rate": 6.627354773614159e-07, + "loss": 0.5691, + "step": 27382 + }, + { + "epoch": 0.8392484982223857, + "grad_norm": 1.7427574329022775, + "learning_rate": 6.624885710240931e-07, + "loss": 0.6125, + "step": 27383 + }, + { + "epoch": 0.8392791467451269, + "grad_norm": 1.8855209724902138, + "learning_rate": 6.6224170742538e-07, + "loss": 0.5418, + "step": 27384 + }, + { + "epoch": 0.8393097952678681, + "grad_norm": 1.8350375124018412, + "learning_rate": 6.619948865677107e-07, + "loss": 0.5469, + "step": 27385 + }, + { + "epoch": 0.8393404437906093, + "grad_norm": 1.8116306657963976, + "learning_rate": 6.617481084535143e-07, + "loss": 0.5606, + "step": 27386 + }, + { + "epoch": 0.8393710923133505, + "grad_norm": 1.9609173820468575, + "learning_rate": 6.615013730852249e-07, + "loss": 0.5922, + "step": 27387 + }, + { + "epoch": 0.8394017408360916, + "grad_norm": 0.772749483657047, + "learning_rate": 6.612546804652714e-07, + "loss": 0.3857, + "step": 27388 + }, + { + "epoch": 0.8394323893588329, + "grad_norm": 0.8127450458393577, + "learning_rate": 6.610080305960859e-07, + "loss": 0.4057, + "step": 27389 + }, + { + "epoch": 0.8394630378815741, + "grad_norm": 2.2104939807673216, + "learning_rate": 6.607614234800997e-07, + "loss": 0.5583, + "step": 27390 + }, + { + "epoch": 0.8394936864043153, + "grad_norm": 1.7835263929923608, + "learning_rate": 6.605148591197396e-07, + "loss": 0.5412, + "step": 27391 + }, + { + "epoch": 0.8395243349270565, + "grad_norm": 0.791329892590439, + "learning_rate": 6.602683375174374e-07, + "loss": 0.4135, + "step": 27392 + }, + { + "epoch": 0.8395549834497977, + "grad_norm": 1.915585301940192, + "learning_rate": 6.600218586756229e-07, + "loss": 0.5877, + "step": 27393 + }, + { + "epoch": 0.8395856319725389, + "grad_norm": 1.8853566902697259, + "learning_rate": 6.597754225967223e-07, + "loss": 0.7321, + "step": 27394 + }, + { + "epoch": 0.8396162804952801, + "grad_norm": 0.7963256207833367, + "learning_rate": 6.595290292831647e-07, + "loss": 0.4051, + "step": 27395 + }, + { + "epoch": 0.8396469290180213, + "grad_norm": 1.8244341840850933, + "learning_rate": 6.592826787373791e-07, + "loss": 0.5662, + "step": 27396 + }, + { + "epoch": 0.8396775775407626, + "grad_norm": 2.1386053791827853, + "learning_rate": 6.590363709617914e-07, + "loss": 0.5256, + "step": 27397 + }, + { + "epoch": 0.8397082260635037, + "grad_norm": 1.7348184988387505, + "learning_rate": 6.587901059588298e-07, + "loss": 0.5182, + "step": 27398 + }, + { + "epoch": 0.839738874586245, + "grad_norm": 1.7285350512352684, + "learning_rate": 6.585438837309188e-07, + "loss": 0.5555, + "step": 27399 + }, + { + "epoch": 0.8397695231089861, + "grad_norm": 0.7978463804530903, + "learning_rate": 6.582977042804856e-07, + "loss": 0.4012, + "step": 27400 + }, + { + "epoch": 0.8398001716317274, + "grad_norm": 1.9070956411467663, + "learning_rate": 6.580515676099575e-07, + "loss": 0.5583, + "step": 27401 + }, + { + "epoch": 0.8398308201544685, + "grad_norm": 0.7831108992053814, + "learning_rate": 6.578054737217565e-07, + "loss": 0.3772, + "step": 27402 + }, + { + "epoch": 0.8398614686772098, + "grad_norm": 1.7779749631443789, + "learning_rate": 6.575594226183096e-07, + "loss": 0.499, + "step": 27403 + }, + { + "epoch": 0.8398921171999509, + "grad_norm": 1.7680222486002204, + "learning_rate": 6.573134143020421e-07, + "loss": 0.5617, + "step": 27404 + }, + { + "epoch": 0.8399227657226922, + "grad_norm": 1.964429662978589, + "learning_rate": 6.570674487753753e-07, + "loss": 0.5575, + "step": 27405 + }, + { + "epoch": 0.8399534142454333, + "grad_norm": 0.8042011276669911, + "learning_rate": 6.56821526040734e-07, + "loss": 0.3953, + "step": 27406 + }, + { + "epoch": 0.8399840627681746, + "grad_norm": 1.9372380242466556, + "learning_rate": 6.565756461005429e-07, + "loss": 0.592, + "step": 27407 + }, + { + "epoch": 0.8400147112909158, + "grad_norm": 2.162517674610064, + "learning_rate": 6.563298089572218e-07, + "loss": 0.6165, + "step": 27408 + }, + { + "epoch": 0.840045359813657, + "grad_norm": 1.7905504060237747, + "learning_rate": 6.560840146131958e-07, + "loss": 0.5477, + "step": 27409 + }, + { + "epoch": 0.8400760083363982, + "grad_norm": 1.707195996207775, + "learning_rate": 6.558382630708843e-07, + "loss": 0.4856, + "step": 27410 + }, + { + "epoch": 0.8401066568591394, + "grad_norm": 1.5352117670947798, + "learning_rate": 6.555925543327097e-07, + "loss": 0.5033, + "step": 27411 + }, + { + "epoch": 0.8401373053818806, + "grad_norm": 1.777530707465699, + "learning_rate": 6.553468884010949e-07, + "loss": 0.5389, + "step": 27412 + }, + { + "epoch": 0.8401679539046218, + "grad_norm": 1.8069718778110655, + "learning_rate": 6.551012652784572e-07, + "loss": 0.4622, + "step": 27413 + }, + { + "epoch": 0.840198602427363, + "grad_norm": 1.9965163345924586, + "learning_rate": 6.548556849672183e-07, + "loss": 0.5806, + "step": 27414 + }, + { + "epoch": 0.8402292509501043, + "grad_norm": 1.8919004655157114, + "learning_rate": 6.546101474697996e-07, + "loss": 0.5458, + "step": 27415 + }, + { + "epoch": 0.8402598994728454, + "grad_norm": 0.795070923671218, + "learning_rate": 6.543646527886177e-07, + "loss": 0.3977, + "step": 27416 + }, + { + "epoch": 0.8402905479955867, + "grad_norm": 1.884297951120508, + "learning_rate": 6.541192009260938e-07, + "loss": 0.6213, + "step": 27417 + }, + { + "epoch": 0.8403211965183278, + "grad_norm": 2.1793846029586086, + "learning_rate": 6.538737918846444e-07, + "loss": 0.5362, + "step": 27418 + }, + { + "epoch": 0.840351845041069, + "grad_norm": 1.7771914318396982, + "learning_rate": 6.536284256666881e-07, + "loss": 0.6596, + "step": 27419 + }, + { + "epoch": 0.8403824935638102, + "grad_norm": 1.6728708630166385, + "learning_rate": 6.533831022746445e-07, + "loss": 0.5394, + "step": 27420 + }, + { + "epoch": 0.8404131420865514, + "grad_norm": 2.0387323489757976, + "learning_rate": 6.53137821710928e-07, + "loss": 0.5276, + "step": 27421 + }, + { + "epoch": 0.8404437906092926, + "grad_norm": 2.086171277655997, + "learning_rate": 6.52892583977957e-07, + "loss": 0.7222, + "step": 27422 + }, + { + "epoch": 0.8404744391320338, + "grad_norm": 1.7681511073467933, + "learning_rate": 6.526473890781487e-07, + "loss": 0.5767, + "step": 27423 + }, + { + "epoch": 0.840505087654775, + "grad_norm": 1.9336985255745196, + "learning_rate": 6.524022370139166e-07, + "loss": 0.6991, + "step": 27424 + }, + { + "epoch": 0.8405357361775162, + "grad_norm": 1.965203245359283, + "learning_rate": 6.521571277876782e-07, + "loss": 0.5929, + "step": 27425 + }, + { + "epoch": 0.8405663847002575, + "grad_norm": 1.985933590434382, + "learning_rate": 6.519120614018487e-07, + "loss": 0.5514, + "step": 27426 + }, + { + "epoch": 0.8405970332229986, + "grad_norm": 1.9902124779378663, + "learning_rate": 6.516670378588413e-07, + "loss": 0.5209, + "step": 27427 + }, + { + "epoch": 0.8406276817457399, + "grad_norm": 1.7617708687176135, + "learning_rate": 6.514220571610719e-07, + "loss": 0.5937, + "step": 27428 + }, + { + "epoch": 0.840658330268481, + "grad_norm": 1.8660434761101825, + "learning_rate": 6.511771193109517e-07, + "loss": 0.5864, + "step": 27429 + }, + { + "epoch": 0.8406889787912223, + "grad_norm": 0.8431198173393178, + "learning_rate": 6.509322243108978e-07, + "loss": 0.4087, + "step": 27430 + }, + { + "epoch": 0.8407196273139634, + "grad_norm": 1.9045697337191614, + "learning_rate": 6.506873721633223e-07, + "loss": 0.587, + "step": 27431 + }, + { + "epoch": 0.8407502758367047, + "grad_norm": 2.0307215211836653, + "learning_rate": 6.504425628706352e-07, + "loss": 0.6056, + "step": 27432 + }, + { + "epoch": 0.8407809243594458, + "grad_norm": 2.1153164138370775, + "learning_rate": 6.501977964352502e-07, + "loss": 0.6121, + "step": 27433 + }, + { + "epoch": 0.8408115728821871, + "grad_norm": 1.6799820432327828, + "learning_rate": 6.499530728595809e-07, + "loss": 0.531, + "step": 27434 + }, + { + "epoch": 0.8408422214049283, + "grad_norm": 1.9274633926437361, + "learning_rate": 6.497083921460356e-07, + "loss": 0.5128, + "step": 27435 + }, + { + "epoch": 0.8408728699276695, + "grad_norm": 1.7080989595693472, + "learning_rate": 6.494637542970267e-07, + "loss": 0.539, + "step": 27436 + }, + { + "epoch": 0.8409035184504107, + "grad_norm": 2.0027935538651755, + "learning_rate": 6.492191593149643e-07, + "loss": 0.61, + "step": 27437 + }, + { + "epoch": 0.8409341669731519, + "grad_norm": 2.2992715411142184, + "learning_rate": 6.489746072022601e-07, + "loss": 0.5774, + "step": 27438 + }, + { + "epoch": 0.8409648154958931, + "grad_norm": 1.8068702944195778, + "learning_rate": 6.487300979613226e-07, + "loss": 0.5599, + "step": 27439 + }, + { + "epoch": 0.8409954640186343, + "grad_norm": 1.9197103898594, + "learning_rate": 6.484856315945581e-07, + "loss": 0.5698, + "step": 27440 + }, + { + "epoch": 0.8410261125413755, + "grad_norm": 1.8844364010588295, + "learning_rate": 6.482412081043804e-07, + "loss": 0.546, + "step": 27441 + }, + { + "epoch": 0.8410567610641168, + "grad_norm": 1.7782227928068433, + "learning_rate": 6.479968274931952e-07, + "loss": 0.5241, + "step": 27442 + }, + { + "epoch": 0.8410874095868579, + "grad_norm": 2.0258376067567974, + "learning_rate": 6.477524897634102e-07, + "loss": 0.5295, + "step": 27443 + }, + { + "epoch": 0.8411180581095992, + "grad_norm": 1.9004356503354134, + "learning_rate": 6.475081949174334e-07, + "loss": 0.5655, + "step": 27444 + }, + { + "epoch": 0.8411487066323403, + "grad_norm": 1.6303173354175697, + "learning_rate": 6.472639429576732e-07, + "loss": 0.5419, + "step": 27445 + }, + { + "epoch": 0.8411793551550816, + "grad_norm": 2.0404304968691407, + "learning_rate": 6.470197338865336e-07, + "loss": 0.6846, + "step": 27446 + }, + { + "epoch": 0.8412100036778227, + "grad_norm": 1.6588725643951798, + "learning_rate": 6.467755677064225e-07, + "loss": 0.5184, + "step": 27447 + }, + { + "epoch": 0.841240652200564, + "grad_norm": 1.7079431937254517, + "learning_rate": 6.465314444197457e-07, + "loss": 0.5612, + "step": 27448 + }, + { + "epoch": 0.8412713007233051, + "grad_norm": 2.1022724557101977, + "learning_rate": 6.462873640289091e-07, + "loss": 0.6085, + "step": 27449 + }, + { + "epoch": 0.8413019492460463, + "grad_norm": 1.9606966368315786, + "learning_rate": 6.460433265363175e-07, + "loss": 0.5878, + "step": 27450 + }, + { + "epoch": 0.8413325977687875, + "grad_norm": 1.8540245670200104, + "learning_rate": 6.457993319443723e-07, + "loss": 0.5432, + "step": 27451 + }, + { + "epoch": 0.8413632462915287, + "grad_norm": 1.798406163570875, + "learning_rate": 6.455553802554832e-07, + "loss": 0.583, + "step": 27452 + }, + { + "epoch": 0.84139389481427, + "grad_norm": 2.8597423454045923, + "learning_rate": 6.453114714720504e-07, + "loss": 0.5739, + "step": 27453 + }, + { + "epoch": 0.8414245433370111, + "grad_norm": 2.2262065970299134, + "learning_rate": 6.450676055964772e-07, + "loss": 0.6077, + "step": 27454 + }, + { + "epoch": 0.8414551918597524, + "grad_norm": 1.9190844785266743, + "learning_rate": 6.448237826311671e-07, + "loss": 0.4577, + "step": 27455 + }, + { + "epoch": 0.8414858403824935, + "grad_norm": 0.8386856418913603, + "learning_rate": 6.445800025785226e-07, + "loss": 0.4162, + "step": 27456 + }, + { + "epoch": 0.8415164889052348, + "grad_norm": 0.7970353951384314, + "learning_rate": 6.443362654409469e-07, + "loss": 0.3929, + "step": 27457 + }, + { + "epoch": 0.8415471374279759, + "grad_norm": 0.8067082261824937, + "learning_rate": 6.44092571220839e-07, + "loss": 0.4157, + "step": 27458 + }, + { + "epoch": 0.8415777859507172, + "grad_norm": 2.0316500090301535, + "learning_rate": 6.438489199206017e-07, + "loss": 0.5069, + "step": 27459 + }, + { + "epoch": 0.8416084344734583, + "grad_norm": 1.8752670961264835, + "learning_rate": 6.436053115426366e-07, + "loss": 0.5985, + "step": 27460 + }, + { + "epoch": 0.8416390829961996, + "grad_norm": 1.8643982376791381, + "learning_rate": 6.433617460893432e-07, + "loss": 0.5464, + "step": 27461 + }, + { + "epoch": 0.8416697315189408, + "grad_norm": 1.772741347388297, + "learning_rate": 6.431182235631201e-07, + "loss": 0.4789, + "step": 27462 + }, + { + "epoch": 0.841700380041682, + "grad_norm": 1.8668312877347382, + "learning_rate": 6.428747439663674e-07, + "loss": 0.5727, + "step": 27463 + }, + { + "epoch": 0.8417310285644232, + "grad_norm": 1.850154427036235, + "learning_rate": 6.42631307301485e-07, + "loss": 0.5853, + "step": 27464 + }, + { + "epoch": 0.8417616770871644, + "grad_norm": 1.8481080220330453, + "learning_rate": 6.423879135708728e-07, + "loss": 0.6709, + "step": 27465 + }, + { + "epoch": 0.8417923256099056, + "grad_norm": 1.8569052610717052, + "learning_rate": 6.421445627769258e-07, + "loss": 0.6294, + "step": 27466 + }, + { + "epoch": 0.8418229741326468, + "grad_norm": 1.8503826943718167, + "learning_rate": 6.419012549220433e-07, + "loss": 0.6287, + "step": 27467 + }, + { + "epoch": 0.841853622655388, + "grad_norm": 1.8613813340597456, + "learning_rate": 6.41657990008624e-07, + "loss": 0.5599, + "step": 27468 + }, + { + "epoch": 0.8418842711781293, + "grad_norm": 1.809033970289528, + "learning_rate": 6.414147680390637e-07, + "loss": 0.5188, + "step": 27469 + }, + { + "epoch": 0.8419149197008704, + "grad_norm": 2.179944460560894, + "learning_rate": 6.411715890157572e-07, + "loss": 0.6101, + "step": 27470 + }, + { + "epoch": 0.8419455682236117, + "grad_norm": 0.8011771784099166, + "learning_rate": 6.409284529411036e-07, + "loss": 0.382, + "step": 27471 + }, + { + "epoch": 0.8419762167463528, + "grad_norm": 2.025088669750203, + "learning_rate": 6.406853598174978e-07, + "loss": 0.6747, + "step": 27472 + }, + { + "epoch": 0.8420068652690941, + "grad_norm": 1.8928489442593897, + "learning_rate": 6.404423096473334e-07, + "loss": 0.5751, + "step": 27473 + }, + { + "epoch": 0.8420375137918352, + "grad_norm": 1.8845265359144745, + "learning_rate": 6.401993024330061e-07, + "loss": 0.5075, + "step": 27474 + }, + { + "epoch": 0.8420681623145765, + "grad_norm": 1.9594035166949166, + "learning_rate": 6.399563381769108e-07, + "loss": 0.5779, + "step": 27475 + }, + { + "epoch": 0.8420988108373176, + "grad_norm": 1.7144015032447628, + "learning_rate": 6.397134168814422e-07, + "loss": 0.5846, + "step": 27476 + }, + { + "epoch": 0.8421294593600589, + "grad_norm": 0.7669105144963545, + "learning_rate": 6.394705385489925e-07, + "loss": 0.3862, + "step": 27477 + }, + { + "epoch": 0.8421601078828, + "grad_norm": 1.9607181678034347, + "learning_rate": 6.392277031819544e-07, + "loss": 0.685, + "step": 27478 + }, + { + "epoch": 0.8421907564055413, + "grad_norm": 1.6600774554730993, + "learning_rate": 6.389849107827229e-07, + "loss": 0.5602, + "step": 27479 + }, + { + "epoch": 0.8422214049282825, + "grad_norm": 1.8648859227878074, + "learning_rate": 6.387421613536887e-07, + "loss": 0.5627, + "step": 27480 + }, + { + "epoch": 0.8422520534510236, + "grad_norm": 1.7159593181272994, + "learning_rate": 6.384994548972423e-07, + "loss": 0.5067, + "step": 27481 + }, + { + "epoch": 0.8422827019737649, + "grad_norm": 0.8108367309107255, + "learning_rate": 6.382567914157784e-07, + "loss": 0.3912, + "step": 27482 + }, + { + "epoch": 0.842313350496506, + "grad_norm": 0.7939335460649093, + "learning_rate": 6.38014170911685e-07, + "loss": 0.3936, + "step": 27483 + }, + { + "epoch": 0.8423439990192473, + "grad_norm": 2.1736161619311716, + "learning_rate": 6.377715933873557e-07, + "loss": 0.5314, + "step": 27484 + }, + { + "epoch": 0.8423746475419884, + "grad_norm": 2.127861971952592, + "learning_rate": 6.375290588451777e-07, + "loss": 0.5966, + "step": 27485 + }, + { + "epoch": 0.8424052960647297, + "grad_norm": 1.870583074137742, + "learning_rate": 6.372865672875416e-07, + "loss": 0.5102, + "step": 27486 + }, + { + "epoch": 0.8424359445874708, + "grad_norm": 0.8175839675291952, + "learning_rate": 6.370441187168392e-07, + "loss": 0.4077, + "step": 27487 + }, + { + "epoch": 0.8424665931102121, + "grad_norm": 1.7852870622406751, + "learning_rate": 6.368017131354559e-07, + "loss": 0.5738, + "step": 27488 + }, + { + "epoch": 0.8424972416329533, + "grad_norm": 2.0406956676140244, + "learning_rate": 6.365593505457812e-07, + "loss": 0.6285, + "step": 27489 + }, + { + "epoch": 0.8425278901556945, + "grad_norm": 2.082287710749376, + "learning_rate": 6.363170309502054e-07, + "loss": 0.5409, + "step": 27490 + }, + { + "epoch": 0.8425585386784357, + "grad_norm": 2.1142005879437726, + "learning_rate": 6.360747543511131e-07, + "loss": 0.6184, + "step": 27491 + }, + { + "epoch": 0.8425891872011769, + "grad_norm": 2.174246981570346, + "learning_rate": 6.35832520750893e-07, + "loss": 0.5231, + "step": 27492 + }, + { + "epoch": 0.8426198357239181, + "grad_norm": 1.9162158088487409, + "learning_rate": 6.355903301519323e-07, + "loss": 0.5514, + "step": 27493 + }, + { + "epoch": 0.8426504842466593, + "grad_norm": 1.8503152689366047, + "learning_rate": 6.35348182556616e-07, + "loss": 0.5868, + "step": 27494 + }, + { + "epoch": 0.8426811327694005, + "grad_norm": 1.7803922427864762, + "learning_rate": 6.351060779673318e-07, + "loss": 0.5423, + "step": 27495 + }, + { + "epoch": 0.8427117812921417, + "grad_norm": 1.9419240774486495, + "learning_rate": 6.348640163864633e-07, + "loss": 0.589, + "step": 27496 + }, + { + "epoch": 0.8427424298148829, + "grad_norm": 0.7852189902059026, + "learning_rate": 6.346219978163964e-07, + "loss": 0.3992, + "step": 27497 + }, + { + "epoch": 0.8427730783376242, + "grad_norm": 1.9971739416619594, + "learning_rate": 6.34380022259517e-07, + "loss": 0.5598, + "step": 27498 + }, + { + "epoch": 0.8428037268603653, + "grad_norm": 2.043721228229904, + "learning_rate": 6.34138089718207e-07, + "loss": 0.6432, + "step": 27499 + }, + { + "epoch": 0.8428343753831066, + "grad_norm": 1.8038696300882253, + "learning_rate": 6.338962001948512e-07, + "loss": 0.4973, + "step": 27500 + }, + { + "epoch": 0.8428650239058477, + "grad_norm": 1.663003647222235, + "learning_rate": 6.336543536918349e-07, + "loss": 0.5528, + "step": 27501 + }, + { + "epoch": 0.842895672428589, + "grad_norm": 1.7564884590029965, + "learning_rate": 6.334125502115385e-07, + "loss": 0.5708, + "step": 27502 + }, + { + "epoch": 0.8429263209513301, + "grad_norm": 1.9223172316652817, + "learning_rate": 6.331707897563449e-07, + "loss": 0.5349, + "step": 27503 + }, + { + "epoch": 0.8429569694740714, + "grad_norm": 0.8044549935754616, + "learning_rate": 6.32929072328638e-07, + "loss": 0.3929, + "step": 27504 + }, + { + "epoch": 0.8429876179968125, + "grad_norm": 1.833331573234653, + "learning_rate": 6.326873979307973e-07, + "loss": 0.5357, + "step": 27505 + }, + { + "epoch": 0.8430182665195538, + "grad_norm": 1.8146689813109003, + "learning_rate": 6.324457665652062e-07, + "loss": 0.4302, + "step": 27506 + }, + { + "epoch": 0.843048915042295, + "grad_norm": 1.9684643283681786, + "learning_rate": 6.322041782342437e-07, + "loss": 0.6375, + "step": 27507 + }, + { + "epoch": 0.8430795635650362, + "grad_norm": 0.817231055392326, + "learning_rate": 6.319626329402906e-07, + "loss": 0.3991, + "step": 27508 + }, + { + "epoch": 0.8431102120877774, + "grad_norm": 1.8359027048941345, + "learning_rate": 6.317211306857285e-07, + "loss": 0.5674, + "step": 27509 + }, + { + "epoch": 0.8431408606105186, + "grad_norm": 2.0925590828002574, + "learning_rate": 6.314796714729343e-07, + "loss": 0.6662, + "step": 27510 + }, + { + "epoch": 0.8431715091332598, + "grad_norm": 1.8243629591781942, + "learning_rate": 6.312382553042889e-07, + "loss": 0.5934, + "step": 27511 + }, + { + "epoch": 0.8432021576560009, + "grad_norm": 1.8624311563878952, + "learning_rate": 6.309968821821721e-07, + "loss": 0.5275, + "step": 27512 + }, + { + "epoch": 0.8432328061787422, + "grad_norm": 0.8375342096750676, + "learning_rate": 6.307555521089598e-07, + "loss": 0.4139, + "step": 27513 + }, + { + "epoch": 0.8432634547014833, + "grad_norm": 1.972547173337922, + "learning_rate": 6.305142650870316e-07, + "loss": 0.6349, + "step": 27514 + }, + { + "epoch": 0.8432941032242246, + "grad_norm": 1.900514348437833, + "learning_rate": 6.302730211187635e-07, + "loss": 0.5243, + "step": 27515 + }, + { + "epoch": 0.8433247517469658, + "grad_norm": 1.607309909534858, + "learning_rate": 6.300318202065337e-07, + "loss": 0.427, + "step": 27516 + }, + { + "epoch": 0.843355400269707, + "grad_norm": 1.7606678402340918, + "learning_rate": 6.297906623527189e-07, + "loss": 0.6262, + "step": 27517 + }, + { + "epoch": 0.8433860487924482, + "grad_norm": 2.0116332733975235, + "learning_rate": 6.295495475596941e-07, + "loss": 0.6338, + "step": 27518 + }, + { + "epoch": 0.8434166973151894, + "grad_norm": 1.9947774829309566, + "learning_rate": 6.293084758298356e-07, + "loss": 0.5532, + "step": 27519 + }, + { + "epoch": 0.8434473458379306, + "grad_norm": 2.0240460014720147, + "learning_rate": 6.290674471655206e-07, + "loss": 0.5498, + "step": 27520 + }, + { + "epoch": 0.8434779943606718, + "grad_norm": 1.7954115428133073, + "learning_rate": 6.288264615691209e-07, + "loss": 0.5442, + "step": 27521 + }, + { + "epoch": 0.843508642883413, + "grad_norm": 1.8497955418968104, + "learning_rate": 6.285855190430129e-07, + "loss": 0.4707, + "step": 27522 + }, + { + "epoch": 0.8435392914061542, + "grad_norm": 1.8557216015245186, + "learning_rate": 6.28344619589571e-07, + "loss": 0.5127, + "step": 27523 + }, + { + "epoch": 0.8435699399288954, + "grad_norm": 1.8160714555645654, + "learning_rate": 6.28103763211167e-07, + "loss": 0.518, + "step": 27524 + }, + { + "epoch": 0.8436005884516367, + "grad_norm": 1.8161347316533993, + "learning_rate": 6.278629499101763e-07, + "loss": 0.5456, + "step": 27525 + }, + { + "epoch": 0.8436312369743778, + "grad_norm": 1.5201208140370486, + "learning_rate": 6.276221796889692e-07, + "loss": 0.4868, + "step": 27526 + }, + { + "epoch": 0.8436618854971191, + "grad_norm": 1.8556156171590505, + "learning_rate": 6.2738145254992e-07, + "loss": 0.6415, + "step": 27527 + }, + { + "epoch": 0.8436925340198602, + "grad_norm": 1.7936393992724162, + "learning_rate": 6.271407684954012e-07, + "loss": 0.5553, + "step": 27528 + }, + { + "epoch": 0.8437231825426015, + "grad_norm": 0.7845988159658914, + "learning_rate": 6.269001275277819e-07, + "loss": 0.4005, + "step": 27529 + }, + { + "epoch": 0.8437538310653426, + "grad_norm": 1.8687103503109002, + "learning_rate": 6.266595296494349e-07, + "loss": 0.5686, + "step": 27530 + }, + { + "epoch": 0.8437844795880839, + "grad_norm": 0.7962057042931103, + "learning_rate": 6.264189748627314e-07, + "loss": 0.4096, + "step": 27531 + }, + { + "epoch": 0.843815128110825, + "grad_norm": 1.721669496532375, + "learning_rate": 6.261784631700396e-07, + "loss": 0.5721, + "step": 27532 + }, + { + "epoch": 0.8438457766335663, + "grad_norm": 1.847543727287113, + "learning_rate": 6.25937994573731e-07, + "loss": 0.4385, + "step": 27533 + }, + { + "epoch": 0.8438764251563075, + "grad_norm": 1.941462270260146, + "learning_rate": 6.256975690761746e-07, + "loss": 0.5649, + "step": 27534 + }, + { + "epoch": 0.8439070736790487, + "grad_norm": 0.7955177293893299, + "learning_rate": 6.254571866797388e-07, + "loss": 0.4186, + "step": 27535 + }, + { + "epoch": 0.8439377222017899, + "grad_norm": 1.9404414350480077, + "learning_rate": 6.252168473867937e-07, + "loss": 0.5653, + "step": 27536 + }, + { + "epoch": 0.8439683707245311, + "grad_norm": 2.0715789728419813, + "learning_rate": 6.249765511997041e-07, + "loss": 0.6262, + "step": 27537 + }, + { + "epoch": 0.8439990192472723, + "grad_norm": 2.0692076992848647, + "learning_rate": 6.247362981208422e-07, + "loss": 0.5542, + "step": 27538 + }, + { + "epoch": 0.8440296677700135, + "grad_norm": 1.6933457998212949, + "learning_rate": 6.24496088152573e-07, + "loss": 0.4897, + "step": 27539 + }, + { + "epoch": 0.8440603162927547, + "grad_norm": 0.7768785321986683, + "learning_rate": 6.242559212972621e-07, + "loss": 0.4078, + "step": 27540 + }, + { + "epoch": 0.844090964815496, + "grad_norm": 1.9443066250285237, + "learning_rate": 6.240157975572775e-07, + "loss": 0.5759, + "step": 27541 + }, + { + "epoch": 0.8441216133382371, + "grad_norm": 2.0166592924501963, + "learning_rate": 6.237757169349856e-07, + "loss": 0.5735, + "step": 27542 + }, + { + "epoch": 0.8441522618609782, + "grad_norm": 0.8066772202795817, + "learning_rate": 6.235356794327507e-07, + "loss": 0.3862, + "step": 27543 + }, + { + "epoch": 0.8441829103837195, + "grad_norm": 1.9430968274436495, + "learning_rate": 6.232956850529381e-07, + "loss": 0.6254, + "step": 27544 + }, + { + "epoch": 0.8442135589064607, + "grad_norm": 0.7748626107654407, + "learning_rate": 6.230557337979126e-07, + "loss": 0.3799, + "step": 27545 + }, + { + "epoch": 0.8442442074292019, + "grad_norm": 1.7064225716806198, + "learning_rate": 6.228158256700407e-07, + "loss": 0.5326, + "step": 27546 + }, + { + "epoch": 0.8442748559519431, + "grad_norm": 1.8356842866395127, + "learning_rate": 6.225759606716841e-07, + "loss": 0.5422, + "step": 27547 + }, + { + "epoch": 0.8443055044746843, + "grad_norm": 1.7467184119284678, + "learning_rate": 6.223361388052041e-07, + "loss": 0.4946, + "step": 27548 + }, + { + "epoch": 0.8443361529974255, + "grad_norm": 1.7345143482233214, + "learning_rate": 6.22096360072969e-07, + "loss": 0.519, + "step": 27549 + }, + { + "epoch": 0.8443668015201667, + "grad_norm": 1.7027784152136196, + "learning_rate": 6.218566244773383e-07, + "loss": 0.5512, + "step": 27550 + }, + { + "epoch": 0.8443974500429079, + "grad_norm": 0.7734516451791712, + "learning_rate": 6.216169320206733e-07, + "loss": 0.3806, + "step": 27551 + }, + { + "epoch": 0.8444280985656492, + "grad_norm": 2.1413333256608444, + "learning_rate": 6.213772827053366e-07, + "loss": 0.5622, + "step": 27552 + }, + { + "epoch": 0.8444587470883903, + "grad_norm": 1.785124337951291, + "learning_rate": 6.211376765336913e-07, + "loss": 0.5927, + "step": 27553 + }, + { + "epoch": 0.8444893956111316, + "grad_norm": 1.9272066982421305, + "learning_rate": 6.20898113508096e-07, + "loss": 0.5238, + "step": 27554 + }, + { + "epoch": 0.8445200441338727, + "grad_norm": 1.904375454690035, + "learning_rate": 6.206585936309117e-07, + "loss": 0.5551, + "step": 27555 + }, + { + "epoch": 0.844550692656614, + "grad_norm": 2.2703597797438433, + "learning_rate": 6.204191169044987e-07, + "loss": 0.6663, + "step": 27556 + }, + { + "epoch": 0.8445813411793551, + "grad_norm": 0.7715704060389049, + "learning_rate": 6.201796833312179e-07, + "loss": 0.3845, + "step": 27557 + }, + { + "epoch": 0.8446119897020964, + "grad_norm": 2.015316752215406, + "learning_rate": 6.199402929134273e-07, + "loss": 0.5298, + "step": 27558 + }, + { + "epoch": 0.8446426382248375, + "grad_norm": 1.7546331672530653, + "learning_rate": 6.197009456534847e-07, + "loss": 0.5341, + "step": 27559 + }, + { + "epoch": 0.8446732867475788, + "grad_norm": 1.919588852069275, + "learning_rate": 6.194616415537496e-07, + "loss": 0.5916, + "step": 27560 + }, + { + "epoch": 0.84470393527032, + "grad_norm": 1.995304338694731, + "learning_rate": 6.192223806165809e-07, + "loss": 0.501, + "step": 27561 + }, + { + "epoch": 0.8447345837930612, + "grad_norm": 1.8968962647215828, + "learning_rate": 6.189831628443333e-07, + "loss": 0.5553, + "step": 27562 + }, + { + "epoch": 0.8447652323158024, + "grad_norm": 1.767804264328688, + "learning_rate": 6.187439882393659e-07, + "loss": 0.5461, + "step": 27563 + }, + { + "epoch": 0.8447958808385436, + "grad_norm": 1.6455310329641466, + "learning_rate": 6.185048568040347e-07, + "loss": 0.5776, + "step": 27564 + }, + { + "epoch": 0.8448265293612848, + "grad_norm": 1.9393455784610298, + "learning_rate": 6.182657685406979e-07, + "loss": 0.6124, + "step": 27565 + }, + { + "epoch": 0.844857177884026, + "grad_norm": 1.9887598796479191, + "learning_rate": 6.18026723451709e-07, + "loss": 0.6704, + "step": 27566 + }, + { + "epoch": 0.8448878264067672, + "grad_norm": 2.091697952551731, + "learning_rate": 6.177877215394218e-07, + "loss": 0.5568, + "step": 27567 + }, + { + "epoch": 0.8449184749295084, + "grad_norm": 2.1999232881863096, + "learning_rate": 6.17548762806196e-07, + "loss": 0.6039, + "step": 27568 + }, + { + "epoch": 0.8449491234522496, + "grad_norm": 2.006647770050431, + "learning_rate": 6.173098472543831e-07, + "loss": 0.5426, + "step": 27569 + }, + { + "epoch": 0.8449797719749909, + "grad_norm": 1.7588370152496557, + "learning_rate": 6.170709748863368e-07, + "loss": 0.5417, + "step": 27570 + }, + { + "epoch": 0.845010420497732, + "grad_norm": 2.0031671692996813, + "learning_rate": 6.168321457044119e-07, + "loss": 0.5371, + "step": 27571 + }, + { + "epoch": 0.8450410690204733, + "grad_norm": 1.8981685981265264, + "learning_rate": 6.165933597109608e-07, + "loss": 0.5593, + "step": 27572 + }, + { + "epoch": 0.8450717175432144, + "grad_norm": 1.9378216124561989, + "learning_rate": 6.163546169083384e-07, + "loss": 0.5249, + "step": 27573 + }, + { + "epoch": 0.8451023660659556, + "grad_norm": 1.9384873470857777, + "learning_rate": 6.161159172988939e-07, + "loss": 0.4928, + "step": 27574 + }, + { + "epoch": 0.8451330145886968, + "grad_norm": 1.866665325213252, + "learning_rate": 6.158772608849817e-07, + "loss": 0.5105, + "step": 27575 + }, + { + "epoch": 0.845163663111438, + "grad_norm": 1.7098863931110766, + "learning_rate": 6.156386476689529e-07, + "loss": 0.5319, + "step": 27576 + }, + { + "epoch": 0.8451943116341792, + "grad_norm": 0.7861496539998927, + "learning_rate": 6.154000776531588e-07, + "loss": 0.3794, + "step": 27577 + }, + { + "epoch": 0.8452249601569204, + "grad_norm": 1.9812826814705173, + "learning_rate": 6.151615508399472e-07, + "loss": 0.6315, + "step": 27578 + }, + { + "epoch": 0.8452556086796617, + "grad_norm": 1.9037653716229186, + "learning_rate": 6.149230672316731e-07, + "loss": 0.6366, + "step": 27579 + }, + { + "epoch": 0.8452862572024028, + "grad_norm": 2.010433779803031, + "learning_rate": 6.146846268306839e-07, + "loss": 0.5959, + "step": 27580 + }, + { + "epoch": 0.8453169057251441, + "grad_norm": 1.8959404245222626, + "learning_rate": 6.144462296393277e-07, + "loss": 0.5362, + "step": 27581 + }, + { + "epoch": 0.8453475542478852, + "grad_norm": 1.9338537494654013, + "learning_rate": 6.142078756599551e-07, + "loss": 0.5963, + "step": 27582 + }, + { + "epoch": 0.8453782027706265, + "grad_norm": 2.015172596615257, + "learning_rate": 6.139695648949145e-07, + "loss": 0.5863, + "step": 27583 + }, + { + "epoch": 0.8454088512933676, + "grad_norm": 0.7384442900369069, + "learning_rate": 6.137312973465553e-07, + "loss": 0.3864, + "step": 27584 + }, + { + "epoch": 0.8454394998161089, + "grad_norm": 2.0965166719660524, + "learning_rate": 6.134930730172223e-07, + "loss": 0.6322, + "step": 27585 + }, + { + "epoch": 0.84547014833885, + "grad_norm": 1.922037570765306, + "learning_rate": 6.132548919092652e-07, + "loss": 0.6477, + "step": 27586 + }, + { + "epoch": 0.8455007968615913, + "grad_norm": 0.7708131377335563, + "learning_rate": 6.130167540250304e-07, + "loss": 0.3938, + "step": 27587 + }, + { + "epoch": 0.8455314453843324, + "grad_norm": 1.8525139059829883, + "learning_rate": 6.127786593668644e-07, + "loss": 0.452, + "step": 27588 + }, + { + "epoch": 0.8455620939070737, + "grad_norm": 1.9614217755221601, + "learning_rate": 6.125406079371104e-07, + "loss": 0.5815, + "step": 27589 + }, + { + "epoch": 0.8455927424298149, + "grad_norm": 4.3277183165194275, + "learning_rate": 6.123025997381182e-07, + "loss": 0.512, + "step": 27590 + }, + { + "epoch": 0.8456233909525561, + "grad_norm": 1.6729328106347234, + "learning_rate": 6.120646347722304e-07, + "loss": 0.5776, + "step": 27591 + }, + { + "epoch": 0.8456540394752973, + "grad_norm": 1.6851851740494104, + "learning_rate": 6.118267130417938e-07, + "loss": 0.5496, + "step": 27592 + }, + { + "epoch": 0.8456846879980385, + "grad_norm": 2.115514995683307, + "learning_rate": 6.115888345491499e-07, + "loss": 0.5749, + "step": 27593 + }, + { + "epoch": 0.8457153365207797, + "grad_norm": 0.829417990870581, + "learning_rate": 6.113509992966443e-07, + "loss": 0.423, + "step": 27594 + }, + { + "epoch": 0.8457459850435209, + "grad_norm": 1.731030428175851, + "learning_rate": 6.11113207286621e-07, + "loss": 0.5061, + "step": 27595 + }, + { + "epoch": 0.8457766335662621, + "grad_norm": 1.7901000183990088, + "learning_rate": 6.108754585214211e-07, + "loss": 0.5719, + "step": 27596 + }, + { + "epoch": 0.8458072820890034, + "grad_norm": 1.9819892663192578, + "learning_rate": 6.106377530033885e-07, + "loss": 0.5507, + "step": 27597 + }, + { + "epoch": 0.8458379306117445, + "grad_norm": 2.001934381582051, + "learning_rate": 6.104000907348662e-07, + "loss": 0.6114, + "step": 27598 + }, + { + "epoch": 0.8458685791344858, + "grad_norm": 1.9060077914395757, + "learning_rate": 6.101624717181953e-07, + "loss": 0.6113, + "step": 27599 + }, + { + "epoch": 0.8458992276572269, + "grad_norm": 1.9728557024781357, + "learning_rate": 6.099248959557141e-07, + "loss": 0.5297, + "step": 27600 + }, + { + "epoch": 0.8459298761799682, + "grad_norm": 1.8288803764895463, + "learning_rate": 6.096873634497685e-07, + "loss": 0.5977, + "step": 27601 + }, + { + "epoch": 0.8459605247027093, + "grad_norm": 0.8005900757223732, + "learning_rate": 6.094498742026955e-07, + "loss": 0.42, + "step": 27602 + }, + { + "epoch": 0.8459911732254506, + "grad_norm": 1.8909297338815019, + "learning_rate": 6.092124282168377e-07, + "loss": 0.4976, + "step": 27603 + }, + { + "epoch": 0.8460218217481917, + "grad_norm": 1.8881240975982496, + "learning_rate": 6.089750254945314e-07, + "loss": 0.628, + "step": 27604 + }, + { + "epoch": 0.8460524702709329, + "grad_norm": 1.875179302820956, + "learning_rate": 6.087376660381183e-07, + "loss": 0.588, + "step": 27605 + }, + { + "epoch": 0.8460831187936741, + "grad_norm": 1.7610402783370476, + "learning_rate": 6.085003498499376e-07, + "loss": 0.4686, + "step": 27606 + }, + { + "epoch": 0.8461137673164153, + "grad_norm": 2.0261384578231616, + "learning_rate": 6.08263076932325e-07, + "loss": 0.5739, + "step": 27607 + }, + { + "epoch": 0.8461444158391566, + "grad_norm": 0.8370044739451207, + "learning_rate": 6.080258472876205e-07, + "loss": 0.3862, + "step": 27608 + }, + { + "epoch": 0.8461750643618977, + "grad_norm": 1.9160581058493837, + "learning_rate": 6.077886609181621e-07, + "loss": 0.6686, + "step": 27609 + }, + { + "epoch": 0.846205712884639, + "grad_norm": 1.8410109638543368, + "learning_rate": 6.075515178262841e-07, + "loss": 0.5764, + "step": 27610 + }, + { + "epoch": 0.8462363614073801, + "grad_norm": 1.6036624371023407, + "learning_rate": 6.073144180143265e-07, + "loss": 0.5382, + "step": 27611 + }, + { + "epoch": 0.8462670099301214, + "grad_norm": 1.8537455467066957, + "learning_rate": 6.070773614846226e-07, + "loss": 0.5075, + "step": 27612 + }, + { + "epoch": 0.8462976584528625, + "grad_norm": 1.970070845668852, + "learning_rate": 6.068403482395097e-07, + "loss": 0.5697, + "step": 27613 + }, + { + "epoch": 0.8463283069756038, + "grad_norm": 1.757485874740266, + "learning_rate": 6.066033782813241e-07, + "loss": 0.6001, + "step": 27614 + }, + { + "epoch": 0.846358955498345, + "grad_norm": 1.9340803395502735, + "learning_rate": 6.063664516123979e-07, + "loss": 0.5651, + "step": 27615 + }, + { + "epoch": 0.8463896040210862, + "grad_norm": 1.7963325758524114, + "learning_rate": 6.061295682350676e-07, + "loss": 0.5074, + "step": 27616 + }, + { + "epoch": 0.8464202525438274, + "grad_norm": 1.832816850847356, + "learning_rate": 6.058927281516675e-07, + "loss": 0.5702, + "step": 27617 + }, + { + "epoch": 0.8464509010665686, + "grad_norm": 1.9282537453161148, + "learning_rate": 6.056559313645299e-07, + "loss": 0.5577, + "step": 27618 + }, + { + "epoch": 0.8464815495893098, + "grad_norm": 1.8483384062602102, + "learning_rate": 6.054191778759888e-07, + "loss": 0.5795, + "step": 27619 + }, + { + "epoch": 0.846512198112051, + "grad_norm": 2.080497655647621, + "learning_rate": 6.051824676883777e-07, + "loss": 0.5498, + "step": 27620 + }, + { + "epoch": 0.8465428466347922, + "grad_norm": 2.0845365591663017, + "learning_rate": 6.049458008040276e-07, + "loss": 0.6552, + "step": 27621 + }, + { + "epoch": 0.8465734951575334, + "grad_norm": 1.951664809751996, + "learning_rate": 6.047091772252716e-07, + "loss": 0.5201, + "step": 27622 + }, + { + "epoch": 0.8466041436802746, + "grad_norm": 1.9252341216911757, + "learning_rate": 6.044725969544401e-07, + "loss": 0.6039, + "step": 27623 + }, + { + "epoch": 0.8466347922030159, + "grad_norm": 0.781562638183723, + "learning_rate": 6.042360599938646e-07, + "loss": 0.3848, + "step": 27624 + }, + { + "epoch": 0.846665440725757, + "grad_norm": 0.8089965762215849, + "learning_rate": 6.039995663458765e-07, + "loss": 0.4039, + "step": 27625 + }, + { + "epoch": 0.8466960892484983, + "grad_norm": 1.6660721245783892, + "learning_rate": 6.037631160128049e-07, + "loss": 0.5967, + "step": 27626 + }, + { + "epoch": 0.8467267377712394, + "grad_norm": 1.8918036074783093, + "learning_rate": 6.035267089969798e-07, + "loss": 0.4804, + "step": 27627 + }, + { + "epoch": 0.8467573862939807, + "grad_norm": 2.1607844513719248, + "learning_rate": 6.032903453007322e-07, + "loss": 0.6914, + "step": 27628 + }, + { + "epoch": 0.8467880348167218, + "grad_norm": 0.7845617010080653, + "learning_rate": 6.030540249263889e-07, + "loss": 0.3858, + "step": 27629 + }, + { + "epoch": 0.8468186833394631, + "grad_norm": 1.87790567647957, + "learning_rate": 6.028177478762793e-07, + "loss": 0.5584, + "step": 27630 + }, + { + "epoch": 0.8468493318622042, + "grad_norm": 1.7903563664038087, + "learning_rate": 6.025815141527319e-07, + "loss": 0.4638, + "step": 27631 + }, + { + "epoch": 0.8468799803849455, + "grad_norm": 1.7157770562525254, + "learning_rate": 6.023453237580734e-07, + "loss": 0.4863, + "step": 27632 + }, + { + "epoch": 0.8469106289076866, + "grad_norm": 2.0414261731386256, + "learning_rate": 6.021091766946329e-07, + "loss": 0.5576, + "step": 27633 + }, + { + "epoch": 0.8469412774304279, + "grad_norm": 2.0390604284893383, + "learning_rate": 6.018730729647343e-07, + "loss": 0.6065, + "step": 27634 + }, + { + "epoch": 0.8469719259531691, + "grad_norm": 1.8570362027288694, + "learning_rate": 6.01637012570706e-07, + "loss": 0.5348, + "step": 27635 + }, + { + "epoch": 0.8470025744759102, + "grad_norm": 1.9617465006228685, + "learning_rate": 6.014009955148748e-07, + "loss": 0.6099, + "step": 27636 + }, + { + "epoch": 0.8470332229986515, + "grad_norm": 1.7807935330779519, + "learning_rate": 6.011650217995634e-07, + "loss": 0.5973, + "step": 27637 + }, + { + "epoch": 0.8470638715213926, + "grad_norm": 0.8193431836334945, + "learning_rate": 6.009290914270993e-07, + "loss": 0.3967, + "step": 27638 + }, + { + "epoch": 0.8470945200441339, + "grad_norm": 1.7312810288194294, + "learning_rate": 6.00693204399807e-07, + "loss": 0.466, + "step": 27639 + }, + { + "epoch": 0.847125168566875, + "grad_norm": 1.8717714021776446, + "learning_rate": 6.004573607200087e-07, + "loss": 0.6312, + "step": 27640 + }, + { + "epoch": 0.8471558170896163, + "grad_norm": 1.9265630103477975, + "learning_rate": 6.002215603900302e-07, + "loss": 0.6034, + "step": 27641 + }, + { + "epoch": 0.8471864656123574, + "grad_norm": 1.8929371664261165, + "learning_rate": 5.999858034121958e-07, + "loss": 0.6114, + "step": 27642 + }, + { + "epoch": 0.8472171141350987, + "grad_norm": 0.7473659513146266, + "learning_rate": 5.997500897888253e-07, + "loss": 0.3754, + "step": 27643 + }, + { + "epoch": 0.8472477626578399, + "grad_norm": 1.8896956139597734, + "learning_rate": 5.995144195222447e-07, + "loss": 0.479, + "step": 27644 + }, + { + "epoch": 0.8472784111805811, + "grad_norm": 1.9389748956627195, + "learning_rate": 5.992787926147731e-07, + "loss": 0.5817, + "step": 27645 + }, + { + "epoch": 0.8473090597033223, + "grad_norm": 1.7354516961880286, + "learning_rate": 5.990432090687332e-07, + "loss": 0.6207, + "step": 27646 + }, + { + "epoch": 0.8473397082260635, + "grad_norm": 1.869896638643898, + "learning_rate": 5.98807668886448e-07, + "loss": 0.4861, + "step": 27647 + }, + { + "epoch": 0.8473703567488047, + "grad_norm": 1.7214583412064366, + "learning_rate": 5.985721720702359e-07, + "loss": 0.5253, + "step": 27648 + }, + { + "epoch": 0.8474010052715459, + "grad_norm": 0.8621378083086043, + "learning_rate": 5.983367186224182e-07, + "loss": 0.417, + "step": 27649 + }, + { + "epoch": 0.8474316537942871, + "grad_norm": 1.6976453504221116, + "learning_rate": 5.981013085453158e-07, + "loss": 0.5107, + "step": 27650 + }, + { + "epoch": 0.8474623023170283, + "grad_norm": 0.760471320759579, + "learning_rate": 5.978659418412469e-07, + "loss": 0.3963, + "step": 27651 + }, + { + "epoch": 0.8474929508397695, + "grad_norm": 1.8325599094428884, + "learning_rate": 5.976306185125314e-07, + "loss": 0.5258, + "step": 27652 + }, + { + "epoch": 0.8475235993625108, + "grad_norm": 1.8367885112450961, + "learning_rate": 5.973953385614883e-07, + "loss": 0.5462, + "step": 27653 + }, + { + "epoch": 0.8475542478852519, + "grad_norm": 2.0206085867334647, + "learning_rate": 5.971601019904344e-07, + "loss": 0.613, + "step": 27654 + }, + { + "epoch": 0.8475848964079932, + "grad_norm": 1.686938974910816, + "learning_rate": 5.969249088016899e-07, + "loss": 0.5593, + "step": 27655 + }, + { + "epoch": 0.8476155449307343, + "grad_norm": 1.8187435945821393, + "learning_rate": 5.966897589975695e-07, + "loss": 0.5576, + "step": 27656 + }, + { + "epoch": 0.8476461934534756, + "grad_norm": 0.7575621373263949, + "learning_rate": 5.964546525803916e-07, + "loss": 0.3937, + "step": 27657 + }, + { + "epoch": 0.8476768419762167, + "grad_norm": 1.8737697127544015, + "learning_rate": 5.962195895524742e-07, + "loss": 0.5619, + "step": 27658 + }, + { + "epoch": 0.847707490498958, + "grad_norm": 1.9150798803780624, + "learning_rate": 5.959845699161304e-07, + "loss": 0.6112, + "step": 27659 + }, + { + "epoch": 0.8477381390216991, + "grad_norm": 0.7796086681192073, + "learning_rate": 5.957495936736774e-07, + "loss": 0.3913, + "step": 27660 + }, + { + "epoch": 0.8477687875444404, + "grad_norm": 1.9533503278570592, + "learning_rate": 5.955146608274321e-07, + "loss": 0.5898, + "step": 27661 + }, + { + "epoch": 0.8477994360671816, + "grad_norm": 1.977721306694795, + "learning_rate": 5.952797713797065e-07, + "loss": 0.531, + "step": 27662 + }, + { + "epoch": 0.8478300845899228, + "grad_norm": 0.7541772167425208, + "learning_rate": 5.950449253328172e-07, + "loss": 0.3822, + "step": 27663 + }, + { + "epoch": 0.847860733112664, + "grad_norm": 0.8029495508841775, + "learning_rate": 5.948101226890752e-07, + "loss": 0.3907, + "step": 27664 + }, + { + "epoch": 0.8478913816354052, + "grad_norm": 1.985752108700521, + "learning_rate": 5.945753634507983e-07, + "loss": 0.5813, + "step": 27665 + }, + { + "epoch": 0.8479220301581464, + "grad_norm": 1.57138768543739, + "learning_rate": 5.943406476202973e-07, + "loss": 0.5264, + "step": 27666 + }, + { + "epoch": 0.8479526786808875, + "grad_norm": 0.8134743577197276, + "learning_rate": 5.941059751998846e-07, + "loss": 0.4004, + "step": 27667 + }, + { + "epoch": 0.8479833272036288, + "grad_norm": 1.6676968559529834, + "learning_rate": 5.938713461918727e-07, + "loss": 0.536, + "step": 27668 + }, + { + "epoch": 0.8480139757263699, + "grad_norm": 1.9382991855012124, + "learning_rate": 5.936367605985749e-07, + "loss": 0.5977, + "step": 27669 + }, + { + "epoch": 0.8480446242491112, + "grad_norm": 1.9349997090576767, + "learning_rate": 5.934022184223004e-07, + "loss": 0.5783, + "step": 27670 + }, + { + "epoch": 0.8480752727718524, + "grad_norm": 0.8158978269215204, + "learning_rate": 5.931677196653612e-07, + "loss": 0.4327, + "step": 27671 + }, + { + "epoch": 0.8481059212945936, + "grad_norm": 0.7858958794929292, + "learning_rate": 5.929332643300683e-07, + "loss": 0.398, + "step": 27672 + }, + { + "epoch": 0.8481365698173348, + "grad_norm": 1.7875858684432218, + "learning_rate": 5.926988524187327e-07, + "loss": 0.591, + "step": 27673 + }, + { + "epoch": 0.848167218340076, + "grad_norm": 1.8293405926772008, + "learning_rate": 5.924644839336625e-07, + "loss": 0.5541, + "step": 27674 + }, + { + "epoch": 0.8481978668628172, + "grad_norm": 1.9594127426982566, + "learning_rate": 5.922301588771657e-07, + "loss": 0.5426, + "step": 27675 + }, + { + "epoch": 0.8482285153855584, + "grad_norm": 2.1987481779131106, + "learning_rate": 5.919958772515549e-07, + "loss": 0.6789, + "step": 27676 + }, + { + "epoch": 0.8482591639082996, + "grad_norm": 1.9427658029369046, + "learning_rate": 5.917616390591363e-07, + "loss": 0.589, + "step": 27677 + }, + { + "epoch": 0.8482898124310408, + "grad_norm": 1.7324630105705157, + "learning_rate": 5.915274443022179e-07, + "loss": 0.5546, + "step": 27678 + }, + { + "epoch": 0.848320460953782, + "grad_norm": 1.7974715719748426, + "learning_rate": 5.912932929831066e-07, + "loss": 0.588, + "step": 27679 + }, + { + "epoch": 0.8483511094765233, + "grad_norm": 2.130370414861186, + "learning_rate": 5.91059185104112e-07, + "loss": 0.5792, + "step": 27680 + }, + { + "epoch": 0.8483817579992644, + "grad_norm": 2.179908702489199, + "learning_rate": 5.908251206675386e-07, + "loss": 0.5539, + "step": 27681 + }, + { + "epoch": 0.8484124065220057, + "grad_norm": 1.8297738795841578, + "learning_rate": 5.90591099675693e-07, + "loss": 0.6192, + "step": 27682 + }, + { + "epoch": 0.8484430550447468, + "grad_norm": 1.7668037855680427, + "learning_rate": 5.903571221308813e-07, + "loss": 0.6006, + "step": 27683 + }, + { + "epoch": 0.8484737035674881, + "grad_norm": 1.9368119778971642, + "learning_rate": 5.901231880354108e-07, + "loss": 0.5766, + "step": 27684 + }, + { + "epoch": 0.8485043520902292, + "grad_norm": 1.6249033857407953, + "learning_rate": 5.898892973915843e-07, + "loss": 0.5651, + "step": 27685 + }, + { + "epoch": 0.8485350006129705, + "grad_norm": 1.9152522835086296, + "learning_rate": 5.896554502017049e-07, + "loss": 0.553, + "step": 27686 + }, + { + "epoch": 0.8485656491357116, + "grad_norm": 1.8890159780381086, + "learning_rate": 5.894216464680813e-07, + "loss": 0.5773, + "step": 27687 + }, + { + "epoch": 0.8485962976584529, + "grad_norm": 1.9723964830955163, + "learning_rate": 5.891878861930139e-07, + "loss": 0.5365, + "step": 27688 + }, + { + "epoch": 0.848626946181194, + "grad_norm": 0.7805097441535271, + "learning_rate": 5.889541693788064e-07, + "loss": 0.3948, + "step": 27689 + }, + { + "epoch": 0.8486575947039353, + "grad_norm": 2.145748824492041, + "learning_rate": 5.88720496027762e-07, + "loss": 0.5096, + "step": 27690 + }, + { + "epoch": 0.8486882432266765, + "grad_norm": 1.8444495325775205, + "learning_rate": 5.884868661421833e-07, + "loss": 0.5971, + "step": 27691 + }, + { + "epoch": 0.8487188917494177, + "grad_norm": 2.058245196465411, + "learning_rate": 5.882532797243734e-07, + "loss": 0.5204, + "step": 27692 + }, + { + "epoch": 0.8487495402721589, + "grad_norm": 1.9965831267821343, + "learning_rate": 5.88019736776631e-07, + "loss": 0.5907, + "step": 27693 + }, + { + "epoch": 0.8487801887949001, + "grad_norm": 1.8728707905441482, + "learning_rate": 5.877862373012599e-07, + "loss": 0.4511, + "step": 27694 + }, + { + "epoch": 0.8488108373176413, + "grad_norm": 1.7730318557425533, + "learning_rate": 5.875527813005604e-07, + "loss": 0.5906, + "step": 27695 + }, + { + "epoch": 0.8488414858403825, + "grad_norm": 2.019741080249192, + "learning_rate": 5.873193687768325e-07, + "loss": 0.5413, + "step": 27696 + }, + { + "epoch": 0.8488721343631237, + "grad_norm": 1.9559097681121327, + "learning_rate": 5.870859997323746e-07, + "loss": 0.5497, + "step": 27697 + }, + { + "epoch": 0.8489027828858648, + "grad_norm": 1.6890043562867523, + "learning_rate": 5.868526741694875e-07, + "loss": 0.6326, + "step": 27698 + }, + { + "epoch": 0.8489334314086061, + "grad_norm": 2.5696869501692294, + "learning_rate": 5.866193920904706e-07, + "loss": 0.626, + "step": 27699 + }, + { + "epoch": 0.8489640799313473, + "grad_norm": 1.941336131661566, + "learning_rate": 5.863861534976228e-07, + "loss": 0.6486, + "step": 27700 + }, + { + "epoch": 0.8489947284540885, + "grad_norm": 1.91176418377531, + "learning_rate": 5.861529583932402e-07, + "loss": 0.5783, + "step": 27701 + }, + { + "epoch": 0.8490253769768297, + "grad_norm": 2.097372211310911, + "learning_rate": 5.859198067796218e-07, + "loss": 0.6981, + "step": 27702 + }, + { + "epoch": 0.8490560254995709, + "grad_norm": 2.2159431286889806, + "learning_rate": 5.856866986590665e-07, + "loss": 0.6094, + "step": 27703 + }, + { + "epoch": 0.8490866740223121, + "grad_norm": 0.7877377993568054, + "learning_rate": 5.854536340338685e-07, + "loss": 0.3945, + "step": 27704 + }, + { + "epoch": 0.8491173225450533, + "grad_norm": 1.8939265768962772, + "learning_rate": 5.852206129063248e-07, + "loss": 0.6005, + "step": 27705 + }, + { + "epoch": 0.8491479710677945, + "grad_norm": 2.0623161169783324, + "learning_rate": 5.849876352787337e-07, + "loss": 0.6189, + "step": 27706 + }, + { + "epoch": 0.8491786195905358, + "grad_norm": 1.5790691688751022, + "learning_rate": 5.847547011533882e-07, + "loss": 0.4887, + "step": 27707 + }, + { + "epoch": 0.8492092681132769, + "grad_norm": 2.1152634070765504, + "learning_rate": 5.845218105325839e-07, + "loss": 0.6171, + "step": 27708 + }, + { + "epoch": 0.8492399166360182, + "grad_norm": 1.8864797608482688, + "learning_rate": 5.842889634186161e-07, + "loss": 0.6454, + "step": 27709 + }, + { + "epoch": 0.8492705651587593, + "grad_norm": 0.7864453389387144, + "learning_rate": 5.840561598137784e-07, + "loss": 0.3968, + "step": 27710 + }, + { + "epoch": 0.8493012136815006, + "grad_norm": 1.8675154077361096, + "learning_rate": 5.838233997203668e-07, + "loss": 0.5499, + "step": 27711 + }, + { + "epoch": 0.8493318622042417, + "grad_norm": 2.3117395040877264, + "learning_rate": 5.835906831406718e-07, + "loss": 0.6094, + "step": 27712 + }, + { + "epoch": 0.849362510726983, + "grad_norm": 1.966234396919756, + "learning_rate": 5.833580100769881e-07, + "loss": 0.5678, + "step": 27713 + }, + { + "epoch": 0.8493931592497241, + "grad_norm": 0.7998168801230532, + "learning_rate": 5.831253805316084e-07, + "loss": 0.3992, + "step": 27714 + }, + { + "epoch": 0.8494238077724654, + "grad_norm": 0.7720189223307833, + "learning_rate": 5.828927945068252e-07, + "loss": 0.3861, + "step": 27715 + }, + { + "epoch": 0.8494544562952066, + "grad_norm": 1.7283859872247707, + "learning_rate": 5.826602520049268e-07, + "loss": 0.5162, + "step": 27716 + }, + { + "epoch": 0.8494851048179478, + "grad_norm": 2.2630027352069573, + "learning_rate": 5.824277530282096e-07, + "loss": 0.6144, + "step": 27717 + }, + { + "epoch": 0.849515753340689, + "grad_norm": 1.7015692892789716, + "learning_rate": 5.821952975789608e-07, + "loss": 0.4706, + "step": 27718 + }, + { + "epoch": 0.8495464018634302, + "grad_norm": 1.9435570944443787, + "learning_rate": 5.819628856594733e-07, + "loss": 0.5116, + "step": 27719 + }, + { + "epoch": 0.8495770503861714, + "grad_norm": 1.8650032898682236, + "learning_rate": 5.817305172720344e-07, + "loss": 0.4974, + "step": 27720 + }, + { + "epoch": 0.8496076989089126, + "grad_norm": 0.8090217082764023, + "learning_rate": 5.814981924189356e-07, + "loss": 0.3911, + "step": 27721 + }, + { + "epoch": 0.8496383474316538, + "grad_norm": 1.9731505933994826, + "learning_rate": 5.812659111024666e-07, + "loss": 0.6017, + "step": 27722 + }, + { + "epoch": 0.849668995954395, + "grad_norm": 1.756774795645548, + "learning_rate": 5.810336733249139e-07, + "loss": 0.4757, + "step": 27723 + }, + { + "epoch": 0.8496996444771362, + "grad_norm": 0.7914947791145174, + "learning_rate": 5.808014790885674e-07, + "loss": 0.4151, + "step": 27724 + }, + { + "epoch": 0.8497302929998775, + "grad_norm": 1.9436554666756147, + "learning_rate": 5.805693283957154e-07, + "loss": 0.5275, + "step": 27725 + }, + { + "epoch": 0.8497609415226186, + "grad_norm": 1.9708624403733432, + "learning_rate": 5.803372212486436e-07, + "loss": 0.5553, + "step": 27726 + }, + { + "epoch": 0.8497915900453599, + "grad_norm": 1.7876219370599606, + "learning_rate": 5.801051576496402e-07, + "loss": 0.5472, + "step": 27727 + }, + { + "epoch": 0.849822238568101, + "grad_norm": 1.897012935871373, + "learning_rate": 5.798731376009925e-07, + "loss": 0.5601, + "step": 27728 + }, + { + "epoch": 0.8498528870908422, + "grad_norm": 2.1078340132096995, + "learning_rate": 5.796411611049846e-07, + "loss": 0.5432, + "step": 27729 + }, + { + "epoch": 0.8498835356135834, + "grad_norm": 2.064132668800935, + "learning_rate": 5.794092281639041e-07, + "loss": 0.5694, + "step": 27730 + }, + { + "epoch": 0.8499141841363246, + "grad_norm": 1.7904944893996957, + "learning_rate": 5.791773387800348e-07, + "loss": 0.5918, + "step": 27731 + }, + { + "epoch": 0.8499448326590658, + "grad_norm": 1.7285457631680574, + "learning_rate": 5.78945492955662e-07, + "loss": 0.5637, + "step": 27732 + }, + { + "epoch": 0.849975481181807, + "grad_norm": 1.896923839366322, + "learning_rate": 5.787136906930719e-07, + "loss": 0.5845, + "step": 27733 + }, + { + "epoch": 0.8500061297045483, + "grad_norm": 0.8015220894243973, + "learning_rate": 5.784819319945456e-07, + "loss": 0.381, + "step": 27734 + }, + { + "epoch": 0.8500367782272894, + "grad_norm": 1.756395244557813, + "learning_rate": 5.782502168623688e-07, + "loss": 0.5116, + "step": 27735 + }, + { + "epoch": 0.8500674267500307, + "grad_norm": 1.6861575021816086, + "learning_rate": 5.780185452988241e-07, + "loss": 0.5788, + "step": 27736 + }, + { + "epoch": 0.8500980752727718, + "grad_norm": 1.7981309282292355, + "learning_rate": 5.777869173061939e-07, + "loss": 0.6117, + "step": 27737 + }, + { + "epoch": 0.8501287237955131, + "grad_norm": 1.672176294165713, + "learning_rate": 5.7755533288676e-07, + "loss": 0.5471, + "step": 27738 + }, + { + "epoch": 0.8501593723182542, + "grad_norm": 1.717029833574175, + "learning_rate": 5.773237920428065e-07, + "loss": 0.6148, + "step": 27739 + }, + { + "epoch": 0.8501900208409955, + "grad_norm": 1.7931120247588148, + "learning_rate": 5.770922947766116e-07, + "loss": 0.5576, + "step": 27740 + }, + { + "epoch": 0.8502206693637366, + "grad_norm": 2.1518159046930245, + "learning_rate": 5.768608410904597e-07, + "loss": 0.5355, + "step": 27741 + }, + { + "epoch": 0.8502513178864779, + "grad_norm": 1.828389063217523, + "learning_rate": 5.766294309866283e-07, + "loss": 0.596, + "step": 27742 + }, + { + "epoch": 0.850281966409219, + "grad_norm": 1.8449576223826951, + "learning_rate": 5.763980644673989e-07, + "loss": 0.5177, + "step": 27743 + }, + { + "epoch": 0.8503126149319603, + "grad_norm": 2.1930174819852017, + "learning_rate": 5.761667415350519e-07, + "loss": 0.5693, + "step": 27744 + }, + { + "epoch": 0.8503432634547015, + "grad_norm": 2.1933579958853318, + "learning_rate": 5.75935462191865e-07, + "loss": 0.6383, + "step": 27745 + }, + { + "epoch": 0.8503739119774427, + "grad_norm": 0.8081677478399539, + "learning_rate": 5.757042264401186e-07, + "loss": 0.4103, + "step": 27746 + }, + { + "epoch": 0.8504045605001839, + "grad_norm": 1.9610642117422317, + "learning_rate": 5.754730342820908e-07, + "loss": 0.5115, + "step": 27747 + }, + { + "epoch": 0.8504352090229251, + "grad_norm": 2.0655544763299507, + "learning_rate": 5.752418857200582e-07, + "loss": 0.5507, + "step": 27748 + }, + { + "epoch": 0.8504658575456663, + "grad_norm": 1.8494014615625292, + "learning_rate": 5.75010780756301e-07, + "loss": 0.6386, + "step": 27749 + }, + { + "epoch": 0.8504965060684075, + "grad_norm": 1.9299744669096466, + "learning_rate": 5.747797193930932e-07, + "loss": 0.6368, + "step": 27750 + }, + { + "epoch": 0.8505271545911487, + "grad_norm": 2.161730395310195, + "learning_rate": 5.745487016327134e-07, + "loss": 0.5848, + "step": 27751 + }, + { + "epoch": 0.85055780311389, + "grad_norm": 1.9192026995203342, + "learning_rate": 5.74317727477438e-07, + "loss": 0.549, + "step": 27752 + }, + { + "epoch": 0.8505884516366311, + "grad_norm": 1.9137113481240244, + "learning_rate": 5.740867969295422e-07, + "loss": 0.5795, + "step": 27753 + }, + { + "epoch": 0.8506191001593724, + "grad_norm": 2.0231775691357634, + "learning_rate": 5.73855909991301e-07, + "loss": 0.5634, + "step": 27754 + }, + { + "epoch": 0.8506497486821135, + "grad_norm": 1.9773692358749357, + "learning_rate": 5.736250666649911e-07, + "loss": 0.6527, + "step": 27755 + }, + { + "epoch": 0.8506803972048548, + "grad_norm": 1.9515193552756787, + "learning_rate": 5.733942669528852e-07, + "loss": 0.5444, + "step": 27756 + }, + { + "epoch": 0.8507110457275959, + "grad_norm": 1.9953494811265668, + "learning_rate": 5.731635108572581e-07, + "loss": 0.6217, + "step": 27757 + }, + { + "epoch": 0.8507416942503372, + "grad_norm": 0.8395444574715296, + "learning_rate": 5.729327983803845e-07, + "loss": 0.3812, + "step": 27758 + }, + { + "epoch": 0.8507723427730783, + "grad_norm": 1.6773287132397814, + "learning_rate": 5.727021295245356e-07, + "loss": 0.5046, + "step": 27759 + }, + { + "epoch": 0.8508029912958195, + "grad_norm": 1.9971348107740678, + "learning_rate": 5.724715042919865e-07, + "loss": 0.5628, + "step": 27760 + }, + { + "epoch": 0.8508336398185607, + "grad_norm": 1.837319224448863, + "learning_rate": 5.722409226850078e-07, + "loss": 0.4624, + "step": 27761 + }, + { + "epoch": 0.8508642883413019, + "grad_norm": 1.6956081699957126, + "learning_rate": 5.720103847058717e-07, + "loss": 0.5775, + "step": 27762 + }, + { + "epoch": 0.8508949368640432, + "grad_norm": 1.947850540931733, + "learning_rate": 5.717798903568517e-07, + "loss": 0.5553, + "step": 27763 + }, + { + "epoch": 0.8509255853867843, + "grad_norm": 2.1600593808953197, + "learning_rate": 5.71549439640216e-07, + "loss": 0.6475, + "step": 27764 + }, + { + "epoch": 0.8509562339095256, + "grad_norm": 2.125925037702949, + "learning_rate": 5.713190325582374e-07, + "loss": 0.6386, + "step": 27765 + }, + { + "epoch": 0.8509868824322667, + "grad_norm": 1.809697286683746, + "learning_rate": 5.710886691131856e-07, + "loss": 0.5803, + "step": 27766 + }, + { + "epoch": 0.851017530955008, + "grad_norm": 1.9443810032609612, + "learning_rate": 5.708583493073299e-07, + "loss": 0.6381, + "step": 27767 + }, + { + "epoch": 0.8510481794777491, + "grad_norm": 1.7270270654195097, + "learning_rate": 5.706280731429404e-07, + "loss": 0.5351, + "step": 27768 + }, + { + "epoch": 0.8510788280004904, + "grad_norm": 1.6972328817815798, + "learning_rate": 5.70397840622286e-07, + "loss": 0.5099, + "step": 27769 + }, + { + "epoch": 0.8511094765232315, + "grad_norm": 1.7457827522850942, + "learning_rate": 5.701676517476345e-07, + "loss": 0.5213, + "step": 27770 + }, + { + "epoch": 0.8511401250459728, + "grad_norm": 1.7929237880515023, + "learning_rate": 5.699375065212553e-07, + "loss": 0.5635, + "step": 27771 + }, + { + "epoch": 0.851170773568714, + "grad_norm": 1.830615364602478, + "learning_rate": 5.697074049454138e-07, + "loss": 0.5769, + "step": 27772 + }, + { + "epoch": 0.8512014220914552, + "grad_norm": 1.7953051829462354, + "learning_rate": 5.694773470223807e-07, + "loss": 0.5904, + "step": 27773 + }, + { + "epoch": 0.8512320706141964, + "grad_norm": 2.119037959132655, + "learning_rate": 5.692473327544206e-07, + "loss": 0.6141, + "step": 27774 + }, + { + "epoch": 0.8512627191369376, + "grad_norm": 1.8362613671803072, + "learning_rate": 5.690173621437995e-07, + "loss": 0.5567, + "step": 27775 + }, + { + "epoch": 0.8512933676596788, + "grad_norm": 1.8176537951830851, + "learning_rate": 5.687874351927835e-07, + "loss": 0.5645, + "step": 27776 + }, + { + "epoch": 0.85132401618242, + "grad_norm": 0.7927902154035892, + "learning_rate": 5.685575519036402e-07, + "loss": 0.3848, + "step": 27777 + }, + { + "epoch": 0.8513546647051612, + "grad_norm": 1.8722403783018904, + "learning_rate": 5.683277122786318e-07, + "loss": 0.5853, + "step": 27778 + }, + { + "epoch": 0.8513853132279025, + "grad_norm": 1.7167918778221172, + "learning_rate": 5.680979163200246e-07, + "loss": 0.5463, + "step": 27779 + }, + { + "epoch": 0.8514159617506436, + "grad_norm": 1.9125131729566789, + "learning_rate": 5.678681640300837e-07, + "loss": 0.6374, + "step": 27780 + }, + { + "epoch": 0.8514466102733849, + "grad_norm": 1.9205289779174706, + "learning_rate": 5.676384554110703e-07, + "loss": 0.5481, + "step": 27781 + }, + { + "epoch": 0.851477258796126, + "grad_norm": 0.790468456300217, + "learning_rate": 5.674087904652509e-07, + "loss": 0.4055, + "step": 27782 + }, + { + "epoch": 0.8515079073188673, + "grad_norm": 2.0325037074454406, + "learning_rate": 5.671791691948842e-07, + "loss": 0.6093, + "step": 27783 + }, + { + "epoch": 0.8515385558416084, + "grad_norm": 1.7356844276514327, + "learning_rate": 5.669495916022377e-07, + "loss": 0.5646, + "step": 27784 + }, + { + "epoch": 0.8515692043643497, + "grad_norm": 1.9218718855143935, + "learning_rate": 5.667200576895709e-07, + "loss": 0.5519, + "step": 27785 + }, + { + "epoch": 0.8515998528870908, + "grad_norm": 0.7640853268656586, + "learning_rate": 5.664905674591448e-07, + "loss": 0.4127, + "step": 27786 + }, + { + "epoch": 0.8516305014098321, + "grad_norm": 1.8614473191967194, + "learning_rate": 5.662611209132219e-07, + "loss": 0.5825, + "step": 27787 + }, + { + "epoch": 0.8516611499325732, + "grad_norm": 1.757102618404352, + "learning_rate": 5.660317180540631e-07, + "loss": 0.5812, + "step": 27788 + }, + { + "epoch": 0.8516917984553145, + "grad_norm": 2.1956326880097965, + "learning_rate": 5.658023588839273e-07, + "loss": 0.6875, + "step": 27789 + }, + { + "epoch": 0.8517224469780557, + "grad_norm": 2.0117024924554654, + "learning_rate": 5.655730434050755e-07, + "loss": 0.6074, + "step": 27790 + }, + { + "epoch": 0.8517530955007968, + "grad_norm": 1.9459883216742373, + "learning_rate": 5.653437716197669e-07, + "loss": 0.6068, + "step": 27791 + }, + { + "epoch": 0.8517837440235381, + "grad_norm": 1.8473477264174871, + "learning_rate": 5.651145435302618e-07, + "loss": 0.5449, + "step": 27792 + }, + { + "epoch": 0.8518143925462792, + "grad_norm": 2.0201071161387456, + "learning_rate": 5.648853591388181e-07, + "loss": 0.6104, + "step": 27793 + }, + { + "epoch": 0.8518450410690205, + "grad_norm": 1.8242840897757, + "learning_rate": 5.646562184476928e-07, + "loss": 0.6173, + "step": 27794 + }, + { + "epoch": 0.8518756895917616, + "grad_norm": 1.8208566933467596, + "learning_rate": 5.644271214591446e-07, + "loss": 0.5493, + "step": 27795 + }, + { + "epoch": 0.8519063381145029, + "grad_norm": 0.7884240569264828, + "learning_rate": 5.641980681754317e-07, + "loss": 0.4086, + "step": 27796 + }, + { + "epoch": 0.851936986637244, + "grad_norm": 2.077287299461355, + "learning_rate": 5.639690585988089e-07, + "loss": 0.5691, + "step": 27797 + }, + { + "epoch": 0.8519676351599853, + "grad_norm": 0.8392312516611843, + "learning_rate": 5.637400927315339e-07, + "loss": 0.3955, + "step": 27798 + }, + { + "epoch": 0.8519982836827265, + "grad_norm": 1.9064610786950118, + "learning_rate": 5.635111705758633e-07, + "loss": 0.6334, + "step": 27799 + }, + { + "epoch": 0.8520289322054677, + "grad_norm": 2.0701834688504857, + "learning_rate": 5.63282292134053e-07, + "loss": 0.6637, + "step": 27800 + }, + { + "epoch": 0.8520595807282089, + "grad_norm": 1.9766074162692906, + "learning_rate": 5.63053457408358e-07, + "loss": 0.5869, + "step": 27801 + }, + { + "epoch": 0.8520902292509501, + "grad_norm": 2.156063786986325, + "learning_rate": 5.6282466640103e-07, + "loss": 0.6628, + "step": 27802 + }, + { + "epoch": 0.8521208777736913, + "grad_norm": 2.083316477214528, + "learning_rate": 5.625959191143277e-07, + "loss": 0.5047, + "step": 27803 + }, + { + "epoch": 0.8521515262964325, + "grad_norm": 1.8770767296155761, + "learning_rate": 5.623672155505038e-07, + "loss": 0.5126, + "step": 27804 + }, + { + "epoch": 0.8521821748191737, + "grad_norm": 1.8514690295783036, + "learning_rate": 5.621385557118097e-07, + "loss": 0.554, + "step": 27805 + }, + { + "epoch": 0.852212823341915, + "grad_norm": 1.9088429207226107, + "learning_rate": 5.619099396004996e-07, + "loss": 0.6022, + "step": 27806 + }, + { + "epoch": 0.8522434718646561, + "grad_norm": 1.7885361397767214, + "learning_rate": 5.616813672188281e-07, + "loss": 0.6301, + "step": 27807 + }, + { + "epoch": 0.8522741203873974, + "grad_norm": 1.7594441444821023, + "learning_rate": 5.614528385690443e-07, + "loss": 0.5569, + "step": 27808 + }, + { + "epoch": 0.8523047689101385, + "grad_norm": 1.9923732895633235, + "learning_rate": 5.612243536534012e-07, + "loss": 0.6007, + "step": 27809 + }, + { + "epoch": 0.8523354174328798, + "grad_norm": 1.9150959173194342, + "learning_rate": 5.609959124741504e-07, + "loss": 0.5872, + "step": 27810 + }, + { + "epoch": 0.8523660659556209, + "grad_norm": 2.165542196649166, + "learning_rate": 5.60767515033544e-07, + "loss": 0.6212, + "step": 27811 + }, + { + "epoch": 0.8523967144783622, + "grad_norm": 1.993020748715085, + "learning_rate": 5.605391613338307e-07, + "loss": 0.5471, + "step": 27812 + }, + { + "epoch": 0.8524273630011033, + "grad_norm": 2.1667562204173656, + "learning_rate": 5.603108513772587e-07, + "loss": 0.5813, + "step": 27813 + }, + { + "epoch": 0.8524580115238446, + "grad_norm": 1.9680142130038163, + "learning_rate": 5.600825851660824e-07, + "loss": 0.5959, + "step": 27814 + }, + { + "epoch": 0.8524886600465857, + "grad_norm": 0.7886970049878786, + "learning_rate": 5.598543627025483e-07, + "loss": 0.3753, + "step": 27815 + }, + { + "epoch": 0.852519308569327, + "grad_norm": 1.9075708367362132, + "learning_rate": 5.596261839889039e-07, + "loss": 0.5573, + "step": 27816 + }, + { + "epoch": 0.8525499570920682, + "grad_norm": 2.2641239024151925, + "learning_rate": 5.593980490273987e-07, + "loss": 0.6348, + "step": 27817 + }, + { + "epoch": 0.8525806056148094, + "grad_norm": 1.9081558075407186, + "learning_rate": 5.591699578202808e-07, + "loss": 0.6126, + "step": 27818 + }, + { + "epoch": 0.8526112541375506, + "grad_norm": 2.0559502822101083, + "learning_rate": 5.589419103697991e-07, + "loss": 0.6236, + "step": 27819 + }, + { + "epoch": 0.8526419026602918, + "grad_norm": 1.8151178782109465, + "learning_rate": 5.587139066781977e-07, + "loss": 0.5552, + "step": 27820 + }, + { + "epoch": 0.852672551183033, + "grad_norm": 1.6349831095245353, + "learning_rate": 5.584859467477243e-07, + "loss": 0.5097, + "step": 27821 + }, + { + "epoch": 0.8527031997057741, + "grad_norm": 1.8696042323740845, + "learning_rate": 5.582580305806262e-07, + "loss": 0.5822, + "step": 27822 + }, + { + "epoch": 0.8527338482285154, + "grad_norm": 1.869747583150493, + "learning_rate": 5.580301581791487e-07, + "loss": 0.5713, + "step": 27823 + }, + { + "epoch": 0.8527644967512565, + "grad_norm": 1.9863493967209165, + "learning_rate": 5.578023295455343e-07, + "loss": 0.5024, + "step": 27824 + }, + { + "epoch": 0.8527951452739978, + "grad_norm": 1.963925480583358, + "learning_rate": 5.575745446820325e-07, + "loss": 0.6021, + "step": 27825 + }, + { + "epoch": 0.852825793796739, + "grad_norm": 2.0241979907786978, + "learning_rate": 5.573468035908835e-07, + "loss": 0.5812, + "step": 27826 + }, + { + "epoch": 0.8528564423194802, + "grad_norm": 2.1646203255761396, + "learning_rate": 5.571191062743347e-07, + "loss": 0.567, + "step": 27827 + }, + { + "epoch": 0.8528870908422214, + "grad_norm": 1.8959360072411107, + "learning_rate": 5.568914527346269e-07, + "loss": 0.5318, + "step": 27828 + }, + { + "epoch": 0.8529177393649626, + "grad_norm": 1.8303386171377785, + "learning_rate": 5.566638429740051e-07, + "loss": 0.5302, + "step": 27829 + }, + { + "epoch": 0.8529483878877038, + "grad_norm": 1.9340454976018588, + "learning_rate": 5.564362769947118e-07, + "loss": 0.5473, + "step": 27830 + }, + { + "epoch": 0.852979036410445, + "grad_norm": 2.1754916064431145, + "learning_rate": 5.562087547989875e-07, + "loss": 0.5402, + "step": 27831 + }, + { + "epoch": 0.8530096849331862, + "grad_norm": 2.0175078887907043, + "learning_rate": 5.559812763890759e-07, + "loss": 0.4982, + "step": 27832 + }, + { + "epoch": 0.8530403334559274, + "grad_norm": 2.005851714490311, + "learning_rate": 5.557538417672187e-07, + "loss": 0.5636, + "step": 27833 + }, + { + "epoch": 0.8530709819786686, + "grad_norm": 1.7152815290544514, + "learning_rate": 5.555264509356556e-07, + "loss": 0.6147, + "step": 27834 + }, + { + "epoch": 0.8531016305014099, + "grad_norm": 1.6855206546789299, + "learning_rate": 5.55299103896626e-07, + "loss": 0.5727, + "step": 27835 + }, + { + "epoch": 0.853132279024151, + "grad_norm": 1.7986609062028074, + "learning_rate": 5.550718006523736e-07, + "loss": 0.4855, + "step": 27836 + }, + { + "epoch": 0.8531629275468923, + "grad_norm": 2.1247584034698215, + "learning_rate": 5.548445412051345e-07, + "loss": 0.6168, + "step": 27837 + }, + { + "epoch": 0.8531935760696334, + "grad_norm": 2.0695182592930883, + "learning_rate": 5.546173255571508e-07, + "loss": 0.5734, + "step": 27838 + }, + { + "epoch": 0.8532242245923747, + "grad_norm": 1.8493165366182538, + "learning_rate": 5.543901537106594e-07, + "loss": 0.5742, + "step": 27839 + }, + { + "epoch": 0.8532548731151158, + "grad_norm": 1.8907634785068774, + "learning_rate": 5.541630256678987e-07, + "loss": 0.4976, + "step": 27840 + }, + { + "epoch": 0.8532855216378571, + "grad_norm": 1.812383415231276, + "learning_rate": 5.539359414311085e-07, + "loss": 0.5222, + "step": 27841 + }, + { + "epoch": 0.8533161701605982, + "grad_norm": 0.8113042710885422, + "learning_rate": 5.537089010025237e-07, + "loss": 0.4145, + "step": 27842 + }, + { + "epoch": 0.8533468186833395, + "grad_norm": 1.9426297645160489, + "learning_rate": 5.534819043843831e-07, + "loss": 0.6, + "step": 27843 + }, + { + "epoch": 0.8533774672060807, + "grad_norm": 0.8275805172393923, + "learning_rate": 5.532549515789237e-07, + "loss": 0.4066, + "step": 27844 + }, + { + "epoch": 0.8534081157288219, + "grad_norm": 1.8579607196750212, + "learning_rate": 5.530280425883805e-07, + "loss": 0.569, + "step": 27845 + }, + { + "epoch": 0.8534387642515631, + "grad_norm": 0.8325870518803236, + "learning_rate": 5.528011774149905e-07, + "loss": 0.3963, + "step": 27846 + }, + { + "epoch": 0.8534694127743043, + "grad_norm": 1.9957606571384463, + "learning_rate": 5.52574356060987e-07, + "loss": 0.5985, + "step": 27847 + }, + { + "epoch": 0.8535000612970455, + "grad_norm": 1.899531688746314, + "learning_rate": 5.52347578528607e-07, + "loss": 0.5386, + "step": 27848 + }, + { + "epoch": 0.8535307098197867, + "grad_norm": 1.7263503806897942, + "learning_rate": 5.521208448200849e-07, + "loss": 0.5088, + "step": 27849 + }, + { + "epoch": 0.8535613583425279, + "grad_norm": 2.089724923903295, + "learning_rate": 5.518941549376527e-07, + "loss": 0.5918, + "step": 27850 + }, + { + "epoch": 0.8535920068652691, + "grad_norm": 1.999550067428268, + "learning_rate": 5.51667508883546e-07, + "loss": 0.5159, + "step": 27851 + }, + { + "epoch": 0.8536226553880103, + "grad_norm": 2.103836695924699, + "learning_rate": 5.514409066599985e-07, + "loss": 0.6178, + "step": 27852 + }, + { + "epoch": 0.8536533039107514, + "grad_norm": 2.1539035313665686, + "learning_rate": 5.512143482692411e-07, + "loss": 0.668, + "step": 27853 + }, + { + "epoch": 0.8536839524334927, + "grad_norm": 1.9684140041291074, + "learning_rate": 5.509878337135066e-07, + "loss": 0.5407, + "step": 27854 + }, + { + "epoch": 0.8537146009562339, + "grad_norm": 1.8355732136412206, + "learning_rate": 5.507613629950287e-07, + "loss": 0.5306, + "step": 27855 + }, + { + "epoch": 0.8537452494789751, + "grad_norm": 1.633606767640871, + "learning_rate": 5.505349361160362e-07, + "loss": 0.5352, + "step": 27856 + }, + { + "epoch": 0.8537758980017163, + "grad_norm": 0.78508854021618, + "learning_rate": 5.503085530787628e-07, + "loss": 0.3892, + "step": 27857 + }, + { + "epoch": 0.8538065465244575, + "grad_norm": 1.8497546742364253, + "learning_rate": 5.500822138854361e-07, + "loss": 0.5692, + "step": 27858 + }, + { + "epoch": 0.8538371950471987, + "grad_norm": 1.9397445820896986, + "learning_rate": 5.498559185382885e-07, + "loss": 0.5881, + "step": 27859 + }, + { + "epoch": 0.8538678435699399, + "grad_norm": 1.7941391038921555, + "learning_rate": 5.496296670395501e-07, + "loss": 0.492, + "step": 27860 + }, + { + "epoch": 0.8538984920926811, + "grad_norm": 1.907521962961939, + "learning_rate": 5.494034593914476e-07, + "loss": 0.5801, + "step": 27861 + }, + { + "epoch": 0.8539291406154224, + "grad_norm": 2.1256528800900516, + "learning_rate": 5.491772955962122e-07, + "loss": 0.5739, + "step": 27862 + }, + { + "epoch": 0.8539597891381635, + "grad_norm": 2.017115499423188, + "learning_rate": 5.489511756560728e-07, + "loss": 0.6497, + "step": 27863 + }, + { + "epoch": 0.8539904376609048, + "grad_norm": 0.796535384966754, + "learning_rate": 5.487250995732546e-07, + "loss": 0.4123, + "step": 27864 + }, + { + "epoch": 0.8540210861836459, + "grad_norm": 0.8062855180904316, + "learning_rate": 5.484990673499874e-07, + "loss": 0.4013, + "step": 27865 + }, + { + "epoch": 0.8540517347063872, + "grad_norm": 1.9680087289351162, + "learning_rate": 5.482730789884987e-07, + "loss": 0.6, + "step": 27866 + }, + { + "epoch": 0.8540823832291283, + "grad_norm": 2.0399062010180433, + "learning_rate": 5.480471344910137e-07, + "loss": 0.6689, + "step": 27867 + }, + { + "epoch": 0.8541130317518696, + "grad_norm": 1.7847738129274915, + "learning_rate": 5.4782123385976e-07, + "loss": 0.6276, + "step": 27868 + }, + { + "epoch": 0.8541436802746107, + "grad_norm": 0.7795414227333223, + "learning_rate": 5.475953770969622e-07, + "loss": 0.3816, + "step": 27869 + }, + { + "epoch": 0.854174328797352, + "grad_norm": 2.073391062798166, + "learning_rate": 5.47369564204846e-07, + "loss": 0.5455, + "step": 27870 + }, + { + "epoch": 0.8542049773200932, + "grad_norm": 2.219822833701992, + "learning_rate": 5.471437951856378e-07, + "loss": 0.5232, + "step": 27871 + }, + { + "epoch": 0.8542356258428344, + "grad_norm": 0.7984988188065977, + "learning_rate": 5.469180700415605e-07, + "loss": 0.4059, + "step": 27872 + }, + { + "epoch": 0.8542662743655756, + "grad_norm": 1.9300957037188229, + "learning_rate": 5.466923887748382e-07, + "loss": 0.5246, + "step": 27873 + }, + { + "epoch": 0.8542969228883168, + "grad_norm": 1.8436806695081833, + "learning_rate": 5.464667513876965e-07, + "loss": 0.5477, + "step": 27874 + }, + { + "epoch": 0.854327571411058, + "grad_norm": 1.868907386329961, + "learning_rate": 5.462411578823562e-07, + "loss": 0.5655, + "step": 27875 + }, + { + "epoch": 0.8543582199337992, + "grad_norm": 1.9926627072862129, + "learning_rate": 5.460156082610418e-07, + "loss": 0.6513, + "step": 27876 + }, + { + "epoch": 0.8543888684565404, + "grad_norm": 1.7627181181121985, + "learning_rate": 5.457901025259759e-07, + "loss": 0.5737, + "step": 27877 + }, + { + "epoch": 0.8544195169792816, + "grad_norm": 1.9299523413434079, + "learning_rate": 5.455646406793785e-07, + "loss": 0.6263, + "step": 27878 + }, + { + "epoch": 0.8544501655020228, + "grad_norm": 1.7671843348446428, + "learning_rate": 5.453392227234739e-07, + "loss": 0.5712, + "step": 27879 + }, + { + "epoch": 0.8544808140247641, + "grad_norm": 1.7771959184535295, + "learning_rate": 5.451138486604796e-07, + "loss": 0.5746, + "step": 27880 + }, + { + "epoch": 0.8545114625475052, + "grad_norm": 1.7932738628796667, + "learning_rate": 5.4488851849262e-07, + "loss": 0.5622, + "step": 27881 + }, + { + "epoch": 0.8545421110702465, + "grad_norm": 2.0341942641140984, + "learning_rate": 5.44663232222114e-07, + "loss": 0.7567, + "step": 27882 + }, + { + "epoch": 0.8545727595929876, + "grad_norm": 1.9493664703785079, + "learning_rate": 5.444379898511803e-07, + "loss": 0.6639, + "step": 27883 + }, + { + "epoch": 0.8546034081157288, + "grad_norm": 1.722647863803046, + "learning_rate": 5.442127913820389e-07, + "loss": 0.5797, + "step": 27884 + }, + { + "epoch": 0.85463405663847, + "grad_norm": 1.8556713709568584, + "learning_rate": 5.439876368169101e-07, + "loss": 0.4877, + "step": 27885 + }, + { + "epoch": 0.8546647051612112, + "grad_norm": 1.5995461140832214, + "learning_rate": 5.437625261580099e-07, + "loss": 0.4384, + "step": 27886 + }, + { + "epoch": 0.8546953536839524, + "grad_norm": 0.8298979604484785, + "learning_rate": 5.435374594075576e-07, + "loss": 0.3914, + "step": 27887 + }, + { + "epoch": 0.8547260022066936, + "grad_norm": 0.8104257198756297, + "learning_rate": 5.433124365677722e-07, + "loss": 0.3987, + "step": 27888 + }, + { + "epoch": 0.8547566507294349, + "grad_norm": 1.7935441703984205, + "learning_rate": 5.43087457640869e-07, + "loss": 0.4774, + "step": 27889 + }, + { + "epoch": 0.854787299252176, + "grad_norm": 1.685431639098922, + "learning_rate": 5.428625226290663e-07, + "loss": 0.5412, + "step": 27890 + }, + { + "epoch": 0.8548179477749173, + "grad_norm": 1.9593403432241216, + "learning_rate": 5.426376315345783e-07, + "loss": 0.5815, + "step": 27891 + }, + { + "epoch": 0.8548485962976584, + "grad_norm": 2.0230006801075544, + "learning_rate": 5.424127843596222e-07, + "loss": 0.5723, + "step": 27892 + }, + { + "epoch": 0.8548792448203997, + "grad_norm": 1.6941926145869581, + "learning_rate": 5.421879811064145e-07, + "loss": 0.5547, + "step": 27893 + }, + { + "epoch": 0.8549098933431408, + "grad_norm": 2.027330220764987, + "learning_rate": 5.419632217771681e-07, + "loss": 0.6236, + "step": 27894 + }, + { + "epoch": 0.8549405418658821, + "grad_norm": 2.1645184080162427, + "learning_rate": 5.417385063740987e-07, + "loss": 0.5785, + "step": 27895 + }, + { + "epoch": 0.8549711903886232, + "grad_norm": 2.0304434141569585, + "learning_rate": 5.41513834899422e-07, + "loss": 0.538, + "step": 27896 + }, + { + "epoch": 0.8550018389113645, + "grad_norm": 3.276105329077473, + "learning_rate": 5.412892073553489e-07, + "loss": 0.4834, + "step": 27897 + }, + { + "epoch": 0.8550324874341056, + "grad_norm": 1.9161428266863978, + "learning_rate": 5.410646237440947e-07, + "loss": 0.519, + "step": 27898 + }, + { + "epoch": 0.8550631359568469, + "grad_norm": 1.8413024360376078, + "learning_rate": 5.408400840678701e-07, + "loss": 0.5532, + "step": 27899 + }, + { + "epoch": 0.8550937844795881, + "grad_norm": 1.9225466986953557, + "learning_rate": 5.40615588328891e-07, + "loss": 0.5342, + "step": 27900 + }, + { + "epoch": 0.8551244330023293, + "grad_norm": 2.094874859667503, + "learning_rate": 5.403911365293674e-07, + "loss": 0.6111, + "step": 27901 + }, + { + "epoch": 0.8551550815250705, + "grad_norm": 1.8016378962967075, + "learning_rate": 5.401667286715096e-07, + "loss": 0.5363, + "step": 27902 + }, + { + "epoch": 0.8551857300478117, + "grad_norm": 1.7742498503199464, + "learning_rate": 5.399423647575308e-07, + "loss": 0.5254, + "step": 27903 + }, + { + "epoch": 0.8552163785705529, + "grad_norm": 2.2739361215952534, + "learning_rate": 5.397180447896416e-07, + "loss": 0.6136, + "step": 27904 + }, + { + "epoch": 0.8552470270932941, + "grad_norm": 1.7430305868874458, + "learning_rate": 5.394937687700508e-07, + "loss": 0.5511, + "step": 27905 + }, + { + "epoch": 0.8552776756160353, + "grad_norm": 1.661865721750616, + "learning_rate": 5.392695367009693e-07, + "loss": 0.4425, + "step": 27906 + }, + { + "epoch": 0.8553083241387766, + "grad_norm": 1.828349767536251, + "learning_rate": 5.390453485846065e-07, + "loss": 0.5751, + "step": 27907 + }, + { + "epoch": 0.8553389726615177, + "grad_norm": 1.9902772195586644, + "learning_rate": 5.388212044231716e-07, + "loss": 0.5854, + "step": 27908 + }, + { + "epoch": 0.855369621184259, + "grad_norm": 2.189571026816787, + "learning_rate": 5.385971042188736e-07, + "loss": 0.5071, + "step": 27909 + }, + { + "epoch": 0.8554002697070001, + "grad_norm": 2.1432413120331395, + "learning_rate": 5.383730479739174e-07, + "loss": 0.56, + "step": 27910 + }, + { + "epoch": 0.8554309182297414, + "grad_norm": 1.9385765944044069, + "learning_rate": 5.381490356905155e-07, + "loss": 0.5283, + "step": 27911 + }, + { + "epoch": 0.8554615667524825, + "grad_norm": 0.7979678242890045, + "learning_rate": 5.379250673708725e-07, + "loss": 0.3904, + "step": 27912 + }, + { + "epoch": 0.8554922152752238, + "grad_norm": 1.8479023930094045, + "learning_rate": 5.377011430171941e-07, + "loss": 0.5693, + "step": 27913 + }, + { + "epoch": 0.8555228637979649, + "grad_norm": 1.9778884379240917, + "learning_rate": 5.374772626316887e-07, + "loss": 0.6578, + "step": 27914 + }, + { + "epoch": 0.8555535123207061, + "grad_norm": 0.8239932975746957, + "learning_rate": 5.372534262165624e-07, + "loss": 0.4041, + "step": 27915 + }, + { + "epoch": 0.8555841608434473, + "grad_norm": 1.944360286137296, + "learning_rate": 5.370296337740188e-07, + "loss": 0.587, + "step": 27916 + }, + { + "epoch": 0.8556148093661885, + "grad_norm": 2.490063793735923, + "learning_rate": 5.368058853062641e-07, + "loss": 0.5638, + "step": 27917 + }, + { + "epoch": 0.8556454578889298, + "grad_norm": 2.080106242577037, + "learning_rate": 5.36582180815503e-07, + "loss": 0.5281, + "step": 27918 + }, + { + "epoch": 0.8556761064116709, + "grad_norm": 2.048198518563076, + "learning_rate": 5.363585203039412e-07, + "loss": 0.6477, + "step": 27919 + }, + { + "epoch": 0.8557067549344122, + "grad_norm": 2.10395122854608, + "learning_rate": 5.361349037737801e-07, + "loss": 0.5819, + "step": 27920 + }, + { + "epoch": 0.8557374034571533, + "grad_norm": 0.7750416354897839, + "learning_rate": 5.359113312272224e-07, + "loss": 0.384, + "step": 27921 + }, + { + "epoch": 0.8557680519798946, + "grad_norm": 1.8228889505361503, + "learning_rate": 5.356878026664747e-07, + "loss": 0.614, + "step": 27922 + }, + { + "epoch": 0.8557987005026357, + "grad_norm": 1.9880748543859117, + "learning_rate": 5.354643180937368e-07, + "loss": 0.6007, + "step": 27923 + }, + { + "epoch": 0.855829349025377, + "grad_norm": 2.172241082990024, + "learning_rate": 5.352408775112111e-07, + "loss": 0.6486, + "step": 27924 + }, + { + "epoch": 0.8558599975481181, + "grad_norm": 0.9426171135069277, + "learning_rate": 5.350174809210989e-07, + "loss": 0.4118, + "step": 27925 + }, + { + "epoch": 0.8558906460708594, + "grad_norm": 2.007169387907579, + "learning_rate": 5.347941283256014e-07, + "loss": 0.5241, + "step": 27926 + }, + { + "epoch": 0.8559212945936006, + "grad_norm": 1.8707436460837936, + "learning_rate": 5.345708197269217e-07, + "loss": 0.5987, + "step": 27927 + }, + { + "epoch": 0.8559519431163418, + "grad_norm": 1.8692766677517025, + "learning_rate": 5.343475551272565e-07, + "loss": 0.5991, + "step": 27928 + }, + { + "epoch": 0.855982591639083, + "grad_norm": 2.090901680817255, + "learning_rate": 5.341243345288077e-07, + "loss": 0.6291, + "step": 27929 + }, + { + "epoch": 0.8560132401618242, + "grad_norm": 1.8410801556994723, + "learning_rate": 5.339011579337761e-07, + "loss": 0.5273, + "step": 27930 + }, + { + "epoch": 0.8560438886845654, + "grad_norm": 2.018024049746451, + "learning_rate": 5.336780253443579e-07, + "loss": 0.5894, + "step": 27931 + }, + { + "epoch": 0.8560745372073066, + "grad_norm": 2.149443959755096, + "learning_rate": 5.334549367627518e-07, + "loss": 0.501, + "step": 27932 + }, + { + "epoch": 0.8561051857300478, + "grad_norm": 2.050541225421511, + "learning_rate": 5.332318921911589e-07, + "loss": 0.5188, + "step": 27933 + }, + { + "epoch": 0.856135834252789, + "grad_norm": 0.7802440063309151, + "learning_rate": 5.33008891631775e-07, + "loss": 0.3896, + "step": 27934 + }, + { + "epoch": 0.8561664827755302, + "grad_norm": 1.4628227088687444, + "learning_rate": 5.327859350867959e-07, + "loss": 0.4974, + "step": 27935 + }, + { + "epoch": 0.8561971312982715, + "grad_norm": 1.9470374902621808, + "learning_rate": 5.325630225584206e-07, + "loss": 0.631, + "step": 27936 + }, + { + "epoch": 0.8562277798210126, + "grad_norm": 2.1587250398495863, + "learning_rate": 5.323401540488443e-07, + "loss": 0.5845, + "step": 27937 + }, + { + "epoch": 0.8562584283437539, + "grad_norm": 2.059076459678811, + "learning_rate": 5.32117329560265e-07, + "loss": 0.6148, + "step": 27938 + }, + { + "epoch": 0.856289076866495, + "grad_norm": 1.7492127847236305, + "learning_rate": 5.318945490948757e-07, + "loss": 0.5612, + "step": 27939 + }, + { + "epoch": 0.8563197253892363, + "grad_norm": 0.8333108702773458, + "learning_rate": 5.316718126548726e-07, + "loss": 0.418, + "step": 27940 + }, + { + "epoch": 0.8563503739119774, + "grad_norm": 1.8508211965771968, + "learning_rate": 5.314491202424515e-07, + "loss": 0.5642, + "step": 27941 + }, + { + "epoch": 0.8563810224347187, + "grad_norm": 0.7602896868234157, + "learning_rate": 5.312264718598053e-07, + "loss": 0.3724, + "step": 27942 + }, + { + "epoch": 0.8564116709574598, + "grad_norm": 1.9592704772518872, + "learning_rate": 5.310038675091273e-07, + "loss": 0.648, + "step": 27943 + }, + { + "epoch": 0.8564423194802011, + "grad_norm": 2.107759974506321, + "learning_rate": 5.307813071926116e-07, + "loss": 0.523, + "step": 27944 + }, + { + "epoch": 0.8564729680029423, + "grad_norm": 1.9520881425592007, + "learning_rate": 5.30558790912451e-07, + "loss": 0.6609, + "step": 27945 + }, + { + "epoch": 0.8565036165256834, + "grad_norm": 1.9120236761606986, + "learning_rate": 5.303363186708394e-07, + "loss": 0.6105, + "step": 27946 + }, + { + "epoch": 0.8565342650484247, + "grad_norm": 1.8367827225603515, + "learning_rate": 5.301138904699665e-07, + "loss": 0.635, + "step": 27947 + }, + { + "epoch": 0.8565649135711658, + "grad_norm": 1.905422386722036, + "learning_rate": 5.298915063120252e-07, + "loss": 0.585, + "step": 27948 + }, + { + "epoch": 0.8565955620939071, + "grad_norm": 2.6656968157061023, + "learning_rate": 5.296691661992081e-07, + "loss": 0.61, + "step": 27949 + }, + { + "epoch": 0.8566262106166482, + "grad_norm": 1.9380365533945436, + "learning_rate": 5.294468701337036e-07, + "loss": 0.6235, + "step": 27950 + }, + { + "epoch": 0.8566568591393895, + "grad_norm": 1.798958957565058, + "learning_rate": 5.292246181177014e-07, + "loss": 0.4886, + "step": 27951 + }, + { + "epoch": 0.8566875076621306, + "grad_norm": 1.9288485107777564, + "learning_rate": 5.290024101533952e-07, + "loss": 0.5417, + "step": 27952 + }, + { + "epoch": 0.8567181561848719, + "grad_norm": 1.7487058200979726, + "learning_rate": 5.287802462429708e-07, + "loss": 0.5167, + "step": 27953 + }, + { + "epoch": 0.856748804707613, + "grad_norm": 1.9039849804024467, + "learning_rate": 5.285581263886197e-07, + "loss": 0.5723, + "step": 27954 + }, + { + "epoch": 0.8567794532303543, + "grad_norm": 0.7649259339154661, + "learning_rate": 5.283360505925283e-07, + "loss": 0.3906, + "step": 27955 + }, + { + "epoch": 0.8568101017530955, + "grad_norm": 1.7863727624170853, + "learning_rate": 5.281140188568862e-07, + "loss": 0.5739, + "step": 27956 + }, + { + "epoch": 0.8568407502758367, + "grad_norm": 0.7966182844846535, + "learning_rate": 5.27892031183882e-07, + "loss": 0.389, + "step": 27957 + }, + { + "epoch": 0.8568713987985779, + "grad_norm": 1.788915437297957, + "learning_rate": 5.276700875757002e-07, + "loss": 0.558, + "step": 27958 + }, + { + "epoch": 0.8569020473213191, + "grad_norm": 1.97104412847294, + "learning_rate": 5.274481880345301e-07, + "loss": 0.5545, + "step": 27959 + }, + { + "epoch": 0.8569326958440603, + "grad_norm": 2.07845622994804, + "learning_rate": 5.272263325625576e-07, + "loss": 0.5825, + "step": 27960 + }, + { + "epoch": 0.8569633443668015, + "grad_norm": 1.9936001958298706, + "learning_rate": 5.27004521161969e-07, + "loss": 0.5971, + "step": 27961 + }, + { + "epoch": 0.8569939928895427, + "grad_norm": 2.102110466882034, + "learning_rate": 5.267827538349474e-07, + "loss": 0.5428, + "step": 27962 + }, + { + "epoch": 0.857024641412284, + "grad_norm": 1.9135838833341303, + "learning_rate": 5.26561030583681e-07, + "loss": 0.6298, + "step": 27963 + }, + { + "epoch": 0.8570552899350251, + "grad_norm": 1.7966195167504917, + "learning_rate": 5.263393514103532e-07, + "loss": 0.6068, + "step": 27964 + }, + { + "epoch": 0.8570859384577664, + "grad_norm": 0.8069327071740833, + "learning_rate": 5.261177163171494e-07, + "loss": 0.4017, + "step": 27965 + }, + { + "epoch": 0.8571165869805075, + "grad_norm": 1.9995979981989673, + "learning_rate": 5.258961253062512e-07, + "loss": 0.5543, + "step": 27966 + }, + { + "epoch": 0.8571472355032488, + "grad_norm": 2.0323260826849077, + "learning_rate": 5.256745783798428e-07, + "loss": 0.5861, + "step": 27967 + }, + { + "epoch": 0.8571778840259899, + "grad_norm": 1.8668628395937394, + "learning_rate": 5.254530755401094e-07, + "loss": 0.6262, + "step": 27968 + }, + { + "epoch": 0.8572085325487312, + "grad_norm": 1.9445010022594906, + "learning_rate": 5.252316167892301e-07, + "loss": 0.5873, + "step": 27969 + }, + { + "epoch": 0.8572391810714723, + "grad_norm": 1.7937313526493759, + "learning_rate": 5.25010202129389e-07, + "loss": 0.5257, + "step": 27970 + }, + { + "epoch": 0.8572698295942136, + "grad_norm": 1.9252890452144158, + "learning_rate": 5.24788831562768e-07, + "loss": 0.5348, + "step": 27971 + }, + { + "epoch": 0.8573004781169548, + "grad_norm": 0.8155394872519661, + "learning_rate": 5.245675050915467e-07, + "loss": 0.3992, + "step": 27972 + }, + { + "epoch": 0.857331126639696, + "grad_norm": 1.9508139632217607, + "learning_rate": 5.243462227179069e-07, + "loss": 0.5984, + "step": 27973 + }, + { + "epoch": 0.8573617751624372, + "grad_norm": 1.6022474841543883, + "learning_rate": 5.241249844440299e-07, + "loss": 0.4854, + "step": 27974 + }, + { + "epoch": 0.8573924236851784, + "grad_norm": 1.9474551245827698, + "learning_rate": 5.239037902720939e-07, + "loss": 0.6314, + "step": 27975 + }, + { + "epoch": 0.8574230722079196, + "grad_norm": 1.8741228400555137, + "learning_rate": 5.2368264020428e-07, + "loss": 0.5568, + "step": 27976 + }, + { + "epoch": 0.8574537207306607, + "grad_norm": 1.8686385414868087, + "learning_rate": 5.234615342427651e-07, + "loss": 0.4981, + "step": 27977 + }, + { + "epoch": 0.857484369253402, + "grad_norm": 1.8544679206043544, + "learning_rate": 5.232404723897294e-07, + "loss": 0.6797, + "step": 27978 + }, + { + "epoch": 0.8575150177761431, + "grad_norm": 1.8371266798780979, + "learning_rate": 5.230194546473516e-07, + "loss": 0.6051, + "step": 27979 + }, + { + "epoch": 0.8575456662988844, + "grad_norm": 1.9827460386186604, + "learning_rate": 5.227984810178077e-07, + "loss": 0.5554, + "step": 27980 + }, + { + "epoch": 0.8575763148216256, + "grad_norm": 1.8870746735827173, + "learning_rate": 5.22577551503276e-07, + "loss": 0.5402, + "step": 27981 + }, + { + "epoch": 0.8576069633443668, + "grad_norm": 1.697712618568492, + "learning_rate": 5.223566661059338e-07, + "loss": 0.5526, + "step": 27982 + }, + { + "epoch": 0.857637611867108, + "grad_norm": 1.8846953184700528, + "learning_rate": 5.221358248279568e-07, + "loss": 0.6128, + "step": 27983 + }, + { + "epoch": 0.8576682603898492, + "grad_norm": 1.7390654448429395, + "learning_rate": 5.219150276715206e-07, + "loss": 0.6056, + "step": 27984 + }, + { + "epoch": 0.8576989089125904, + "grad_norm": 1.8280243360307538, + "learning_rate": 5.216942746388026e-07, + "loss": 0.6713, + "step": 27985 + }, + { + "epoch": 0.8577295574353316, + "grad_norm": 1.761328487959593, + "learning_rate": 5.214735657319758e-07, + "loss": 0.5473, + "step": 27986 + }, + { + "epoch": 0.8577602059580728, + "grad_norm": 0.8183739582548301, + "learning_rate": 5.212529009532164e-07, + "loss": 0.399, + "step": 27987 + }, + { + "epoch": 0.857790854480814, + "grad_norm": 2.210134957951464, + "learning_rate": 5.210322803046974e-07, + "loss": 0.5707, + "step": 27988 + }, + { + "epoch": 0.8578215030035552, + "grad_norm": 2.0281113186261646, + "learning_rate": 5.208117037885934e-07, + "loss": 0.5633, + "step": 27989 + }, + { + "epoch": 0.8578521515262965, + "grad_norm": 1.8886666673230068, + "learning_rate": 5.205911714070788e-07, + "loss": 0.6152, + "step": 27990 + }, + { + "epoch": 0.8578828000490376, + "grad_norm": 1.9943263588184623, + "learning_rate": 5.203706831623245e-07, + "loss": 0.4961, + "step": 27991 + }, + { + "epoch": 0.8579134485717789, + "grad_norm": 1.9675978144262134, + "learning_rate": 5.201502390565039e-07, + "loss": 0.5368, + "step": 27992 + }, + { + "epoch": 0.85794409709452, + "grad_norm": 0.8214366760008923, + "learning_rate": 5.1992983909179e-07, + "loss": 0.3949, + "step": 27993 + }, + { + "epoch": 0.8579747456172613, + "grad_norm": 0.7768187128326858, + "learning_rate": 5.197094832703531e-07, + "loss": 0.3859, + "step": 27994 + }, + { + "epoch": 0.8580053941400024, + "grad_norm": 0.7906105895758537, + "learning_rate": 5.194891715943656e-07, + "loss": 0.3965, + "step": 27995 + }, + { + "epoch": 0.8580360426627437, + "grad_norm": 1.8622881904902278, + "learning_rate": 5.19268904065997e-07, + "loss": 0.5757, + "step": 27996 + }, + { + "epoch": 0.8580666911854848, + "grad_norm": 1.78805863579303, + "learning_rate": 5.190486806874184e-07, + "loss": 0.5169, + "step": 27997 + }, + { + "epoch": 0.8580973397082261, + "grad_norm": 1.8643177015553962, + "learning_rate": 5.188285014608002e-07, + "loss": 0.5187, + "step": 27998 + }, + { + "epoch": 0.8581279882309673, + "grad_norm": 1.8032823948638323, + "learning_rate": 5.186083663883107e-07, + "loss": 0.6008, + "step": 27999 + }, + { + "epoch": 0.8581586367537085, + "grad_norm": 1.9372514826878737, + "learning_rate": 5.183882754721198e-07, + "loss": 0.5843, + "step": 28000 + }, + { + "epoch": 0.8581892852764497, + "grad_norm": 1.9707058647078748, + "learning_rate": 5.181682287143963e-07, + "loss": 0.6531, + "step": 28001 + }, + { + "epoch": 0.8582199337991909, + "grad_norm": 0.8096048581480915, + "learning_rate": 5.179482261173075e-07, + "loss": 0.4017, + "step": 28002 + }, + { + "epoch": 0.8582505823219321, + "grad_norm": 0.7750232963802235, + "learning_rate": 5.177282676830214e-07, + "loss": 0.3896, + "step": 28003 + }, + { + "epoch": 0.8582812308446733, + "grad_norm": 0.7686253265338286, + "learning_rate": 5.175083534137065e-07, + "loss": 0.3916, + "step": 28004 + }, + { + "epoch": 0.8583118793674145, + "grad_norm": 2.0217516352393647, + "learning_rate": 5.172884833115277e-07, + "loss": 0.5907, + "step": 28005 + }, + { + "epoch": 0.8583425278901557, + "grad_norm": 1.6958262482514337, + "learning_rate": 5.170686573786532e-07, + "loss": 0.622, + "step": 28006 + }, + { + "epoch": 0.8583731764128969, + "grad_norm": 0.780664816016095, + "learning_rate": 5.168488756172463e-07, + "loss": 0.3949, + "step": 28007 + }, + { + "epoch": 0.858403824935638, + "grad_norm": 1.839513552892754, + "learning_rate": 5.166291380294769e-07, + "loss": 0.5627, + "step": 28008 + }, + { + "epoch": 0.8584344734583793, + "grad_norm": 1.6576196646206423, + "learning_rate": 5.164094446175072e-07, + "loss": 0.5833, + "step": 28009 + }, + { + "epoch": 0.8584651219811205, + "grad_norm": 2.1802951664964665, + "learning_rate": 5.161897953835015e-07, + "loss": 0.5971, + "step": 28010 + }, + { + "epoch": 0.8584957705038617, + "grad_norm": 0.7818560633993816, + "learning_rate": 5.159701903296255e-07, + "loss": 0.3847, + "step": 28011 + }, + { + "epoch": 0.8585264190266029, + "grad_norm": 1.6777202790191181, + "learning_rate": 5.157506294580428e-07, + "loss": 0.5869, + "step": 28012 + }, + { + "epoch": 0.8585570675493441, + "grad_norm": 1.8577162028044176, + "learning_rate": 5.155311127709156e-07, + "loss": 0.5834, + "step": 28013 + }, + { + "epoch": 0.8585877160720853, + "grad_norm": 1.7767847095958023, + "learning_rate": 5.153116402704083e-07, + "loss": 0.6097, + "step": 28014 + }, + { + "epoch": 0.8586183645948265, + "grad_norm": 2.0008914838155896, + "learning_rate": 5.150922119586832e-07, + "loss": 0.6332, + "step": 28015 + }, + { + "epoch": 0.8586490131175677, + "grad_norm": 1.705689806380536, + "learning_rate": 5.148728278379018e-07, + "loss": 0.5807, + "step": 28016 + }, + { + "epoch": 0.858679661640309, + "grad_norm": 0.7969791360930001, + "learning_rate": 5.146534879102267e-07, + "loss": 0.4123, + "step": 28017 + }, + { + "epoch": 0.8587103101630501, + "grad_norm": 1.886725413588049, + "learning_rate": 5.144341921778162e-07, + "loss": 0.5661, + "step": 28018 + }, + { + "epoch": 0.8587409586857914, + "grad_norm": 1.9852412386038965, + "learning_rate": 5.142149406428354e-07, + "loss": 0.5764, + "step": 28019 + }, + { + "epoch": 0.8587716072085325, + "grad_norm": 1.7765242526770706, + "learning_rate": 5.139957333074424e-07, + "loss": 0.6453, + "step": 28020 + }, + { + "epoch": 0.8588022557312738, + "grad_norm": 1.917765226844638, + "learning_rate": 5.137765701737962e-07, + "loss": 0.4987, + "step": 28021 + }, + { + "epoch": 0.8588329042540149, + "grad_norm": 1.6183597615143417, + "learning_rate": 5.135574512440572e-07, + "loss": 0.5402, + "step": 28022 + }, + { + "epoch": 0.8588635527767562, + "grad_norm": 1.832804470171407, + "learning_rate": 5.133383765203859e-07, + "loss": 0.5563, + "step": 28023 + }, + { + "epoch": 0.8588942012994973, + "grad_norm": 1.9507951378527155, + "learning_rate": 5.131193460049383e-07, + "loss": 0.5778, + "step": 28024 + }, + { + "epoch": 0.8589248498222386, + "grad_norm": 1.5462395116081793, + "learning_rate": 5.129003596998738e-07, + "loss": 0.4186, + "step": 28025 + }, + { + "epoch": 0.8589554983449798, + "grad_norm": 1.660866058948592, + "learning_rate": 5.126814176073508e-07, + "loss": 0.5471, + "step": 28026 + }, + { + "epoch": 0.858986146867721, + "grad_norm": 1.9631639007499706, + "learning_rate": 5.124625197295263e-07, + "loss": 0.5952, + "step": 28027 + }, + { + "epoch": 0.8590167953904622, + "grad_norm": 1.8687316937493652, + "learning_rate": 5.122436660685565e-07, + "loss": 0.545, + "step": 28028 + }, + { + "epoch": 0.8590474439132034, + "grad_norm": 0.7963503830325269, + "learning_rate": 5.120248566265967e-07, + "loss": 0.373, + "step": 28029 + }, + { + "epoch": 0.8590780924359446, + "grad_norm": 2.0971551325881124, + "learning_rate": 5.11806091405806e-07, + "loss": 0.5937, + "step": 28030 + }, + { + "epoch": 0.8591087409586858, + "grad_norm": 1.9234675776433752, + "learning_rate": 5.11587370408338e-07, + "loss": 0.5395, + "step": 28031 + }, + { + "epoch": 0.859139389481427, + "grad_norm": 1.7807106507764678, + "learning_rate": 5.113686936363477e-07, + "loss": 0.4902, + "step": 28032 + }, + { + "epoch": 0.8591700380041682, + "grad_norm": 1.8802819207271224, + "learning_rate": 5.111500610919894e-07, + "loss": 0.5467, + "step": 28033 + }, + { + "epoch": 0.8592006865269094, + "grad_norm": 0.7616008815630758, + "learning_rate": 5.109314727774184e-07, + "loss": 0.3858, + "step": 28034 + }, + { + "epoch": 0.8592313350496507, + "grad_norm": 0.8202496384291486, + "learning_rate": 5.107129286947893e-07, + "loss": 0.4052, + "step": 28035 + }, + { + "epoch": 0.8592619835723918, + "grad_norm": 1.8186917742431181, + "learning_rate": 5.104944288462532e-07, + "loss": 0.5723, + "step": 28036 + }, + { + "epoch": 0.8592926320951331, + "grad_norm": 1.9319803801196285, + "learning_rate": 5.10275973233964e-07, + "loss": 0.6036, + "step": 28037 + }, + { + "epoch": 0.8593232806178742, + "grad_norm": 1.9065650437406247, + "learning_rate": 5.100575618600756e-07, + "loss": 0.5791, + "step": 28038 + }, + { + "epoch": 0.8593539291406154, + "grad_norm": 1.8923647552406924, + "learning_rate": 5.09839194726739e-07, + "loss": 0.481, + "step": 28039 + }, + { + "epoch": 0.8593845776633566, + "grad_norm": 1.7824252739139297, + "learning_rate": 5.096208718361045e-07, + "loss": 0.5555, + "step": 28040 + }, + { + "epoch": 0.8594152261860978, + "grad_norm": 1.8613608732376583, + "learning_rate": 5.094025931903246e-07, + "loss": 0.5569, + "step": 28041 + }, + { + "epoch": 0.859445874708839, + "grad_norm": 1.762864211106487, + "learning_rate": 5.091843587915507e-07, + "loss": 0.52, + "step": 28042 + }, + { + "epoch": 0.8594765232315802, + "grad_norm": 1.678032591701591, + "learning_rate": 5.089661686419318e-07, + "loss": 0.517, + "step": 28043 + }, + { + "epoch": 0.8595071717543215, + "grad_norm": 0.8274210212255301, + "learning_rate": 5.087480227436176e-07, + "loss": 0.4202, + "step": 28044 + }, + { + "epoch": 0.8595378202770626, + "grad_norm": 1.783205202985396, + "learning_rate": 5.085299210987587e-07, + "loss": 0.5842, + "step": 28045 + }, + { + "epoch": 0.8595684687998039, + "grad_norm": 1.839231478592405, + "learning_rate": 5.083118637095047e-07, + "loss": 0.5553, + "step": 28046 + }, + { + "epoch": 0.859599117322545, + "grad_norm": 0.8210705737401411, + "learning_rate": 5.080938505780031e-07, + "loss": 0.4084, + "step": 28047 + }, + { + "epoch": 0.8596297658452863, + "grad_norm": 2.187858095301255, + "learning_rate": 5.078758817064e-07, + "loss": 0.65, + "step": 28048 + }, + { + "epoch": 0.8596604143680274, + "grad_norm": 1.7506601377947013, + "learning_rate": 5.076579570968471e-07, + "loss": 0.5944, + "step": 28049 + }, + { + "epoch": 0.8596910628907687, + "grad_norm": 1.9731723893380568, + "learning_rate": 5.074400767514898e-07, + "loss": 0.6036, + "step": 28050 + }, + { + "epoch": 0.8597217114135098, + "grad_norm": 1.8036600911538743, + "learning_rate": 5.072222406724742e-07, + "loss": 0.5121, + "step": 28051 + }, + { + "epoch": 0.8597523599362511, + "grad_norm": 1.8059418979381807, + "learning_rate": 5.070044488619469e-07, + "loss": 0.5277, + "step": 28052 + }, + { + "epoch": 0.8597830084589922, + "grad_norm": 2.2120412778184155, + "learning_rate": 5.067867013220551e-07, + "loss": 0.654, + "step": 28053 + }, + { + "epoch": 0.8598136569817335, + "grad_norm": 2.0312631411792514, + "learning_rate": 5.065689980549438e-07, + "loss": 0.6657, + "step": 28054 + }, + { + "epoch": 0.8598443055044747, + "grad_norm": 1.8959523276273098, + "learning_rate": 5.063513390627572e-07, + "loss": 0.5037, + "step": 28055 + }, + { + "epoch": 0.8598749540272159, + "grad_norm": 1.900442137621867, + "learning_rate": 5.061337243476405e-07, + "loss": 0.5372, + "step": 28056 + }, + { + "epoch": 0.8599056025499571, + "grad_norm": 1.8947434557423664, + "learning_rate": 5.059161539117391e-07, + "loss": 0.5834, + "step": 28057 + }, + { + "epoch": 0.8599362510726983, + "grad_norm": 1.7589180244181917, + "learning_rate": 5.056986277571957e-07, + "loss": 0.5168, + "step": 28058 + }, + { + "epoch": 0.8599668995954395, + "grad_norm": 1.7739199698418475, + "learning_rate": 5.05481145886152e-07, + "loss": 0.5872, + "step": 28059 + }, + { + "epoch": 0.8599975481181807, + "grad_norm": 1.8481652138382372, + "learning_rate": 5.052637083007539e-07, + "loss": 0.5664, + "step": 28060 + }, + { + "epoch": 0.8600281966409219, + "grad_norm": 1.9339100829307332, + "learning_rate": 5.050463150031414e-07, + "loss": 0.5942, + "step": 28061 + }, + { + "epoch": 0.8600588451636632, + "grad_norm": 1.781782030018472, + "learning_rate": 5.048289659954591e-07, + "loss": 0.5231, + "step": 28062 + }, + { + "epoch": 0.8600894936864043, + "grad_norm": 1.7900954975867158, + "learning_rate": 5.046116612798463e-07, + "loss": 0.4891, + "step": 28063 + }, + { + "epoch": 0.8601201422091456, + "grad_norm": 1.9741281809895268, + "learning_rate": 5.04394400858445e-07, + "loss": 0.6071, + "step": 28064 + }, + { + "epoch": 0.8601507907318867, + "grad_norm": 1.9173161628636897, + "learning_rate": 5.041771847333965e-07, + "loss": 0.6537, + "step": 28065 + }, + { + "epoch": 0.860181439254628, + "grad_norm": 1.9684812679213175, + "learning_rate": 5.039600129068395e-07, + "loss": 0.5074, + "step": 28066 + }, + { + "epoch": 0.8602120877773691, + "grad_norm": 1.9517368003311728, + "learning_rate": 5.037428853809151e-07, + "loss": 0.5143, + "step": 28067 + }, + { + "epoch": 0.8602427363001104, + "grad_norm": 1.8814764673866455, + "learning_rate": 5.035258021577633e-07, + "loss": 0.5782, + "step": 28068 + }, + { + "epoch": 0.8602733848228515, + "grad_norm": 1.8105218117278212, + "learning_rate": 5.033087632395223e-07, + "loss": 0.5403, + "step": 28069 + }, + { + "epoch": 0.8603040333455927, + "grad_norm": 1.867744729180472, + "learning_rate": 5.030917686283287e-07, + "loss": 0.6015, + "step": 28070 + }, + { + "epoch": 0.860334681868334, + "grad_norm": 1.9905166860743262, + "learning_rate": 5.028748183263243e-07, + "loss": 0.6695, + "step": 28071 + }, + { + "epoch": 0.8603653303910751, + "grad_norm": 1.8957391055013773, + "learning_rate": 5.02657912335644e-07, + "loss": 0.6151, + "step": 28072 + }, + { + "epoch": 0.8603959789138164, + "grad_norm": 1.8713143973205264, + "learning_rate": 5.024410506584271e-07, + "loss": 0.495, + "step": 28073 + }, + { + "epoch": 0.8604266274365575, + "grad_norm": 1.7766771866923883, + "learning_rate": 5.022242332968086e-07, + "loss": 0.5398, + "step": 28074 + }, + { + "epoch": 0.8604572759592988, + "grad_norm": 1.663765956120282, + "learning_rate": 5.020074602529251e-07, + "loss": 0.5604, + "step": 28075 + }, + { + "epoch": 0.8604879244820399, + "grad_norm": 1.8406659860029393, + "learning_rate": 5.017907315289139e-07, + "loss": 0.5457, + "step": 28076 + }, + { + "epoch": 0.8605185730047812, + "grad_norm": 1.682234509924205, + "learning_rate": 5.015740471269087e-07, + "loss": 0.524, + "step": 28077 + }, + { + "epoch": 0.8605492215275223, + "grad_norm": 1.9495255771847164, + "learning_rate": 5.013574070490452e-07, + "loss": 0.5759, + "step": 28078 + }, + { + "epoch": 0.8605798700502636, + "grad_norm": 1.8224448101410562, + "learning_rate": 5.011408112974592e-07, + "loss": 0.6532, + "step": 28079 + }, + { + "epoch": 0.8606105185730047, + "grad_norm": 1.7551419682423974, + "learning_rate": 5.00924259874283e-07, + "loss": 0.5473, + "step": 28080 + }, + { + "epoch": 0.860641167095746, + "grad_norm": 1.9639084312793897, + "learning_rate": 5.007077527816512e-07, + "loss": 0.5607, + "step": 28081 + }, + { + "epoch": 0.8606718156184872, + "grad_norm": 1.9950710303452037, + "learning_rate": 5.004912900216985e-07, + "loss": 0.6289, + "step": 28082 + }, + { + "epoch": 0.8607024641412284, + "grad_norm": 1.962883755754301, + "learning_rate": 5.002748715965549e-07, + "loss": 0.627, + "step": 28083 + }, + { + "epoch": 0.8607331126639696, + "grad_norm": 2.097796663117327, + "learning_rate": 5.000584975083556e-07, + "loss": 0.6666, + "step": 28084 + }, + { + "epoch": 0.8607637611867108, + "grad_norm": 1.7374305332282733, + "learning_rate": 4.998421677592297e-07, + "loss": 0.5493, + "step": 28085 + }, + { + "epoch": 0.860794409709452, + "grad_norm": 2.0682382866030333, + "learning_rate": 4.996258823513106e-07, + "loss": 0.6116, + "step": 28086 + }, + { + "epoch": 0.8608250582321932, + "grad_norm": 1.9707969785973711, + "learning_rate": 4.994096412867306e-07, + "loss": 0.5961, + "step": 28087 + }, + { + "epoch": 0.8608557067549344, + "grad_norm": 1.8863836456315133, + "learning_rate": 4.991934445676172e-07, + "loss": 0.6394, + "step": 28088 + }, + { + "epoch": 0.8608863552776757, + "grad_norm": 1.7507179116034584, + "learning_rate": 4.989772921961029e-07, + "loss": 0.5257, + "step": 28089 + }, + { + "epoch": 0.8609170038004168, + "grad_norm": 1.6503438181040497, + "learning_rate": 4.987611841743178e-07, + "loss": 0.5968, + "step": 28090 + }, + { + "epoch": 0.8609476523231581, + "grad_norm": 1.8858256821759296, + "learning_rate": 4.985451205043895e-07, + "loss": 0.5199, + "step": 28091 + }, + { + "epoch": 0.8609783008458992, + "grad_norm": 1.688705246853585, + "learning_rate": 4.983291011884489e-07, + "loss": 0.5479, + "step": 28092 + }, + { + "epoch": 0.8610089493686405, + "grad_norm": 2.020680052044759, + "learning_rate": 4.981131262286226e-07, + "loss": 0.6387, + "step": 28093 + }, + { + "epoch": 0.8610395978913816, + "grad_norm": 2.091936524235682, + "learning_rate": 4.978971956270389e-07, + "loss": 0.6053, + "step": 28094 + }, + { + "epoch": 0.8610702464141229, + "grad_norm": 2.244249245927105, + "learning_rate": 4.976813093858279e-07, + "loss": 0.609, + "step": 28095 + }, + { + "epoch": 0.861100894936864, + "grad_norm": 1.8188076367301111, + "learning_rate": 4.974654675071133e-07, + "loss": 0.4059, + "step": 28096 + }, + { + "epoch": 0.8611315434596053, + "grad_norm": 1.8649206399254523, + "learning_rate": 4.972496699930235e-07, + "loss": 0.5682, + "step": 28097 + }, + { + "epoch": 0.8611621919823464, + "grad_norm": 1.8890980430743756, + "learning_rate": 4.970339168456861e-07, + "loss": 0.5161, + "step": 28098 + }, + { + "epoch": 0.8611928405050877, + "grad_norm": 0.8329287974304102, + "learning_rate": 4.968182080672246e-07, + "loss": 0.3962, + "step": 28099 + }, + { + "epoch": 0.8612234890278289, + "grad_norm": 0.8246341655278457, + "learning_rate": 4.966025436597655e-07, + "loss": 0.4007, + "step": 28100 + }, + { + "epoch": 0.86125413755057, + "grad_norm": 2.0947561257292766, + "learning_rate": 4.963869236254343e-07, + "loss": 0.6092, + "step": 28101 + }, + { + "epoch": 0.8612847860733113, + "grad_norm": 1.7727152884871116, + "learning_rate": 4.961713479663549e-07, + "loss": 0.5774, + "step": 28102 + }, + { + "epoch": 0.8613154345960524, + "grad_norm": 2.2832659260008197, + "learning_rate": 4.959558166846518e-07, + "loss": 0.5921, + "step": 28103 + }, + { + "epoch": 0.8613460831187937, + "grad_norm": 1.8652771690610177, + "learning_rate": 4.957403297824476e-07, + "loss": 0.5207, + "step": 28104 + }, + { + "epoch": 0.8613767316415348, + "grad_norm": 1.7692729744393572, + "learning_rate": 4.955248872618667e-07, + "loss": 0.5078, + "step": 28105 + }, + { + "epoch": 0.8614073801642761, + "grad_norm": 1.8848915928859955, + "learning_rate": 4.953094891250326e-07, + "loss": 0.5228, + "step": 28106 + }, + { + "epoch": 0.8614380286870172, + "grad_norm": 0.8097374684490923, + "learning_rate": 4.950941353740651e-07, + "loss": 0.4144, + "step": 28107 + }, + { + "epoch": 0.8614686772097585, + "grad_norm": 2.0159108837010704, + "learning_rate": 4.948788260110882e-07, + "loss": 0.5479, + "step": 28108 + }, + { + "epoch": 0.8614993257324997, + "grad_norm": 0.8144993891887393, + "learning_rate": 4.946635610382239e-07, + "loss": 0.3964, + "step": 28109 + }, + { + "epoch": 0.8615299742552409, + "grad_norm": 0.7684025816778556, + "learning_rate": 4.944483404575911e-07, + "loss": 0.3991, + "step": 28110 + }, + { + "epoch": 0.8615606227779821, + "grad_norm": 1.9329224205115538, + "learning_rate": 4.942331642713116e-07, + "loss": 0.6016, + "step": 28111 + }, + { + "epoch": 0.8615912713007233, + "grad_norm": 1.8522362878487413, + "learning_rate": 4.940180324815069e-07, + "loss": 0.538, + "step": 28112 + }, + { + "epoch": 0.8616219198234645, + "grad_norm": 1.75838743869024, + "learning_rate": 4.938029450902943e-07, + "loss": 0.6497, + "step": 28113 + }, + { + "epoch": 0.8616525683462057, + "grad_norm": 0.799549057282173, + "learning_rate": 4.935879020997953e-07, + "loss": 0.3981, + "step": 28114 + }, + { + "epoch": 0.8616832168689469, + "grad_norm": 2.1235491199358965, + "learning_rate": 4.933729035121266e-07, + "loss": 0.5799, + "step": 28115 + }, + { + "epoch": 0.8617138653916881, + "grad_norm": 0.8044966630061703, + "learning_rate": 4.931579493294075e-07, + "loss": 0.3903, + "step": 28116 + }, + { + "epoch": 0.8617445139144293, + "grad_norm": 1.8330210140415613, + "learning_rate": 4.929430395537577e-07, + "loss": 0.533, + "step": 28117 + }, + { + "epoch": 0.8617751624371706, + "grad_norm": 1.9414492935210368, + "learning_rate": 4.927281741872919e-07, + "loss": 0.618, + "step": 28118 + }, + { + "epoch": 0.8618058109599117, + "grad_norm": 1.8493128389754978, + "learning_rate": 4.925133532321285e-07, + "loss": 0.5358, + "step": 28119 + }, + { + "epoch": 0.861836459482653, + "grad_norm": 1.8369775818217906, + "learning_rate": 4.922985766903859e-07, + "loss": 0.5303, + "step": 28120 + }, + { + "epoch": 0.8618671080053941, + "grad_norm": 2.072834260558551, + "learning_rate": 4.920838445641774e-07, + "loss": 0.5874, + "step": 28121 + }, + { + "epoch": 0.8618977565281354, + "grad_norm": 1.8621695449753632, + "learning_rate": 4.918691568556205e-07, + "loss": 0.6244, + "step": 28122 + }, + { + "epoch": 0.8619284050508765, + "grad_norm": 1.8837857569785366, + "learning_rate": 4.91654513566831e-07, + "loss": 0.5496, + "step": 28123 + }, + { + "epoch": 0.8619590535736178, + "grad_norm": 2.2442237042155773, + "learning_rate": 4.914399146999222e-07, + "loss": 0.6372, + "step": 28124 + }, + { + "epoch": 0.861989702096359, + "grad_norm": 1.8300553226801002, + "learning_rate": 4.912253602570105e-07, + "loss": 0.5638, + "step": 28125 + }, + { + "epoch": 0.8620203506191002, + "grad_norm": 1.9643374177131123, + "learning_rate": 4.910108502402067e-07, + "loss": 0.5599, + "step": 28126 + }, + { + "epoch": 0.8620509991418414, + "grad_norm": 1.9141215153582245, + "learning_rate": 4.907963846516289e-07, + "loss": 0.6382, + "step": 28127 + }, + { + "epoch": 0.8620816476645826, + "grad_norm": 1.8239292465737915, + "learning_rate": 4.905819634933878e-07, + "loss": 0.5038, + "step": 28128 + }, + { + "epoch": 0.8621122961873238, + "grad_norm": 1.9840378802948702, + "learning_rate": 4.903675867675956e-07, + "loss": 0.5854, + "step": 28129 + }, + { + "epoch": 0.862142944710065, + "grad_norm": 2.113333963400916, + "learning_rate": 4.901532544763654e-07, + "loss": 0.5674, + "step": 28130 + }, + { + "epoch": 0.8621735932328062, + "grad_norm": 1.7384007598624565, + "learning_rate": 4.899389666218101e-07, + "loss": 0.5057, + "step": 28131 + }, + { + "epoch": 0.8622042417555473, + "grad_norm": 1.7440245073420262, + "learning_rate": 4.897247232060392e-07, + "loss": 0.5782, + "step": 28132 + }, + { + "epoch": 0.8622348902782886, + "grad_norm": 1.8919524143913518, + "learning_rate": 4.895105242311643e-07, + "loss": 0.5322, + "step": 28133 + }, + { + "epoch": 0.8622655388010297, + "grad_norm": 1.798991394593926, + "learning_rate": 4.892963696992964e-07, + "loss": 0.5918, + "step": 28134 + }, + { + "epoch": 0.862296187323771, + "grad_norm": 2.1393770913725865, + "learning_rate": 4.890822596125466e-07, + "loss": 0.5521, + "step": 28135 + }, + { + "epoch": 0.8623268358465122, + "grad_norm": 2.2278922880947025, + "learning_rate": 4.888681939730233e-07, + "loss": 0.6141, + "step": 28136 + }, + { + "epoch": 0.8623574843692534, + "grad_norm": 2.0477558821873076, + "learning_rate": 4.886541727828348e-07, + "loss": 0.4943, + "step": 28137 + }, + { + "epoch": 0.8623881328919946, + "grad_norm": 1.737012157426301, + "learning_rate": 4.884401960440915e-07, + "loss": 0.5679, + "step": 28138 + }, + { + "epoch": 0.8624187814147358, + "grad_norm": 1.9589275695670543, + "learning_rate": 4.882262637589019e-07, + "loss": 0.6101, + "step": 28139 + }, + { + "epoch": 0.862449429937477, + "grad_norm": 1.9023883483944164, + "learning_rate": 4.880123759293725e-07, + "loss": 0.559, + "step": 28140 + }, + { + "epoch": 0.8624800784602182, + "grad_norm": 0.8014925840456232, + "learning_rate": 4.877985325576112e-07, + "loss": 0.4033, + "step": 28141 + }, + { + "epoch": 0.8625107269829594, + "grad_norm": 1.9383817613031995, + "learning_rate": 4.875847336457268e-07, + "loss": 0.5378, + "step": 28142 + }, + { + "epoch": 0.8625413755057006, + "grad_norm": 1.9325083496215933, + "learning_rate": 4.873709791958237e-07, + "loss": 0.5252, + "step": 28143 + }, + { + "epoch": 0.8625720240284418, + "grad_norm": 2.085821882675375, + "learning_rate": 4.871572692100096e-07, + "loss": 0.5524, + "step": 28144 + }, + { + "epoch": 0.8626026725511831, + "grad_norm": 1.9930205022879275, + "learning_rate": 4.86943603690388e-07, + "loss": 0.5176, + "step": 28145 + }, + { + "epoch": 0.8626333210739242, + "grad_norm": 1.90517259646636, + "learning_rate": 4.867299826390676e-07, + "loss": 0.5532, + "step": 28146 + }, + { + "epoch": 0.8626639695966655, + "grad_norm": 1.8684319309386945, + "learning_rate": 4.865164060581512e-07, + "loss": 0.5734, + "step": 28147 + }, + { + "epoch": 0.8626946181194066, + "grad_norm": 0.7613466458968321, + "learning_rate": 4.863028739497427e-07, + "loss": 0.3765, + "step": 28148 + }, + { + "epoch": 0.8627252666421479, + "grad_norm": 1.8335167734241287, + "learning_rate": 4.860893863159471e-07, + "loss": 0.6005, + "step": 28149 + }, + { + "epoch": 0.862755915164889, + "grad_norm": 2.06447231482471, + "learning_rate": 4.858759431588683e-07, + "loss": 0.5665, + "step": 28150 + }, + { + "epoch": 0.8627865636876303, + "grad_norm": 2.0271334472350784, + "learning_rate": 4.856625444806079e-07, + "loss": 0.5937, + "step": 28151 + }, + { + "epoch": 0.8628172122103714, + "grad_norm": 0.8136733750552472, + "learning_rate": 4.854491902832697e-07, + "loss": 0.4259, + "step": 28152 + }, + { + "epoch": 0.8628478607331127, + "grad_norm": 1.971341810857388, + "learning_rate": 4.852358805689556e-07, + "loss": 0.6691, + "step": 28153 + }, + { + "epoch": 0.8628785092558539, + "grad_norm": 1.735817161032438, + "learning_rate": 4.850226153397686e-07, + "loss": 0.5586, + "step": 28154 + }, + { + "epoch": 0.8629091577785951, + "grad_norm": 0.8136221504119284, + "learning_rate": 4.848093945978088e-07, + "loss": 0.3802, + "step": 28155 + }, + { + "epoch": 0.8629398063013363, + "grad_norm": 0.7858901029145279, + "learning_rate": 4.845962183451753e-07, + "loss": 0.4029, + "step": 28156 + }, + { + "epoch": 0.8629704548240775, + "grad_norm": 2.2173543817834105, + "learning_rate": 4.843830865839727e-07, + "loss": 0.5574, + "step": 28157 + }, + { + "epoch": 0.8630011033468187, + "grad_norm": 2.046596326551819, + "learning_rate": 4.841699993162985e-07, + "loss": 0.5478, + "step": 28158 + }, + { + "epoch": 0.8630317518695599, + "grad_norm": 1.6897939970024547, + "learning_rate": 4.839569565442525e-07, + "loss": 0.5444, + "step": 28159 + }, + { + "epoch": 0.8630624003923011, + "grad_norm": 2.237653767045334, + "learning_rate": 4.837439582699332e-07, + "loss": 0.5396, + "step": 28160 + }, + { + "epoch": 0.8630930489150423, + "grad_norm": 1.7736616177439852, + "learning_rate": 4.835310044954411e-07, + "loss": 0.652, + "step": 28161 + }, + { + "epoch": 0.8631236974377835, + "grad_norm": 1.8393004390512842, + "learning_rate": 4.833180952228738e-07, + "loss": 0.5293, + "step": 28162 + }, + { + "epoch": 0.8631543459605246, + "grad_norm": 2.087023588449109, + "learning_rate": 4.831052304543288e-07, + "loss": 0.5641, + "step": 28163 + }, + { + "epoch": 0.8631849944832659, + "grad_norm": 1.942928082684705, + "learning_rate": 4.82892410191903e-07, + "loss": 0.5277, + "step": 28164 + }, + { + "epoch": 0.8632156430060071, + "grad_norm": 2.0471846541480345, + "learning_rate": 4.826796344376955e-07, + "loss": 0.6514, + "step": 28165 + }, + { + "epoch": 0.8632462915287483, + "grad_norm": 0.7968092079718122, + "learning_rate": 4.824669031938007e-07, + "loss": 0.3817, + "step": 28166 + }, + { + "epoch": 0.8632769400514895, + "grad_norm": 0.7806246088653589, + "learning_rate": 4.822542164623139e-07, + "loss": 0.3969, + "step": 28167 + }, + { + "epoch": 0.8633075885742307, + "grad_norm": 1.794409373773614, + "learning_rate": 4.820415742453343e-07, + "loss": 0.5712, + "step": 28168 + }, + { + "epoch": 0.8633382370969719, + "grad_norm": 1.5588732111089738, + "learning_rate": 4.818289765449546e-07, + "loss": 0.4564, + "step": 28169 + }, + { + "epoch": 0.8633688856197131, + "grad_norm": 1.8993773513613386, + "learning_rate": 4.816164233632692e-07, + "loss": 0.595, + "step": 28170 + }, + { + "epoch": 0.8633995341424543, + "grad_norm": 0.7452840670539739, + "learning_rate": 4.814039147023736e-07, + "loss": 0.382, + "step": 28171 + }, + { + "epoch": 0.8634301826651956, + "grad_norm": 0.8123720928102718, + "learning_rate": 4.811914505643612e-07, + "loss": 0.4131, + "step": 28172 + }, + { + "epoch": 0.8634608311879367, + "grad_norm": 1.837260308564238, + "learning_rate": 4.809790309513263e-07, + "loss": 0.5117, + "step": 28173 + }, + { + "epoch": 0.863491479710678, + "grad_norm": 1.9733036548857397, + "learning_rate": 4.807666558653601e-07, + "loss": 0.6257, + "step": 28174 + }, + { + "epoch": 0.8635221282334191, + "grad_norm": 2.0555333527972164, + "learning_rate": 4.805543253085571e-07, + "loss": 0.5803, + "step": 28175 + }, + { + "epoch": 0.8635527767561604, + "grad_norm": 2.036908845705834, + "learning_rate": 4.803420392830089e-07, + "loss": 0.5886, + "step": 28176 + }, + { + "epoch": 0.8635834252789015, + "grad_norm": 1.9627118351737896, + "learning_rate": 4.801297977908076e-07, + "loss": 0.5321, + "step": 28177 + }, + { + "epoch": 0.8636140738016428, + "grad_norm": 1.8626416241628079, + "learning_rate": 4.799176008340417e-07, + "loss": 0.592, + "step": 28178 + }, + { + "epoch": 0.8636447223243839, + "grad_norm": 1.7795862959471995, + "learning_rate": 4.797054484148061e-07, + "loss": 0.5539, + "step": 28179 + }, + { + "epoch": 0.8636753708471252, + "grad_norm": 1.8111866159796257, + "learning_rate": 4.794933405351881e-07, + "loss": 0.5439, + "step": 28180 + }, + { + "epoch": 0.8637060193698664, + "grad_norm": 1.9890736164946636, + "learning_rate": 4.792812771972799e-07, + "loss": 0.6109, + "step": 28181 + }, + { + "epoch": 0.8637366678926076, + "grad_norm": 1.8960378320980897, + "learning_rate": 4.790692584031692e-07, + "loss": 0.6165, + "step": 28182 + }, + { + "epoch": 0.8637673164153488, + "grad_norm": 1.8569296531756903, + "learning_rate": 4.788572841549461e-07, + "loss": 0.6128, + "step": 28183 + }, + { + "epoch": 0.86379796493809, + "grad_norm": 1.7474737821164774, + "learning_rate": 4.786453544546993e-07, + "loss": 0.6257, + "step": 28184 + }, + { + "epoch": 0.8638286134608312, + "grad_norm": 1.8843845194629454, + "learning_rate": 4.784334693045157e-07, + "loss": 0.5669, + "step": 28185 + }, + { + "epoch": 0.8638592619835724, + "grad_norm": 0.8338476953239702, + "learning_rate": 4.782216287064845e-07, + "loss": 0.4141, + "step": 28186 + }, + { + "epoch": 0.8638899105063136, + "grad_norm": 2.1722235759282875, + "learning_rate": 4.780098326626931e-07, + "loss": 0.6634, + "step": 28187 + }, + { + "epoch": 0.8639205590290548, + "grad_norm": 2.0374339814385674, + "learning_rate": 4.77798081175227e-07, + "loss": 0.4959, + "step": 28188 + }, + { + "epoch": 0.863951207551796, + "grad_norm": 1.9351051527851628, + "learning_rate": 4.775863742461745e-07, + "loss": 0.6001, + "step": 28189 + }, + { + "epoch": 0.8639818560745373, + "grad_norm": 1.9802778852049971, + "learning_rate": 4.773747118776196e-07, + "loss": 0.5649, + "step": 28190 + }, + { + "epoch": 0.8640125045972784, + "grad_norm": 1.728273203518851, + "learning_rate": 4.771630940716487e-07, + "loss": 0.5564, + "step": 28191 + }, + { + "epoch": 0.8640431531200197, + "grad_norm": 1.8752614483022505, + "learning_rate": 4.769515208303483e-07, + "loss": 0.5795, + "step": 28192 + }, + { + "epoch": 0.8640738016427608, + "grad_norm": 2.1031860073483997, + "learning_rate": 4.7673999215580027e-07, + "loss": 0.591, + "step": 28193 + }, + { + "epoch": 0.864104450165502, + "grad_norm": 1.9262960289542805, + "learning_rate": 4.7652850805009086e-07, + "loss": 0.6185, + "step": 28194 + }, + { + "epoch": 0.8641350986882432, + "grad_norm": 1.8111907778472693, + "learning_rate": 4.763170685153046e-07, + "loss": 0.5882, + "step": 28195 + }, + { + "epoch": 0.8641657472109844, + "grad_norm": 2.449883529498196, + "learning_rate": 4.7610567355352356e-07, + "loss": 0.4959, + "step": 28196 + }, + { + "epoch": 0.8641963957337256, + "grad_norm": 2.0192341176627053, + "learning_rate": 4.758943231668284e-07, + "loss": 0.6948, + "step": 28197 + }, + { + "epoch": 0.8642270442564668, + "grad_norm": 1.8314397160757372, + "learning_rate": 4.7568301735730626e-07, + "loss": 0.5412, + "step": 28198 + }, + { + "epoch": 0.864257692779208, + "grad_norm": 1.8040474981582195, + "learning_rate": 4.754717561270361e-07, + "loss": 0.531, + "step": 28199 + }, + { + "epoch": 0.8642883413019492, + "grad_norm": 2.2031288190426253, + "learning_rate": 4.7526053947810127e-07, + "loss": 0.6148, + "step": 28200 + }, + { + "epoch": 0.8643189898246905, + "grad_norm": 0.7872286866100286, + "learning_rate": 4.750493674125811e-07, + "loss": 0.3982, + "step": 28201 + }, + { + "epoch": 0.8643496383474316, + "grad_norm": 2.111645981154864, + "learning_rate": 4.748382399325574e-07, + "loss": 0.5213, + "step": 28202 + }, + { + "epoch": 0.8643802868701729, + "grad_norm": 1.6859279290377065, + "learning_rate": 4.746271570401112e-07, + "loss": 0.5725, + "step": 28203 + }, + { + "epoch": 0.864410935392914, + "grad_norm": 2.0971429833919073, + "learning_rate": 4.744161187373203e-07, + "loss": 0.603, + "step": 28204 + }, + { + "epoch": 0.8644415839156553, + "grad_norm": 1.9969178777212837, + "learning_rate": 4.742051250262658e-07, + "loss": 0.5733, + "step": 28205 + }, + { + "epoch": 0.8644722324383964, + "grad_norm": 0.7599973008324132, + "learning_rate": 4.7399417590902663e-07, + "loss": 0.3847, + "step": 28206 + }, + { + "epoch": 0.8645028809611377, + "grad_norm": 1.8443352507358775, + "learning_rate": 4.737832713876805e-07, + "loss": 0.5357, + "step": 28207 + }, + { + "epoch": 0.8645335294838788, + "grad_norm": 1.6225661791914907, + "learning_rate": 4.7357241146430533e-07, + "loss": 0.437, + "step": 28208 + }, + { + "epoch": 0.8645641780066201, + "grad_norm": 0.7772027524350723, + "learning_rate": 4.7336159614098045e-07, + "loss": 0.3998, + "step": 28209 + }, + { + "epoch": 0.8645948265293613, + "grad_norm": 1.9592873945062281, + "learning_rate": 4.7315082541978085e-07, + "loss": 0.5263, + "step": 28210 + }, + { + "epoch": 0.8646254750521025, + "grad_norm": 1.96759810687807, + "learning_rate": 4.729400993027855e-07, + "loss": 0.5011, + "step": 28211 + }, + { + "epoch": 0.8646561235748437, + "grad_norm": 1.897593770204818, + "learning_rate": 4.7272941779206885e-07, + "loss": 0.5643, + "step": 28212 + }, + { + "epoch": 0.8646867720975849, + "grad_norm": 1.8524119676341848, + "learning_rate": 4.725187808897075e-07, + "loss": 0.5949, + "step": 28213 + }, + { + "epoch": 0.8647174206203261, + "grad_norm": 1.940473824929808, + "learning_rate": 4.723081885977776e-07, + "loss": 0.6193, + "step": 28214 + }, + { + "epoch": 0.8647480691430673, + "grad_norm": 1.6601587255212267, + "learning_rate": 4.720976409183531e-07, + "loss": 0.5516, + "step": 28215 + }, + { + "epoch": 0.8647787176658085, + "grad_norm": 1.896504997033576, + "learning_rate": 4.718871378535089e-07, + "loss": 0.6239, + "step": 28216 + }, + { + "epoch": 0.8648093661885498, + "grad_norm": 0.8066759616230323, + "learning_rate": 4.716766794053201e-07, + "loss": 0.4071, + "step": 28217 + }, + { + "epoch": 0.8648400147112909, + "grad_norm": 2.017502225943168, + "learning_rate": 4.714662655758589e-07, + "loss": 0.6314, + "step": 28218 + }, + { + "epoch": 0.8648706632340322, + "grad_norm": 2.099183543042754, + "learning_rate": 4.7125589636719925e-07, + "loss": 0.6574, + "step": 28219 + }, + { + "epoch": 0.8649013117567733, + "grad_norm": 1.9651360314775408, + "learning_rate": 4.7104557178141495e-07, + "loss": 0.5996, + "step": 28220 + }, + { + "epoch": 0.8649319602795146, + "grad_norm": 1.9061798453329968, + "learning_rate": 4.708352918205761e-07, + "loss": 0.5205, + "step": 28221 + }, + { + "epoch": 0.8649626088022557, + "grad_norm": 2.1256594846699555, + "learning_rate": 4.706250564867576e-07, + "loss": 0.6234, + "step": 28222 + }, + { + "epoch": 0.864993257324997, + "grad_norm": 2.0817554393734063, + "learning_rate": 4.704148657820279e-07, + "loss": 0.5708, + "step": 28223 + }, + { + "epoch": 0.8650239058477381, + "grad_norm": 1.831876933065554, + "learning_rate": 4.7020471970845913e-07, + "loss": 0.5381, + "step": 28224 + }, + { + "epoch": 0.8650545543704793, + "grad_norm": 1.6971415631744933, + "learning_rate": 4.6999461826812363e-07, + "loss": 0.5676, + "step": 28225 + }, + { + "epoch": 0.8650852028932206, + "grad_norm": 1.753967995931039, + "learning_rate": 4.6978456146308915e-07, + "loss": 0.559, + "step": 28226 + }, + { + "epoch": 0.8651158514159617, + "grad_norm": 1.766489821813985, + "learning_rate": 4.695745492954268e-07, + "loss": 0.5268, + "step": 28227 + }, + { + "epoch": 0.865146499938703, + "grad_norm": 1.8111713132827794, + "learning_rate": 4.6936458176720603e-07, + "loss": 0.5091, + "step": 28228 + }, + { + "epoch": 0.8651771484614441, + "grad_norm": 1.9642752621739976, + "learning_rate": 4.691546588804946e-07, + "loss": 0.6314, + "step": 28229 + }, + { + "epoch": 0.8652077969841854, + "grad_norm": 2.020318749316134, + "learning_rate": 4.6894478063736147e-07, + "loss": 0.6102, + "step": 28230 + }, + { + "epoch": 0.8652384455069265, + "grad_norm": 1.7608106726520882, + "learning_rate": 4.6873494703987555e-07, + "loss": 0.5165, + "step": 28231 + }, + { + "epoch": 0.8652690940296678, + "grad_norm": 1.9139032053972727, + "learning_rate": 4.685251580901029e-07, + "loss": 0.498, + "step": 28232 + }, + { + "epoch": 0.8652997425524089, + "grad_norm": 1.7962899653658604, + "learning_rate": 4.683154137901125e-07, + "loss": 0.5851, + "step": 28233 + }, + { + "epoch": 0.8653303910751502, + "grad_norm": 1.732187140655045, + "learning_rate": 4.6810571414196817e-07, + "loss": 0.5823, + "step": 28234 + }, + { + "epoch": 0.8653610395978913, + "grad_norm": 1.9072288341406034, + "learning_rate": 4.6789605914773827e-07, + "loss": 0.5769, + "step": 28235 + }, + { + "epoch": 0.8653916881206326, + "grad_norm": 1.9666781032772709, + "learning_rate": 4.67686448809489e-07, + "loss": 0.5688, + "step": 28236 + }, + { + "epoch": 0.8654223366433738, + "grad_norm": 1.949472399003692, + "learning_rate": 4.674768831292836e-07, + "loss": 0.6122, + "step": 28237 + }, + { + "epoch": 0.865452985166115, + "grad_norm": 1.7498659490084607, + "learning_rate": 4.672673621091883e-07, + "loss": 0.5621, + "step": 28238 + }, + { + "epoch": 0.8654836336888562, + "grad_norm": 1.8519404859070667, + "learning_rate": 4.670578857512681e-07, + "loss": 0.6421, + "step": 28239 + }, + { + "epoch": 0.8655142822115974, + "grad_norm": 1.8000147396636992, + "learning_rate": 4.668484540575857e-07, + "loss": 0.6135, + "step": 28240 + }, + { + "epoch": 0.8655449307343386, + "grad_norm": 2.169116277145488, + "learning_rate": 4.666390670302062e-07, + "loss": 0.5259, + "step": 28241 + }, + { + "epoch": 0.8655755792570798, + "grad_norm": 1.8351958731985154, + "learning_rate": 4.664297246711902e-07, + "loss": 0.5227, + "step": 28242 + }, + { + "epoch": 0.865606227779821, + "grad_norm": 2.048249987959955, + "learning_rate": 4.662204269826037e-07, + "loss": 0.6433, + "step": 28243 + }, + { + "epoch": 0.8656368763025623, + "grad_norm": 0.8389590099778653, + "learning_rate": 4.660111739665074e-07, + "loss": 0.403, + "step": 28244 + }, + { + "epoch": 0.8656675248253034, + "grad_norm": 1.7936629521600025, + "learning_rate": 4.658019656249624e-07, + "loss": 0.5674, + "step": 28245 + }, + { + "epoch": 0.8656981733480447, + "grad_norm": 2.0780309332605267, + "learning_rate": 4.6559280196003087e-07, + "loss": 0.5484, + "step": 28246 + }, + { + "epoch": 0.8657288218707858, + "grad_norm": 1.9615540696530573, + "learning_rate": 4.6538368297377403e-07, + "loss": 0.5786, + "step": 28247 + }, + { + "epoch": 0.8657594703935271, + "grad_norm": 1.7596128845857957, + "learning_rate": 4.6517460866825125e-07, + "loss": 0.5382, + "step": 28248 + }, + { + "epoch": 0.8657901189162682, + "grad_norm": 1.7671668336162956, + "learning_rate": 4.649655790455232e-07, + "loss": 0.6446, + "step": 28249 + }, + { + "epoch": 0.8658207674390095, + "grad_norm": 1.8647548547665247, + "learning_rate": 4.6475659410765097e-07, + "loss": 0.5777, + "step": 28250 + }, + { + "epoch": 0.8658514159617506, + "grad_norm": 1.7626158574997375, + "learning_rate": 4.645476538566912e-07, + "loss": 0.5349, + "step": 28251 + }, + { + "epoch": 0.8658820644844919, + "grad_norm": 0.7791847451286066, + "learning_rate": 4.643387582947051e-07, + "loss": 0.4168, + "step": 28252 + }, + { + "epoch": 0.865912713007233, + "grad_norm": 1.9776931725968276, + "learning_rate": 4.6412990742374766e-07, + "loss": 0.5883, + "step": 28253 + }, + { + "epoch": 0.8659433615299743, + "grad_norm": 1.826158020771826, + "learning_rate": 4.6392110124588055e-07, + "loss": 0.5192, + "step": 28254 + }, + { + "epoch": 0.8659740100527155, + "grad_norm": 1.7358558493382628, + "learning_rate": 4.6371233976315935e-07, + "loss": 0.6253, + "step": 28255 + }, + { + "epoch": 0.8660046585754566, + "grad_norm": 1.8492103240258269, + "learning_rate": 4.635036229776402e-07, + "loss": 0.5466, + "step": 28256 + }, + { + "epoch": 0.8660353070981979, + "grad_norm": 1.944321882177437, + "learning_rate": 4.6329495089138086e-07, + "loss": 0.5682, + "step": 28257 + }, + { + "epoch": 0.866065955620939, + "grad_norm": 1.9489769333758236, + "learning_rate": 4.6308632350643756e-07, + "loss": 0.6269, + "step": 28258 + }, + { + "epoch": 0.8660966041436803, + "grad_norm": 2.0977504336438586, + "learning_rate": 4.6287774082486523e-07, + "loss": 0.5464, + "step": 28259 + }, + { + "epoch": 0.8661272526664214, + "grad_norm": 1.6429431545545745, + "learning_rate": 4.62669202848719e-07, + "loss": 0.5261, + "step": 28260 + }, + { + "epoch": 0.8661579011891627, + "grad_norm": 1.9245650760019266, + "learning_rate": 4.624607095800543e-07, + "loss": 0.6369, + "step": 28261 + }, + { + "epoch": 0.8661885497119038, + "grad_norm": 1.760652021267608, + "learning_rate": 4.622522610209257e-07, + "loss": 0.5426, + "step": 28262 + }, + { + "epoch": 0.8662191982346451, + "grad_norm": 0.792557753706329, + "learning_rate": 4.6204385717338705e-07, + "loss": 0.4069, + "step": 28263 + }, + { + "epoch": 0.8662498467573863, + "grad_norm": 1.8848405016821244, + "learning_rate": 4.6183549803948903e-07, + "loss": 0.487, + "step": 28264 + }, + { + "epoch": 0.8662804952801275, + "grad_norm": 1.9657120368633791, + "learning_rate": 4.6162718362128933e-07, + "loss": 0.558, + "step": 28265 + }, + { + "epoch": 0.8663111438028687, + "grad_norm": 2.0536803814676268, + "learning_rate": 4.6141891392083804e-07, + "loss": 0.6461, + "step": 28266 + }, + { + "epoch": 0.8663417923256099, + "grad_norm": 2.0024672851733314, + "learning_rate": 4.612106889401863e-07, + "loss": 0.5875, + "step": 28267 + }, + { + "epoch": 0.8663724408483511, + "grad_norm": 1.9333376752755436, + "learning_rate": 4.610025086813874e-07, + "loss": 0.5405, + "step": 28268 + }, + { + "epoch": 0.8664030893710923, + "grad_norm": 1.748048995272089, + "learning_rate": 4.6079437314649257e-07, + "loss": 0.4828, + "step": 28269 + }, + { + "epoch": 0.8664337378938335, + "grad_norm": 1.9661723815109375, + "learning_rate": 4.605862823375512e-07, + "loss": 0.5693, + "step": 28270 + }, + { + "epoch": 0.8664643864165747, + "grad_norm": 1.8817142318773101, + "learning_rate": 4.6037823625661504e-07, + "loss": 0.5191, + "step": 28271 + }, + { + "epoch": 0.8664950349393159, + "grad_norm": 1.7828811183366586, + "learning_rate": 4.601702349057335e-07, + "loss": 0.6342, + "step": 28272 + }, + { + "epoch": 0.8665256834620572, + "grad_norm": 2.064547336841913, + "learning_rate": 4.599622782869573e-07, + "loss": 0.6446, + "step": 28273 + }, + { + "epoch": 0.8665563319847983, + "grad_norm": 1.911624404233292, + "learning_rate": 4.5975436640233407e-07, + "loss": 0.5149, + "step": 28274 + }, + { + "epoch": 0.8665869805075396, + "grad_norm": 1.7051572843756109, + "learning_rate": 4.5954649925391116e-07, + "loss": 0.5562, + "step": 28275 + }, + { + "epoch": 0.8666176290302807, + "grad_norm": 1.6848807339766592, + "learning_rate": 4.593386768437402e-07, + "loss": 0.5001, + "step": 28276 + }, + { + "epoch": 0.866648277553022, + "grad_norm": 0.8104284657301735, + "learning_rate": 4.591308991738669e-07, + "loss": 0.4067, + "step": 28277 + }, + { + "epoch": 0.8666789260757631, + "grad_norm": 1.8165296056142135, + "learning_rate": 4.589231662463373e-07, + "loss": 0.5877, + "step": 28278 + }, + { + "epoch": 0.8667095745985044, + "grad_norm": 2.1268194303569117, + "learning_rate": 4.587154780632003e-07, + "loss": 0.5986, + "step": 28279 + }, + { + "epoch": 0.8667402231212455, + "grad_norm": 1.8182412145092366, + "learning_rate": 4.585078346265015e-07, + "loss": 0.5986, + "step": 28280 + }, + { + "epoch": 0.8667708716439868, + "grad_norm": 1.7138757820415467, + "learning_rate": 4.5830023593828764e-07, + "loss": 0.4622, + "step": 28281 + }, + { + "epoch": 0.866801520166728, + "grad_norm": 1.857125583442514, + "learning_rate": 4.5809268200060265e-07, + "loss": 0.5499, + "step": 28282 + }, + { + "epoch": 0.8668321686894692, + "grad_norm": 2.20058943715322, + "learning_rate": 4.578851728154932e-07, + "loss": 0.6394, + "step": 28283 + }, + { + "epoch": 0.8668628172122104, + "grad_norm": 1.8758507167766423, + "learning_rate": 4.576777083850037e-07, + "loss": 0.6612, + "step": 28284 + }, + { + "epoch": 0.8668934657349516, + "grad_norm": 1.9719453035416654, + "learning_rate": 4.5747028871117815e-07, + "loss": 0.6043, + "step": 28285 + }, + { + "epoch": 0.8669241142576928, + "grad_norm": 0.8187881521823875, + "learning_rate": 4.572629137960588e-07, + "loss": 0.4186, + "step": 28286 + }, + { + "epoch": 0.8669547627804339, + "grad_norm": 1.8846806344794786, + "learning_rate": 4.570555836416907e-07, + "loss": 0.4989, + "step": 28287 + }, + { + "epoch": 0.8669854113031752, + "grad_norm": 1.957434831831752, + "learning_rate": 4.56848298250116e-07, + "loss": 0.6199, + "step": 28288 + }, + { + "epoch": 0.8670160598259163, + "grad_norm": 1.9530635097497462, + "learning_rate": 4.566410576233782e-07, + "loss": 0.5386, + "step": 28289 + }, + { + "epoch": 0.8670467083486576, + "grad_norm": 2.1044793452508777, + "learning_rate": 4.5643386176351777e-07, + "loss": 0.5946, + "step": 28290 + }, + { + "epoch": 0.8670773568713988, + "grad_norm": 1.9876557348664943, + "learning_rate": 4.562267106725776e-07, + "loss": 0.6604, + "step": 28291 + }, + { + "epoch": 0.86710800539414, + "grad_norm": 0.7743718740689581, + "learning_rate": 4.560196043525983e-07, + "loss": 0.4053, + "step": 28292 + }, + { + "epoch": 0.8671386539168812, + "grad_norm": 1.863423548221988, + "learning_rate": 4.5581254280562094e-07, + "loss": 0.6874, + "step": 28293 + }, + { + "epoch": 0.8671693024396224, + "grad_norm": 1.8739530660224657, + "learning_rate": 4.5560552603368334e-07, + "loss": 0.5977, + "step": 28294 + }, + { + "epoch": 0.8671999509623636, + "grad_norm": 1.824773325352809, + "learning_rate": 4.5539855403882895e-07, + "loss": 0.528, + "step": 28295 + }, + { + "epoch": 0.8672305994851048, + "grad_norm": 2.0648511043573587, + "learning_rate": 4.551916268230955e-07, + "loss": 0.6832, + "step": 28296 + }, + { + "epoch": 0.867261248007846, + "grad_norm": 1.8570721170136453, + "learning_rate": 4.549847443885208e-07, + "loss": 0.6454, + "step": 28297 + }, + { + "epoch": 0.8672918965305872, + "grad_norm": 1.8056287328451441, + "learning_rate": 4.5477790673714437e-07, + "loss": 0.6337, + "step": 28298 + }, + { + "epoch": 0.8673225450533284, + "grad_norm": 1.9244176130765334, + "learning_rate": 4.545711138710046e-07, + "loss": 0.4977, + "step": 28299 + }, + { + "epoch": 0.8673531935760697, + "grad_norm": 0.768658613751991, + "learning_rate": 4.543643657921387e-07, + "loss": 0.3865, + "step": 28300 + }, + { + "epoch": 0.8673838420988108, + "grad_norm": 1.9908347052178526, + "learning_rate": 4.5415766250258343e-07, + "loss": 0.6306, + "step": 28301 + }, + { + "epoch": 0.8674144906215521, + "grad_norm": 1.796563329561357, + "learning_rate": 4.53951004004376e-07, + "loss": 0.5126, + "step": 28302 + }, + { + "epoch": 0.8674451391442932, + "grad_norm": 2.1523382103053246, + "learning_rate": 4.5374439029955307e-07, + "loss": 0.7131, + "step": 28303 + }, + { + "epoch": 0.8674757876670345, + "grad_norm": 0.7847170824844732, + "learning_rate": 4.535378213901498e-07, + "loss": 0.3739, + "step": 28304 + }, + { + "epoch": 0.8675064361897756, + "grad_norm": 2.0712835247228556, + "learning_rate": 4.533312972781995e-07, + "loss": 0.5655, + "step": 28305 + }, + { + "epoch": 0.8675370847125169, + "grad_norm": 0.7752997478808696, + "learning_rate": 4.5312481796574157e-07, + "loss": 0.3788, + "step": 28306 + }, + { + "epoch": 0.867567733235258, + "grad_norm": 0.8147656532741624, + "learning_rate": 4.529183834548073e-07, + "loss": 0.4095, + "step": 28307 + }, + { + "epoch": 0.8675983817579993, + "grad_norm": 1.9506958486938328, + "learning_rate": 4.5271199374743226e-07, + "loss": 0.4997, + "step": 28308 + }, + { + "epoch": 0.8676290302807405, + "grad_norm": 1.73387594056595, + "learning_rate": 4.5250564884564864e-07, + "loss": 0.5332, + "step": 28309 + }, + { + "epoch": 0.8676596788034817, + "grad_norm": 1.8065451915877204, + "learning_rate": 4.522993487514904e-07, + "loss": 0.5625, + "step": 28310 + }, + { + "epoch": 0.8676903273262229, + "grad_norm": 1.8278682154249914, + "learning_rate": 4.5209309346699093e-07, + "loss": 0.5557, + "step": 28311 + }, + { + "epoch": 0.8677209758489641, + "grad_norm": 2.174144037873075, + "learning_rate": 4.518868829941814e-07, + "loss": 0.5766, + "step": 28312 + }, + { + "epoch": 0.8677516243717053, + "grad_norm": 1.6714953086003448, + "learning_rate": 4.516807173350934e-07, + "loss": 0.5717, + "step": 28313 + }, + { + "epoch": 0.8677822728944465, + "grad_norm": 1.5419402901802692, + "learning_rate": 4.514745964917605e-07, + "loss": 0.5817, + "step": 28314 + }, + { + "epoch": 0.8678129214171877, + "grad_norm": 3.196264889646702, + "learning_rate": 4.512685204662115e-07, + "loss": 0.6384, + "step": 28315 + }, + { + "epoch": 0.867843569939929, + "grad_norm": 1.8146335150834418, + "learning_rate": 4.51062489260477e-07, + "loss": 0.6102, + "step": 28316 + }, + { + "epoch": 0.8678742184626701, + "grad_norm": 2.0606460437631937, + "learning_rate": 4.5085650287658875e-07, + "loss": 0.5983, + "step": 28317 + }, + { + "epoch": 0.8679048669854112, + "grad_norm": 1.9487458034276337, + "learning_rate": 4.506505613165746e-07, + "loss": 0.6123, + "step": 28318 + }, + { + "epoch": 0.8679355155081525, + "grad_norm": 1.9480493737009146, + "learning_rate": 4.5044466458246563e-07, + "loss": 0.659, + "step": 28319 + }, + { + "epoch": 0.8679661640308937, + "grad_norm": 1.6502801989892875, + "learning_rate": 4.50238812676288e-07, + "loss": 0.5825, + "step": 28320 + }, + { + "epoch": 0.8679968125536349, + "grad_norm": 1.7300308529628687, + "learning_rate": 4.500330056000718e-07, + "loss": 0.4266, + "step": 28321 + }, + { + "epoch": 0.8680274610763761, + "grad_norm": 2.1893154194723623, + "learning_rate": 4.498272433558454e-07, + "loss": 0.6423, + "step": 28322 + }, + { + "epoch": 0.8680581095991173, + "grad_norm": 1.9420693446644242, + "learning_rate": 4.4962152594563436e-07, + "loss": 0.5521, + "step": 28323 + }, + { + "epoch": 0.8680887581218585, + "grad_norm": 1.9311858366289971, + "learning_rate": 4.494158533714665e-07, + "loss": 0.6002, + "step": 28324 + }, + { + "epoch": 0.8681194066445997, + "grad_norm": 2.0525045997512437, + "learning_rate": 4.4921022563536974e-07, + "loss": 0.5191, + "step": 28325 + }, + { + "epoch": 0.8681500551673409, + "grad_norm": 2.1431351758776627, + "learning_rate": 4.4900464273936793e-07, + "loss": 0.6364, + "step": 28326 + }, + { + "epoch": 0.8681807036900822, + "grad_norm": 1.880914715789851, + "learning_rate": 4.487991046854878e-07, + "loss": 0.5748, + "step": 28327 + }, + { + "epoch": 0.8682113522128233, + "grad_norm": 1.7032695007148944, + "learning_rate": 4.4859361147575553e-07, + "loss": 0.4393, + "step": 28328 + }, + { + "epoch": 0.8682420007355646, + "grad_norm": 1.9361118642519863, + "learning_rate": 4.4838816311219445e-07, + "loss": 0.6154, + "step": 28329 + }, + { + "epoch": 0.8682726492583057, + "grad_norm": 0.7793396077418914, + "learning_rate": 4.4818275959682967e-07, + "loss": 0.3961, + "step": 28330 + }, + { + "epoch": 0.868303297781047, + "grad_norm": 2.0684760093531844, + "learning_rate": 4.4797740093168395e-07, + "loss": 0.5416, + "step": 28331 + }, + { + "epoch": 0.8683339463037881, + "grad_norm": 0.7723132006702144, + "learning_rate": 4.4777208711878186e-07, + "loss": 0.3966, + "step": 28332 + }, + { + "epoch": 0.8683645948265294, + "grad_norm": 1.7140959037220134, + "learning_rate": 4.475668181601472e-07, + "loss": 0.5889, + "step": 28333 + }, + { + "epoch": 0.8683952433492705, + "grad_norm": 0.7795889615997111, + "learning_rate": 4.473615940578002e-07, + "loss": 0.3793, + "step": 28334 + }, + { + "epoch": 0.8684258918720118, + "grad_norm": 1.8971473310344842, + "learning_rate": 4.4715641481376414e-07, + "loss": 0.564, + "step": 28335 + }, + { + "epoch": 0.868456540394753, + "grad_norm": 1.652306317810122, + "learning_rate": 4.4695128043006187e-07, + "loss": 0.5021, + "step": 28336 + }, + { + "epoch": 0.8684871889174942, + "grad_norm": 1.965597528748953, + "learning_rate": 4.467461909087129e-07, + "loss": 0.6458, + "step": 28337 + }, + { + "epoch": 0.8685178374402354, + "grad_norm": 1.8158411367554645, + "learning_rate": 4.465411462517394e-07, + "loss": 0.5533, + "step": 28338 + }, + { + "epoch": 0.8685484859629766, + "grad_norm": 2.169994118816739, + "learning_rate": 4.463361464611604e-07, + "loss": 0.5951, + "step": 28339 + }, + { + "epoch": 0.8685791344857178, + "grad_norm": 0.7761031742048309, + "learning_rate": 4.46131191538996e-07, + "loss": 0.3818, + "step": 28340 + }, + { + "epoch": 0.868609783008459, + "grad_norm": 1.845896119955678, + "learning_rate": 4.459262814872672e-07, + "loss": 0.5577, + "step": 28341 + }, + { + "epoch": 0.8686404315312002, + "grad_norm": 0.7690665166289922, + "learning_rate": 4.457214163079915e-07, + "loss": 0.3885, + "step": 28342 + }, + { + "epoch": 0.8686710800539414, + "grad_norm": 1.8469319184559432, + "learning_rate": 4.455165960031876e-07, + "loss": 0.5589, + "step": 28343 + }, + { + "epoch": 0.8687017285766826, + "grad_norm": 1.9357732012266298, + "learning_rate": 4.4531182057487464e-07, + "loss": 0.5855, + "step": 28344 + }, + { + "epoch": 0.8687323770994239, + "grad_norm": 2.0376982494869496, + "learning_rate": 4.4510709002506924e-07, + "loss": 0.5641, + "step": 28345 + }, + { + "epoch": 0.868763025622165, + "grad_norm": 1.9432765580025997, + "learning_rate": 4.449024043557887e-07, + "loss": 0.6336, + "step": 28346 + }, + { + "epoch": 0.8687936741449063, + "grad_norm": 1.8204393339523515, + "learning_rate": 4.446977635690514e-07, + "loss": 0.5443, + "step": 28347 + }, + { + "epoch": 0.8688243226676474, + "grad_norm": 1.8663011523663247, + "learning_rate": 4.4449316766687177e-07, + "loss": 0.5353, + "step": 28348 + }, + { + "epoch": 0.8688549711903886, + "grad_norm": 2.8766479276333783, + "learning_rate": 4.442886166512672e-07, + "loss": 0.5259, + "step": 28349 + }, + { + "epoch": 0.8688856197131298, + "grad_norm": 1.914071115175663, + "learning_rate": 4.440841105242516e-07, + "loss": 0.5639, + "step": 28350 + }, + { + "epoch": 0.868916268235871, + "grad_norm": 1.739981624364872, + "learning_rate": 4.438796492878411e-07, + "loss": 0.5658, + "step": 28351 + }, + { + "epoch": 0.8689469167586122, + "grad_norm": 1.9293437711794323, + "learning_rate": 4.436752329440508e-07, + "loss": 0.6151, + "step": 28352 + }, + { + "epoch": 0.8689775652813534, + "grad_norm": 0.7774275950335542, + "learning_rate": 4.434708614948935e-07, + "loss": 0.3994, + "step": 28353 + }, + { + "epoch": 0.8690082138040947, + "grad_norm": 1.976724900623219, + "learning_rate": 4.432665349423837e-07, + "loss": 0.5571, + "step": 28354 + }, + { + "epoch": 0.8690388623268358, + "grad_norm": 2.0021257768728895, + "learning_rate": 4.430622532885354e-07, + "loss": 0.6175, + "step": 28355 + }, + { + "epoch": 0.8690695108495771, + "grad_norm": 1.7872235099940874, + "learning_rate": 4.4285801653535964e-07, + "loss": 0.6164, + "step": 28356 + }, + { + "epoch": 0.8691001593723182, + "grad_norm": 1.8022408832447618, + "learning_rate": 4.4265382468486993e-07, + "loss": 0.6673, + "step": 28357 + }, + { + "epoch": 0.8691308078950595, + "grad_norm": 1.8848462899020524, + "learning_rate": 4.424496777390791e-07, + "loss": 0.6125, + "step": 28358 + }, + { + "epoch": 0.8691614564178006, + "grad_norm": 2.0865939821727704, + "learning_rate": 4.4224557569999715e-07, + "loss": 0.5096, + "step": 28359 + }, + { + "epoch": 0.8691921049405419, + "grad_norm": 1.756156620738281, + "learning_rate": 4.4204151856963586e-07, + "loss": 0.5798, + "step": 28360 + }, + { + "epoch": 0.869222753463283, + "grad_norm": 1.8292780671049151, + "learning_rate": 4.418375063500041e-07, + "loss": 0.6038, + "step": 28361 + }, + { + "epoch": 0.8692534019860243, + "grad_norm": 1.6999361474712384, + "learning_rate": 4.416335390431159e-07, + "loss": 0.5638, + "step": 28362 + }, + { + "epoch": 0.8692840505087654, + "grad_norm": 1.8223492498831226, + "learning_rate": 4.414296166509785e-07, + "loss": 0.5139, + "step": 28363 + }, + { + "epoch": 0.8693146990315067, + "grad_norm": 1.6822138062758232, + "learning_rate": 4.412257391756003e-07, + "loss": 0.5349, + "step": 28364 + }, + { + "epoch": 0.8693453475542479, + "grad_norm": 2.157308368935073, + "learning_rate": 4.410219066189919e-07, + "loss": 0.5809, + "step": 28365 + }, + { + "epoch": 0.8693759960769891, + "grad_norm": 1.7719584518800096, + "learning_rate": 4.408181189831612e-07, + "loss": 0.5514, + "step": 28366 + }, + { + "epoch": 0.8694066445997303, + "grad_norm": 1.8863934818225854, + "learning_rate": 4.4061437627011597e-07, + "loss": 0.5935, + "step": 28367 + }, + { + "epoch": 0.8694372931224715, + "grad_norm": 1.9410762277963207, + "learning_rate": 4.4041067848186347e-07, + "loss": 0.6213, + "step": 28368 + }, + { + "epoch": 0.8694679416452127, + "grad_norm": 2.001419266769122, + "learning_rate": 4.402070256204111e-07, + "loss": 0.5255, + "step": 28369 + }, + { + "epoch": 0.8694985901679539, + "grad_norm": 0.7773428766707529, + "learning_rate": 4.4000341768776654e-07, + "loss": 0.4017, + "step": 28370 + }, + { + "epoch": 0.8695292386906951, + "grad_norm": 2.1492758161559316, + "learning_rate": 4.39799854685935e-07, + "loss": 0.5456, + "step": 28371 + }, + { + "epoch": 0.8695598872134364, + "grad_norm": 2.291324891059189, + "learning_rate": 4.3959633661692145e-07, + "loss": 0.5682, + "step": 28372 + }, + { + "epoch": 0.8695905357361775, + "grad_norm": 0.8209381561689241, + "learning_rate": 4.3939286348273215e-07, + "loss": 0.418, + "step": 28373 + }, + { + "epoch": 0.8696211842589188, + "grad_norm": 2.246373559126548, + "learning_rate": 4.391894352853726e-07, + "loss": 0.6507, + "step": 28374 + }, + { + "epoch": 0.8696518327816599, + "grad_norm": 0.7709723903244764, + "learning_rate": 4.389860520268457e-07, + "loss": 0.3611, + "step": 28375 + }, + { + "epoch": 0.8696824813044012, + "grad_norm": 2.085490463882064, + "learning_rate": 4.3878271370915606e-07, + "loss": 0.6171, + "step": 28376 + }, + { + "epoch": 0.8697131298271423, + "grad_norm": 1.9992563694849683, + "learning_rate": 4.3857942033430857e-07, + "loss": 0.4708, + "step": 28377 + }, + { + "epoch": 0.8697437783498836, + "grad_norm": 1.8101787884893628, + "learning_rate": 4.3837617190430393e-07, + "loss": 0.5177, + "step": 28378 + }, + { + "epoch": 0.8697744268726247, + "grad_norm": 1.865528534568364, + "learning_rate": 4.3817296842114667e-07, + "loss": 0.5949, + "step": 28379 + }, + { + "epoch": 0.8698050753953659, + "grad_norm": 0.7376603636636875, + "learning_rate": 4.379698098868368e-07, + "loss": 0.3964, + "step": 28380 + }, + { + "epoch": 0.8698357239181072, + "grad_norm": 1.8880701481828857, + "learning_rate": 4.3776669630338e-07, + "loss": 0.6349, + "step": 28381 + }, + { + "epoch": 0.8698663724408483, + "grad_norm": 1.8004986795714322, + "learning_rate": 4.375636276727746e-07, + "loss": 0.5446, + "step": 28382 + }, + { + "epoch": 0.8698970209635896, + "grad_norm": 2.00687373668341, + "learning_rate": 4.373606039970213e-07, + "loss": 0.6044, + "step": 28383 + }, + { + "epoch": 0.8699276694863307, + "grad_norm": 1.7531425135506729, + "learning_rate": 4.3715762527812125e-07, + "loss": 0.5686, + "step": 28384 + }, + { + "epoch": 0.869958318009072, + "grad_norm": 1.7873757036223032, + "learning_rate": 4.3695469151807555e-07, + "loss": 0.5702, + "step": 28385 + }, + { + "epoch": 0.8699889665318131, + "grad_norm": 1.7985477524216829, + "learning_rate": 4.3675180271888217e-07, + "loss": 0.5842, + "step": 28386 + }, + { + "epoch": 0.8700196150545544, + "grad_norm": 2.072856857923916, + "learning_rate": 4.365489588825406e-07, + "loss": 0.5167, + "step": 28387 + }, + { + "epoch": 0.8700502635772955, + "grad_norm": 1.9072052719831805, + "learning_rate": 4.3634616001105024e-07, + "loss": 0.631, + "step": 28388 + }, + { + "epoch": 0.8700809121000368, + "grad_norm": 0.7794479466527564, + "learning_rate": 4.3614340610640905e-07, + "loss": 0.3924, + "step": 28389 + }, + { + "epoch": 0.870111560622778, + "grad_norm": 1.919609137560368, + "learning_rate": 4.3594069717061484e-07, + "loss": 0.5096, + "step": 28390 + }, + { + "epoch": 0.8701422091455192, + "grad_norm": 1.7124113978518103, + "learning_rate": 4.3573803320566264e-07, + "loss": 0.5394, + "step": 28391 + }, + { + "epoch": 0.8701728576682604, + "grad_norm": 1.779672062819054, + "learning_rate": 4.355354142135537e-07, + "loss": 0.546, + "step": 28392 + }, + { + "epoch": 0.8702035061910016, + "grad_norm": 2.021980314544473, + "learning_rate": 4.35332840196282e-07, + "loss": 0.5516, + "step": 28393 + }, + { + "epoch": 0.8702341547137428, + "grad_norm": 1.6582118332978353, + "learning_rate": 4.35130311155843e-07, + "loss": 0.5009, + "step": 28394 + }, + { + "epoch": 0.870264803236484, + "grad_norm": 1.78049269131355, + "learning_rate": 4.349278270942325e-07, + "loss": 0.4851, + "step": 28395 + }, + { + "epoch": 0.8702954517592252, + "grad_norm": 1.9785830842739236, + "learning_rate": 4.347253880134467e-07, + "loss": 0.5767, + "step": 28396 + }, + { + "epoch": 0.8703261002819664, + "grad_norm": 2.0292269423319182, + "learning_rate": 4.3452299391548047e-07, + "loss": 0.573, + "step": 28397 + }, + { + "epoch": 0.8703567488047076, + "grad_norm": 1.883271830554814, + "learning_rate": 4.343206448023263e-07, + "loss": 0.5894, + "step": 28398 + }, + { + "epoch": 0.8703873973274489, + "grad_norm": 1.9725748063374529, + "learning_rate": 4.3411834067597913e-07, + "loss": 0.6114, + "step": 28399 + }, + { + "epoch": 0.87041804585019, + "grad_norm": 1.9507111180805312, + "learning_rate": 4.33916081538433e-07, + "loss": 0.5184, + "step": 28400 + }, + { + "epoch": 0.8704486943729313, + "grad_norm": 1.8743131535633903, + "learning_rate": 4.3371386739167966e-07, + "loss": 0.6432, + "step": 28401 + }, + { + "epoch": 0.8704793428956724, + "grad_norm": 1.9729234950927246, + "learning_rate": 4.335116982377108e-07, + "loss": 0.6259, + "step": 28402 + }, + { + "epoch": 0.8705099914184137, + "grad_norm": 2.0648388703335434, + "learning_rate": 4.333095740785209e-07, + "loss": 0.5698, + "step": 28403 + }, + { + "epoch": 0.8705406399411548, + "grad_norm": 1.7716098135557778, + "learning_rate": 4.331074949161002e-07, + "loss": 0.5485, + "step": 28404 + }, + { + "epoch": 0.8705712884638961, + "grad_norm": 2.0247368113115343, + "learning_rate": 4.329054607524391e-07, + "loss": 0.5599, + "step": 28405 + }, + { + "epoch": 0.8706019369866372, + "grad_norm": 1.8170838418839357, + "learning_rate": 4.3270347158952894e-07, + "loss": 0.6316, + "step": 28406 + }, + { + "epoch": 0.8706325855093785, + "grad_norm": 2.1534829158241466, + "learning_rate": 4.325015274293598e-07, + "loss": 0.6245, + "step": 28407 + }, + { + "epoch": 0.8706632340321196, + "grad_norm": 1.8297470783816658, + "learning_rate": 4.3229962827392336e-07, + "loss": 0.5446, + "step": 28408 + }, + { + "epoch": 0.8706938825548609, + "grad_norm": 1.9108860509776073, + "learning_rate": 4.320977741252058e-07, + "loss": 0.593, + "step": 28409 + }, + { + "epoch": 0.8707245310776021, + "grad_norm": 1.900422931730685, + "learning_rate": 4.318959649851978e-07, + "loss": 0.6213, + "step": 28410 + }, + { + "epoch": 0.8707551796003432, + "grad_norm": 1.994633054041629, + "learning_rate": 4.3169420085588885e-07, + "loss": 0.5409, + "step": 28411 + }, + { + "epoch": 0.8707858281230845, + "grad_norm": 1.9585770581902275, + "learning_rate": 4.3149248173926575e-07, + "loss": 0.6389, + "step": 28412 + }, + { + "epoch": 0.8708164766458256, + "grad_norm": 1.502370835837105, + "learning_rate": 4.31290807637314e-07, + "loss": 0.5713, + "step": 28413 + }, + { + "epoch": 0.8708471251685669, + "grad_norm": 1.822520557069089, + "learning_rate": 4.3108917855202494e-07, + "loss": 0.6198, + "step": 28414 + }, + { + "epoch": 0.870877773691308, + "grad_norm": 1.887797914773563, + "learning_rate": 4.308875944853824e-07, + "loss": 0.4772, + "step": 28415 + }, + { + "epoch": 0.8709084222140493, + "grad_norm": 1.7575030146738069, + "learning_rate": 4.3068605543937434e-07, + "loss": 0.5541, + "step": 28416 + }, + { + "epoch": 0.8709390707367904, + "grad_norm": 2.271174111306202, + "learning_rate": 4.304845614159842e-07, + "loss": 0.5924, + "step": 28417 + }, + { + "epoch": 0.8709697192595317, + "grad_norm": 0.8022515892749192, + "learning_rate": 4.3028311241719964e-07, + "loss": 0.3681, + "step": 28418 + }, + { + "epoch": 0.8710003677822729, + "grad_norm": 1.8939310612701872, + "learning_rate": 4.3008170844500543e-07, + "loss": 0.6034, + "step": 28419 + }, + { + "epoch": 0.8710310163050141, + "grad_norm": 2.0314588610521724, + "learning_rate": 4.2988034950138424e-07, + "loss": 0.6652, + "step": 28420 + }, + { + "epoch": 0.8710616648277553, + "grad_norm": 0.791030585026227, + "learning_rate": 4.2967903558832125e-07, + "loss": 0.4144, + "step": 28421 + }, + { + "epoch": 0.8710923133504965, + "grad_norm": 2.0839548032123782, + "learning_rate": 4.294777667078015e-07, + "loss": 0.5522, + "step": 28422 + }, + { + "epoch": 0.8711229618732377, + "grad_norm": 1.9054205830552728, + "learning_rate": 4.292765428618051e-07, + "loss": 0.5842, + "step": 28423 + }, + { + "epoch": 0.8711536103959789, + "grad_norm": 1.8191745639564987, + "learning_rate": 4.2907536405231767e-07, + "loss": 0.5732, + "step": 28424 + }, + { + "epoch": 0.8711842589187201, + "grad_norm": 2.2079690048857783, + "learning_rate": 4.288742302813192e-07, + "loss": 0.6078, + "step": 28425 + }, + { + "epoch": 0.8712149074414614, + "grad_norm": 1.8514979881199647, + "learning_rate": 4.2867314155079275e-07, + "loss": 0.6187, + "step": 28426 + }, + { + "epoch": 0.8712455559642025, + "grad_norm": 1.7045772736969624, + "learning_rate": 4.284720978627205e-07, + "loss": 0.5432, + "step": 28427 + }, + { + "epoch": 0.8712762044869438, + "grad_norm": 1.811597202972157, + "learning_rate": 4.282710992190814e-07, + "loss": 0.586, + "step": 28428 + }, + { + "epoch": 0.8713068530096849, + "grad_norm": 1.9541988072946659, + "learning_rate": 4.280701456218567e-07, + "loss": 0.597, + "step": 28429 + }, + { + "epoch": 0.8713375015324262, + "grad_norm": 2.112376305248507, + "learning_rate": 4.2786923707302755e-07, + "loss": 0.6151, + "step": 28430 + }, + { + "epoch": 0.8713681500551673, + "grad_norm": 1.791375944836535, + "learning_rate": 4.2766837357457235e-07, + "loss": 0.6283, + "step": 28431 + }, + { + "epoch": 0.8713987985779086, + "grad_norm": 0.8181718399436338, + "learning_rate": 4.2746755512846904e-07, + "loss": 0.3914, + "step": 28432 + }, + { + "epoch": 0.8714294471006497, + "grad_norm": 1.5856801931103683, + "learning_rate": 4.2726678173669935e-07, + "loss": 0.5391, + "step": 28433 + }, + { + "epoch": 0.871460095623391, + "grad_norm": 2.026997048161017, + "learning_rate": 4.270660534012394e-07, + "loss": 0.5611, + "step": 28434 + }, + { + "epoch": 0.8714907441461321, + "grad_norm": 1.9273653448111776, + "learning_rate": 4.2686537012406883e-07, + "loss": 0.5726, + "step": 28435 + }, + { + "epoch": 0.8715213926688734, + "grad_norm": 1.8120418592847507, + "learning_rate": 4.2666473190716264e-07, + "loss": 0.5523, + "step": 28436 + }, + { + "epoch": 0.8715520411916146, + "grad_norm": 0.8069922894954162, + "learning_rate": 4.2646413875249925e-07, + "loss": 0.4113, + "step": 28437 + }, + { + "epoch": 0.8715826897143558, + "grad_norm": 1.7859287298889504, + "learning_rate": 4.2626359066205546e-07, + "loss": 0.5367, + "step": 28438 + }, + { + "epoch": 0.871613338237097, + "grad_norm": 1.8126184426738974, + "learning_rate": 4.2606308763780577e-07, + "loss": 0.6245, + "step": 28439 + }, + { + "epoch": 0.8716439867598382, + "grad_norm": 1.8045836389070664, + "learning_rate": 4.25862629681727e-07, + "loss": 0.5668, + "step": 28440 + }, + { + "epoch": 0.8716746352825794, + "grad_norm": 1.7620004423417748, + "learning_rate": 4.2566221679579524e-07, + "loss": 0.512, + "step": 28441 + }, + { + "epoch": 0.8717052838053205, + "grad_norm": 1.981230268123318, + "learning_rate": 4.2546184898198285e-07, + "loss": 0.5704, + "step": 28442 + }, + { + "epoch": 0.8717359323280618, + "grad_norm": 0.7919695371385532, + "learning_rate": 4.2526152624226494e-07, + "loss": 0.4023, + "step": 28443 + }, + { + "epoch": 0.8717665808508029, + "grad_norm": 0.8106642882437706, + "learning_rate": 4.250612485786171e-07, + "loss": 0.3942, + "step": 28444 + }, + { + "epoch": 0.8717972293735442, + "grad_norm": 1.8911393477179914, + "learning_rate": 4.2486101599301054e-07, + "loss": 0.5404, + "step": 28445 + }, + { + "epoch": 0.8718278778962854, + "grad_norm": 0.7994878040324852, + "learning_rate": 4.246608284874193e-07, + "loss": 0.3992, + "step": 28446 + }, + { + "epoch": 0.8718585264190266, + "grad_norm": 1.909554752042239, + "learning_rate": 4.2446068606381507e-07, + "loss": 0.5398, + "step": 28447 + }, + { + "epoch": 0.8718891749417678, + "grad_norm": 1.8434638920541802, + "learning_rate": 4.2426058872417074e-07, + "loss": 0.5488, + "step": 28448 + }, + { + "epoch": 0.871919823464509, + "grad_norm": 1.8397102541615842, + "learning_rate": 4.2406053647045807e-07, + "loss": 0.6821, + "step": 28449 + }, + { + "epoch": 0.8719504719872502, + "grad_norm": 1.7337514292440082, + "learning_rate": 4.238605293046466e-07, + "loss": 0.5948, + "step": 28450 + }, + { + "epoch": 0.8719811205099914, + "grad_norm": 2.1135940870081424, + "learning_rate": 4.2366056722870865e-07, + "loss": 0.6076, + "step": 28451 + }, + { + "epoch": 0.8720117690327326, + "grad_norm": 1.9866123078630697, + "learning_rate": 4.234606502446148e-07, + "loss": 0.5532, + "step": 28452 + }, + { + "epoch": 0.8720424175554738, + "grad_norm": 1.7925454268658576, + "learning_rate": 4.23260778354333e-07, + "loss": 0.6157, + "step": 28453 + }, + { + "epoch": 0.872073066078215, + "grad_norm": 1.8511481570618884, + "learning_rate": 4.2306095155983387e-07, + "loss": 0.5837, + "step": 28454 + }, + { + "epoch": 0.8721037146009563, + "grad_norm": 0.8208886532975138, + "learning_rate": 4.2286116986308747e-07, + "loss": 0.4042, + "step": 28455 + }, + { + "epoch": 0.8721343631236974, + "grad_norm": 1.8125427449575247, + "learning_rate": 4.2266143326605947e-07, + "loss": 0.6246, + "step": 28456 + }, + { + "epoch": 0.8721650116464387, + "grad_norm": 0.84118287267903, + "learning_rate": 4.224617417707211e-07, + "loss": 0.4009, + "step": 28457 + }, + { + "epoch": 0.8721956601691798, + "grad_norm": 2.186176144203049, + "learning_rate": 4.222620953790374e-07, + "loss": 0.594, + "step": 28458 + }, + { + "epoch": 0.8722263086919211, + "grad_norm": 1.8565820319183741, + "learning_rate": 4.2206249409297627e-07, + "loss": 0.4792, + "step": 28459 + }, + { + "epoch": 0.8722569572146622, + "grad_norm": 1.847845993921749, + "learning_rate": 4.218629379145056e-07, + "loss": 0.5498, + "step": 28460 + }, + { + "epoch": 0.8722876057374035, + "grad_norm": 1.9498662337121542, + "learning_rate": 4.2166342684558994e-07, + "loss": 0.6142, + "step": 28461 + }, + { + "epoch": 0.8723182542601446, + "grad_norm": 1.7337086978469214, + "learning_rate": 4.214639608881965e-07, + "loss": 0.4886, + "step": 28462 + }, + { + "epoch": 0.8723489027828859, + "grad_norm": 1.7956063929466541, + "learning_rate": 4.212645400442905e-07, + "loss": 0.6163, + "step": 28463 + }, + { + "epoch": 0.872379551305627, + "grad_norm": 2.202798959570389, + "learning_rate": 4.210651643158353e-07, + "loss": 0.6395, + "step": 28464 + }, + { + "epoch": 0.8724101998283683, + "grad_norm": 1.8594391467135425, + "learning_rate": 4.2086583370479717e-07, + "loss": 0.5995, + "step": 28465 + }, + { + "epoch": 0.8724408483511095, + "grad_norm": 2.0166845142549175, + "learning_rate": 4.2066654821314e-07, + "loss": 0.6347, + "step": 28466 + }, + { + "epoch": 0.8724714968738507, + "grad_norm": 1.8731679535296564, + "learning_rate": 4.204673078428267e-07, + "loss": 0.5618, + "step": 28467 + }, + { + "epoch": 0.8725021453965919, + "grad_norm": 2.014228536214626, + "learning_rate": 4.202681125958213e-07, + "loss": 0.647, + "step": 28468 + }, + { + "epoch": 0.8725327939193331, + "grad_norm": 1.8275168368119663, + "learning_rate": 4.20068962474085e-07, + "loss": 0.5922, + "step": 28469 + }, + { + "epoch": 0.8725634424420743, + "grad_norm": 1.6808810958154117, + "learning_rate": 4.198698574795812e-07, + "loss": 0.5199, + "step": 28470 + }, + { + "epoch": 0.8725940909648155, + "grad_norm": 1.8303096831153032, + "learning_rate": 4.196707976142722e-07, + "loss": 0.6053, + "step": 28471 + }, + { + "epoch": 0.8726247394875567, + "grad_norm": 1.8378162110328669, + "learning_rate": 4.1947178288011815e-07, + "loss": 0.569, + "step": 28472 + }, + { + "epoch": 0.8726553880102978, + "grad_norm": 1.8725087681282977, + "learning_rate": 4.1927281327908074e-07, + "loss": 0.5609, + "step": 28473 + }, + { + "epoch": 0.8726860365330391, + "grad_norm": 1.9033147543610138, + "learning_rate": 4.1907388881312074e-07, + "loss": 0.5692, + "step": 28474 + }, + { + "epoch": 0.8727166850557803, + "grad_norm": 1.9734838013296043, + "learning_rate": 4.1887500948419755e-07, + "loss": 0.6413, + "step": 28475 + }, + { + "epoch": 0.8727473335785215, + "grad_norm": 2.020612821646127, + "learning_rate": 4.186761752942714e-07, + "loss": 0.5622, + "step": 28476 + }, + { + "epoch": 0.8727779821012627, + "grad_norm": 1.6598785719686158, + "learning_rate": 4.1847738624530007e-07, + "loss": 0.5792, + "step": 28477 + }, + { + "epoch": 0.8728086306240039, + "grad_norm": 1.6988596625873382, + "learning_rate": 4.1827864233924374e-07, + "loss": 0.5156, + "step": 28478 + }, + { + "epoch": 0.8728392791467451, + "grad_norm": 1.7937064166418775, + "learning_rate": 4.180799435780608e-07, + "loss": 0.5725, + "step": 28479 + }, + { + "epoch": 0.8728699276694863, + "grad_norm": 1.8594297800863606, + "learning_rate": 4.1788128996370803e-07, + "loss": 0.536, + "step": 28480 + }, + { + "epoch": 0.8729005761922275, + "grad_norm": 2.067931434074301, + "learning_rate": 4.176826814981427e-07, + "loss": 0.5862, + "step": 28481 + }, + { + "epoch": 0.8729312247149688, + "grad_norm": 2.1157266422332914, + "learning_rate": 4.174841181833239e-07, + "loss": 0.6257, + "step": 28482 + }, + { + "epoch": 0.8729618732377099, + "grad_norm": 1.7558390528246972, + "learning_rate": 4.17285600021205e-07, + "loss": 0.5367, + "step": 28483 + }, + { + "epoch": 0.8729925217604512, + "grad_norm": 2.0567643127280824, + "learning_rate": 4.170871270137439e-07, + "loss": 0.5877, + "step": 28484 + }, + { + "epoch": 0.8730231702831923, + "grad_norm": 0.7770677287296838, + "learning_rate": 4.168886991628968e-07, + "loss": 0.389, + "step": 28485 + }, + { + "epoch": 0.8730538188059336, + "grad_norm": 1.8259644136071163, + "learning_rate": 4.166903164706171e-07, + "loss": 0.4971, + "step": 28486 + }, + { + "epoch": 0.8730844673286747, + "grad_norm": 1.9007211514579296, + "learning_rate": 4.164919789388616e-07, + "loss": 0.6337, + "step": 28487 + }, + { + "epoch": 0.873115115851416, + "grad_norm": 1.719444939705008, + "learning_rate": 4.16293686569581e-07, + "loss": 0.5125, + "step": 28488 + }, + { + "epoch": 0.8731457643741571, + "grad_norm": 2.0318545608468264, + "learning_rate": 4.160954393647337e-07, + "loss": 0.5604, + "step": 28489 + }, + { + "epoch": 0.8731764128968984, + "grad_norm": 1.5685017078028551, + "learning_rate": 4.1589723732627094e-07, + "loss": 0.5406, + "step": 28490 + }, + { + "epoch": 0.8732070614196396, + "grad_norm": 0.7854912590025549, + "learning_rate": 4.156990804561445e-07, + "loss": 0.3843, + "step": 28491 + }, + { + "epoch": 0.8732377099423808, + "grad_norm": 1.9256879630163608, + "learning_rate": 4.155009687563083e-07, + "loss": 0.5413, + "step": 28492 + }, + { + "epoch": 0.873268358465122, + "grad_norm": 1.6581952331449623, + "learning_rate": 4.1530290222871474e-07, + "loss": 0.5016, + "step": 28493 + }, + { + "epoch": 0.8732990069878632, + "grad_norm": 1.777733617098108, + "learning_rate": 4.151048808753133e-07, + "loss": 0.5678, + "step": 28494 + }, + { + "epoch": 0.8733296555106044, + "grad_norm": 1.8314525513029678, + "learning_rate": 4.1490690469805694e-07, + "loss": 0.5277, + "step": 28495 + }, + { + "epoch": 0.8733603040333456, + "grad_norm": 1.7785925142614771, + "learning_rate": 4.147089736988963e-07, + "loss": 0.444, + "step": 28496 + }, + { + "epoch": 0.8733909525560868, + "grad_norm": 1.7552580095050128, + "learning_rate": 4.14511087879782e-07, + "loss": 0.5511, + "step": 28497 + }, + { + "epoch": 0.873421601078828, + "grad_norm": 1.9851937754686404, + "learning_rate": 4.1431324724266306e-07, + "loss": 0.6125, + "step": 28498 + }, + { + "epoch": 0.8734522496015692, + "grad_norm": 1.8070112721260987, + "learning_rate": 4.141154517894874e-07, + "loss": 0.5121, + "step": 28499 + }, + { + "epoch": 0.8734828981243105, + "grad_norm": 1.849361587178073, + "learning_rate": 4.139177015222073e-07, + "loss": 0.514, + "step": 28500 + }, + { + "epoch": 0.8735135466470516, + "grad_norm": 1.8672816409607664, + "learning_rate": 4.137199964427696e-07, + "loss": 0.5951, + "step": 28501 + }, + { + "epoch": 0.8735441951697929, + "grad_norm": 1.9225832680220958, + "learning_rate": 4.13522336553121e-07, + "loss": 0.5515, + "step": 28502 + }, + { + "epoch": 0.873574843692534, + "grad_norm": 1.804546700646448, + "learning_rate": 4.1332472185521054e-07, + "loss": 0.5704, + "step": 28503 + }, + { + "epoch": 0.8736054922152752, + "grad_norm": 0.7850288899714027, + "learning_rate": 4.131271523509861e-07, + "loss": 0.3912, + "step": 28504 + }, + { + "epoch": 0.8736361407380164, + "grad_norm": 1.7529941372354603, + "learning_rate": 4.129296280423928e-07, + "loss": 0.5131, + "step": 28505 + }, + { + "epoch": 0.8736667892607576, + "grad_norm": 2.146993243528726, + "learning_rate": 4.127321489313768e-07, + "loss": 0.5517, + "step": 28506 + }, + { + "epoch": 0.8736974377834988, + "grad_norm": 1.9676076759959955, + "learning_rate": 4.1253471501988495e-07, + "loss": 0.5586, + "step": 28507 + }, + { + "epoch": 0.87372808630624, + "grad_norm": 1.6390550987831647, + "learning_rate": 4.1233732630986343e-07, + "loss": 0.4864, + "step": 28508 + }, + { + "epoch": 0.8737587348289813, + "grad_norm": 1.9014859012962506, + "learning_rate": 4.121399828032557e-07, + "loss": 0.6927, + "step": 28509 + }, + { + "epoch": 0.8737893833517224, + "grad_norm": 2.0516673028126986, + "learning_rate": 4.1194268450200526e-07, + "loss": 0.6012, + "step": 28510 + }, + { + "epoch": 0.8738200318744637, + "grad_norm": 1.9910468706831568, + "learning_rate": 4.1174543140805877e-07, + "loss": 0.5261, + "step": 28511 + }, + { + "epoch": 0.8738506803972048, + "grad_norm": 2.0641978615650283, + "learning_rate": 4.1154822352335864e-07, + "loss": 0.6269, + "step": 28512 + }, + { + "epoch": 0.8738813289199461, + "grad_norm": 1.883614405056083, + "learning_rate": 4.1135106084984724e-07, + "loss": 0.6107, + "step": 28513 + }, + { + "epoch": 0.8739119774426872, + "grad_norm": 1.8965498605039195, + "learning_rate": 4.111539433894679e-07, + "loss": 0.5727, + "step": 28514 + }, + { + "epoch": 0.8739426259654285, + "grad_norm": 2.0647318730918514, + "learning_rate": 4.109568711441625e-07, + "loss": 0.4878, + "step": 28515 + }, + { + "epoch": 0.8739732744881696, + "grad_norm": 1.9602466390819049, + "learning_rate": 4.1075984411587387e-07, + "loss": 0.5929, + "step": 28516 + }, + { + "epoch": 0.8740039230109109, + "grad_norm": 3.5554531550627986, + "learning_rate": 4.105628623065422e-07, + "loss": 0.5483, + "step": 28517 + }, + { + "epoch": 0.874034571533652, + "grad_norm": 1.9264598155320019, + "learning_rate": 4.1036592571810916e-07, + "loss": 0.5593, + "step": 28518 + }, + { + "epoch": 0.8740652200563933, + "grad_norm": 1.8499360772949482, + "learning_rate": 4.1016903435251554e-07, + "loss": 0.5518, + "step": 28519 + }, + { + "epoch": 0.8740958685791345, + "grad_norm": 2.1898438956860353, + "learning_rate": 4.099721882117008e-07, + "loss": 0.571, + "step": 28520 + }, + { + "epoch": 0.8741265171018757, + "grad_norm": 1.7835124860448361, + "learning_rate": 4.0977538729760344e-07, + "loss": 0.578, + "step": 28521 + }, + { + "epoch": 0.8741571656246169, + "grad_norm": 1.8258993909344359, + "learning_rate": 4.0957863161216416e-07, + "loss": 0.4309, + "step": 28522 + }, + { + "epoch": 0.8741878141473581, + "grad_norm": 1.8500655081090234, + "learning_rate": 4.0938192115732076e-07, + "loss": 0.4575, + "step": 28523 + }, + { + "epoch": 0.8742184626700993, + "grad_norm": 1.6674994936635172, + "learning_rate": 4.091852559350129e-07, + "loss": 0.4811, + "step": 28524 + }, + { + "epoch": 0.8742491111928405, + "grad_norm": 1.6605242702093668, + "learning_rate": 4.089886359471762e-07, + "loss": 0.48, + "step": 28525 + }, + { + "epoch": 0.8742797597155817, + "grad_norm": 0.7964520067435842, + "learning_rate": 4.087920611957491e-07, + "loss": 0.3903, + "step": 28526 + }, + { + "epoch": 0.874310408238323, + "grad_norm": 1.6790604661681048, + "learning_rate": 4.085955316826695e-07, + "loss": 0.5116, + "step": 28527 + }, + { + "epoch": 0.8743410567610641, + "grad_norm": 1.8702888423837958, + "learning_rate": 4.083990474098731e-07, + "loss": 0.5834, + "step": 28528 + }, + { + "epoch": 0.8743717052838054, + "grad_norm": 0.8170670005339304, + "learning_rate": 4.0820260837929394e-07, + "loss": 0.4019, + "step": 28529 + }, + { + "epoch": 0.8744023538065465, + "grad_norm": 1.6021882059726413, + "learning_rate": 4.080062145928709e-07, + "loss": 0.5756, + "step": 28530 + }, + { + "epoch": 0.8744330023292878, + "grad_norm": 1.6061929836709272, + "learning_rate": 4.078098660525376e-07, + "loss": 0.5157, + "step": 28531 + }, + { + "epoch": 0.8744636508520289, + "grad_norm": 1.976499150153628, + "learning_rate": 4.0761356276022736e-07, + "loss": 0.6313, + "step": 28532 + }, + { + "epoch": 0.8744942993747702, + "grad_norm": 1.7267820645027552, + "learning_rate": 4.0741730471787646e-07, + "loss": 0.5633, + "step": 28533 + }, + { + "epoch": 0.8745249478975113, + "grad_norm": 2.153272910782335, + "learning_rate": 4.072210919274172e-07, + "loss": 0.6828, + "step": 28534 + }, + { + "epoch": 0.8745555964202525, + "grad_norm": 0.8325228779861654, + "learning_rate": 4.0702492439078534e-07, + "loss": 0.3918, + "step": 28535 + }, + { + "epoch": 0.8745862449429938, + "grad_norm": 0.8332539140635591, + "learning_rate": 4.068288021099104e-07, + "loss": 0.4027, + "step": 28536 + }, + { + "epoch": 0.8746168934657349, + "grad_norm": 1.8174150013018862, + "learning_rate": 4.0663272508672693e-07, + "loss": 0.5719, + "step": 28537 + }, + { + "epoch": 0.8746475419884762, + "grad_norm": 0.7741668866568195, + "learning_rate": 4.0643669332316726e-07, + "loss": 0.3988, + "step": 28538 + }, + { + "epoch": 0.8746781905112173, + "grad_norm": 0.7860783648284349, + "learning_rate": 4.062407068211621e-07, + "loss": 0.3814, + "step": 28539 + }, + { + "epoch": 0.8747088390339586, + "grad_norm": 1.9392658278813566, + "learning_rate": 4.0604476558264106e-07, + "loss": 0.6111, + "step": 28540 + }, + { + "epoch": 0.8747394875566997, + "grad_norm": 1.7845759590409187, + "learning_rate": 4.0584886960953806e-07, + "loss": 0.4796, + "step": 28541 + }, + { + "epoch": 0.874770136079441, + "grad_norm": 1.7778729453279085, + "learning_rate": 4.0565301890378053e-07, + "loss": 0.5178, + "step": 28542 + }, + { + "epoch": 0.8748007846021821, + "grad_norm": 1.895063310863793, + "learning_rate": 4.0545721346730017e-07, + "loss": 0.5696, + "step": 28543 + }, + { + "epoch": 0.8748314331249234, + "grad_norm": 1.8046680833049307, + "learning_rate": 4.0526145330202494e-07, + "loss": 0.496, + "step": 28544 + }, + { + "epoch": 0.8748620816476645, + "grad_norm": 1.919196922439976, + "learning_rate": 4.0506573840988386e-07, + "loss": 0.508, + "step": 28545 + }, + { + "epoch": 0.8748927301704058, + "grad_norm": 1.85107958419398, + "learning_rate": 4.0487006879280646e-07, + "loss": 0.4704, + "step": 28546 + }, + { + "epoch": 0.874923378693147, + "grad_norm": 2.194559984248375, + "learning_rate": 4.0467444445271953e-07, + "loss": 0.7296, + "step": 28547 + }, + { + "epoch": 0.8749540272158882, + "grad_norm": 0.8253558819678073, + "learning_rate": 4.0447886539155103e-07, + "loss": 0.3839, + "step": 28548 + }, + { + "epoch": 0.8749846757386294, + "grad_norm": 2.0593942717548317, + "learning_rate": 4.042833316112288e-07, + "loss": 0.595, + "step": 28549 + }, + { + "epoch": 0.8750153242613706, + "grad_norm": 2.2438360138857774, + "learning_rate": 4.04087843113678e-07, + "loss": 0.6133, + "step": 28550 + }, + { + "epoch": 0.8750459727841118, + "grad_norm": 2.05057105855926, + "learning_rate": 4.038923999008254e-07, + "loss": 0.597, + "step": 28551 + }, + { + "epoch": 0.875076621306853, + "grad_norm": 1.8368679150254887, + "learning_rate": 4.036970019745978e-07, + "loss": 0.5196, + "step": 28552 + }, + { + "epoch": 0.8751072698295942, + "grad_norm": 1.841590001210223, + "learning_rate": 4.0350164933691925e-07, + "loss": 0.5896, + "step": 28553 + }, + { + "epoch": 0.8751379183523355, + "grad_norm": 1.8180044665139126, + "learning_rate": 4.0330634198971543e-07, + "loss": 0.5065, + "step": 28554 + }, + { + "epoch": 0.8751685668750766, + "grad_norm": 1.9322044454072396, + "learning_rate": 4.031110799349097e-07, + "loss": 0.5727, + "step": 28555 + }, + { + "epoch": 0.8751992153978179, + "grad_norm": 1.8206304872380672, + "learning_rate": 4.029158631744262e-07, + "loss": 0.5521, + "step": 28556 + }, + { + "epoch": 0.875229863920559, + "grad_norm": 1.6553009952562856, + "learning_rate": 4.0272069171019055e-07, + "loss": 0.553, + "step": 28557 + }, + { + "epoch": 0.8752605124433003, + "grad_norm": 2.0818394066172856, + "learning_rate": 4.025255655441229e-07, + "loss": 0.575, + "step": 28558 + }, + { + "epoch": 0.8752911609660414, + "grad_norm": 1.904576748172883, + "learning_rate": 4.0233048467814727e-07, + "loss": 0.5416, + "step": 28559 + }, + { + "epoch": 0.8753218094887827, + "grad_norm": 1.892772847483345, + "learning_rate": 4.0213544911418653e-07, + "loss": 0.5221, + "step": 28560 + }, + { + "epoch": 0.8753524580115238, + "grad_norm": 2.0323713883948837, + "learning_rate": 4.0194045885416034e-07, + "loss": 0.5666, + "step": 28561 + }, + { + "epoch": 0.8753831065342651, + "grad_norm": 0.8815213284286773, + "learning_rate": 4.017455138999921e-07, + "loss": 0.3977, + "step": 28562 + }, + { + "epoch": 0.8754137550570062, + "grad_norm": 0.7998810106081091, + "learning_rate": 4.0155061425360187e-07, + "loss": 0.3856, + "step": 28563 + }, + { + "epoch": 0.8754444035797475, + "grad_norm": 1.7938751603547363, + "learning_rate": 4.013557599169099e-07, + "loss": 0.5125, + "step": 28564 + }, + { + "epoch": 0.8754750521024887, + "grad_norm": 0.7907428060443402, + "learning_rate": 4.0116095089183684e-07, + "loss": 0.4028, + "step": 28565 + }, + { + "epoch": 0.8755057006252298, + "grad_norm": 1.8887757700831473, + "learning_rate": 4.0096618718030055e-07, + "loss": 0.5925, + "step": 28566 + }, + { + "epoch": 0.8755363491479711, + "grad_norm": 1.7889467865510136, + "learning_rate": 4.0077146878422126e-07, + "loss": 0.5272, + "step": 28567 + }, + { + "epoch": 0.8755669976707122, + "grad_norm": 1.7910166023257996, + "learning_rate": 4.005767957055179e-07, + "loss": 0.5074, + "step": 28568 + }, + { + "epoch": 0.8755976461934535, + "grad_norm": 1.948533413342609, + "learning_rate": 4.0038216794610786e-07, + "loss": 0.6092, + "step": 28569 + }, + { + "epoch": 0.8756282947161946, + "grad_norm": 1.755596664035591, + "learning_rate": 4.00187585507909e-07, + "loss": 0.5087, + "step": 28570 + }, + { + "epoch": 0.8756589432389359, + "grad_norm": 2.1523898517967077, + "learning_rate": 3.999930483928399e-07, + "loss": 0.5441, + "step": 28571 + }, + { + "epoch": 0.875689591761677, + "grad_norm": 0.7710270263608361, + "learning_rate": 3.9979855660281505e-07, + "loss": 0.3861, + "step": 28572 + }, + { + "epoch": 0.8757202402844183, + "grad_norm": 1.8835387058408029, + "learning_rate": 3.9960411013975296e-07, + "loss": 0.6246, + "step": 28573 + }, + { + "epoch": 0.8757508888071595, + "grad_norm": 1.9634616195432704, + "learning_rate": 3.9940970900556766e-07, + "loss": 0.5229, + "step": 28574 + }, + { + "epoch": 0.8757815373299007, + "grad_norm": 0.8378185025721835, + "learning_rate": 3.9921535320217583e-07, + "loss": 0.4132, + "step": 28575 + }, + { + "epoch": 0.8758121858526419, + "grad_norm": 0.8249699044916637, + "learning_rate": 3.990210427314933e-07, + "loss": 0.3988, + "step": 28576 + }, + { + "epoch": 0.8758428343753831, + "grad_norm": 0.7960869575842036, + "learning_rate": 3.9882677759543244e-07, + "loss": 0.3842, + "step": 28577 + }, + { + "epoch": 0.8758734828981243, + "grad_norm": 2.1130744015252305, + "learning_rate": 3.986325577959088e-07, + "loss": 0.5837, + "step": 28578 + }, + { + "epoch": 0.8759041314208655, + "grad_norm": 2.0025303384253674, + "learning_rate": 3.9843838333483654e-07, + "loss": 0.5918, + "step": 28579 + }, + { + "epoch": 0.8759347799436067, + "grad_norm": 1.9394243672535454, + "learning_rate": 3.9824425421412736e-07, + "loss": 0.5312, + "step": 28580 + }, + { + "epoch": 0.875965428466348, + "grad_norm": 1.927388770623665, + "learning_rate": 3.980501704356954e-07, + "loss": 0.5231, + "step": 28581 + }, + { + "epoch": 0.8759960769890891, + "grad_norm": 1.8107041828413353, + "learning_rate": 3.978561320014529e-07, + "loss": 0.5877, + "step": 28582 + }, + { + "epoch": 0.8760267255118304, + "grad_norm": 1.7130265859009517, + "learning_rate": 3.9766213891331116e-07, + "loss": 0.5752, + "step": 28583 + }, + { + "epoch": 0.8760573740345715, + "grad_norm": 1.7194195183626166, + "learning_rate": 3.97468191173182e-07, + "loss": 0.6878, + "step": 28584 + }, + { + "epoch": 0.8760880225573128, + "grad_norm": 1.8720298102008681, + "learning_rate": 3.9727428878297613e-07, + "loss": 0.4849, + "step": 28585 + }, + { + "epoch": 0.8761186710800539, + "grad_norm": 1.926789568533385, + "learning_rate": 3.970804317446042e-07, + "loss": 0.5603, + "step": 28586 + }, + { + "epoch": 0.8761493196027952, + "grad_norm": 1.9024047037174858, + "learning_rate": 3.9688662005997747e-07, + "loss": 0.6175, + "step": 28587 + }, + { + "epoch": 0.8761799681255363, + "grad_norm": 2.33860405179312, + "learning_rate": 3.966928537310033e-07, + "loss": 0.5185, + "step": 28588 + }, + { + "epoch": 0.8762106166482776, + "grad_norm": 2.090444999443609, + "learning_rate": 3.9649913275959295e-07, + "loss": 0.5518, + "step": 28589 + }, + { + "epoch": 0.8762412651710187, + "grad_norm": 1.9467490072500608, + "learning_rate": 3.963054571476549e-07, + "loss": 0.6936, + "step": 28590 + }, + { + "epoch": 0.87627191369376, + "grad_norm": 1.7422027858122942, + "learning_rate": 3.9611182689709595e-07, + "loss": 0.5258, + "step": 28591 + }, + { + "epoch": 0.8763025622165012, + "grad_norm": 2.1617962363347254, + "learning_rate": 3.959182420098256e-07, + "loss": 0.6624, + "step": 28592 + }, + { + "epoch": 0.8763332107392424, + "grad_norm": 0.7766676222837043, + "learning_rate": 3.957247024877514e-07, + "loss": 0.4038, + "step": 28593 + }, + { + "epoch": 0.8763638592619836, + "grad_norm": 1.9093880150315368, + "learning_rate": 3.955312083327795e-07, + "loss": 0.564, + "step": 28594 + }, + { + "epoch": 0.8763945077847248, + "grad_norm": 2.0893129984500227, + "learning_rate": 3.9533775954681664e-07, + "loss": 0.6426, + "step": 28595 + }, + { + "epoch": 0.876425156307466, + "grad_norm": 1.8074033844418291, + "learning_rate": 3.9514435613176805e-07, + "loss": 0.5087, + "step": 28596 + }, + { + "epoch": 0.8764558048302071, + "grad_norm": 1.883215288371888, + "learning_rate": 3.9495099808954165e-07, + "loss": 0.5619, + "step": 28597 + }, + { + "epoch": 0.8764864533529484, + "grad_norm": 2.0369303582956846, + "learning_rate": 3.947576854220414e-07, + "loss": 0.6997, + "step": 28598 + }, + { + "epoch": 0.8765171018756895, + "grad_norm": 2.080779025764892, + "learning_rate": 3.945644181311709e-07, + "loss": 0.614, + "step": 28599 + }, + { + "epoch": 0.8765477503984308, + "grad_norm": 1.8736932153733006, + "learning_rate": 3.9437119621883626e-07, + "loss": 0.6716, + "step": 28600 + }, + { + "epoch": 0.876578398921172, + "grad_norm": 2.0756611248571635, + "learning_rate": 3.9417801968694045e-07, + "loss": 0.597, + "step": 28601 + }, + { + "epoch": 0.8766090474439132, + "grad_norm": 1.9602126788893912, + "learning_rate": 3.93984888537387e-07, + "loss": 0.5831, + "step": 28602 + }, + { + "epoch": 0.8766396959666544, + "grad_norm": 1.8607465900441884, + "learning_rate": 3.9379180277207885e-07, + "loss": 0.5817, + "step": 28603 + }, + { + "epoch": 0.8766703444893956, + "grad_norm": 1.778294574926346, + "learning_rate": 3.935987623929183e-07, + "loss": 0.5569, + "step": 28604 + }, + { + "epoch": 0.8767009930121368, + "grad_norm": 1.6383400992585653, + "learning_rate": 3.934057674018088e-07, + "loss": 0.5704, + "step": 28605 + }, + { + "epoch": 0.876731641534878, + "grad_norm": 1.6655790619985433, + "learning_rate": 3.9321281780065055e-07, + "loss": 0.4637, + "step": 28606 + }, + { + "epoch": 0.8767622900576192, + "grad_norm": 1.8258757280855795, + "learning_rate": 3.9301991359134373e-07, + "loss": 0.5582, + "step": 28607 + }, + { + "epoch": 0.8767929385803604, + "grad_norm": 1.7530781334138448, + "learning_rate": 3.928270547757923e-07, + "loss": 0.4821, + "step": 28608 + }, + { + "epoch": 0.8768235871031016, + "grad_norm": 1.8126796032537338, + "learning_rate": 3.926342413558948e-07, + "loss": 0.5374, + "step": 28609 + }, + { + "epoch": 0.8768542356258429, + "grad_norm": 0.839209353299235, + "learning_rate": 3.9244147333354965e-07, + "loss": 0.4045, + "step": 28610 + }, + { + "epoch": 0.876884884148584, + "grad_norm": 0.8009438053919635, + "learning_rate": 3.922487507106576e-07, + "loss": 0.3961, + "step": 28611 + }, + { + "epoch": 0.8769155326713253, + "grad_norm": 1.9280272066580546, + "learning_rate": 3.920560734891188e-07, + "loss": 0.5762, + "step": 28612 + }, + { + "epoch": 0.8769461811940664, + "grad_norm": 1.772360863317973, + "learning_rate": 3.9186344167082945e-07, + "loss": 0.5579, + "step": 28613 + }, + { + "epoch": 0.8769768297168077, + "grad_norm": 1.951019569509041, + "learning_rate": 3.9167085525768864e-07, + "loss": 0.5514, + "step": 28614 + }, + { + "epoch": 0.8770074782395488, + "grad_norm": 1.722571332345179, + "learning_rate": 3.9147831425159375e-07, + "loss": 0.5678, + "step": 28615 + }, + { + "epoch": 0.8770381267622901, + "grad_norm": 1.7991939084380286, + "learning_rate": 3.9128581865444325e-07, + "loss": 0.6054, + "step": 28616 + }, + { + "epoch": 0.8770687752850312, + "grad_norm": 1.7167695601388506, + "learning_rate": 3.9109336846813285e-07, + "loss": 0.5697, + "step": 28617 + }, + { + "epoch": 0.8770994238077725, + "grad_norm": 1.8085282970740801, + "learning_rate": 3.9090096369455763e-07, + "loss": 0.6332, + "step": 28618 + }, + { + "epoch": 0.8771300723305137, + "grad_norm": 0.7648950714029354, + "learning_rate": 3.907086043356145e-07, + "loss": 0.3803, + "step": 28619 + }, + { + "epoch": 0.8771607208532549, + "grad_norm": 1.7501520607672756, + "learning_rate": 3.905162903932003e-07, + "loss": 0.4708, + "step": 28620 + }, + { + "epoch": 0.8771913693759961, + "grad_norm": 1.8235969889683532, + "learning_rate": 3.903240218692067e-07, + "loss": 0.5023, + "step": 28621 + }, + { + "epoch": 0.8772220178987373, + "grad_norm": 2.1039777276628, + "learning_rate": 3.9013179876553067e-07, + "loss": 0.5946, + "step": 28622 + }, + { + "epoch": 0.8772526664214785, + "grad_norm": 1.7463051485868906, + "learning_rate": 3.89939621084065e-07, + "loss": 0.5059, + "step": 28623 + }, + { + "epoch": 0.8772833149442197, + "grad_norm": 0.7949072578176334, + "learning_rate": 3.89747488826705e-07, + "loss": 0.3954, + "step": 28624 + }, + { + "epoch": 0.8773139634669609, + "grad_norm": 1.8859333993370182, + "learning_rate": 3.895554019953424e-07, + "loss": 0.5404, + "step": 28625 + }, + { + "epoch": 0.8773446119897022, + "grad_norm": 1.897529234396919, + "learning_rate": 3.893633605918684e-07, + "loss": 0.6056, + "step": 28626 + }, + { + "epoch": 0.8773752605124433, + "grad_norm": 1.9495623648020872, + "learning_rate": 3.8917136461817884e-07, + "loss": 0.5991, + "step": 28627 + }, + { + "epoch": 0.8774059090351845, + "grad_norm": 0.8380679074054705, + "learning_rate": 3.889794140761632e-07, + "loss": 0.4032, + "step": 28628 + }, + { + "epoch": 0.8774365575579257, + "grad_norm": 1.940466733106242, + "learning_rate": 3.887875089677123e-07, + "loss": 0.6857, + "step": 28629 + }, + { + "epoch": 0.8774672060806669, + "grad_norm": 1.7292116285376096, + "learning_rate": 3.8859564929471793e-07, + "loss": 0.4962, + "step": 28630 + }, + { + "epoch": 0.8774978546034081, + "grad_norm": 1.8747783532306632, + "learning_rate": 3.8840383505907186e-07, + "loss": 0.6546, + "step": 28631 + }, + { + "epoch": 0.8775285031261493, + "grad_norm": 1.85140065201563, + "learning_rate": 3.882120662626615e-07, + "loss": 0.5722, + "step": 28632 + }, + { + "epoch": 0.8775591516488905, + "grad_norm": 1.8962019566077848, + "learning_rate": 3.8802034290737756e-07, + "loss": 0.6268, + "step": 28633 + }, + { + "epoch": 0.8775898001716317, + "grad_norm": 1.6211382735301616, + "learning_rate": 3.8782866499510905e-07, + "loss": 0.553, + "step": 28634 + }, + { + "epoch": 0.877620448694373, + "grad_norm": 2.040001787999705, + "learning_rate": 3.876370325277462e-07, + "loss": 0.5465, + "step": 28635 + }, + { + "epoch": 0.8776510972171141, + "grad_norm": 2.0771438882224555, + "learning_rate": 3.874454455071752e-07, + "loss": 0.5257, + "step": 28636 + }, + { + "epoch": 0.8776817457398554, + "grad_norm": 1.9047006177411019, + "learning_rate": 3.8725390393528293e-07, + "loss": 0.5441, + "step": 28637 + }, + { + "epoch": 0.8777123942625965, + "grad_norm": 2.043849201039589, + "learning_rate": 3.870624078139601e-07, + "loss": 0.505, + "step": 28638 + }, + { + "epoch": 0.8777430427853378, + "grad_norm": 0.8478700898295707, + "learning_rate": 3.8687095714509124e-07, + "loss": 0.4035, + "step": 28639 + }, + { + "epoch": 0.8777736913080789, + "grad_norm": 1.7354483815085093, + "learning_rate": 3.866795519305622e-07, + "loss": 0.5901, + "step": 28640 + }, + { + "epoch": 0.8778043398308202, + "grad_norm": 1.9885937292483566, + "learning_rate": 3.864881921722602e-07, + "loss": 0.6422, + "step": 28641 + }, + { + "epoch": 0.8778349883535613, + "grad_norm": 1.7703094564735964, + "learning_rate": 3.862968778720705e-07, + "loss": 0.5754, + "step": 28642 + }, + { + "epoch": 0.8778656368763026, + "grad_norm": 2.0166942781549153, + "learning_rate": 3.861056090318788e-07, + "loss": 0.5813, + "step": 28643 + }, + { + "epoch": 0.8778962853990437, + "grad_norm": 0.7747948184275141, + "learning_rate": 3.859143856535685e-07, + "loss": 0.3973, + "step": 28644 + }, + { + "epoch": 0.877926933921785, + "grad_norm": 1.9288011156285276, + "learning_rate": 3.8572320773902436e-07, + "loss": 0.5783, + "step": 28645 + }, + { + "epoch": 0.8779575824445262, + "grad_norm": 1.9167163336920736, + "learning_rate": 3.855320752901304e-07, + "loss": 0.4913, + "step": 28646 + }, + { + "epoch": 0.8779882309672674, + "grad_norm": 1.6754211804500023, + "learning_rate": 3.8534098830877e-07, + "loss": 0.5082, + "step": 28647 + }, + { + "epoch": 0.8780188794900086, + "grad_norm": 1.8905743481265345, + "learning_rate": 3.8514994679682395e-07, + "loss": 0.5445, + "step": 28648 + }, + { + "epoch": 0.8780495280127498, + "grad_norm": 1.9981142036857087, + "learning_rate": 3.849589507561774e-07, + "loss": 0.5569, + "step": 28649 + }, + { + "epoch": 0.878080176535491, + "grad_norm": 1.786346709990749, + "learning_rate": 3.8476800018871054e-07, + "loss": 0.5844, + "step": 28650 + }, + { + "epoch": 0.8781108250582322, + "grad_norm": 1.635121087308965, + "learning_rate": 3.8457709509630623e-07, + "loss": 0.5122, + "step": 28651 + }, + { + "epoch": 0.8781414735809734, + "grad_norm": 2.0776141179145076, + "learning_rate": 3.843862354808442e-07, + "loss": 0.6369, + "step": 28652 + }, + { + "epoch": 0.8781721221037146, + "grad_norm": 1.6931006954703982, + "learning_rate": 3.8419542134420505e-07, + "loss": 0.5761, + "step": 28653 + }, + { + "epoch": 0.8782027706264558, + "grad_norm": 1.9920919694285253, + "learning_rate": 3.840046526882707e-07, + "loss": 0.5645, + "step": 28654 + }, + { + "epoch": 0.8782334191491971, + "grad_norm": 1.8867598209472833, + "learning_rate": 3.838139295149185e-07, + "loss": 0.5384, + "step": 28655 + }, + { + "epoch": 0.8782640676719382, + "grad_norm": 1.90788293644906, + "learning_rate": 3.8362325182602857e-07, + "loss": 0.5065, + "step": 28656 + }, + { + "epoch": 0.8782947161946795, + "grad_norm": 0.8457584020639773, + "learning_rate": 3.834326196234811e-07, + "loss": 0.3844, + "step": 28657 + }, + { + "epoch": 0.8783253647174206, + "grad_norm": 0.8082008753162495, + "learning_rate": 3.8324203290915296e-07, + "loss": 0.4137, + "step": 28658 + }, + { + "epoch": 0.8783560132401618, + "grad_norm": 1.9327191575897111, + "learning_rate": 3.8305149168492094e-07, + "loss": 0.5981, + "step": 28659 + }, + { + "epoch": 0.878386661762903, + "grad_norm": 2.1636284620834942, + "learning_rate": 3.8286099595266525e-07, + "loss": 0.606, + "step": 28660 + }, + { + "epoch": 0.8784173102856442, + "grad_norm": 1.9296002266692271, + "learning_rate": 3.82670545714261e-07, + "loss": 0.5839, + "step": 28661 + }, + { + "epoch": 0.8784479588083854, + "grad_norm": 2.0137824891129084, + "learning_rate": 3.824801409715856e-07, + "loss": 0.5233, + "step": 28662 + }, + { + "epoch": 0.8784786073311266, + "grad_norm": 0.7861777748957325, + "learning_rate": 3.822897817265142e-07, + "loss": 0.3758, + "step": 28663 + }, + { + "epoch": 0.8785092558538679, + "grad_norm": 1.8284567839891666, + "learning_rate": 3.820994679809231e-07, + "loss": 0.5257, + "step": 28664 + }, + { + "epoch": 0.878539904376609, + "grad_norm": 2.0499404181695393, + "learning_rate": 3.819091997366886e-07, + "loss": 0.5541, + "step": 28665 + }, + { + "epoch": 0.8785705528993503, + "grad_norm": 1.7701758500819516, + "learning_rate": 3.8171897699568304e-07, + "loss": 0.5057, + "step": 28666 + }, + { + "epoch": 0.8786012014220914, + "grad_norm": 2.2337439992277326, + "learning_rate": 3.815287997597822e-07, + "loss": 0.6079, + "step": 28667 + }, + { + "epoch": 0.8786318499448327, + "grad_norm": 1.8377616480674557, + "learning_rate": 3.813386680308606e-07, + "loss": 0.587, + "step": 28668 + }, + { + "epoch": 0.8786624984675738, + "grad_norm": 0.7856482793618613, + "learning_rate": 3.811485818107902e-07, + "loss": 0.3898, + "step": 28669 + }, + { + "epoch": 0.8786931469903151, + "grad_norm": 0.8103713473320187, + "learning_rate": 3.809585411014455e-07, + "loss": 0.4006, + "step": 28670 + }, + { + "epoch": 0.8787237955130562, + "grad_norm": 2.220966797884925, + "learning_rate": 3.807685459046967e-07, + "loss": 0.6069, + "step": 28671 + }, + { + "epoch": 0.8787544440357975, + "grad_norm": 2.0403875724840908, + "learning_rate": 3.805785962224179e-07, + "loss": 0.6161, + "step": 28672 + }, + { + "epoch": 0.8787850925585386, + "grad_norm": 1.74247791314156, + "learning_rate": 3.803886920564809e-07, + "loss": 0.5208, + "step": 28673 + }, + { + "epoch": 0.8788157410812799, + "grad_norm": 1.9008269470759558, + "learning_rate": 3.8019883340875473e-07, + "loss": 0.5644, + "step": 28674 + }, + { + "epoch": 0.8788463896040211, + "grad_norm": 1.9855662219089805, + "learning_rate": 3.800090202811119e-07, + "loss": 0.5423, + "step": 28675 + }, + { + "epoch": 0.8788770381267623, + "grad_norm": 2.016081590042351, + "learning_rate": 3.798192526754235e-07, + "loss": 0.5774, + "step": 28676 + }, + { + "epoch": 0.8789076866495035, + "grad_norm": 0.8152179330679686, + "learning_rate": 3.7962953059355655e-07, + "loss": 0.4004, + "step": 28677 + }, + { + "epoch": 0.8789383351722447, + "grad_norm": 0.8274141786441599, + "learning_rate": 3.794398540373823e-07, + "loss": 0.4082, + "step": 28678 + }, + { + "epoch": 0.8789689836949859, + "grad_norm": 1.8678418062935422, + "learning_rate": 3.7925022300877026e-07, + "loss": 0.5495, + "step": 28679 + }, + { + "epoch": 0.8789996322177271, + "grad_norm": 0.787990135128531, + "learning_rate": 3.7906063750958734e-07, + "loss": 0.3959, + "step": 28680 + }, + { + "epoch": 0.8790302807404683, + "grad_norm": 2.1205458325993405, + "learning_rate": 3.7887109754170315e-07, + "loss": 0.6378, + "step": 28681 + }, + { + "epoch": 0.8790609292632096, + "grad_norm": 2.1871172877980003, + "learning_rate": 3.786816031069829e-07, + "loss": 0.5921, + "step": 28682 + }, + { + "epoch": 0.8790915777859507, + "grad_norm": 0.7727992531107527, + "learning_rate": 3.7849215420729615e-07, + "loss": 0.3933, + "step": 28683 + }, + { + "epoch": 0.879122226308692, + "grad_norm": 1.8580957446322786, + "learning_rate": 3.7830275084450865e-07, + "loss": 0.5687, + "step": 28684 + }, + { + "epoch": 0.8791528748314331, + "grad_norm": 2.1993350821903057, + "learning_rate": 3.7811339302048667e-07, + "loss": 0.6198, + "step": 28685 + }, + { + "epoch": 0.8791835233541744, + "grad_norm": 1.926523070763217, + "learning_rate": 3.779240807370954e-07, + "loss": 0.4534, + "step": 28686 + }, + { + "epoch": 0.8792141718769155, + "grad_norm": 1.9049899806660453, + "learning_rate": 3.777348139962017e-07, + "loss": 0.5943, + "step": 28687 + }, + { + "epoch": 0.8792448203996568, + "grad_norm": 1.8523038710671043, + "learning_rate": 3.775455927996685e-07, + "loss": 0.549, + "step": 28688 + }, + { + "epoch": 0.8792754689223979, + "grad_norm": 1.8362006145416376, + "learning_rate": 3.7735641714936157e-07, + "loss": 0.5756, + "step": 28689 + }, + { + "epoch": 0.8793061174451391, + "grad_norm": 0.8357687895272043, + "learning_rate": 3.7716728704714547e-07, + "loss": 0.397, + "step": 28690 + }, + { + "epoch": 0.8793367659678804, + "grad_norm": 1.9320185927780136, + "learning_rate": 3.7697820249488204e-07, + "loss": 0.6062, + "step": 28691 + }, + { + "epoch": 0.8793674144906215, + "grad_norm": 1.9016900403544386, + "learning_rate": 3.7678916349443596e-07, + "loss": 0.5816, + "step": 28692 + }, + { + "epoch": 0.8793980630133628, + "grad_norm": 1.7727728125633548, + "learning_rate": 3.766001700476685e-07, + "loss": 0.4832, + "step": 28693 + }, + { + "epoch": 0.8794287115361039, + "grad_norm": 0.836379787546283, + "learning_rate": 3.7641122215644254e-07, + "loss": 0.4233, + "step": 28694 + }, + { + "epoch": 0.8794593600588452, + "grad_norm": 1.828253286808137, + "learning_rate": 3.7622231982262057e-07, + "loss": 0.5328, + "step": 28695 + }, + { + "epoch": 0.8794900085815863, + "grad_norm": 1.9533856320382135, + "learning_rate": 3.760334630480622e-07, + "loss": 0.5689, + "step": 28696 + }, + { + "epoch": 0.8795206571043276, + "grad_norm": 1.7271509583432687, + "learning_rate": 3.7584465183462925e-07, + "loss": 0.6388, + "step": 28697 + }, + { + "epoch": 0.8795513056270687, + "grad_norm": 1.6831638501165624, + "learning_rate": 3.7565588618418305e-07, + "loss": 0.5335, + "step": 28698 + }, + { + "epoch": 0.87958195414981, + "grad_norm": 1.8697398250655242, + "learning_rate": 3.7546716609858146e-07, + "loss": 0.4819, + "step": 28699 + }, + { + "epoch": 0.8796126026725511, + "grad_norm": 2.0946422037290917, + "learning_rate": 3.752784915796853e-07, + "loss": 0.5713, + "step": 28700 + }, + { + "epoch": 0.8796432511952924, + "grad_norm": 1.9423215841640047, + "learning_rate": 3.750898626293542e-07, + "loss": 0.5407, + "step": 28701 + }, + { + "epoch": 0.8796738997180336, + "grad_norm": 1.980125689941204, + "learning_rate": 3.749012792494455e-07, + "loss": 0.5967, + "step": 28702 + }, + { + "epoch": 0.8797045482407748, + "grad_norm": 1.9305597236955747, + "learning_rate": 3.7471274144181836e-07, + "loss": 0.5181, + "step": 28703 + }, + { + "epoch": 0.879735196763516, + "grad_norm": 0.8284579845497634, + "learning_rate": 3.745242492083284e-07, + "loss": 0.3986, + "step": 28704 + }, + { + "epoch": 0.8797658452862572, + "grad_norm": 1.6920985899321654, + "learning_rate": 3.743358025508359e-07, + "loss": 0.4979, + "step": 28705 + }, + { + "epoch": 0.8797964938089984, + "grad_norm": 1.8138472998738, + "learning_rate": 3.7414740147119653e-07, + "loss": 0.5117, + "step": 28706 + }, + { + "epoch": 0.8798271423317396, + "grad_norm": 1.9563525400146624, + "learning_rate": 3.739590459712661e-07, + "loss": 0.6968, + "step": 28707 + }, + { + "epoch": 0.8798577908544808, + "grad_norm": 1.9401918228882624, + "learning_rate": 3.7377073605290024e-07, + "loss": 0.5585, + "step": 28708 + }, + { + "epoch": 0.879888439377222, + "grad_norm": 0.7671270140037109, + "learning_rate": 3.7358247171795593e-07, + "loss": 0.3757, + "step": 28709 + }, + { + "epoch": 0.8799190878999632, + "grad_norm": 1.8648831386187026, + "learning_rate": 3.7339425296828603e-07, + "loss": 0.5166, + "step": 28710 + }, + { + "epoch": 0.8799497364227045, + "grad_norm": 0.8272022189306802, + "learning_rate": 3.732060798057469e-07, + "loss": 0.4086, + "step": 28711 + }, + { + "epoch": 0.8799803849454456, + "grad_norm": 1.9835747244398148, + "learning_rate": 3.730179522321925e-07, + "loss": 0.6336, + "step": 28712 + }, + { + "epoch": 0.8800110334681869, + "grad_norm": 1.881261610624436, + "learning_rate": 3.728298702494754e-07, + "loss": 0.6102, + "step": 28713 + }, + { + "epoch": 0.880041681990928, + "grad_norm": 1.7924441152720367, + "learning_rate": 3.726418338594506e-07, + "loss": 0.6137, + "step": 28714 + }, + { + "epoch": 0.8800723305136693, + "grad_norm": 1.9618572109663626, + "learning_rate": 3.724538430639685e-07, + "loss": 0.6133, + "step": 28715 + }, + { + "epoch": 0.8801029790364104, + "grad_norm": 1.6935987896811795, + "learning_rate": 3.72265897864883e-07, + "loss": 0.4688, + "step": 28716 + }, + { + "epoch": 0.8801336275591517, + "grad_norm": 2.2274022460144987, + "learning_rate": 3.7207799826404603e-07, + "loss": 0.6668, + "step": 28717 + }, + { + "epoch": 0.8801642760818928, + "grad_norm": 1.969976875493802, + "learning_rate": 3.7189014426330826e-07, + "loss": 0.5101, + "step": 28718 + }, + { + "epoch": 0.8801949246046341, + "grad_norm": 1.6073796392664388, + "learning_rate": 3.717023358645211e-07, + "loss": 0.5466, + "step": 28719 + }, + { + "epoch": 0.8802255731273753, + "grad_norm": 2.901334750032524, + "learning_rate": 3.715145730695358e-07, + "loss": 0.6581, + "step": 28720 + }, + { + "epoch": 0.8802562216501164, + "grad_norm": 1.8775649042123852, + "learning_rate": 3.713268558802008e-07, + "loss": 0.6057, + "step": 28721 + }, + { + "epoch": 0.8802868701728577, + "grad_norm": 1.9238624143637164, + "learning_rate": 3.711391842983675e-07, + "loss": 0.5615, + "step": 28722 + }, + { + "epoch": 0.8803175186955988, + "grad_norm": 2.176400808407551, + "learning_rate": 3.709515583258821e-07, + "loss": 0.614, + "step": 28723 + }, + { + "epoch": 0.8803481672183401, + "grad_norm": 1.775779786499469, + "learning_rate": 3.707639779645972e-07, + "loss": 0.5571, + "step": 28724 + }, + { + "epoch": 0.8803788157410812, + "grad_norm": 2.0335720706665117, + "learning_rate": 3.705764432163594e-07, + "loss": 0.6735, + "step": 28725 + }, + { + "epoch": 0.8804094642638225, + "grad_norm": 1.9631997642102665, + "learning_rate": 3.703889540830158e-07, + "loss": 0.6659, + "step": 28726 + }, + { + "epoch": 0.8804401127865636, + "grad_norm": 1.915842997921532, + "learning_rate": 3.702015105664142e-07, + "loss": 0.5834, + "step": 28727 + }, + { + "epoch": 0.8804707613093049, + "grad_norm": 0.8276815564017774, + "learning_rate": 3.70014112668402e-07, + "loss": 0.4054, + "step": 28728 + }, + { + "epoch": 0.8805014098320461, + "grad_norm": 1.866710535062723, + "learning_rate": 3.698267603908251e-07, + "loss": 0.5859, + "step": 28729 + }, + { + "epoch": 0.8805320583547873, + "grad_norm": 1.686565157036055, + "learning_rate": 3.696394537355297e-07, + "loss": 0.6184, + "step": 28730 + }, + { + "epoch": 0.8805627068775285, + "grad_norm": 1.9753286092151998, + "learning_rate": 3.694521927043615e-07, + "loss": 0.532, + "step": 28731 + }, + { + "epoch": 0.8805933554002697, + "grad_norm": 0.8676250781119224, + "learning_rate": 3.6926497729916633e-07, + "loss": 0.4112, + "step": 28732 + }, + { + "epoch": 0.8806240039230109, + "grad_norm": 1.7681684091971814, + "learning_rate": 3.6907780752178877e-07, + "loss": 0.5796, + "step": 28733 + }, + { + "epoch": 0.8806546524457521, + "grad_norm": 1.854193857205674, + "learning_rate": 3.688906833740702e-07, + "loss": 0.6069, + "step": 28734 + }, + { + "epoch": 0.8806853009684933, + "grad_norm": 1.8807925215372578, + "learning_rate": 3.687036048578585e-07, + "loss": 0.5478, + "step": 28735 + }, + { + "epoch": 0.8807159494912346, + "grad_norm": 1.857412665902949, + "learning_rate": 3.6851657197499503e-07, + "loss": 0.5486, + "step": 28736 + }, + { + "epoch": 0.8807465980139757, + "grad_norm": 1.8235242840077106, + "learning_rate": 3.683295847273216e-07, + "loss": 0.5621, + "step": 28737 + }, + { + "epoch": 0.880777246536717, + "grad_norm": 1.873898328508797, + "learning_rate": 3.6814264311668235e-07, + "loss": 0.5863, + "step": 28738 + }, + { + "epoch": 0.8808078950594581, + "grad_norm": 1.8119015292619471, + "learning_rate": 3.6795574714491966e-07, + "loss": 0.5839, + "step": 28739 + }, + { + "epoch": 0.8808385435821994, + "grad_norm": 1.832670729006786, + "learning_rate": 3.677688968138732e-07, + "loss": 0.5537, + "step": 28740 + }, + { + "epoch": 0.8808691921049405, + "grad_norm": 1.8036037922177797, + "learning_rate": 3.675820921253848e-07, + "loss": 0.605, + "step": 28741 + }, + { + "epoch": 0.8808998406276818, + "grad_norm": 2.09180250508167, + "learning_rate": 3.673953330812952e-07, + "loss": 0.5706, + "step": 28742 + }, + { + "epoch": 0.8809304891504229, + "grad_norm": 1.8864035593826776, + "learning_rate": 3.6720861968344567e-07, + "loss": 0.552, + "step": 28743 + }, + { + "epoch": 0.8809611376731642, + "grad_norm": 1.9843688743780672, + "learning_rate": 3.670219519336754e-07, + "loss": 0.6263, + "step": 28744 + }, + { + "epoch": 0.8809917861959053, + "grad_norm": 1.7957007271572696, + "learning_rate": 3.668353298338212e-07, + "loss": 0.4439, + "step": 28745 + }, + { + "epoch": 0.8810224347186466, + "grad_norm": 1.7321538272331478, + "learning_rate": 3.6664875338572546e-07, + "loss": 0.5879, + "step": 28746 + }, + { + "epoch": 0.8810530832413878, + "grad_norm": 0.8866726226103738, + "learning_rate": 3.664622225912251e-07, + "loss": 0.3799, + "step": 28747 + }, + { + "epoch": 0.881083731764129, + "grad_norm": 1.6689793025235733, + "learning_rate": 3.662757374521575e-07, + "loss": 0.4367, + "step": 28748 + }, + { + "epoch": 0.8811143802868702, + "grad_norm": 2.0026028466858596, + "learning_rate": 3.660892979703601e-07, + "loss": 0.6128, + "step": 28749 + }, + { + "epoch": 0.8811450288096114, + "grad_norm": 2.382804308761761, + "learning_rate": 3.6590290414767084e-07, + "loss": 0.6187, + "step": 28750 + }, + { + "epoch": 0.8811756773323526, + "grad_norm": 1.7842423248816037, + "learning_rate": 3.6571655598592715e-07, + "loss": 0.5277, + "step": 28751 + }, + { + "epoch": 0.8812063258550937, + "grad_norm": 1.8616925126481485, + "learning_rate": 3.6553025348696256e-07, + "loss": 0.5391, + "step": 28752 + }, + { + "epoch": 0.881236974377835, + "grad_norm": 1.967178995547843, + "learning_rate": 3.6534399665261454e-07, + "loss": 0.5347, + "step": 28753 + }, + { + "epoch": 0.8812676229005761, + "grad_norm": 2.0353380393673466, + "learning_rate": 3.6515778548471824e-07, + "loss": 0.6862, + "step": 28754 + }, + { + "epoch": 0.8812982714233174, + "grad_norm": 1.7115062218121333, + "learning_rate": 3.6497161998510833e-07, + "loss": 0.5563, + "step": 28755 + }, + { + "epoch": 0.8813289199460586, + "grad_norm": 0.7984657244504502, + "learning_rate": 3.6478550015561775e-07, + "loss": 0.3913, + "step": 28756 + }, + { + "epoch": 0.8813595684687998, + "grad_norm": 1.8938628193713065, + "learning_rate": 3.6459942599808285e-07, + "loss": 0.5538, + "step": 28757 + }, + { + "epoch": 0.881390216991541, + "grad_norm": 2.0420679599026457, + "learning_rate": 3.6441339751433546e-07, + "loss": 0.6609, + "step": 28758 + }, + { + "epoch": 0.8814208655142822, + "grad_norm": 0.8133192110145753, + "learning_rate": 3.6422741470620913e-07, + "loss": 0.3926, + "step": 28759 + }, + { + "epoch": 0.8814515140370234, + "grad_norm": 2.009151228080707, + "learning_rate": 3.640414775755358e-07, + "loss": 0.612, + "step": 28760 + }, + { + "epoch": 0.8814821625597646, + "grad_norm": 1.9214175772367494, + "learning_rate": 3.638555861241477e-07, + "loss": 0.5839, + "step": 28761 + }, + { + "epoch": 0.8815128110825058, + "grad_norm": 2.013735503512586, + "learning_rate": 3.636697403538775e-07, + "loss": 0.6683, + "step": 28762 + }, + { + "epoch": 0.881543459605247, + "grad_norm": 2.0654752080217755, + "learning_rate": 3.634839402665552e-07, + "loss": 0.657, + "step": 28763 + }, + { + "epoch": 0.8815741081279882, + "grad_norm": 1.8920365167550655, + "learning_rate": 3.632981858640117e-07, + "loss": 0.6192, + "step": 28764 + }, + { + "epoch": 0.8816047566507295, + "grad_norm": 1.7853245716555908, + "learning_rate": 3.6311247714807815e-07, + "loss": 0.5018, + "step": 28765 + }, + { + "epoch": 0.8816354051734706, + "grad_norm": 1.8082568639729952, + "learning_rate": 3.6292681412058384e-07, + "loss": 0.5401, + "step": 28766 + }, + { + "epoch": 0.8816660536962119, + "grad_norm": 1.7489058104067154, + "learning_rate": 3.6274119678335775e-07, + "loss": 0.4869, + "step": 28767 + }, + { + "epoch": 0.881696702218953, + "grad_norm": 1.6757718934339796, + "learning_rate": 3.625556251382284e-07, + "loss": 0.5698, + "step": 28768 + }, + { + "epoch": 0.8817273507416943, + "grad_norm": 1.780129618047362, + "learning_rate": 3.623700991870255e-07, + "loss": 0.5713, + "step": 28769 + }, + { + "epoch": 0.8817579992644354, + "grad_norm": 1.6677466341643037, + "learning_rate": 3.6218461893157753e-07, + "loss": 0.5268, + "step": 28770 + }, + { + "epoch": 0.8817886477871767, + "grad_norm": 2.190968620810849, + "learning_rate": 3.619991843737097e-07, + "loss": 0.6182, + "step": 28771 + }, + { + "epoch": 0.8818192963099178, + "grad_norm": 1.8910636015217857, + "learning_rate": 3.618137955152512e-07, + "loss": 0.5437, + "step": 28772 + }, + { + "epoch": 0.8818499448326591, + "grad_norm": 2.009655853401205, + "learning_rate": 3.616284523580288e-07, + "loss": 0.4857, + "step": 28773 + }, + { + "epoch": 0.8818805933554003, + "grad_norm": 1.8902532956983387, + "learning_rate": 3.6144315490386774e-07, + "loss": 0.5697, + "step": 28774 + }, + { + "epoch": 0.8819112418781415, + "grad_norm": 2.067741063158282, + "learning_rate": 3.6125790315459263e-07, + "loss": 0.5112, + "step": 28775 + }, + { + "epoch": 0.8819418904008827, + "grad_norm": 2.226554097577595, + "learning_rate": 3.610726971120321e-07, + "loss": 0.5417, + "step": 28776 + }, + { + "epoch": 0.8819725389236239, + "grad_norm": 1.798781168071757, + "learning_rate": 3.608875367780079e-07, + "loss": 0.5881, + "step": 28777 + }, + { + "epoch": 0.8820031874463651, + "grad_norm": 0.7788541994912409, + "learning_rate": 3.60702422154347e-07, + "loss": 0.3932, + "step": 28778 + }, + { + "epoch": 0.8820338359691063, + "grad_norm": 1.8342983211406627, + "learning_rate": 3.605173532428713e-07, + "loss": 0.5374, + "step": 28779 + }, + { + "epoch": 0.8820644844918475, + "grad_norm": 0.7776914066212202, + "learning_rate": 3.6033233004540534e-07, + "loss": 0.3689, + "step": 28780 + }, + { + "epoch": 0.8820951330145888, + "grad_norm": 1.727256293463583, + "learning_rate": 3.601473525637728e-07, + "loss": 0.4558, + "step": 28781 + }, + { + "epoch": 0.8821257815373299, + "grad_norm": 1.9851573388290902, + "learning_rate": 3.599624207997943e-07, + "loss": 0.5936, + "step": 28782 + }, + { + "epoch": 0.882156430060071, + "grad_norm": 1.6275421444551401, + "learning_rate": 3.597775347552934e-07, + "loss": 0.4869, + "step": 28783 + }, + { + "epoch": 0.8821870785828123, + "grad_norm": 1.994222214473163, + "learning_rate": 3.5959269443209267e-07, + "loss": 0.6772, + "step": 28784 + }, + { + "epoch": 0.8822177271055535, + "grad_norm": 1.984930805907903, + "learning_rate": 3.5940789983201274e-07, + "loss": 0.561, + "step": 28785 + }, + { + "epoch": 0.8822483756282947, + "grad_norm": 1.9489416869155025, + "learning_rate": 3.592231509568722e-07, + "loss": 0.6048, + "step": 28786 + }, + { + "epoch": 0.8822790241510359, + "grad_norm": 1.8471957452957724, + "learning_rate": 3.5903844780849464e-07, + "loss": 0.5571, + "step": 28787 + }, + { + "epoch": 0.8823096726737771, + "grad_norm": 1.9744799542796496, + "learning_rate": 3.588537903886985e-07, + "loss": 0.536, + "step": 28788 + }, + { + "epoch": 0.8823403211965183, + "grad_norm": 1.8743903840009741, + "learning_rate": 3.5866917869930405e-07, + "loss": 0.5832, + "step": 28789 + }, + { + "epoch": 0.8823709697192595, + "grad_norm": 1.8841929792843322, + "learning_rate": 3.584846127421288e-07, + "loss": 0.5645, + "step": 28790 + }, + { + "epoch": 0.8824016182420007, + "grad_norm": 1.8474784889677964, + "learning_rate": 3.583000925189922e-07, + "loss": 0.5825, + "step": 28791 + }, + { + "epoch": 0.882432266764742, + "grad_norm": 1.6535382532827703, + "learning_rate": 3.5811561803171304e-07, + "loss": 0.5076, + "step": 28792 + }, + { + "epoch": 0.8824629152874831, + "grad_norm": 1.7494997793043927, + "learning_rate": 3.5793118928210803e-07, + "loss": 0.6175, + "step": 28793 + }, + { + "epoch": 0.8824935638102244, + "grad_norm": 1.7351823851702375, + "learning_rate": 3.577468062719941e-07, + "loss": 0.5056, + "step": 28794 + }, + { + "epoch": 0.8825242123329655, + "grad_norm": 1.8884016962544816, + "learning_rate": 3.5756246900319034e-07, + "loss": 0.5803, + "step": 28795 + }, + { + "epoch": 0.8825548608557068, + "grad_norm": 1.9147949171360419, + "learning_rate": 3.573781774775098e-07, + "loss": 0.5443, + "step": 28796 + }, + { + "epoch": 0.8825855093784479, + "grad_norm": 1.7490303609308058, + "learning_rate": 3.571939316967704e-07, + "loss": 0.4961, + "step": 28797 + }, + { + "epoch": 0.8826161579011892, + "grad_norm": 2.0063971150148636, + "learning_rate": 3.5700973166278795e-07, + "loss": 0.4422, + "step": 28798 + }, + { + "epoch": 0.8826468064239303, + "grad_norm": 1.8363055889167468, + "learning_rate": 3.5682557737737546e-07, + "loss": 0.5604, + "step": 28799 + }, + { + "epoch": 0.8826774549466716, + "grad_norm": 2.2661767134366273, + "learning_rate": 3.566414688423492e-07, + "loss": 0.603, + "step": 28800 + }, + { + "epoch": 0.8827081034694128, + "grad_norm": 1.8172771417720226, + "learning_rate": 3.564574060595222e-07, + "loss": 0.6139, + "step": 28801 + }, + { + "epoch": 0.882738751992154, + "grad_norm": 2.1046985053688476, + "learning_rate": 3.562733890307085e-07, + "loss": 0.6322, + "step": 28802 + }, + { + "epoch": 0.8827694005148952, + "grad_norm": 1.7611099061785351, + "learning_rate": 3.5608941775772175e-07, + "loss": 0.5673, + "step": 28803 + }, + { + "epoch": 0.8828000490376364, + "grad_norm": 1.8005234476534968, + "learning_rate": 3.559054922423738e-07, + "loss": 0.5233, + "step": 28804 + }, + { + "epoch": 0.8828306975603776, + "grad_norm": 1.8592668848409324, + "learning_rate": 3.5572161248647705e-07, + "loss": 0.5564, + "step": 28805 + }, + { + "epoch": 0.8828613460831188, + "grad_norm": 1.8058981345156169, + "learning_rate": 3.5553777849184403e-07, + "loss": 0.5305, + "step": 28806 + }, + { + "epoch": 0.88289199460586, + "grad_norm": 0.8211679223280414, + "learning_rate": 3.5535399026028537e-07, + "loss": 0.4089, + "step": 28807 + }, + { + "epoch": 0.8829226431286012, + "grad_norm": 1.980427760587801, + "learning_rate": 3.55170247793612e-07, + "loss": 0.6311, + "step": 28808 + }, + { + "epoch": 0.8829532916513424, + "grad_norm": 1.965863230604797, + "learning_rate": 3.549865510936351e-07, + "loss": 0.4957, + "step": 28809 + }, + { + "epoch": 0.8829839401740837, + "grad_norm": 1.852388692206327, + "learning_rate": 3.548029001621639e-07, + "loss": 0.5436, + "step": 28810 + }, + { + "epoch": 0.8830145886968248, + "grad_norm": 1.68373977531125, + "learning_rate": 3.5461929500100857e-07, + "loss": 0.6034, + "step": 28811 + }, + { + "epoch": 0.8830452372195661, + "grad_norm": 1.8620072593752222, + "learning_rate": 3.5443573561197763e-07, + "loss": 0.5407, + "step": 28812 + }, + { + "epoch": 0.8830758857423072, + "grad_norm": 1.9631275936011328, + "learning_rate": 3.542522219968797e-07, + "loss": 0.5708, + "step": 28813 + }, + { + "epoch": 0.8831065342650484, + "grad_norm": 1.9674215604491727, + "learning_rate": 3.5406875415752386e-07, + "loss": 0.6657, + "step": 28814 + }, + { + "epoch": 0.8831371827877896, + "grad_norm": 0.8198461739639469, + "learning_rate": 3.5388533209571696e-07, + "loss": 0.4027, + "step": 28815 + }, + { + "epoch": 0.8831678313105308, + "grad_norm": 1.7679954629778254, + "learning_rate": 3.537019558132665e-07, + "loss": 0.6446, + "step": 28816 + }, + { + "epoch": 0.883198479833272, + "grad_norm": 0.7910478703215732, + "learning_rate": 3.535186253119799e-07, + "loss": 0.3878, + "step": 28817 + }, + { + "epoch": 0.8832291283560132, + "grad_norm": 1.846256719689068, + "learning_rate": 3.5333534059366294e-07, + "loss": 0.5606, + "step": 28818 + }, + { + "epoch": 0.8832597768787545, + "grad_norm": 1.6809401970156777, + "learning_rate": 3.5315210166012195e-07, + "loss": 0.4687, + "step": 28819 + }, + { + "epoch": 0.8832904254014956, + "grad_norm": 1.9053788703378949, + "learning_rate": 3.5296890851316154e-07, + "loss": 0.6112, + "step": 28820 + }, + { + "epoch": 0.8833210739242369, + "grad_norm": 1.83080199997713, + "learning_rate": 3.5278576115458817e-07, + "loss": 0.5752, + "step": 28821 + }, + { + "epoch": 0.883351722446978, + "grad_norm": 1.8968304433218397, + "learning_rate": 3.5260265958620586e-07, + "loss": 0.5796, + "step": 28822 + }, + { + "epoch": 0.8833823709697193, + "grad_norm": 1.8832574771480757, + "learning_rate": 3.524196038098182e-07, + "loss": 0.6575, + "step": 28823 + }, + { + "epoch": 0.8834130194924604, + "grad_norm": 0.782139397514332, + "learning_rate": 3.5223659382722875e-07, + "loss": 0.3735, + "step": 28824 + }, + { + "epoch": 0.8834436680152017, + "grad_norm": 0.7928637330362747, + "learning_rate": 3.520536296402427e-07, + "loss": 0.4074, + "step": 28825 + }, + { + "epoch": 0.8834743165379428, + "grad_norm": 1.772679758004928, + "learning_rate": 3.518707112506603e-07, + "loss": 0.5388, + "step": 28826 + }, + { + "epoch": 0.8835049650606841, + "grad_norm": 0.8173263080504118, + "learning_rate": 3.516878386602857e-07, + "loss": 0.393, + "step": 28827 + }, + { + "epoch": 0.8835356135834253, + "grad_norm": 1.8597009420455732, + "learning_rate": 3.5150501187092013e-07, + "loss": 0.5731, + "step": 28828 + }, + { + "epoch": 0.8835662621061665, + "grad_norm": 2.0687004105726645, + "learning_rate": 3.51322230884365e-07, + "loss": 0.6489, + "step": 28829 + }, + { + "epoch": 0.8835969106289077, + "grad_norm": 1.7138204036981992, + "learning_rate": 3.511394957024217e-07, + "loss": 0.5719, + "step": 28830 + }, + { + "epoch": 0.8836275591516489, + "grad_norm": 1.828252531449769, + "learning_rate": 3.5095680632688867e-07, + "loss": 0.5439, + "step": 28831 + }, + { + "epoch": 0.8836582076743901, + "grad_norm": 1.7788381100468185, + "learning_rate": 3.5077416275956956e-07, + "loss": 0.5687, + "step": 28832 + }, + { + "epoch": 0.8836888561971313, + "grad_norm": 1.882352535504446, + "learning_rate": 3.5059156500226235e-07, + "loss": 0.6606, + "step": 28833 + }, + { + "epoch": 0.8837195047198725, + "grad_norm": 1.995701671943256, + "learning_rate": 3.50409013056765e-07, + "loss": 0.5905, + "step": 28834 + }, + { + "epoch": 0.8837501532426137, + "grad_norm": 2.13693122972275, + "learning_rate": 3.5022650692487725e-07, + "loss": 0.6525, + "step": 28835 + }, + { + "epoch": 0.8837808017653549, + "grad_norm": 1.9727400734218603, + "learning_rate": 3.500440466083982e-07, + "loss": 0.6329, + "step": 28836 + }, + { + "epoch": 0.8838114502880962, + "grad_norm": 1.8771275641335072, + "learning_rate": 3.498616321091242e-07, + "loss": 0.5518, + "step": 28837 + }, + { + "epoch": 0.8838420988108373, + "grad_norm": 0.7774440170531215, + "learning_rate": 3.4967926342885317e-07, + "loss": 0.385, + "step": 28838 + }, + { + "epoch": 0.8838727473335786, + "grad_norm": 1.962377751595147, + "learning_rate": 3.4949694056938324e-07, + "loss": 0.5575, + "step": 28839 + }, + { + "epoch": 0.8839033958563197, + "grad_norm": 2.1261043690467005, + "learning_rate": 3.49314663532509e-07, + "loss": 0.5667, + "step": 28840 + }, + { + "epoch": 0.883934044379061, + "grad_norm": 0.7904967840475449, + "learning_rate": 3.4913243232002846e-07, + "loss": 0.3915, + "step": 28841 + }, + { + "epoch": 0.8839646929018021, + "grad_norm": 2.067308413262351, + "learning_rate": 3.489502469337336e-07, + "loss": 0.5153, + "step": 28842 + }, + { + "epoch": 0.8839953414245434, + "grad_norm": 1.9126517591964742, + "learning_rate": 3.48768107375424e-07, + "loss": 0.668, + "step": 28843 + }, + { + "epoch": 0.8840259899472845, + "grad_norm": 1.5955394830864174, + "learning_rate": 3.485860136468927e-07, + "loss": 0.5652, + "step": 28844 + }, + { + "epoch": 0.8840566384700257, + "grad_norm": 1.8063278358713157, + "learning_rate": 3.4840396574993217e-07, + "loss": 0.5163, + "step": 28845 + }, + { + "epoch": 0.884087286992767, + "grad_norm": 0.7693458593811393, + "learning_rate": 3.4822196368633767e-07, + "loss": 0.3842, + "step": 28846 + }, + { + "epoch": 0.8841179355155081, + "grad_norm": 1.9821480139421235, + "learning_rate": 3.480400074579032e-07, + "loss": 0.5644, + "step": 28847 + }, + { + "epoch": 0.8841485840382494, + "grad_norm": 1.672274829778345, + "learning_rate": 3.4785809706642027e-07, + "loss": 0.5223, + "step": 28848 + }, + { + "epoch": 0.8841792325609905, + "grad_norm": 0.7847595729962399, + "learning_rate": 3.476762325136812e-07, + "loss": 0.3993, + "step": 28849 + }, + { + "epoch": 0.8842098810837318, + "grad_norm": 2.034225151133813, + "learning_rate": 3.4749441380147906e-07, + "loss": 0.5457, + "step": 28850 + }, + { + "epoch": 0.8842405296064729, + "grad_norm": 0.8204576689309776, + "learning_rate": 3.4731264093160574e-07, + "loss": 0.4003, + "step": 28851 + }, + { + "epoch": 0.8842711781292142, + "grad_norm": 1.8804280609348552, + "learning_rate": 3.4713091390585096e-07, + "loss": 0.5687, + "step": 28852 + }, + { + "epoch": 0.8843018266519553, + "grad_norm": 1.9931024259135914, + "learning_rate": 3.469492327260043e-07, + "loss": 0.6538, + "step": 28853 + }, + { + "epoch": 0.8843324751746966, + "grad_norm": 1.9682166329601585, + "learning_rate": 3.4676759739385946e-07, + "loss": 0.6266, + "step": 28854 + }, + { + "epoch": 0.8843631236974377, + "grad_norm": 0.8267891163079384, + "learning_rate": 3.465860079112032e-07, + "loss": 0.4041, + "step": 28855 + }, + { + "epoch": 0.884393772220179, + "grad_norm": 1.7953336036854568, + "learning_rate": 3.464044642798259e-07, + "loss": 0.5081, + "step": 28856 + }, + { + "epoch": 0.8844244207429202, + "grad_norm": 1.9567778560834692, + "learning_rate": 3.4622296650151545e-07, + "loss": 0.5861, + "step": 28857 + }, + { + "epoch": 0.8844550692656614, + "grad_norm": 1.8398090213649063, + "learning_rate": 3.460415145780605e-07, + "loss": 0.5528, + "step": 28858 + }, + { + "epoch": 0.8844857177884026, + "grad_norm": 1.9457008929825945, + "learning_rate": 3.4586010851125063e-07, + "loss": 0.6117, + "step": 28859 + }, + { + "epoch": 0.8845163663111438, + "grad_norm": 2.2291741268521874, + "learning_rate": 3.4567874830287116e-07, + "loss": 0.63, + "step": 28860 + }, + { + "epoch": 0.884547014833885, + "grad_norm": 2.1157566526763643, + "learning_rate": 3.454974339547096e-07, + "loss": 0.5535, + "step": 28861 + }, + { + "epoch": 0.8845776633566262, + "grad_norm": 1.7927765601942867, + "learning_rate": 3.453161654685533e-07, + "loss": 0.5273, + "step": 28862 + }, + { + "epoch": 0.8846083118793674, + "grad_norm": 1.9320617828684248, + "learning_rate": 3.451349428461881e-07, + "loss": 0.6576, + "step": 28863 + }, + { + "epoch": 0.8846389604021087, + "grad_norm": 1.865323521632812, + "learning_rate": 3.449537660893987e-07, + "loss": 0.6145, + "step": 28864 + }, + { + "epoch": 0.8846696089248498, + "grad_norm": 2.3065430776986737, + "learning_rate": 3.447726351999703e-07, + "loss": 0.6012, + "step": 28865 + }, + { + "epoch": 0.8847002574475911, + "grad_norm": 1.9973336625309321, + "learning_rate": 3.4459155017968925e-07, + "loss": 0.6007, + "step": 28866 + }, + { + "epoch": 0.8847309059703322, + "grad_norm": 1.909393612781189, + "learning_rate": 3.4441051103033807e-07, + "loss": 0.5537, + "step": 28867 + }, + { + "epoch": 0.8847615544930735, + "grad_norm": 1.7650707471802451, + "learning_rate": 3.442295177537014e-07, + "loss": 0.5647, + "step": 28868 + }, + { + "epoch": 0.8847922030158146, + "grad_norm": 2.0584953006811415, + "learning_rate": 3.4404857035156226e-07, + "loss": 0.6598, + "step": 28869 + }, + { + "epoch": 0.8848228515385559, + "grad_norm": 1.7557727576856832, + "learning_rate": 3.438676688257053e-07, + "loss": 0.5618, + "step": 28870 + }, + { + "epoch": 0.884853500061297, + "grad_norm": 1.8551261737663953, + "learning_rate": 3.4368681317791086e-07, + "loss": 0.575, + "step": 28871 + }, + { + "epoch": 0.8848841485840383, + "grad_norm": 2.190673437447005, + "learning_rate": 3.4350600340996023e-07, + "loss": 0.6167, + "step": 28872 + }, + { + "epoch": 0.8849147971067794, + "grad_norm": 2.1372939959444537, + "learning_rate": 3.433252395236381e-07, + "loss": 0.6255, + "step": 28873 + }, + { + "epoch": 0.8849454456295207, + "grad_norm": 2.4132063876566807, + "learning_rate": 3.4314452152072354e-07, + "loss": 0.5104, + "step": 28874 + }, + { + "epoch": 0.8849760941522619, + "grad_norm": 0.7939650877787471, + "learning_rate": 3.4296384940299687e-07, + "loss": 0.3863, + "step": 28875 + }, + { + "epoch": 0.885006742675003, + "grad_norm": 1.7978663813028786, + "learning_rate": 3.427832231722389e-07, + "loss": 0.4622, + "step": 28876 + }, + { + "epoch": 0.8850373911977443, + "grad_norm": 0.8110800643459795, + "learning_rate": 3.4260264283022926e-07, + "loss": 0.3931, + "step": 28877 + }, + { + "epoch": 0.8850680397204854, + "grad_norm": 2.175052734635789, + "learning_rate": 3.4242210837874876e-07, + "loss": 0.5285, + "step": 28878 + }, + { + "epoch": 0.8850986882432267, + "grad_norm": 1.9469960441262322, + "learning_rate": 3.422416198195738e-07, + "loss": 0.5769, + "step": 28879 + }, + { + "epoch": 0.8851293367659678, + "grad_norm": 1.773107667561283, + "learning_rate": 3.420611771544835e-07, + "loss": 0.5835, + "step": 28880 + }, + { + "epoch": 0.8851599852887091, + "grad_norm": 1.8112891464844663, + "learning_rate": 3.418807803852575e-07, + "loss": 0.5168, + "step": 28881 + }, + { + "epoch": 0.8851906338114502, + "grad_norm": 1.7863973956866381, + "learning_rate": 3.4170042951367224e-07, + "loss": 0.5863, + "step": 28882 + }, + { + "epoch": 0.8852212823341915, + "grad_norm": 0.8310185698966569, + "learning_rate": 3.415201245415023e-07, + "loss": 0.4072, + "step": 28883 + }, + { + "epoch": 0.8852519308569327, + "grad_norm": 0.8023775671561039, + "learning_rate": 3.4133986547052855e-07, + "loss": 0.408, + "step": 28884 + }, + { + "epoch": 0.8852825793796739, + "grad_norm": 1.7234956815432487, + "learning_rate": 3.4115965230252404e-07, + "loss": 0.4582, + "step": 28885 + }, + { + "epoch": 0.8853132279024151, + "grad_norm": 0.7965915084710973, + "learning_rate": 3.4097948503926613e-07, + "loss": 0.3807, + "step": 28886 + }, + { + "epoch": 0.8853438764251563, + "grad_norm": 1.9625676510415193, + "learning_rate": 3.407993636825291e-07, + "loss": 0.5672, + "step": 28887 + }, + { + "epoch": 0.8853745249478975, + "grad_norm": 2.054292024234447, + "learning_rate": 3.406192882340875e-07, + "loss": 0.6071, + "step": 28888 + }, + { + "epoch": 0.8854051734706387, + "grad_norm": 1.8978879122807937, + "learning_rate": 3.4043925869571724e-07, + "loss": 0.6476, + "step": 28889 + }, + { + "epoch": 0.8854358219933799, + "grad_norm": 1.6962235919246855, + "learning_rate": 3.4025927506919075e-07, + "loss": 0.5493, + "step": 28890 + }, + { + "epoch": 0.8854664705161212, + "grad_norm": 2.027507379750177, + "learning_rate": 3.4007933735628163e-07, + "loss": 0.6042, + "step": 28891 + }, + { + "epoch": 0.8854971190388623, + "grad_norm": 0.7869686397740563, + "learning_rate": 3.398994455587634e-07, + "loss": 0.3783, + "step": 28892 + }, + { + "epoch": 0.8855277675616036, + "grad_norm": 1.786588942627675, + "learning_rate": 3.397195996784092e-07, + "loss": 0.5943, + "step": 28893 + }, + { + "epoch": 0.8855584160843447, + "grad_norm": 2.0178439308050002, + "learning_rate": 3.39539799716988e-07, + "loss": 0.5535, + "step": 28894 + }, + { + "epoch": 0.885589064607086, + "grad_norm": 1.9429443235307005, + "learning_rate": 3.3936004567627523e-07, + "loss": 0.491, + "step": 28895 + }, + { + "epoch": 0.8856197131298271, + "grad_norm": 0.781733764586263, + "learning_rate": 3.391803375580394e-07, + "loss": 0.3989, + "step": 28896 + }, + { + "epoch": 0.8856503616525684, + "grad_norm": 1.9036958046392785, + "learning_rate": 3.3900067536405346e-07, + "loss": 0.5095, + "step": 28897 + }, + { + "epoch": 0.8856810101753095, + "grad_norm": 1.8960075811741655, + "learning_rate": 3.3882105909608497e-07, + "loss": 0.5598, + "step": 28898 + }, + { + "epoch": 0.8857116586980508, + "grad_norm": 1.8747398927554595, + "learning_rate": 3.386414887559059e-07, + "loss": 0.5753, + "step": 28899 + }, + { + "epoch": 0.885742307220792, + "grad_norm": 0.7869018956181375, + "learning_rate": 3.384619643452852e-07, + "loss": 0.394, + "step": 28900 + }, + { + "epoch": 0.8857729557435332, + "grad_norm": 0.846780397097301, + "learning_rate": 3.3828248586599113e-07, + "loss": 0.3931, + "step": 28901 + }, + { + "epoch": 0.8858036042662744, + "grad_norm": 1.8375768587223384, + "learning_rate": 3.381030533197921e-07, + "loss": 0.5829, + "step": 28902 + }, + { + "epoch": 0.8858342527890156, + "grad_norm": 1.6789877550686059, + "learning_rate": 3.379236667084573e-07, + "loss": 0.5472, + "step": 28903 + }, + { + "epoch": 0.8858649013117568, + "grad_norm": 1.8937611057691655, + "learning_rate": 3.377443260337532e-07, + "loss": 0.5513, + "step": 28904 + }, + { + "epoch": 0.885895549834498, + "grad_norm": 1.864353241627941, + "learning_rate": 3.375650312974466e-07, + "loss": 0.5458, + "step": 28905 + }, + { + "epoch": 0.8859261983572392, + "grad_norm": 0.7894753702202391, + "learning_rate": 3.3738578250130547e-07, + "loss": 0.3951, + "step": 28906 + }, + { + "epoch": 0.8859568468799803, + "grad_norm": 1.9401732883897658, + "learning_rate": 3.372065796470947e-07, + "loss": 0.4783, + "step": 28907 + }, + { + "epoch": 0.8859874954027216, + "grad_norm": 1.7914869221825485, + "learning_rate": 3.370274227365811e-07, + "loss": 0.5285, + "step": 28908 + }, + { + "epoch": 0.8860181439254627, + "grad_norm": 0.8514817196136353, + "learning_rate": 3.3684831177152876e-07, + "loss": 0.4054, + "step": 28909 + }, + { + "epoch": 0.886048792448204, + "grad_norm": 1.943011324296612, + "learning_rate": 3.3666924675370307e-07, + "loss": 0.548, + "step": 28910 + }, + { + "epoch": 0.8860794409709452, + "grad_norm": 1.6601629472334165, + "learning_rate": 3.3649022768486917e-07, + "loss": 0.4874, + "step": 28911 + }, + { + "epoch": 0.8861100894936864, + "grad_norm": 2.0867875253624706, + "learning_rate": 3.363112545667896e-07, + "loss": 0.5768, + "step": 28912 + }, + { + "epoch": 0.8861407380164276, + "grad_norm": 1.8289127227589266, + "learning_rate": 3.361323274012279e-07, + "loss": 0.5296, + "step": 28913 + }, + { + "epoch": 0.8861713865391688, + "grad_norm": 1.881912974710792, + "learning_rate": 3.359534461899494e-07, + "loss": 0.5932, + "step": 28914 + }, + { + "epoch": 0.88620203506191, + "grad_norm": 2.511327577627848, + "learning_rate": 3.3577461093471376e-07, + "loss": 0.7267, + "step": 28915 + }, + { + "epoch": 0.8862326835846512, + "grad_norm": 1.8299224541224273, + "learning_rate": 3.3559582163728456e-07, + "loss": 0.5824, + "step": 28916 + }, + { + "epoch": 0.8862633321073924, + "grad_norm": 1.9875105439824137, + "learning_rate": 3.3541707829942314e-07, + "loss": 0.5115, + "step": 28917 + }, + { + "epoch": 0.8862939806301336, + "grad_norm": 2.0945536108247786, + "learning_rate": 3.352383809228904e-07, + "loss": 0.536, + "step": 28918 + }, + { + "epoch": 0.8863246291528748, + "grad_norm": 2.091546213430536, + "learning_rate": 3.350597295094482e-07, + "loss": 0.5445, + "step": 28919 + }, + { + "epoch": 0.8863552776756161, + "grad_norm": 1.770522094459845, + "learning_rate": 3.348811240608552e-07, + "loss": 0.5274, + "step": 28920 + }, + { + "epoch": 0.8863859261983572, + "grad_norm": 1.8453384353000615, + "learning_rate": 3.347025645788726e-07, + "loss": 0.5821, + "step": 28921 + }, + { + "epoch": 0.8864165747210985, + "grad_norm": 1.7944564772210196, + "learning_rate": 3.345240510652592e-07, + "loss": 0.6545, + "step": 28922 + }, + { + "epoch": 0.8864472232438396, + "grad_norm": 1.7774518904237924, + "learning_rate": 3.3434558352177403e-07, + "loss": 0.5611, + "step": 28923 + }, + { + "epoch": 0.8864778717665809, + "grad_norm": 2.4397661572656024, + "learning_rate": 3.341671619501752e-07, + "loss": 0.5324, + "step": 28924 + }, + { + "epoch": 0.886508520289322, + "grad_norm": 2.029662475303819, + "learning_rate": 3.339887863522223e-07, + "loss": 0.6394, + "step": 28925 + }, + { + "epoch": 0.8865391688120633, + "grad_norm": 1.9791284753478031, + "learning_rate": 3.338104567296707e-07, + "loss": 0.6311, + "step": 28926 + }, + { + "epoch": 0.8865698173348044, + "grad_norm": 1.8556122112597473, + "learning_rate": 3.33632173084279e-07, + "loss": 0.5795, + "step": 28927 + }, + { + "epoch": 0.8866004658575457, + "grad_norm": 1.7899587560376602, + "learning_rate": 3.334539354178029e-07, + "loss": 0.5795, + "step": 28928 + }, + { + "epoch": 0.8866311143802869, + "grad_norm": 1.7032223850657624, + "learning_rate": 3.3327574373199946e-07, + "loss": 0.463, + "step": 28929 + }, + { + "epoch": 0.8866617629030281, + "grad_norm": 0.8047722578422246, + "learning_rate": 3.3309759802862496e-07, + "loss": 0.3936, + "step": 28930 + }, + { + "epoch": 0.8866924114257693, + "grad_norm": 1.9358634163365862, + "learning_rate": 3.329194983094325e-07, + "loss": 0.6525, + "step": 28931 + }, + { + "epoch": 0.8867230599485105, + "grad_norm": 1.9387194155016003, + "learning_rate": 3.3274144457617897e-07, + "loss": 0.6184, + "step": 28932 + }, + { + "epoch": 0.8867537084712517, + "grad_norm": 1.7129824238132496, + "learning_rate": 3.3256343683061854e-07, + "loss": 0.5308, + "step": 28933 + }, + { + "epoch": 0.8867843569939929, + "grad_norm": 2.0211389259699284, + "learning_rate": 3.3238547507450425e-07, + "loss": 0.5194, + "step": 28934 + }, + { + "epoch": 0.8868150055167341, + "grad_norm": 1.929366615997866, + "learning_rate": 3.3220755930959025e-07, + "loss": 0.511, + "step": 28935 + }, + { + "epoch": 0.8868456540394754, + "grad_norm": 0.8635747213541562, + "learning_rate": 3.3202968953763015e-07, + "loss": 0.432, + "step": 28936 + }, + { + "epoch": 0.8868763025622165, + "grad_norm": 0.8145113125074753, + "learning_rate": 3.3185186576037474e-07, + "loss": 0.406, + "step": 28937 + }, + { + "epoch": 0.8869069510849577, + "grad_norm": 1.6964154624893713, + "learning_rate": 3.316740879795782e-07, + "loss": 0.4908, + "step": 28938 + }, + { + "epoch": 0.8869375996076989, + "grad_norm": 1.6828082602539274, + "learning_rate": 3.3149635619699026e-07, + "loss": 0.6121, + "step": 28939 + }, + { + "epoch": 0.8869682481304401, + "grad_norm": 1.9328464872176194, + "learning_rate": 3.3131867041436394e-07, + "loss": 0.6544, + "step": 28940 + }, + { + "epoch": 0.8869988966531813, + "grad_norm": 2.042680729067915, + "learning_rate": 3.3114103063345006e-07, + "loss": 0.5893, + "step": 28941 + }, + { + "epoch": 0.8870295451759225, + "grad_norm": 1.9634249516322158, + "learning_rate": 3.3096343685599717e-07, + "loss": 0.5555, + "step": 28942 + }, + { + "epoch": 0.8870601936986637, + "grad_norm": 1.9292713315578298, + "learning_rate": 3.3078588908375565e-07, + "loss": 0.5311, + "step": 28943 + }, + { + "epoch": 0.8870908422214049, + "grad_norm": 2.055311608315351, + "learning_rate": 3.3060838731847676e-07, + "loss": 0.5996, + "step": 28944 + }, + { + "epoch": 0.8871214907441461, + "grad_norm": 1.9378832775888086, + "learning_rate": 3.3043093156190754e-07, + "loss": 0.6266, + "step": 28945 + }, + { + "epoch": 0.8871521392668873, + "grad_norm": 1.9378447417456952, + "learning_rate": 3.302535218157965e-07, + "loss": 0.4873, + "step": 28946 + }, + { + "epoch": 0.8871827877896286, + "grad_norm": 2.046594704974327, + "learning_rate": 3.300761580818934e-07, + "loss": 0.6141, + "step": 28947 + }, + { + "epoch": 0.8872134363123697, + "grad_norm": 1.9856485673844226, + "learning_rate": 3.298988403619441e-07, + "loss": 0.5827, + "step": 28948 + }, + { + "epoch": 0.887244084835111, + "grad_norm": 2.0527820538927246, + "learning_rate": 3.297215686576971e-07, + "loss": 0.6251, + "step": 28949 + }, + { + "epoch": 0.8872747333578521, + "grad_norm": 1.761532070881029, + "learning_rate": 3.2954434297089775e-07, + "loss": 0.5667, + "step": 28950 + }, + { + "epoch": 0.8873053818805934, + "grad_norm": 0.7923468555954134, + "learning_rate": 3.29367163303293e-07, + "loss": 0.3953, + "step": 28951 + }, + { + "epoch": 0.8873360304033345, + "grad_norm": 1.9258018995010464, + "learning_rate": 3.2919002965662915e-07, + "loss": 0.5356, + "step": 28952 + }, + { + "epoch": 0.8873666789260758, + "grad_norm": 2.094367373066047, + "learning_rate": 3.2901294203265046e-07, + "loss": 0.5081, + "step": 28953 + }, + { + "epoch": 0.8873973274488169, + "grad_norm": 1.94350636010397, + "learning_rate": 3.288359004331021e-07, + "loss": 0.4917, + "step": 28954 + }, + { + "epoch": 0.8874279759715582, + "grad_norm": 1.6638563857690718, + "learning_rate": 3.2865890485972995e-07, + "loss": 0.4631, + "step": 28955 + }, + { + "epoch": 0.8874586244942994, + "grad_norm": 2.0108908995063657, + "learning_rate": 3.2848195531427594e-07, + "loss": 0.5789, + "step": 28956 + }, + { + "epoch": 0.8874892730170406, + "grad_norm": 0.8166272419140229, + "learning_rate": 3.2830505179848425e-07, + "loss": 0.4039, + "step": 28957 + }, + { + "epoch": 0.8875199215397818, + "grad_norm": 2.008877045289778, + "learning_rate": 3.281281943140985e-07, + "loss": 0.557, + "step": 28958 + }, + { + "epoch": 0.887550570062523, + "grad_norm": 2.013818900083544, + "learning_rate": 3.279513828628611e-07, + "loss": 0.5376, + "step": 28959 + }, + { + "epoch": 0.8875812185852642, + "grad_norm": 2.2027441896565816, + "learning_rate": 3.2777461744651516e-07, + "loss": 0.4342, + "step": 28960 + }, + { + "epoch": 0.8876118671080054, + "grad_norm": 1.7382378856645497, + "learning_rate": 3.2759789806679987e-07, + "loss": 0.3804, + "step": 28961 + }, + { + "epoch": 0.8876425156307466, + "grad_norm": 2.0172973810891657, + "learning_rate": 3.2742122472545825e-07, + "loss": 0.4881, + "step": 28962 + }, + { + "epoch": 0.8876731641534878, + "grad_norm": 1.8309814259854873, + "learning_rate": 3.272445974242311e-07, + "loss": 0.4639, + "step": 28963 + }, + { + "epoch": 0.887703812676229, + "grad_norm": 2.125981181742931, + "learning_rate": 3.2706801616485816e-07, + "loss": 0.6077, + "step": 28964 + }, + { + "epoch": 0.8877344611989703, + "grad_norm": 1.6922378419601438, + "learning_rate": 3.268914809490797e-07, + "loss": 0.5768, + "step": 28965 + }, + { + "epoch": 0.8877651097217114, + "grad_norm": 1.9668405635706747, + "learning_rate": 3.267149917786361e-07, + "loss": 0.6488, + "step": 28966 + }, + { + "epoch": 0.8877957582444527, + "grad_norm": 2.2172711913884875, + "learning_rate": 3.2653854865526414e-07, + "loss": 0.5656, + "step": 28967 + }, + { + "epoch": 0.8878264067671938, + "grad_norm": 0.8306824386344192, + "learning_rate": 3.263621515807047e-07, + "loss": 0.4059, + "step": 28968 + }, + { + "epoch": 0.887857055289935, + "grad_norm": 1.7830071241762653, + "learning_rate": 3.2618580055669313e-07, + "loss": 0.4889, + "step": 28969 + }, + { + "epoch": 0.8878877038126762, + "grad_norm": 2.147971835047823, + "learning_rate": 3.2600949558497076e-07, + "loss": 0.5706, + "step": 28970 + }, + { + "epoch": 0.8879183523354174, + "grad_norm": 1.8091317963017624, + "learning_rate": 3.2583323666727174e-07, + "loss": 0.5305, + "step": 28971 + }, + { + "epoch": 0.8879490008581586, + "grad_norm": 1.770124908448919, + "learning_rate": 3.256570238053336e-07, + "loss": 0.5202, + "step": 28972 + }, + { + "epoch": 0.8879796493808998, + "grad_norm": 1.772519720013148, + "learning_rate": 3.2548085700089273e-07, + "loss": 0.5971, + "step": 28973 + }, + { + "epoch": 0.888010297903641, + "grad_norm": 1.827547184666738, + "learning_rate": 3.2530473625568606e-07, + "loss": 0.5218, + "step": 28974 + }, + { + "epoch": 0.8880409464263822, + "grad_norm": 1.6985786578021274, + "learning_rate": 3.251286615714466e-07, + "loss": 0.5276, + "step": 28975 + }, + { + "epoch": 0.8880715949491235, + "grad_norm": 1.932631515907652, + "learning_rate": 3.2495263294991084e-07, + "loss": 0.4807, + "step": 28976 + }, + { + "epoch": 0.8881022434718646, + "grad_norm": 1.7831731810877096, + "learning_rate": 3.247766503928129e-07, + "loss": 0.6862, + "step": 28977 + }, + { + "epoch": 0.8881328919946059, + "grad_norm": 1.9914966363737243, + "learning_rate": 3.246007139018875e-07, + "loss": 0.5119, + "step": 28978 + }, + { + "epoch": 0.888163540517347, + "grad_norm": 1.8728180396198404, + "learning_rate": 3.244248234788677e-07, + "loss": 0.5554, + "step": 28979 + }, + { + "epoch": 0.8881941890400883, + "grad_norm": 1.7548979336185042, + "learning_rate": 3.242489791254849e-07, + "loss": 0.5391, + "step": 28980 + }, + { + "epoch": 0.8882248375628294, + "grad_norm": 1.8842432314026316, + "learning_rate": 3.2407318084347494e-07, + "loss": 0.5522, + "step": 28981 + }, + { + "epoch": 0.8882554860855707, + "grad_norm": 1.7273461888424073, + "learning_rate": 3.238974286345681e-07, + "loss": 0.5947, + "step": 28982 + }, + { + "epoch": 0.8882861346083119, + "grad_norm": 1.8567774365782619, + "learning_rate": 3.2372172250049513e-07, + "loss": 0.4994, + "step": 28983 + }, + { + "epoch": 0.8883167831310531, + "grad_norm": 2.1664242591325142, + "learning_rate": 3.2354606244298925e-07, + "loss": 0.592, + "step": 28984 + }, + { + "epoch": 0.8883474316537943, + "grad_norm": 2.036312011180408, + "learning_rate": 3.233704484637801e-07, + "loss": 0.6024, + "step": 28985 + }, + { + "epoch": 0.8883780801765355, + "grad_norm": 2.37341662414432, + "learning_rate": 3.23194880564599e-07, + "loss": 0.6261, + "step": 28986 + }, + { + "epoch": 0.8884087286992767, + "grad_norm": 1.869144205545394, + "learning_rate": 3.2301935874717527e-07, + "loss": 0.5828, + "step": 28987 + }, + { + "epoch": 0.8884393772220179, + "grad_norm": 2.0904654542411545, + "learning_rate": 3.22843883013238e-07, + "loss": 0.602, + "step": 28988 + }, + { + "epoch": 0.8884700257447591, + "grad_norm": 0.8221834292918085, + "learning_rate": 3.2266845336451747e-07, + "loss": 0.406, + "step": 28989 + }, + { + "epoch": 0.8885006742675003, + "grad_norm": 1.8373531497045148, + "learning_rate": 3.224930698027412e-07, + "loss": 0.4296, + "step": 28990 + }, + { + "epoch": 0.8885313227902415, + "grad_norm": 1.7790869350149203, + "learning_rate": 3.223177323296367e-07, + "loss": 0.5711, + "step": 28991 + }, + { + "epoch": 0.8885619713129828, + "grad_norm": 2.042836921636191, + "learning_rate": 3.2214244094693313e-07, + "loss": 0.6017, + "step": 28992 + }, + { + "epoch": 0.8885926198357239, + "grad_norm": 1.9260500781727263, + "learning_rate": 3.2196719565635747e-07, + "loss": 0.5262, + "step": 28993 + }, + { + "epoch": 0.8886232683584652, + "grad_norm": 2.0898561325409175, + "learning_rate": 3.21791996459635e-07, + "loss": 0.6384, + "step": 28994 + }, + { + "epoch": 0.8886539168812063, + "grad_norm": 1.8887583581646403, + "learning_rate": 3.2161684335849317e-07, + "loss": 0.5277, + "step": 28995 + }, + { + "epoch": 0.8886845654039476, + "grad_norm": 1.802089003101817, + "learning_rate": 3.2144173635465735e-07, + "loss": 0.6024, + "step": 28996 + }, + { + "epoch": 0.8887152139266887, + "grad_norm": 1.7671756943673864, + "learning_rate": 3.2126667544985393e-07, + "loss": 0.5258, + "step": 28997 + }, + { + "epoch": 0.88874586244943, + "grad_norm": 1.8620330679648993, + "learning_rate": 3.210916606458064e-07, + "loss": 0.5131, + "step": 28998 + }, + { + "epoch": 0.8887765109721711, + "grad_norm": 1.7945501055686455, + "learning_rate": 3.2091669194424025e-07, + "loss": 0.5931, + "step": 28999 + }, + { + "epoch": 0.8888071594949123, + "grad_norm": 1.9171455808119584, + "learning_rate": 3.207417693468795e-07, + "loss": 0.5592, + "step": 29000 + }, + { + "epoch": 0.8888378080176536, + "grad_norm": 2.0091207894390366, + "learning_rate": 3.205668928554473e-07, + "loss": 0.5969, + "step": 29001 + }, + { + "epoch": 0.8888684565403947, + "grad_norm": 1.921490533473004, + "learning_rate": 3.203920624716661e-07, + "loss": 0.5341, + "step": 29002 + }, + { + "epoch": 0.888899105063136, + "grad_norm": 1.8625515422114902, + "learning_rate": 3.20217278197259e-07, + "loss": 0.5358, + "step": 29003 + }, + { + "epoch": 0.8889297535858771, + "grad_norm": 2.161902899053662, + "learning_rate": 3.200425400339485e-07, + "loss": 0.6238, + "step": 29004 + }, + { + "epoch": 0.8889604021086184, + "grad_norm": 1.906457101398361, + "learning_rate": 3.198678479834572e-07, + "loss": 0.5454, + "step": 29005 + }, + { + "epoch": 0.8889910506313595, + "grad_norm": 1.8476450823910626, + "learning_rate": 3.1969320204750467e-07, + "loss": 0.5123, + "step": 29006 + }, + { + "epoch": 0.8890216991541008, + "grad_norm": 0.8304897094001467, + "learning_rate": 3.1951860222781296e-07, + "loss": 0.3916, + "step": 29007 + }, + { + "epoch": 0.8890523476768419, + "grad_norm": 1.7467573206064493, + "learning_rate": 3.1934404852610235e-07, + "loss": 0.5088, + "step": 29008 + }, + { + "epoch": 0.8890829961995832, + "grad_norm": 1.8034219281689097, + "learning_rate": 3.191695409440915e-07, + "loss": 0.5993, + "step": 29009 + }, + { + "epoch": 0.8891136447223243, + "grad_norm": 2.089903090224437, + "learning_rate": 3.1899507948350115e-07, + "loss": 0.5931, + "step": 29010 + }, + { + "epoch": 0.8891442932450656, + "grad_norm": 1.911871664044322, + "learning_rate": 3.1882066414605063e-07, + "loss": 0.616, + "step": 29011 + }, + { + "epoch": 0.8891749417678068, + "grad_norm": 2.1514097287572715, + "learning_rate": 3.186462949334568e-07, + "loss": 0.6283, + "step": 29012 + }, + { + "epoch": 0.889205590290548, + "grad_norm": 1.8428092231976299, + "learning_rate": 3.1847197184743997e-07, + "loss": 0.5612, + "step": 29013 + }, + { + "epoch": 0.8892362388132892, + "grad_norm": 1.7428099748469208, + "learning_rate": 3.182976948897154e-07, + "loss": 0.6234, + "step": 29014 + }, + { + "epoch": 0.8892668873360304, + "grad_norm": 0.788031371430566, + "learning_rate": 3.1812346406200176e-07, + "loss": 0.3847, + "step": 29015 + }, + { + "epoch": 0.8892975358587716, + "grad_norm": 0.7983602238097871, + "learning_rate": 3.179492793660166e-07, + "loss": 0.3868, + "step": 29016 + }, + { + "epoch": 0.8893281843815128, + "grad_norm": 1.9169360978470897, + "learning_rate": 3.1777514080347404e-07, + "loss": 0.5938, + "step": 29017 + }, + { + "epoch": 0.889358832904254, + "grad_norm": 1.9821030864245914, + "learning_rate": 3.176010483760911e-07, + "loss": 0.5517, + "step": 29018 + }, + { + "epoch": 0.8893894814269953, + "grad_norm": 1.8239884348417124, + "learning_rate": 3.174270020855835e-07, + "loss": 0.6049, + "step": 29019 + }, + { + "epoch": 0.8894201299497364, + "grad_norm": 1.8948023963069922, + "learning_rate": 3.1725300193366615e-07, + "loss": 0.5663, + "step": 29020 + }, + { + "epoch": 0.8894507784724777, + "grad_norm": 2.11583418751898, + "learning_rate": 3.1707904792205144e-07, + "loss": 0.5727, + "step": 29021 + }, + { + "epoch": 0.8894814269952188, + "grad_norm": 2.0200462801505346, + "learning_rate": 3.1690514005245643e-07, + "loss": 0.4937, + "step": 29022 + }, + { + "epoch": 0.8895120755179601, + "grad_norm": 0.804056955381687, + "learning_rate": 3.16731278326593e-07, + "loss": 0.4196, + "step": 29023 + }, + { + "epoch": 0.8895427240407012, + "grad_norm": 1.9012160592573941, + "learning_rate": 3.165574627461748e-07, + "loss": 0.6114, + "step": 29024 + }, + { + "epoch": 0.8895733725634425, + "grad_norm": 2.215780892412796, + "learning_rate": 3.1638369331291386e-07, + "loss": 0.6153, + "step": 29025 + }, + { + "epoch": 0.8896040210861836, + "grad_norm": 1.8093519057098022, + "learning_rate": 3.162099700285226e-07, + "loss": 0.5269, + "step": 29026 + }, + { + "epoch": 0.8896346696089249, + "grad_norm": 0.8148135057690322, + "learning_rate": 3.160362928947136e-07, + "loss": 0.3978, + "step": 29027 + }, + { + "epoch": 0.889665318131666, + "grad_norm": 1.9768876263859065, + "learning_rate": 3.158626619131966e-07, + "loss": 0.5589, + "step": 29028 + }, + { + "epoch": 0.8896959666544073, + "grad_norm": 2.0131924274979327, + "learning_rate": 3.156890770856835e-07, + "loss": 0.6318, + "step": 29029 + }, + { + "epoch": 0.8897266151771485, + "grad_norm": 2.1017357849129197, + "learning_rate": 3.1551553841388526e-07, + "loss": 0.5904, + "step": 29030 + }, + { + "epoch": 0.8897572636998896, + "grad_norm": 2.0284941997617576, + "learning_rate": 3.153420458995099e-07, + "loss": 0.5554, + "step": 29031 + }, + { + "epoch": 0.8897879122226309, + "grad_norm": 1.9980857692772616, + "learning_rate": 3.1516859954426826e-07, + "loss": 0.5814, + "step": 29032 + }, + { + "epoch": 0.889818560745372, + "grad_norm": 1.9596234964670158, + "learning_rate": 3.1499519934986956e-07, + "loss": 0.5922, + "step": 29033 + }, + { + "epoch": 0.8898492092681133, + "grad_norm": 1.7405249199104411, + "learning_rate": 3.148218453180213e-07, + "loss": 0.4613, + "step": 29034 + }, + { + "epoch": 0.8898798577908544, + "grad_norm": 1.920755636925017, + "learning_rate": 3.1464853745043324e-07, + "loss": 0.6221, + "step": 29035 + }, + { + "epoch": 0.8899105063135957, + "grad_norm": 1.888297534274532, + "learning_rate": 3.1447527574881064e-07, + "loss": 0.5136, + "step": 29036 + }, + { + "epoch": 0.8899411548363368, + "grad_norm": 1.7202633860026595, + "learning_rate": 3.143020602148622e-07, + "loss": 0.449, + "step": 29037 + }, + { + "epoch": 0.8899718033590781, + "grad_norm": 1.9204764591776966, + "learning_rate": 3.141288908502954e-07, + "loss": 0.5757, + "step": 29038 + }, + { + "epoch": 0.8900024518818193, + "grad_norm": 1.5465270505194502, + "learning_rate": 3.139557676568145e-07, + "loss": 0.5137, + "step": 29039 + }, + { + "epoch": 0.8900331004045605, + "grad_norm": 1.8811209703919567, + "learning_rate": 3.137826906361263e-07, + "loss": 0.6456, + "step": 29040 + }, + { + "epoch": 0.8900637489273017, + "grad_norm": 2.1534825086166496, + "learning_rate": 3.136096597899374e-07, + "loss": 0.6616, + "step": 29041 + }, + { + "epoch": 0.8900943974500429, + "grad_norm": 1.7485997858353872, + "learning_rate": 3.134366751199508e-07, + "loss": 0.4567, + "step": 29042 + }, + { + "epoch": 0.8901250459727841, + "grad_norm": 1.942530294622304, + "learning_rate": 3.132637366278718e-07, + "loss": 0.6234, + "step": 29043 + }, + { + "epoch": 0.8901556944955253, + "grad_norm": 2.0840710337432444, + "learning_rate": 3.130908443154046e-07, + "loss": 0.6145, + "step": 29044 + }, + { + "epoch": 0.8901863430182665, + "grad_norm": 2.003964987378896, + "learning_rate": 3.129179981842523e-07, + "loss": 0.6154, + "step": 29045 + }, + { + "epoch": 0.8902169915410078, + "grad_norm": 1.8042121574256427, + "learning_rate": 3.1274519823611847e-07, + "loss": 0.5217, + "step": 29046 + }, + { + "epoch": 0.8902476400637489, + "grad_norm": 1.696645716707758, + "learning_rate": 3.125724444727052e-07, + "loss": 0.4659, + "step": 29047 + }, + { + "epoch": 0.8902782885864902, + "grad_norm": 1.7879813558286557, + "learning_rate": 3.123997368957149e-07, + "loss": 0.5545, + "step": 29048 + }, + { + "epoch": 0.8903089371092313, + "grad_norm": 1.7967369302949938, + "learning_rate": 3.122270755068502e-07, + "loss": 0.5414, + "step": 29049 + }, + { + "epoch": 0.8903395856319726, + "grad_norm": 1.8356361261350387, + "learning_rate": 3.1205446030781016e-07, + "loss": 0.527, + "step": 29050 + }, + { + "epoch": 0.8903702341547137, + "grad_norm": 0.8025951028634561, + "learning_rate": 3.1188189130029747e-07, + "loss": 0.3852, + "step": 29051 + }, + { + "epoch": 0.890400882677455, + "grad_norm": 2.113555099035457, + "learning_rate": 3.1170936848601285e-07, + "loss": 0.6082, + "step": 29052 + }, + { + "epoch": 0.8904315312001961, + "grad_norm": 2.229597634416197, + "learning_rate": 3.1153689186665446e-07, + "loss": 0.6389, + "step": 29053 + }, + { + "epoch": 0.8904621797229374, + "grad_norm": 1.779915903584385, + "learning_rate": 3.1136446144392376e-07, + "loss": 0.6339, + "step": 29054 + }, + { + "epoch": 0.8904928282456785, + "grad_norm": 1.873885063833857, + "learning_rate": 3.1119207721951704e-07, + "loss": 0.6297, + "step": 29055 + }, + { + "epoch": 0.8905234767684198, + "grad_norm": 0.778540263541164, + "learning_rate": 3.1101973919513526e-07, + "loss": 0.4071, + "step": 29056 + }, + { + "epoch": 0.890554125291161, + "grad_norm": 1.900882466875336, + "learning_rate": 3.108474473724765e-07, + "loss": 0.6163, + "step": 29057 + }, + { + "epoch": 0.8905847738139022, + "grad_norm": 2.312170950173978, + "learning_rate": 3.1067520175323605e-07, + "loss": 0.5841, + "step": 29058 + }, + { + "epoch": 0.8906154223366434, + "grad_norm": 0.7882235436507042, + "learning_rate": 3.105030023391137e-07, + "loss": 0.4023, + "step": 29059 + }, + { + "epoch": 0.8906460708593846, + "grad_norm": 1.8420308313375806, + "learning_rate": 3.103308491318052e-07, + "loss": 0.5237, + "step": 29060 + }, + { + "epoch": 0.8906767193821258, + "grad_norm": 2.2746020298710885, + "learning_rate": 3.10158742133006e-07, + "loss": 0.6106, + "step": 29061 + }, + { + "epoch": 0.8907073679048669, + "grad_norm": 1.7930235891997015, + "learning_rate": 3.0998668134441304e-07, + "loss": 0.531, + "step": 29062 + }, + { + "epoch": 0.8907380164276082, + "grad_norm": 1.6758337130074699, + "learning_rate": 3.098146667677215e-07, + "loss": 0.5795, + "step": 29063 + }, + { + "epoch": 0.8907686649503493, + "grad_norm": 2.0439412275828377, + "learning_rate": 3.096426984046258e-07, + "loss": 0.6527, + "step": 29064 + }, + { + "epoch": 0.8907993134730906, + "grad_norm": 0.7809036132738257, + "learning_rate": 3.0947077625682165e-07, + "loss": 0.3947, + "step": 29065 + }, + { + "epoch": 0.8908299619958318, + "grad_norm": 2.047623762514005, + "learning_rate": 3.09298900326e-07, + "loss": 0.5016, + "step": 29066 + }, + { + "epoch": 0.890860610518573, + "grad_norm": 1.8739739279239578, + "learning_rate": 3.0912707061385825e-07, + "loss": 0.5731, + "step": 29067 + }, + { + "epoch": 0.8908912590413142, + "grad_norm": 1.8912788588067229, + "learning_rate": 3.0895528712208745e-07, + "loss": 0.604, + "step": 29068 + }, + { + "epoch": 0.8909219075640554, + "grad_norm": 1.8012962057587318, + "learning_rate": 3.0878354985238e-07, + "loss": 0.5528, + "step": 29069 + }, + { + "epoch": 0.8909525560867966, + "grad_norm": 5.660167287507456, + "learning_rate": 3.0861185880642854e-07, + "loss": 0.535, + "step": 29070 + }, + { + "epoch": 0.8909832046095378, + "grad_norm": 0.8435329984968566, + "learning_rate": 3.084402139859249e-07, + "loss": 0.4062, + "step": 29071 + }, + { + "epoch": 0.891013853132279, + "grad_norm": 1.75176519682702, + "learning_rate": 3.082686153925601e-07, + "loss": 0.5145, + "step": 29072 + }, + { + "epoch": 0.8910445016550202, + "grad_norm": 1.9622440558930174, + "learning_rate": 3.08097063028025e-07, + "loss": 0.6916, + "step": 29073 + }, + { + "epoch": 0.8910751501777614, + "grad_norm": 1.9037043542293641, + "learning_rate": 3.0792555689401093e-07, + "loss": 0.6755, + "step": 29074 + }, + { + "epoch": 0.8911057987005027, + "grad_norm": 0.8182673684337977, + "learning_rate": 3.0775409699220547e-07, + "loss": 0.3975, + "step": 29075 + }, + { + "epoch": 0.8911364472232438, + "grad_norm": 0.7957894712751551, + "learning_rate": 3.0758268332430064e-07, + "loss": 0.3935, + "step": 29076 + }, + { + "epoch": 0.8911670957459851, + "grad_norm": 1.9131489390268377, + "learning_rate": 3.074113158919828e-07, + "loss": 0.5719, + "step": 29077 + }, + { + "epoch": 0.8911977442687262, + "grad_norm": 1.731359894890742, + "learning_rate": 3.0723999469694344e-07, + "loss": 0.5622, + "step": 29078 + }, + { + "epoch": 0.8912283927914675, + "grad_norm": 1.8324879652583121, + "learning_rate": 3.0706871974086893e-07, + "loss": 0.6708, + "step": 29079 + }, + { + "epoch": 0.8912590413142086, + "grad_norm": 1.8481956450359893, + "learning_rate": 3.068974910254463e-07, + "loss": 0.6312, + "step": 29080 + }, + { + "epoch": 0.8912896898369499, + "grad_norm": 2.013728598186218, + "learning_rate": 3.0672630855236363e-07, + "loss": 0.6066, + "step": 29081 + }, + { + "epoch": 0.891320338359691, + "grad_norm": 1.7653955724070913, + "learning_rate": 3.065551723233079e-07, + "loss": 0.6142, + "step": 29082 + }, + { + "epoch": 0.8913509868824323, + "grad_norm": 2.0579864933538126, + "learning_rate": 3.063840823399644e-07, + "loss": 0.5995, + "step": 29083 + }, + { + "epoch": 0.8913816354051735, + "grad_norm": 1.8819989620211557, + "learning_rate": 3.062130386040196e-07, + "loss": 0.5736, + "step": 29084 + }, + { + "epoch": 0.8914122839279147, + "grad_norm": 0.760247279150852, + "learning_rate": 3.060420411171583e-07, + "loss": 0.3878, + "step": 29085 + }, + { + "epoch": 0.8914429324506559, + "grad_norm": 1.7399464088786423, + "learning_rate": 3.0587108988106684e-07, + "loss": 0.5433, + "step": 29086 + }, + { + "epoch": 0.8914735809733971, + "grad_norm": 1.8215361117418338, + "learning_rate": 3.0570018489742836e-07, + "loss": 0.5644, + "step": 29087 + }, + { + "epoch": 0.8915042294961383, + "grad_norm": 1.8112935206004073, + "learning_rate": 3.05529326167926e-07, + "loss": 0.5636, + "step": 29088 + }, + { + "epoch": 0.8915348780188795, + "grad_norm": 0.7722763693873629, + "learning_rate": 3.053585136942455e-07, + "loss": 0.3922, + "step": 29089 + }, + { + "epoch": 0.8915655265416207, + "grad_norm": 1.8068013609766076, + "learning_rate": 3.0518774747806844e-07, + "loss": 0.5822, + "step": 29090 + }, + { + "epoch": 0.891596175064362, + "grad_norm": 1.7437084341977913, + "learning_rate": 3.0501702752107733e-07, + "loss": 0.5051, + "step": 29091 + }, + { + "epoch": 0.8916268235871031, + "grad_norm": 1.8893678939385854, + "learning_rate": 3.0484635382495465e-07, + "loss": 0.5418, + "step": 29092 + }, + { + "epoch": 0.8916574721098443, + "grad_norm": 2.1250981240522115, + "learning_rate": 3.0467572639138243e-07, + "loss": 0.5524, + "step": 29093 + }, + { + "epoch": 0.8916881206325855, + "grad_norm": 1.862285832252163, + "learning_rate": 3.045051452220421e-07, + "loss": 0.4989, + "step": 29094 + }, + { + "epoch": 0.8917187691553267, + "grad_norm": 1.7210389435392401, + "learning_rate": 3.043346103186129e-07, + "loss": 0.4968, + "step": 29095 + }, + { + "epoch": 0.8917494176780679, + "grad_norm": 1.8843111826785408, + "learning_rate": 3.0416412168277675e-07, + "loss": 0.4861, + "step": 29096 + }, + { + "epoch": 0.8917800662008091, + "grad_norm": 1.8439909751909624, + "learning_rate": 3.039936793162135e-07, + "loss": 0.5393, + "step": 29097 + }, + { + "epoch": 0.8918107147235503, + "grad_norm": 1.8607791386070653, + "learning_rate": 3.038232832206023e-07, + "loss": 0.5473, + "step": 29098 + }, + { + "epoch": 0.8918413632462915, + "grad_norm": 1.851444998668086, + "learning_rate": 3.036529333976207e-07, + "loss": 0.5557, + "step": 29099 + }, + { + "epoch": 0.8918720117690327, + "grad_norm": 1.940725448335634, + "learning_rate": 3.034826298489485e-07, + "loss": 0.5947, + "step": 29100 + }, + { + "epoch": 0.8919026602917739, + "grad_norm": 1.8156469737731344, + "learning_rate": 3.033123725762643e-07, + "loss": 0.536, + "step": 29101 + }, + { + "epoch": 0.8919333088145152, + "grad_norm": 1.7901395963721274, + "learning_rate": 3.0314216158124465e-07, + "loss": 0.5151, + "step": 29102 + }, + { + "epoch": 0.8919639573372563, + "grad_norm": 1.8531344533473035, + "learning_rate": 3.0297199686556646e-07, + "loss": 0.6332, + "step": 29103 + }, + { + "epoch": 0.8919946058599976, + "grad_norm": 1.8693745806484297, + "learning_rate": 3.0280187843090723e-07, + "loss": 0.631, + "step": 29104 + }, + { + "epoch": 0.8920252543827387, + "grad_norm": 1.775778540585314, + "learning_rate": 3.026318062789441e-07, + "loss": 0.5561, + "step": 29105 + }, + { + "epoch": 0.89205590290548, + "grad_norm": 0.7879474360736394, + "learning_rate": 3.024617804113511e-07, + "loss": 0.38, + "step": 29106 + }, + { + "epoch": 0.8920865514282211, + "grad_norm": 1.8397481148291779, + "learning_rate": 3.022918008298026e-07, + "loss": 0.6542, + "step": 29107 + }, + { + "epoch": 0.8921171999509624, + "grad_norm": 1.9532605321943324, + "learning_rate": 3.021218675359766e-07, + "loss": 0.6371, + "step": 29108 + }, + { + "epoch": 0.8921478484737035, + "grad_norm": 2.12472425838271, + "learning_rate": 3.0195198053154574e-07, + "loss": 0.5302, + "step": 29109 + }, + { + "epoch": 0.8921784969964448, + "grad_norm": 2.151523792697309, + "learning_rate": 3.017821398181836e-07, + "loss": 0.6679, + "step": 29110 + }, + { + "epoch": 0.892209145519186, + "grad_norm": 1.7802675762365676, + "learning_rate": 3.016123453975639e-07, + "loss": 0.5777, + "step": 29111 + }, + { + "epoch": 0.8922397940419272, + "grad_norm": 2.019278242405207, + "learning_rate": 3.0144259727135974e-07, + "loss": 0.6369, + "step": 29112 + }, + { + "epoch": 0.8922704425646684, + "grad_norm": 0.7978264733601866, + "learning_rate": 3.0127289544124473e-07, + "loss": 0.3975, + "step": 29113 + }, + { + "epoch": 0.8923010910874096, + "grad_norm": 0.7843679513968835, + "learning_rate": 3.0110323990888924e-07, + "loss": 0.4109, + "step": 29114 + }, + { + "epoch": 0.8923317396101508, + "grad_norm": 2.0003811489430188, + "learning_rate": 3.0093363067596635e-07, + "loss": 0.5375, + "step": 29115 + }, + { + "epoch": 0.892362388132892, + "grad_norm": 2.3979415505086807, + "learning_rate": 3.00764067744147e-07, + "loss": 0.6925, + "step": 29116 + }, + { + "epoch": 0.8923930366556332, + "grad_norm": 2.046790509762005, + "learning_rate": 3.005945511151015e-07, + "loss": 0.5894, + "step": 29117 + }, + { + "epoch": 0.8924236851783744, + "grad_norm": 2.1330396100493947, + "learning_rate": 3.0042508079049905e-07, + "loss": 0.5789, + "step": 29118 + }, + { + "epoch": 0.8924543337011156, + "grad_norm": 1.695988361021494, + "learning_rate": 3.002556567720122e-07, + "loss": 0.5981, + "step": 29119 + }, + { + "epoch": 0.8924849822238569, + "grad_norm": 1.8576940022463726, + "learning_rate": 3.0008627906130796e-07, + "loss": 0.4326, + "step": 29120 + }, + { + "epoch": 0.892515630746598, + "grad_norm": 1.8072933178672075, + "learning_rate": 2.999169476600572e-07, + "loss": 0.5781, + "step": 29121 + }, + { + "epoch": 0.8925462792693393, + "grad_norm": 1.9878373394393731, + "learning_rate": 2.997476625699258e-07, + "loss": 0.5775, + "step": 29122 + }, + { + "epoch": 0.8925769277920804, + "grad_norm": 0.7551825418678626, + "learning_rate": 2.9957842379258417e-07, + "loss": 0.3779, + "step": 29123 + }, + { + "epoch": 0.8926075763148216, + "grad_norm": 2.1106463596058616, + "learning_rate": 2.9940923132969923e-07, + "loss": 0.4977, + "step": 29124 + }, + { + "epoch": 0.8926382248375628, + "grad_norm": 1.9490424406592117, + "learning_rate": 2.992400851829375e-07, + "loss": 0.5468, + "step": 29125 + }, + { + "epoch": 0.892668873360304, + "grad_norm": 2.0190871609522563, + "learning_rate": 2.990709853539653e-07, + "loss": 0.5203, + "step": 29126 + }, + { + "epoch": 0.8926995218830452, + "grad_norm": 1.849929607364662, + "learning_rate": 2.9890193184445085e-07, + "loss": 0.4791, + "step": 29127 + }, + { + "epoch": 0.8927301704057864, + "grad_norm": 1.7776841157213055, + "learning_rate": 2.987329246560583e-07, + "loss": 0.452, + "step": 29128 + }, + { + "epoch": 0.8927608189285277, + "grad_norm": 1.7629829775079375, + "learning_rate": 2.985639637904514e-07, + "loss": 0.5082, + "step": 29129 + }, + { + "epoch": 0.8927914674512688, + "grad_norm": 2.004821386564947, + "learning_rate": 2.9839504924929875e-07, + "loss": 0.7131, + "step": 29130 + }, + { + "epoch": 0.8928221159740101, + "grad_norm": 2.0175278610542002, + "learning_rate": 2.9822618103426127e-07, + "loss": 0.6452, + "step": 29131 + }, + { + "epoch": 0.8928527644967512, + "grad_norm": 1.743905806220782, + "learning_rate": 2.980573591470054e-07, + "loss": 0.5293, + "step": 29132 + }, + { + "epoch": 0.8928834130194925, + "grad_norm": 2.0840923914410587, + "learning_rate": 2.97888583589192e-07, + "loss": 0.606, + "step": 29133 + }, + { + "epoch": 0.8929140615422336, + "grad_norm": 2.0929372379167317, + "learning_rate": 2.9771985436248594e-07, + "loss": 0.6084, + "step": 29134 + }, + { + "epoch": 0.8929447100649749, + "grad_norm": 1.9062881504459506, + "learning_rate": 2.975511714685503e-07, + "loss": 0.5753, + "step": 29135 + }, + { + "epoch": 0.892975358587716, + "grad_norm": 1.7931682456394555, + "learning_rate": 2.9738253490904477e-07, + "loss": 0.4972, + "step": 29136 + }, + { + "epoch": 0.8930060071104573, + "grad_norm": 1.7849139978955413, + "learning_rate": 2.9721394468563316e-07, + "loss": 0.5459, + "step": 29137 + }, + { + "epoch": 0.8930366556331985, + "grad_norm": 1.731882349280884, + "learning_rate": 2.970454007999757e-07, + "loss": 0.5928, + "step": 29138 + }, + { + "epoch": 0.8930673041559397, + "grad_norm": 1.9459694425965892, + "learning_rate": 2.9687690325373273e-07, + "loss": 0.6124, + "step": 29139 + }, + { + "epoch": 0.8930979526786809, + "grad_norm": 2.2927261224590576, + "learning_rate": 2.9670845204856523e-07, + "loss": 0.6055, + "step": 29140 + }, + { + "epoch": 0.8931286012014221, + "grad_norm": 2.113591511800739, + "learning_rate": 2.9654004718613347e-07, + "loss": 0.4909, + "step": 29141 + }, + { + "epoch": 0.8931592497241633, + "grad_norm": 1.7317605629901862, + "learning_rate": 2.9637168866809505e-07, + "loss": 0.5824, + "step": 29142 + }, + { + "epoch": 0.8931898982469045, + "grad_norm": 0.7935615034272031, + "learning_rate": 2.962033764961109e-07, + "loss": 0.4156, + "step": 29143 + }, + { + "epoch": 0.8932205467696457, + "grad_norm": 1.9429123687689625, + "learning_rate": 2.960351106718373e-07, + "loss": 0.592, + "step": 29144 + }, + { + "epoch": 0.893251195292387, + "grad_norm": 0.7995429373810334, + "learning_rate": 2.958668911969337e-07, + "loss": 0.3921, + "step": 29145 + }, + { + "epoch": 0.8932818438151281, + "grad_norm": 1.9974081335530944, + "learning_rate": 2.9569871807305806e-07, + "loss": 0.575, + "step": 29146 + }, + { + "epoch": 0.8933124923378694, + "grad_norm": 1.8534141453188069, + "learning_rate": 2.9553059130186526e-07, + "loss": 0.4686, + "step": 29147 + }, + { + "epoch": 0.8933431408606105, + "grad_norm": 2.0681246870041665, + "learning_rate": 2.9536251088501387e-07, + "loss": 0.6325, + "step": 29148 + }, + { + "epoch": 0.8933737893833518, + "grad_norm": 0.8318179429243185, + "learning_rate": 2.951944768241594e-07, + "loss": 0.4049, + "step": 29149 + }, + { + "epoch": 0.8934044379060929, + "grad_norm": 1.9459011675732532, + "learning_rate": 2.9502648912095756e-07, + "loss": 0.6117, + "step": 29150 + }, + { + "epoch": 0.8934350864288342, + "grad_norm": 1.8120695544157384, + "learning_rate": 2.948585477770638e-07, + "loss": 0.6158, + "step": 29151 + }, + { + "epoch": 0.8934657349515753, + "grad_norm": 1.6901622106541945, + "learning_rate": 2.946906527941318e-07, + "loss": 0.5333, + "step": 29152 + }, + { + "epoch": 0.8934963834743166, + "grad_norm": 1.9849151021722908, + "learning_rate": 2.945228041738174e-07, + "loss": 0.6179, + "step": 29153 + }, + { + "epoch": 0.8935270319970577, + "grad_norm": 1.8234653085981025, + "learning_rate": 2.9435500191777377e-07, + "loss": 0.5916, + "step": 29154 + }, + { + "epoch": 0.8935576805197989, + "grad_norm": 2.32639326314753, + "learning_rate": 2.94187246027654e-07, + "loss": 0.575, + "step": 29155 + }, + { + "epoch": 0.8935883290425402, + "grad_norm": 1.8152939640847385, + "learning_rate": 2.9401953650511073e-07, + "loss": 0.5808, + "step": 29156 + }, + { + "epoch": 0.8936189775652813, + "grad_norm": 1.8973742186914775, + "learning_rate": 2.9385187335179864e-07, + "loss": 0.5113, + "step": 29157 + }, + { + "epoch": 0.8936496260880226, + "grad_norm": 1.8170855523471412, + "learning_rate": 2.936842565693665e-07, + "loss": 0.5646, + "step": 29158 + }, + { + "epoch": 0.8936802746107637, + "grad_norm": 1.8107254084085909, + "learning_rate": 2.935166861594685e-07, + "loss": 0.5518, + "step": 29159 + }, + { + "epoch": 0.893710923133505, + "grad_norm": 0.7779192417365765, + "learning_rate": 2.9334916212375495e-07, + "loss": 0.3755, + "step": 29160 + }, + { + "epoch": 0.8937415716562461, + "grad_norm": 1.818033833948246, + "learning_rate": 2.9318168446387574e-07, + "loss": 0.5613, + "step": 29161 + }, + { + "epoch": 0.8937722201789874, + "grad_norm": 1.8466878349879683, + "learning_rate": 2.9301425318148223e-07, + "loss": 0.5513, + "step": 29162 + }, + { + "epoch": 0.8938028687017285, + "grad_norm": 1.895656318863321, + "learning_rate": 2.9284686827822316e-07, + "loss": 0.5464, + "step": 29163 + }, + { + "epoch": 0.8938335172244698, + "grad_norm": 0.8124790528849958, + "learning_rate": 2.926795297557483e-07, + "loss": 0.3996, + "step": 29164 + }, + { + "epoch": 0.893864165747211, + "grad_norm": 2.0808319135081925, + "learning_rate": 2.925122376157069e-07, + "loss": 0.617, + "step": 29165 + }, + { + "epoch": 0.8938948142699522, + "grad_norm": 2.065602852269105, + "learning_rate": 2.9234499185974594e-07, + "loss": 0.6226, + "step": 29166 + }, + { + "epoch": 0.8939254627926934, + "grad_norm": 2.0209563888364475, + "learning_rate": 2.9217779248951474e-07, + "loss": 0.5325, + "step": 29167 + }, + { + "epoch": 0.8939561113154346, + "grad_norm": 1.7494424696336512, + "learning_rate": 2.920106395066613e-07, + "loss": 0.4379, + "step": 29168 + }, + { + "epoch": 0.8939867598381758, + "grad_norm": 0.7816735913288129, + "learning_rate": 2.918435329128305e-07, + "loss": 0.387, + "step": 29169 + }, + { + "epoch": 0.894017408360917, + "grad_norm": 1.839286701142163, + "learning_rate": 2.916764727096699e-07, + "loss": 0.5455, + "step": 29170 + }, + { + "epoch": 0.8940480568836582, + "grad_norm": 1.9623114928759546, + "learning_rate": 2.915094588988265e-07, + "loss": 0.6433, + "step": 29171 + }, + { + "epoch": 0.8940787054063994, + "grad_norm": 1.902332919442307, + "learning_rate": 2.913424914819446e-07, + "loss": 0.484, + "step": 29172 + }, + { + "epoch": 0.8941093539291406, + "grad_norm": 1.8395367307213528, + "learning_rate": 2.911755704606706e-07, + "loss": 0.531, + "step": 29173 + }, + { + "epoch": 0.8941400024518819, + "grad_norm": 1.932102156047284, + "learning_rate": 2.9100869583664757e-07, + "loss": 0.6666, + "step": 29174 + }, + { + "epoch": 0.894170650974623, + "grad_norm": 1.7579167944973295, + "learning_rate": 2.9084186761152044e-07, + "loss": 0.5993, + "step": 29175 + }, + { + "epoch": 0.8942012994973643, + "grad_norm": 1.6977004324811318, + "learning_rate": 2.906750857869345e-07, + "loss": 0.4819, + "step": 29176 + }, + { + "epoch": 0.8942319480201054, + "grad_norm": 1.607898155034489, + "learning_rate": 2.905083503645312e-07, + "loss": 0.5302, + "step": 29177 + }, + { + "epoch": 0.8942625965428467, + "grad_norm": 2.065490854046644, + "learning_rate": 2.9034166134595365e-07, + "loss": 0.5915, + "step": 29178 + }, + { + "epoch": 0.8942932450655878, + "grad_norm": 0.8337618925793387, + "learning_rate": 2.901750187328456e-07, + "loss": 0.3922, + "step": 29179 + }, + { + "epoch": 0.8943238935883291, + "grad_norm": 1.852287106284008, + "learning_rate": 2.900084225268474e-07, + "loss": 0.5532, + "step": 29180 + }, + { + "epoch": 0.8943545421110702, + "grad_norm": 1.99731106632739, + "learning_rate": 2.8984187272960154e-07, + "loss": 0.5493, + "step": 29181 + }, + { + "epoch": 0.8943851906338115, + "grad_norm": 2.0384550865378626, + "learning_rate": 2.896753693427495e-07, + "loss": 0.5482, + "step": 29182 + }, + { + "epoch": 0.8944158391565527, + "grad_norm": 1.7431788271994622, + "learning_rate": 2.8950891236793065e-07, + "loss": 0.6085, + "step": 29183 + }, + { + "epoch": 0.8944464876792939, + "grad_norm": 1.965659846171309, + "learning_rate": 2.893425018067864e-07, + "loss": 0.6226, + "step": 29184 + }, + { + "epoch": 0.8944771362020351, + "grad_norm": 1.7194839635842138, + "learning_rate": 2.891761376609542e-07, + "loss": 0.5803, + "step": 29185 + }, + { + "epoch": 0.8945077847247762, + "grad_norm": 2.0722777436861, + "learning_rate": 2.890098199320762e-07, + "loss": 0.568, + "step": 29186 + }, + { + "epoch": 0.8945384332475175, + "grad_norm": 1.7574609629894091, + "learning_rate": 2.888435486217894e-07, + "loss": 0.5745, + "step": 29187 + }, + { + "epoch": 0.8945690817702586, + "grad_norm": 1.7025819730196563, + "learning_rate": 2.8867732373173254e-07, + "loss": 0.5256, + "step": 29188 + }, + { + "epoch": 0.8945997302929999, + "grad_norm": 2.150190023216781, + "learning_rate": 2.885111452635431e-07, + "loss": 0.5359, + "step": 29189 + }, + { + "epoch": 0.894630378815741, + "grad_norm": 1.8895932279435248, + "learning_rate": 2.883450132188598e-07, + "loss": 0.4764, + "step": 29190 + }, + { + "epoch": 0.8946610273384823, + "grad_norm": 1.8295830356906857, + "learning_rate": 2.881789275993174e-07, + "loss": 0.5367, + "step": 29191 + }, + { + "epoch": 0.8946916758612234, + "grad_norm": 2.0132890986415335, + "learning_rate": 2.8801288840655415e-07, + "loss": 0.5814, + "step": 29192 + }, + { + "epoch": 0.8947223243839647, + "grad_norm": 1.6692377853725935, + "learning_rate": 2.878468956422054e-07, + "loss": 0.4817, + "step": 29193 + }, + { + "epoch": 0.8947529729067059, + "grad_norm": 1.903340151074622, + "learning_rate": 2.876809493079075e-07, + "loss": 0.5632, + "step": 29194 + }, + { + "epoch": 0.8947836214294471, + "grad_norm": 1.8148722186912343, + "learning_rate": 2.8751504940529474e-07, + "loss": 0.5567, + "step": 29195 + }, + { + "epoch": 0.8948142699521883, + "grad_norm": 1.8339618281746743, + "learning_rate": 2.873491959360014e-07, + "loss": 0.5302, + "step": 29196 + }, + { + "epoch": 0.8948449184749295, + "grad_norm": 2.004410379949535, + "learning_rate": 2.8718338890166177e-07, + "loss": 0.5647, + "step": 29197 + }, + { + "epoch": 0.8948755669976707, + "grad_norm": 1.7552343209555843, + "learning_rate": 2.8701762830391167e-07, + "loss": 0.5813, + "step": 29198 + }, + { + "epoch": 0.8949062155204119, + "grad_norm": 1.7657694078515431, + "learning_rate": 2.8685191414438096e-07, + "loss": 0.5854, + "step": 29199 + }, + { + "epoch": 0.8949368640431531, + "grad_norm": 2.05976046332123, + "learning_rate": 2.86686246424705e-07, + "loss": 0.6052, + "step": 29200 + }, + { + "epoch": 0.8949675125658944, + "grad_norm": 2.1476777095853268, + "learning_rate": 2.865206251465158e-07, + "loss": 0.5821, + "step": 29201 + }, + { + "epoch": 0.8949981610886355, + "grad_norm": 1.817901783728755, + "learning_rate": 2.863550503114443e-07, + "loss": 0.5035, + "step": 29202 + }, + { + "epoch": 0.8950288096113768, + "grad_norm": 2.0068907009382655, + "learning_rate": 2.861895219211236e-07, + "loss": 0.6799, + "step": 29203 + }, + { + "epoch": 0.8950594581341179, + "grad_norm": 1.6164625222059577, + "learning_rate": 2.860240399771813e-07, + "loss": 0.452, + "step": 29204 + }, + { + "epoch": 0.8950901066568592, + "grad_norm": 1.9457819500615179, + "learning_rate": 2.8585860448125226e-07, + "loss": 0.5259, + "step": 29205 + }, + { + "epoch": 0.8951207551796003, + "grad_norm": 1.8595995052565346, + "learning_rate": 2.856932154349645e-07, + "loss": 0.5104, + "step": 29206 + }, + { + "epoch": 0.8951514037023416, + "grad_norm": 1.9350729220612155, + "learning_rate": 2.855278728399469e-07, + "loss": 0.518, + "step": 29207 + }, + { + "epoch": 0.8951820522250827, + "grad_norm": 1.8947473120231615, + "learning_rate": 2.853625766978296e-07, + "loss": 0.6532, + "step": 29208 + }, + { + "epoch": 0.895212700747824, + "grad_norm": 2.168787595011027, + "learning_rate": 2.8519732701024204e-07, + "loss": 0.6003, + "step": 29209 + }, + { + "epoch": 0.8952433492705651, + "grad_norm": 1.8387736345263717, + "learning_rate": 2.850321237788101e-07, + "loss": 0.5956, + "step": 29210 + }, + { + "epoch": 0.8952739977933064, + "grad_norm": 1.8707079157983724, + "learning_rate": 2.8486696700516345e-07, + "loss": 0.5085, + "step": 29211 + }, + { + "epoch": 0.8953046463160476, + "grad_norm": 2.1716392442499823, + "learning_rate": 2.8470185669092934e-07, + "loss": 0.4722, + "step": 29212 + }, + { + "epoch": 0.8953352948387888, + "grad_norm": 1.8736021854179894, + "learning_rate": 2.8453679283773463e-07, + "loss": 0.5882, + "step": 29213 + }, + { + "epoch": 0.89536594336153, + "grad_norm": 2.00389006194049, + "learning_rate": 2.8437177544720526e-07, + "loss": 0.5301, + "step": 29214 + }, + { + "epoch": 0.8953965918842712, + "grad_norm": 1.673989531671404, + "learning_rate": 2.8420680452096616e-07, + "loss": 0.5947, + "step": 29215 + }, + { + "epoch": 0.8954272404070124, + "grad_norm": 0.8003917596887395, + "learning_rate": 2.8404188006064537e-07, + "loss": 0.3902, + "step": 29216 + }, + { + "epoch": 0.8954578889297535, + "grad_norm": 1.918525527712599, + "learning_rate": 2.838770020678666e-07, + "loss": 0.6257, + "step": 29217 + }, + { + "epoch": 0.8954885374524948, + "grad_norm": 1.8799803837274798, + "learning_rate": 2.837121705442536e-07, + "loss": 0.519, + "step": 29218 + }, + { + "epoch": 0.8955191859752359, + "grad_norm": 2.203779230514419, + "learning_rate": 2.835473854914311e-07, + "loss": 0.5882, + "step": 29219 + }, + { + "epoch": 0.8955498344979772, + "grad_norm": 2.116306933600119, + "learning_rate": 2.833826469110235e-07, + "loss": 0.61, + "step": 29220 + }, + { + "epoch": 0.8955804830207184, + "grad_norm": 1.9546660465373877, + "learning_rate": 2.832179548046537e-07, + "loss": 0.5491, + "step": 29221 + }, + { + "epoch": 0.8956111315434596, + "grad_norm": 1.8618129396627898, + "learning_rate": 2.830533091739435e-07, + "loss": 0.5051, + "step": 29222 + }, + { + "epoch": 0.8956417800662008, + "grad_norm": 2.1678956073648807, + "learning_rate": 2.828887100205163e-07, + "loss": 0.6605, + "step": 29223 + }, + { + "epoch": 0.895672428588942, + "grad_norm": 0.8360643549609592, + "learning_rate": 2.8272415734599435e-07, + "loss": 0.4122, + "step": 29224 + }, + { + "epoch": 0.8957030771116832, + "grad_norm": 1.8341346823604834, + "learning_rate": 2.825596511519979e-07, + "loss": 0.6539, + "step": 29225 + }, + { + "epoch": 0.8957337256344244, + "grad_norm": 0.8056317579471671, + "learning_rate": 2.823951914401468e-07, + "loss": 0.3907, + "step": 29226 + }, + { + "epoch": 0.8957643741571656, + "grad_norm": 0.8093602503205974, + "learning_rate": 2.8223077821206425e-07, + "loss": 0.3989, + "step": 29227 + }, + { + "epoch": 0.8957950226799068, + "grad_norm": 1.8867237979490838, + "learning_rate": 2.820664114693694e-07, + "loss": 0.4711, + "step": 29228 + }, + { + "epoch": 0.895825671202648, + "grad_norm": 2.0252698072041206, + "learning_rate": 2.8190209121367996e-07, + "loss": 0.5703, + "step": 29229 + }, + { + "epoch": 0.8958563197253893, + "grad_norm": 1.7193856466041282, + "learning_rate": 2.8173781744661676e-07, + "loss": 0.5544, + "step": 29230 + }, + { + "epoch": 0.8958869682481304, + "grad_norm": 1.9889802632921436, + "learning_rate": 2.8157359016979855e-07, + "loss": 0.5118, + "step": 29231 + }, + { + "epoch": 0.8959176167708717, + "grad_norm": 1.668808547595713, + "learning_rate": 2.8140940938484296e-07, + "loss": 0.5449, + "step": 29232 + }, + { + "epoch": 0.8959482652936128, + "grad_norm": 1.7659792524536948, + "learning_rate": 2.812452750933675e-07, + "loss": 0.5703, + "step": 29233 + }, + { + "epoch": 0.8959789138163541, + "grad_norm": 1.8652856259433372, + "learning_rate": 2.810811872969893e-07, + "loss": 0.5712, + "step": 29234 + }, + { + "epoch": 0.8960095623390952, + "grad_norm": 1.7698872620171506, + "learning_rate": 2.809171459973264e-07, + "loss": 0.5625, + "step": 29235 + }, + { + "epoch": 0.8960402108618365, + "grad_norm": 0.7701898951136432, + "learning_rate": 2.8075315119599487e-07, + "loss": 0.399, + "step": 29236 + }, + { + "epoch": 0.8960708593845776, + "grad_norm": 1.8610504476007268, + "learning_rate": 2.805892028946078e-07, + "loss": 0.5003, + "step": 29237 + }, + { + "epoch": 0.8961015079073189, + "grad_norm": 1.7912553333740155, + "learning_rate": 2.804253010947849e-07, + "loss": 0.5741, + "step": 29238 + }, + { + "epoch": 0.8961321564300601, + "grad_norm": 0.8219311097434789, + "learning_rate": 2.8026144579813786e-07, + "loss": 0.4081, + "step": 29239 + }, + { + "epoch": 0.8961628049528013, + "grad_norm": 2.154717285618533, + "learning_rate": 2.80097637006283e-07, + "loss": 0.621, + "step": 29240 + }, + { + "epoch": 0.8961934534755425, + "grad_norm": 0.798380169657634, + "learning_rate": 2.799338747208336e-07, + "loss": 0.4094, + "step": 29241 + }, + { + "epoch": 0.8962241019982837, + "grad_norm": 1.9715887504809895, + "learning_rate": 2.797701589434032e-07, + "loss": 0.6094, + "step": 29242 + }, + { + "epoch": 0.8962547505210249, + "grad_norm": 1.8808314280050809, + "learning_rate": 2.796064896756057e-07, + "loss": 0.5207, + "step": 29243 + }, + { + "epoch": 0.8962853990437661, + "grad_norm": 1.861445098391788, + "learning_rate": 2.7944286691905244e-07, + "loss": 0.5221, + "step": 29244 + }, + { + "epoch": 0.8963160475665073, + "grad_norm": 2.075949667075406, + "learning_rate": 2.7927929067535664e-07, + "loss": 0.5887, + "step": 29245 + }, + { + "epoch": 0.8963466960892486, + "grad_norm": 0.8273002025690672, + "learning_rate": 2.7911576094613035e-07, + "loss": 0.382, + "step": 29246 + }, + { + "epoch": 0.8963773446119897, + "grad_norm": 1.8375159514839357, + "learning_rate": 2.789522777329839e-07, + "loss": 0.5662, + "step": 29247 + }, + { + "epoch": 0.8964079931347309, + "grad_norm": 1.9381639586596504, + "learning_rate": 2.7878884103752944e-07, + "loss": 0.5428, + "step": 29248 + }, + { + "epoch": 0.8964386416574721, + "grad_norm": 2.0911167386797667, + "learning_rate": 2.786254508613756e-07, + "loss": 0.653, + "step": 29249 + }, + { + "epoch": 0.8964692901802133, + "grad_norm": 1.7610778312218256, + "learning_rate": 2.7846210720613276e-07, + "loss": 0.5155, + "step": 29250 + }, + { + "epoch": 0.8964999387029545, + "grad_norm": 1.8639204885877811, + "learning_rate": 2.7829881007341184e-07, + "loss": 0.4531, + "step": 29251 + }, + { + "epoch": 0.8965305872256957, + "grad_norm": 2.027183662316618, + "learning_rate": 2.781355594648205e-07, + "loss": 0.657, + "step": 29252 + }, + { + "epoch": 0.8965612357484369, + "grad_norm": 2.05777035754172, + "learning_rate": 2.779723553819674e-07, + "loss": 0.6193, + "step": 29253 + }, + { + "epoch": 0.8965918842711781, + "grad_norm": 1.8689330573246337, + "learning_rate": 2.778091978264613e-07, + "loss": 0.5933, + "step": 29254 + }, + { + "epoch": 0.8966225327939193, + "grad_norm": 1.9019726884111532, + "learning_rate": 2.776460867999098e-07, + "loss": 0.5388, + "step": 29255 + }, + { + "epoch": 0.8966531813166605, + "grad_norm": 1.9551571003733557, + "learning_rate": 2.774830223039182e-07, + "loss": 0.571, + "step": 29256 + }, + { + "epoch": 0.8966838298394018, + "grad_norm": 2.046712632145146, + "learning_rate": 2.7732000434009586e-07, + "loss": 0.6907, + "step": 29257 + }, + { + "epoch": 0.8967144783621429, + "grad_norm": 1.9069053068752349, + "learning_rate": 2.77157032910047e-07, + "loss": 0.5643, + "step": 29258 + }, + { + "epoch": 0.8967451268848842, + "grad_norm": 1.8828504547401932, + "learning_rate": 2.769941080153793e-07, + "loss": 0.5292, + "step": 29259 + }, + { + "epoch": 0.8967757754076253, + "grad_norm": 0.7998760470834524, + "learning_rate": 2.768312296576964e-07, + "loss": 0.3956, + "step": 29260 + }, + { + "epoch": 0.8968064239303666, + "grad_norm": 2.00523741690335, + "learning_rate": 2.7666839783860424e-07, + "loss": 0.4946, + "step": 29261 + }, + { + "epoch": 0.8968370724531077, + "grad_norm": 2.1723409452615687, + "learning_rate": 2.765056125597071e-07, + "loss": 0.5327, + "step": 29262 + }, + { + "epoch": 0.896867720975849, + "grad_norm": 0.7689508956570946, + "learning_rate": 2.7634287382260816e-07, + "loss": 0.3874, + "step": 29263 + }, + { + "epoch": 0.8968983694985901, + "grad_norm": 1.7798843198535963, + "learning_rate": 2.7618018162891116e-07, + "loss": 0.5583, + "step": 29264 + }, + { + "epoch": 0.8969290180213314, + "grad_norm": 1.5834024994371259, + "learning_rate": 2.760175359802203e-07, + "loss": 0.5471, + "step": 29265 + }, + { + "epoch": 0.8969596665440726, + "grad_norm": 2.0531621845932397, + "learning_rate": 2.7585493687813656e-07, + "loss": 0.6031, + "step": 29266 + }, + { + "epoch": 0.8969903150668138, + "grad_norm": 1.8270546921954653, + "learning_rate": 2.7569238432426313e-07, + "loss": 0.5521, + "step": 29267 + }, + { + "epoch": 0.897020963589555, + "grad_norm": 1.8646996140411491, + "learning_rate": 2.755298783202021e-07, + "loss": 0.6155, + "step": 29268 + }, + { + "epoch": 0.8970516121122962, + "grad_norm": 1.9487507606936878, + "learning_rate": 2.7536741886755313e-07, + "loss": 0.5795, + "step": 29269 + }, + { + "epoch": 0.8970822606350374, + "grad_norm": 1.8966039176431155, + "learning_rate": 2.75205005967919e-07, + "loss": 0.4895, + "step": 29270 + }, + { + "epoch": 0.8971129091577786, + "grad_norm": 1.8612731625383419, + "learning_rate": 2.750426396228978e-07, + "loss": 0.5648, + "step": 29271 + }, + { + "epoch": 0.8971435576805198, + "grad_norm": 1.8823334470509954, + "learning_rate": 2.748803198340899e-07, + "loss": 0.585, + "step": 29272 + }, + { + "epoch": 0.897174206203261, + "grad_norm": 1.9203068679094375, + "learning_rate": 2.747180466030963e-07, + "loss": 0.6031, + "step": 29273 + }, + { + "epoch": 0.8972048547260022, + "grad_norm": 1.9291794146556083, + "learning_rate": 2.745558199315146e-07, + "loss": 0.4811, + "step": 29274 + }, + { + "epoch": 0.8972355032487435, + "grad_norm": 0.792496063387553, + "learning_rate": 2.7439363982094293e-07, + "loss": 0.3765, + "step": 29275 + }, + { + "epoch": 0.8972661517714846, + "grad_norm": 1.9669486477386566, + "learning_rate": 2.7423150627298056e-07, + "loss": 0.5775, + "step": 29276 + }, + { + "epoch": 0.8972968002942259, + "grad_norm": 2.003381533554582, + "learning_rate": 2.740694192892235e-07, + "loss": 0.5233, + "step": 29277 + }, + { + "epoch": 0.897327448816967, + "grad_norm": 1.9926537104690332, + "learning_rate": 2.7390737887126986e-07, + "loss": 0.5596, + "step": 29278 + }, + { + "epoch": 0.8973580973397082, + "grad_norm": 1.9491787297288925, + "learning_rate": 2.737453850207167e-07, + "loss": 0.4122, + "step": 29279 + }, + { + "epoch": 0.8973887458624494, + "grad_norm": 1.7982491644774736, + "learning_rate": 2.7358343773915887e-07, + "loss": 0.5073, + "step": 29280 + }, + { + "epoch": 0.8974193943851906, + "grad_norm": 1.9756738469514707, + "learning_rate": 2.7342153702819284e-07, + "loss": 0.6733, + "step": 29281 + }, + { + "epoch": 0.8974500429079318, + "grad_norm": 1.7489643877741574, + "learning_rate": 2.732596828894141e-07, + "loss": 0.4999, + "step": 29282 + }, + { + "epoch": 0.897480691430673, + "grad_norm": 2.034881915033056, + "learning_rate": 2.730978753244162e-07, + "loss": 0.5787, + "step": 29283 + }, + { + "epoch": 0.8975113399534143, + "grad_norm": 0.7634298626035657, + "learning_rate": 2.7293611433479527e-07, + "loss": 0.3879, + "step": 29284 + }, + { + "epoch": 0.8975419884761554, + "grad_norm": 1.737450889628991, + "learning_rate": 2.7277439992214385e-07, + "loss": 0.5381, + "step": 29285 + }, + { + "epoch": 0.8975726369988967, + "grad_norm": 1.9267205355970907, + "learning_rate": 2.726127320880556e-07, + "loss": 0.5849, + "step": 29286 + }, + { + "epoch": 0.8976032855216378, + "grad_norm": 1.6995518293704426, + "learning_rate": 2.7245111083412436e-07, + "loss": 0.6391, + "step": 29287 + }, + { + "epoch": 0.8976339340443791, + "grad_norm": 1.7584414666834478, + "learning_rate": 2.7228953616194155e-07, + "loss": 0.5935, + "step": 29288 + }, + { + "epoch": 0.8976645825671202, + "grad_norm": 2.537039823902325, + "learning_rate": 2.721280080730992e-07, + "loss": 0.63, + "step": 29289 + }, + { + "epoch": 0.8976952310898615, + "grad_norm": 2.0701506950930852, + "learning_rate": 2.719665265691901e-07, + "loss": 0.4835, + "step": 29290 + }, + { + "epoch": 0.8977258796126026, + "grad_norm": 1.9477853396115545, + "learning_rate": 2.7180509165180337e-07, + "loss": 0.6417, + "step": 29291 + }, + { + "epoch": 0.8977565281353439, + "grad_norm": 1.8131152526203869, + "learning_rate": 2.716437033225322e-07, + "loss": 0.525, + "step": 29292 + }, + { + "epoch": 0.897787176658085, + "grad_norm": 1.6856296247814015, + "learning_rate": 2.7148236158296427e-07, + "loss": 0.448, + "step": 29293 + }, + { + "epoch": 0.8978178251808263, + "grad_norm": 1.8701655018144763, + "learning_rate": 2.7132106643469103e-07, + "loss": 0.558, + "step": 29294 + }, + { + "epoch": 0.8978484737035675, + "grad_norm": 1.9031542420568397, + "learning_rate": 2.711598178793012e-07, + "loss": 0.6732, + "step": 29295 + }, + { + "epoch": 0.8978791222263087, + "grad_norm": 2.1492021011795033, + "learning_rate": 2.709986159183836e-07, + "loss": 0.6344, + "step": 29296 + }, + { + "epoch": 0.8979097707490499, + "grad_norm": 1.6678756952701714, + "learning_rate": 2.7083746055352635e-07, + "loss": 0.5725, + "step": 29297 + }, + { + "epoch": 0.8979404192717911, + "grad_norm": 1.9628412083601219, + "learning_rate": 2.7067635178631814e-07, + "loss": 0.574, + "step": 29298 + }, + { + "epoch": 0.8979710677945323, + "grad_norm": 1.873675405093256, + "learning_rate": 2.7051528961834493e-07, + "loss": 0.5696, + "step": 29299 + }, + { + "epoch": 0.8980017163172735, + "grad_norm": 0.8024288846762553, + "learning_rate": 2.703542740511961e-07, + "loss": 0.3838, + "step": 29300 + }, + { + "epoch": 0.8980323648400147, + "grad_norm": 1.7355224433481549, + "learning_rate": 2.7019330508645526e-07, + "loss": 0.4907, + "step": 29301 + }, + { + "epoch": 0.898063013362756, + "grad_norm": 1.8566307859569602, + "learning_rate": 2.700323827257106e-07, + "loss": 0.6571, + "step": 29302 + }, + { + "epoch": 0.8980936618854971, + "grad_norm": 2.3232313117382724, + "learning_rate": 2.6987150697054764e-07, + "loss": 0.6111, + "step": 29303 + }, + { + "epoch": 0.8981243104082384, + "grad_norm": 0.7648395926318021, + "learning_rate": 2.6971067782255e-07, + "loss": 0.3634, + "step": 29304 + }, + { + "epoch": 0.8981549589309795, + "grad_norm": 1.9924849663467847, + "learning_rate": 2.695498952833037e-07, + "loss": 0.5269, + "step": 29305 + }, + { + "epoch": 0.8981856074537208, + "grad_norm": 0.8123740995418868, + "learning_rate": 2.693891593543929e-07, + "loss": 0.3767, + "step": 29306 + }, + { + "epoch": 0.8982162559764619, + "grad_norm": 1.8188088965928029, + "learning_rate": 2.6922847003740036e-07, + "loss": 0.5303, + "step": 29307 + }, + { + "epoch": 0.8982469044992032, + "grad_norm": 1.682590903986641, + "learning_rate": 2.6906782733391036e-07, + "loss": 0.415, + "step": 29308 + }, + { + "epoch": 0.8982775530219443, + "grad_norm": 0.8300489150216056, + "learning_rate": 2.689072312455066e-07, + "loss": 0.3821, + "step": 29309 + }, + { + "epoch": 0.8983082015446855, + "grad_norm": 1.8602072525758628, + "learning_rate": 2.687466817737694e-07, + "loss": 0.5352, + "step": 29310 + }, + { + "epoch": 0.8983388500674268, + "grad_norm": 2.0386544483819016, + "learning_rate": 2.6858617892028203e-07, + "loss": 0.5607, + "step": 29311 + }, + { + "epoch": 0.8983694985901679, + "grad_norm": 1.8246708811032046, + "learning_rate": 2.6842572268662436e-07, + "loss": 0.491, + "step": 29312 + }, + { + "epoch": 0.8984001471129092, + "grad_norm": 1.8372061541033682, + "learning_rate": 2.6826531307438066e-07, + "loss": 0.5483, + "step": 29313 + }, + { + "epoch": 0.8984307956356503, + "grad_norm": 1.8309730176317962, + "learning_rate": 2.6810495008512907e-07, + "loss": 0.5778, + "step": 29314 + }, + { + "epoch": 0.8984614441583916, + "grad_norm": 0.8777603287102849, + "learning_rate": 2.679446337204494e-07, + "loss": 0.3877, + "step": 29315 + }, + { + "epoch": 0.8984920926811327, + "grad_norm": 1.968805237942405, + "learning_rate": 2.6778436398192165e-07, + "loss": 0.6061, + "step": 29316 + }, + { + "epoch": 0.898522741203874, + "grad_norm": 1.8762072269043844, + "learning_rate": 2.6762414087112663e-07, + "loss": 0.6021, + "step": 29317 + }, + { + "epoch": 0.8985533897266151, + "grad_norm": 1.7182700929357089, + "learning_rate": 2.6746396438964095e-07, + "loss": 0.5208, + "step": 29318 + }, + { + "epoch": 0.8985840382493564, + "grad_norm": 1.7533900403627338, + "learning_rate": 2.673038345390433e-07, + "loss": 0.5517, + "step": 29319 + }, + { + "epoch": 0.8986146867720975, + "grad_norm": 2.046476171668499, + "learning_rate": 2.671437513209124e-07, + "loss": 0.5254, + "step": 29320 + }, + { + "epoch": 0.8986453352948388, + "grad_norm": 1.768850135918499, + "learning_rate": 2.669837147368254e-07, + "loss": 0.5731, + "step": 29321 + }, + { + "epoch": 0.89867598381758, + "grad_norm": 1.9428013058419196, + "learning_rate": 2.6682372478835925e-07, + "loss": 0.5828, + "step": 29322 + }, + { + "epoch": 0.8987066323403212, + "grad_norm": 2.4515208256745464, + "learning_rate": 2.666637814770884e-07, + "loss": 0.5708, + "step": 29323 + }, + { + "epoch": 0.8987372808630624, + "grad_norm": 1.8378581144843997, + "learning_rate": 2.6650388480459143e-07, + "loss": 0.6084, + "step": 29324 + }, + { + "epoch": 0.8987679293858036, + "grad_norm": 0.7878784442635957, + "learning_rate": 2.663440347724433e-07, + "loss": 0.3772, + "step": 29325 + }, + { + "epoch": 0.8987985779085448, + "grad_norm": 1.7460663948556883, + "learning_rate": 2.661842313822177e-07, + "loss": 0.5443, + "step": 29326 + }, + { + "epoch": 0.898829226431286, + "grad_norm": 1.8990045374810351, + "learning_rate": 2.660244746354901e-07, + "loss": 0.6426, + "step": 29327 + }, + { + "epoch": 0.8988598749540272, + "grad_norm": 1.9711059818719308, + "learning_rate": 2.6586476453383526e-07, + "loss": 0.5567, + "step": 29328 + }, + { + "epoch": 0.8988905234767685, + "grad_norm": 2.160673210121862, + "learning_rate": 2.657051010788253e-07, + "loss": 0.5737, + "step": 29329 + }, + { + "epoch": 0.8989211719995096, + "grad_norm": 1.6644767954771131, + "learning_rate": 2.6554548427203455e-07, + "loss": 0.6, + "step": 29330 + }, + { + "epoch": 0.8989518205222509, + "grad_norm": 1.8479124204798558, + "learning_rate": 2.65385914115035e-07, + "loss": 0.5889, + "step": 29331 + }, + { + "epoch": 0.898982469044992, + "grad_norm": 0.8140317126315945, + "learning_rate": 2.652263906094005e-07, + "loss": 0.4019, + "step": 29332 + }, + { + "epoch": 0.8990131175677333, + "grad_norm": 2.2741951711784805, + "learning_rate": 2.650669137567019e-07, + "loss": 0.5483, + "step": 29333 + }, + { + "epoch": 0.8990437660904744, + "grad_norm": 0.7557883786851174, + "learning_rate": 2.6490748355850916e-07, + "loss": 0.3746, + "step": 29334 + }, + { + "epoch": 0.8990744146132157, + "grad_norm": 1.974357547962374, + "learning_rate": 2.6474810001639594e-07, + "loss": 0.4755, + "step": 29335 + }, + { + "epoch": 0.8991050631359568, + "grad_norm": 1.837583193974551, + "learning_rate": 2.645887631319311e-07, + "loss": 0.5602, + "step": 29336 + }, + { + "epoch": 0.8991357116586981, + "grad_norm": 1.7507385888559666, + "learning_rate": 2.6442947290668374e-07, + "loss": 0.5224, + "step": 29337 + }, + { + "epoch": 0.8991663601814393, + "grad_norm": 1.824950924997863, + "learning_rate": 2.6427022934222503e-07, + "loss": 0.5685, + "step": 29338 + }, + { + "epoch": 0.8991970087041805, + "grad_norm": 1.9881659815192378, + "learning_rate": 2.64111032440123e-07, + "loss": 0.5144, + "step": 29339 + }, + { + "epoch": 0.8992276572269217, + "grad_norm": 1.7488096480028716, + "learning_rate": 2.6395188220194767e-07, + "loss": 0.5555, + "step": 29340 + }, + { + "epoch": 0.8992583057496628, + "grad_norm": 1.717946652286305, + "learning_rate": 2.6379277862926546e-07, + "loss": 0.5722, + "step": 29341 + }, + { + "epoch": 0.8992889542724041, + "grad_norm": 1.8212245143480394, + "learning_rate": 2.6363372172364453e-07, + "loss": 0.5888, + "step": 29342 + }, + { + "epoch": 0.8993196027951452, + "grad_norm": 2.052400527400419, + "learning_rate": 2.6347471148665373e-07, + "loss": 0.5815, + "step": 29343 + }, + { + "epoch": 0.8993502513178865, + "grad_norm": 1.763367645974218, + "learning_rate": 2.633157479198578e-07, + "loss": 0.5327, + "step": 29344 + }, + { + "epoch": 0.8993808998406276, + "grad_norm": 0.7888614869862418, + "learning_rate": 2.631568310248234e-07, + "loss": 0.3913, + "step": 29345 + }, + { + "epoch": 0.8994115483633689, + "grad_norm": 1.9705873492127277, + "learning_rate": 2.629979608031169e-07, + "loss": 0.6135, + "step": 29346 + }, + { + "epoch": 0.89944219688611, + "grad_norm": 0.7630558614162967, + "learning_rate": 2.6283913725630326e-07, + "loss": 0.3778, + "step": 29347 + }, + { + "epoch": 0.8994728454088513, + "grad_norm": 2.3946372260914908, + "learning_rate": 2.626803603859479e-07, + "loss": 0.5668, + "step": 29348 + }, + { + "epoch": 0.8995034939315925, + "grad_norm": 1.8172657186923074, + "learning_rate": 2.625216301936151e-07, + "loss": 0.5813, + "step": 29349 + }, + { + "epoch": 0.8995341424543337, + "grad_norm": 0.821854306545918, + "learning_rate": 2.623629466808686e-07, + "loss": 0.4156, + "step": 29350 + }, + { + "epoch": 0.8995647909770749, + "grad_norm": 1.8632299858019898, + "learning_rate": 2.622043098492727e-07, + "loss": 0.4839, + "step": 29351 + }, + { + "epoch": 0.8995954394998161, + "grad_norm": 2.139910118683589, + "learning_rate": 2.620457197003901e-07, + "loss": 0.5439, + "step": 29352 + }, + { + "epoch": 0.8996260880225573, + "grad_norm": 1.721402435809283, + "learning_rate": 2.618871762357816e-07, + "loss": 0.5182, + "step": 29353 + }, + { + "epoch": 0.8996567365452985, + "grad_norm": 2.1044016581994742, + "learning_rate": 2.6172867945701284e-07, + "loss": 0.5617, + "step": 29354 + }, + { + "epoch": 0.8996873850680397, + "grad_norm": 0.8188717152654078, + "learning_rate": 2.615702293656436e-07, + "loss": 0.3729, + "step": 29355 + }, + { + "epoch": 0.899718033590781, + "grad_norm": 2.016287781275154, + "learning_rate": 2.6141182596323423e-07, + "loss": 0.5254, + "step": 29356 + }, + { + "epoch": 0.8997486821135221, + "grad_norm": 0.7875961325613866, + "learning_rate": 2.612534692513469e-07, + "loss": 0.3938, + "step": 29357 + }, + { + "epoch": 0.8997793306362634, + "grad_norm": 0.7997941650222773, + "learning_rate": 2.6109515923154137e-07, + "loss": 0.4051, + "step": 29358 + }, + { + "epoch": 0.8998099791590045, + "grad_norm": 1.7831829053322286, + "learning_rate": 2.6093689590537877e-07, + "loss": 0.592, + "step": 29359 + }, + { + "epoch": 0.8998406276817458, + "grad_norm": 1.8694840207352492, + "learning_rate": 2.6077867927441656e-07, + "loss": 0.5628, + "step": 29360 + }, + { + "epoch": 0.8998712762044869, + "grad_norm": 1.8831997882168001, + "learning_rate": 2.6062050934021476e-07, + "loss": 0.6631, + "step": 29361 + }, + { + "epoch": 0.8999019247272282, + "grad_norm": 1.9508618062023444, + "learning_rate": 2.604623861043326e-07, + "loss": 0.6335, + "step": 29362 + }, + { + "epoch": 0.8999325732499693, + "grad_norm": 1.9294040405434345, + "learning_rate": 2.6030430956832664e-07, + "loss": 0.6309, + "step": 29363 + }, + { + "epoch": 0.8999632217727106, + "grad_norm": 2.0679107499345473, + "learning_rate": 2.6014627973375395e-07, + "loss": 0.6035, + "step": 29364 + }, + { + "epoch": 0.8999938702954517, + "grad_norm": 0.7951516752034937, + "learning_rate": 2.5998829660217383e-07, + "loss": 0.3958, + "step": 29365 + }, + { + "epoch": 0.900024518818193, + "grad_norm": 1.9276448152771783, + "learning_rate": 2.5983036017514174e-07, + "loss": 0.5886, + "step": 29366 + }, + { + "epoch": 0.9000551673409342, + "grad_norm": 1.8012554251208501, + "learning_rate": 2.596724704542142e-07, + "loss": 0.4521, + "step": 29367 + }, + { + "epoch": 0.9000858158636754, + "grad_norm": 1.9825147341736093, + "learning_rate": 2.59514627440946e-07, + "loss": 0.5854, + "step": 29368 + }, + { + "epoch": 0.9001164643864166, + "grad_norm": 1.9446766477648025, + "learning_rate": 2.5935683113689324e-07, + "loss": 0.5665, + "step": 29369 + }, + { + "epoch": 0.9001471129091578, + "grad_norm": 1.965927979952297, + "learning_rate": 2.5919908154361076e-07, + "loss": 0.5916, + "step": 29370 + }, + { + "epoch": 0.900177761431899, + "grad_norm": 1.6811436851939405, + "learning_rate": 2.5904137866265277e-07, + "loss": 0.4474, + "step": 29371 + }, + { + "epoch": 0.9002084099546401, + "grad_norm": 1.9016487325609823, + "learning_rate": 2.5888372249557256e-07, + "loss": 0.5261, + "step": 29372 + }, + { + "epoch": 0.9002390584773814, + "grad_norm": 2.0781550725894595, + "learning_rate": 2.5872611304392503e-07, + "loss": 0.5881, + "step": 29373 + }, + { + "epoch": 0.9002697070001225, + "grad_norm": 2.034869316094638, + "learning_rate": 2.585685503092611e-07, + "loss": 0.5525, + "step": 29374 + }, + { + "epoch": 0.9003003555228638, + "grad_norm": 1.7596261519306777, + "learning_rate": 2.5841103429313506e-07, + "loss": 0.677, + "step": 29375 + }, + { + "epoch": 0.900331004045605, + "grad_norm": 1.8220812186389388, + "learning_rate": 2.5825356499709853e-07, + "loss": 0.5664, + "step": 29376 + }, + { + "epoch": 0.9003616525683462, + "grad_norm": 2.153720003096516, + "learning_rate": 2.580961424227024e-07, + "loss": 0.5621, + "step": 29377 + }, + { + "epoch": 0.9003923010910874, + "grad_norm": 1.6025251943495533, + "learning_rate": 2.5793876657149886e-07, + "loss": 0.4647, + "step": 29378 + }, + { + "epoch": 0.9004229496138286, + "grad_norm": 1.7066621383945344, + "learning_rate": 2.5778143744503714e-07, + "loss": 0.5864, + "step": 29379 + }, + { + "epoch": 0.9004535981365698, + "grad_norm": 0.7927054019160985, + "learning_rate": 2.5762415504486827e-07, + "loss": 0.4117, + "step": 29380 + }, + { + "epoch": 0.900484246659311, + "grad_norm": 2.071334451455699, + "learning_rate": 2.5746691937254265e-07, + "loss": 0.5881, + "step": 29381 + }, + { + "epoch": 0.9005148951820522, + "grad_norm": 1.7647916858528423, + "learning_rate": 2.573097304296085e-07, + "loss": 0.5176, + "step": 29382 + }, + { + "epoch": 0.9005455437047934, + "grad_norm": 1.803438795526948, + "learning_rate": 2.571525882176146e-07, + "loss": 0.5356, + "step": 29383 + }, + { + "epoch": 0.9005761922275346, + "grad_norm": 1.7245548044178796, + "learning_rate": 2.5699549273811075e-07, + "loss": 0.5674, + "step": 29384 + }, + { + "epoch": 0.9006068407502759, + "grad_norm": 2.0705041420913464, + "learning_rate": 2.56838443992643e-07, + "loss": 0.6318, + "step": 29385 + }, + { + "epoch": 0.900637489273017, + "grad_norm": 2.092674537510572, + "learning_rate": 2.566814419827601e-07, + "loss": 0.6483, + "step": 29386 + }, + { + "epoch": 0.9006681377957583, + "grad_norm": 2.0621977434215943, + "learning_rate": 2.5652448671000916e-07, + "loss": 0.5428, + "step": 29387 + }, + { + "epoch": 0.9006987863184994, + "grad_norm": 1.8116227308457469, + "learning_rate": 2.5636757817593506e-07, + "loss": 0.4958, + "step": 29388 + }, + { + "epoch": 0.9007294348412407, + "grad_norm": 1.9074803282691604, + "learning_rate": 2.5621071638208597e-07, + "loss": 0.4925, + "step": 29389 + }, + { + "epoch": 0.9007600833639818, + "grad_norm": 2.0061463512388675, + "learning_rate": 2.560539013300051e-07, + "loss": 0.6087, + "step": 29390 + }, + { + "epoch": 0.9007907318867231, + "grad_norm": 1.9445750102424375, + "learning_rate": 2.5589713302123955e-07, + "loss": 0.5615, + "step": 29391 + }, + { + "epoch": 0.9008213804094642, + "grad_norm": 1.9174992446623618, + "learning_rate": 2.557404114573342e-07, + "loss": 0.4917, + "step": 29392 + }, + { + "epoch": 0.9008520289322055, + "grad_norm": 1.9314902552030213, + "learning_rate": 2.555837366398312e-07, + "loss": 0.6029, + "step": 29393 + }, + { + "epoch": 0.9008826774549467, + "grad_norm": 1.806848155555166, + "learning_rate": 2.554271085702759e-07, + "loss": 0.578, + "step": 29394 + }, + { + "epoch": 0.9009133259776879, + "grad_norm": 1.8918266479797128, + "learning_rate": 2.5527052725021204e-07, + "loss": 0.4177, + "step": 29395 + }, + { + "epoch": 0.9009439745004291, + "grad_norm": 2.010895033529152, + "learning_rate": 2.5511399268118076e-07, + "loss": 0.5247, + "step": 29396 + }, + { + "epoch": 0.9009746230231703, + "grad_norm": 1.7905001641923357, + "learning_rate": 2.5495750486472625e-07, + "loss": 0.5693, + "step": 29397 + }, + { + "epoch": 0.9010052715459115, + "grad_norm": 1.7813964860216478, + "learning_rate": 2.5480106380238846e-07, + "loss": 0.6053, + "step": 29398 + }, + { + "epoch": 0.9010359200686527, + "grad_norm": 1.7986376048163764, + "learning_rate": 2.5464466949571e-07, + "loss": 0.5774, + "step": 29399 + }, + { + "epoch": 0.9010665685913939, + "grad_norm": 0.7997195234637732, + "learning_rate": 2.544883219462324e-07, + "loss": 0.3985, + "step": 29400 + }, + { + "epoch": 0.9010972171141352, + "grad_norm": 1.8738077494682301, + "learning_rate": 2.5433202115549503e-07, + "loss": 0.5548, + "step": 29401 + }, + { + "epoch": 0.9011278656368763, + "grad_norm": 1.8106464642648479, + "learning_rate": 2.541757671250389e-07, + "loss": 0.5404, + "step": 29402 + }, + { + "epoch": 0.9011585141596175, + "grad_norm": 0.794015348073414, + "learning_rate": 2.5401955985640323e-07, + "loss": 0.3825, + "step": 29403 + }, + { + "epoch": 0.9011891626823587, + "grad_norm": 1.907340292992794, + "learning_rate": 2.5386339935112694e-07, + "loss": 0.5408, + "step": 29404 + }, + { + "epoch": 0.9012198112050999, + "grad_norm": 1.792771529545859, + "learning_rate": 2.537072856107486e-07, + "loss": 0.5539, + "step": 29405 + }, + { + "epoch": 0.9012504597278411, + "grad_norm": 1.7477001476210643, + "learning_rate": 2.535512186368072e-07, + "loss": 0.5328, + "step": 29406 + }, + { + "epoch": 0.9012811082505823, + "grad_norm": 1.7617717599627931, + "learning_rate": 2.533951984308397e-07, + "loss": 0.5602, + "step": 29407 + }, + { + "epoch": 0.9013117567733235, + "grad_norm": 0.7879095708391005, + "learning_rate": 2.532392249943849e-07, + "loss": 0.3916, + "step": 29408 + }, + { + "epoch": 0.9013424052960647, + "grad_norm": 1.8484681729398935, + "learning_rate": 2.5308329832897715e-07, + "loss": 0.5616, + "step": 29409 + }, + { + "epoch": 0.901373053818806, + "grad_norm": 1.8061554172900018, + "learning_rate": 2.5292741843615466e-07, + "loss": 0.4948, + "step": 29410 + }, + { + "epoch": 0.9014037023415471, + "grad_norm": 0.8399638944021208, + "learning_rate": 2.527715853174534e-07, + "loss": 0.3995, + "step": 29411 + }, + { + "epoch": 0.9014343508642884, + "grad_norm": 1.8860845377713058, + "learning_rate": 2.526157989744077e-07, + "loss": 0.4736, + "step": 29412 + }, + { + "epoch": 0.9014649993870295, + "grad_norm": 1.862761563260096, + "learning_rate": 2.5246005940855303e-07, + "loss": 0.5574, + "step": 29413 + }, + { + "epoch": 0.9014956479097708, + "grad_norm": 1.977199363846384, + "learning_rate": 2.523043666214248e-07, + "loss": 0.5839, + "step": 29414 + }, + { + "epoch": 0.9015262964325119, + "grad_norm": 1.9609410065897774, + "learning_rate": 2.521487206145562e-07, + "loss": 0.6975, + "step": 29415 + }, + { + "epoch": 0.9015569449552532, + "grad_norm": 1.7983322882964656, + "learning_rate": 2.5199312138948053e-07, + "loss": 0.4803, + "step": 29416 + }, + { + "epoch": 0.9015875934779943, + "grad_norm": 2.1252316046912854, + "learning_rate": 2.518375689477326e-07, + "loss": 0.553, + "step": 29417 + }, + { + "epoch": 0.9016182420007356, + "grad_norm": 1.9745249234692124, + "learning_rate": 2.516820632908429e-07, + "loss": 0.6307, + "step": 29418 + }, + { + "epoch": 0.9016488905234767, + "grad_norm": 2.0013549010016813, + "learning_rate": 2.515266044203457e-07, + "loss": 0.5272, + "step": 29419 + }, + { + "epoch": 0.901679539046218, + "grad_norm": 2.1778345451434555, + "learning_rate": 2.5137119233776984e-07, + "loss": 0.555, + "step": 29420 + }, + { + "epoch": 0.9017101875689592, + "grad_norm": 1.8781103962663663, + "learning_rate": 2.5121582704465076e-07, + "loss": 0.606, + "step": 29421 + }, + { + "epoch": 0.9017408360917004, + "grad_norm": 1.9472075164985834, + "learning_rate": 2.510605085425166e-07, + "loss": 0.5341, + "step": 29422 + }, + { + "epoch": 0.9017714846144416, + "grad_norm": 0.8118054184815704, + "learning_rate": 2.509052368328979e-07, + "loss": 0.3736, + "step": 29423 + }, + { + "epoch": 0.9018021331371828, + "grad_norm": 1.8734659587719824, + "learning_rate": 2.5075001191732507e-07, + "loss": 0.5994, + "step": 29424 + }, + { + "epoch": 0.901832781659924, + "grad_norm": 1.7172823605665353, + "learning_rate": 2.5059483379732797e-07, + "loss": 0.5948, + "step": 29425 + }, + { + "epoch": 0.9018634301826652, + "grad_norm": 1.9974334527987159, + "learning_rate": 2.5043970247443484e-07, + "loss": 0.5505, + "step": 29426 + }, + { + "epoch": 0.9018940787054064, + "grad_norm": 1.9740646599988825, + "learning_rate": 2.5028461795017446e-07, + "loss": 0.5591, + "step": 29427 + }, + { + "epoch": 0.9019247272281476, + "grad_norm": 0.8160560431418603, + "learning_rate": 2.5012958022607446e-07, + "loss": 0.4086, + "step": 29428 + }, + { + "epoch": 0.9019553757508888, + "grad_norm": 1.917393451737605, + "learning_rate": 2.4997458930366425e-07, + "loss": 0.5937, + "step": 29429 + }, + { + "epoch": 0.9019860242736301, + "grad_norm": 1.8824578420171616, + "learning_rate": 2.498196451844698e-07, + "loss": 0.5255, + "step": 29430 + }, + { + "epoch": 0.9020166727963712, + "grad_norm": 1.9169970527952531, + "learning_rate": 2.4966474787001596e-07, + "loss": 0.61, + "step": 29431 + }, + { + "epoch": 0.9020473213191125, + "grad_norm": 1.8863135627219374, + "learning_rate": 2.4950989736183264e-07, + "loss": 0.5763, + "step": 29432 + }, + { + "epoch": 0.9020779698418536, + "grad_norm": 0.7538837436968295, + "learning_rate": 2.493550936614436e-07, + "loss": 0.4004, + "step": 29433 + }, + { + "epoch": 0.9021086183645948, + "grad_norm": 1.9092182533262825, + "learning_rate": 2.4920033677037327e-07, + "loss": 0.5961, + "step": 29434 + }, + { + "epoch": 0.902139266887336, + "grad_norm": 2.020810364652536, + "learning_rate": 2.490456266901481e-07, + "loss": 0.5431, + "step": 29435 + }, + { + "epoch": 0.9021699154100772, + "grad_norm": 1.6698285019548913, + "learning_rate": 2.4889096342229246e-07, + "loss": 0.5006, + "step": 29436 + }, + { + "epoch": 0.9022005639328184, + "grad_norm": 2.1326999036269485, + "learning_rate": 2.4873634696832904e-07, + "loss": 0.5988, + "step": 29437 + }, + { + "epoch": 0.9022312124555596, + "grad_norm": 1.8417496244321665, + "learning_rate": 2.485817773297816e-07, + "loss": 0.5661, + "step": 29438 + }, + { + "epoch": 0.9022618609783009, + "grad_norm": 1.8968550098155506, + "learning_rate": 2.484272545081745e-07, + "loss": 0.5403, + "step": 29439 + }, + { + "epoch": 0.902292509501042, + "grad_norm": 1.7763696019699526, + "learning_rate": 2.4827277850502926e-07, + "loss": 0.4974, + "step": 29440 + }, + { + "epoch": 0.9023231580237833, + "grad_norm": 1.8361257338309793, + "learning_rate": 2.481183493218686e-07, + "loss": 0.478, + "step": 29441 + }, + { + "epoch": 0.9023538065465244, + "grad_norm": 0.8042319736745163, + "learning_rate": 2.4796396696021295e-07, + "loss": 0.4004, + "step": 29442 + }, + { + "epoch": 0.9023844550692657, + "grad_norm": 1.8751747116930264, + "learning_rate": 2.478096314215844e-07, + "loss": 0.5469, + "step": 29443 + }, + { + "epoch": 0.9024151035920068, + "grad_norm": 1.9113253444323393, + "learning_rate": 2.4765534270750404e-07, + "loss": 0.5072, + "step": 29444 + }, + { + "epoch": 0.9024457521147481, + "grad_norm": 2.0758465194791595, + "learning_rate": 2.4750110081949054e-07, + "loss": 0.6721, + "step": 29445 + }, + { + "epoch": 0.9024764006374892, + "grad_norm": 1.6742589868509319, + "learning_rate": 2.473469057590644e-07, + "loss": 0.4391, + "step": 29446 + }, + { + "epoch": 0.9025070491602305, + "grad_norm": 2.1770662777498724, + "learning_rate": 2.471927575277461e-07, + "loss": 0.6304, + "step": 29447 + }, + { + "epoch": 0.9025376976829717, + "grad_norm": 0.8422081332340827, + "learning_rate": 2.470386561270538e-07, + "loss": 0.4152, + "step": 29448 + }, + { + "epoch": 0.9025683462057129, + "grad_norm": 1.6746994673491928, + "learning_rate": 2.468846015585058e-07, + "loss": 0.5519, + "step": 29449 + }, + { + "epoch": 0.9025989947284541, + "grad_norm": 2.0206674401710614, + "learning_rate": 2.4673059382361806e-07, + "loss": 0.5898, + "step": 29450 + }, + { + "epoch": 0.9026296432511953, + "grad_norm": 1.7158748448013528, + "learning_rate": 2.465766329239122e-07, + "loss": 0.5373, + "step": 29451 + }, + { + "epoch": 0.9026602917739365, + "grad_norm": 0.8176785385661951, + "learning_rate": 2.464227188609025e-07, + "loss": 0.4048, + "step": 29452 + }, + { + "epoch": 0.9026909402966777, + "grad_norm": 1.9442107117819698, + "learning_rate": 2.462688516361056e-07, + "loss": 0.5772, + "step": 29453 + }, + { + "epoch": 0.9027215888194189, + "grad_norm": 2.080158757138292, + "learning_rate": 2.4611503125103744e-07, + "loss": 0.4934, + "step": 29454 + }, + { + "epoch": 0.9027522373421601, + "grad_norm": 1.6291977109813902, + "learning_rate": 2.4596125770721456e-07, + "loss": 0.4695, + "step": 29455 + }, + { + "epoch": 0.9027828858649013, + "grad_norm": 2.0163140868559593, + "learning_rate": 2.458075310061525e-07, + "loss": 0.5429, + "step": 29456 + }, + { + "epoch": 0.9028135343876426, + "grad_norm": 1.818212276675911, + "learning_rate": 2.45653851149365e-07, + "loss": 0.5704, + "step": 29457 + }, + { + "epoch": 0.9028441829103837, + "grad_norm": 2.207387352266369, + "learning_rate": 2.4550021813836587e-07, + "loss": 0.5997, + "step": 29458 + }, + { + "epoch": 0.902874831433125, + "grad_norm": 1.6281146939152613, + "learning_rate": 2.4534663197467056e-07, + "loss": 0.5735, + "step": 29459 + }, + { + "epoch": 0.9029054799558661, + "grad_norm": 1.9725324374677429, + "learning_rate": 2.451930926597912e-07, + "loss": 0.5906, + "step": 29460 + }, + { + "epoch": 0.9029361284786074, + "grad_norm": 1.6979282839001644, + "learning_rate": 2.450396001952399e-07, + "loss": 0.5724, + "step": 29461 + }, + { + "epoch": 0.9029667770013485, + "grad_norm": 1.7249236409294697, + "learning_rate": 2.4488615458253096e-07, + "loss": 0.5683, + "step": 29462 + }, + { + "epoch": 0.9029974255240898, + "grad_norm": 2.07276738835748, + "learning_rate": 2.4473275582317545e-07, + "loss": 0.4651, + "step": 29463 + }, + { + "epoch": 0.9030280740468309, + "grad_norm": 1.9613465314108285, + "learning_rate": 2.445794039186844e-07, + "loss": 0.5882, + "step": 29464 + }, + { + "epoch": 0.9030587225695721, + "grad_norm": 0.8136020698937046, + "learning_rate": 2.4442609887056935e-07, + "loss": 0.4105, + "step": 29465 + }, + { + "epoch": 0.9030893710923134, + "grad_norm": 1.8475252406031069, + "learning_rate": 2.442728406803402e-07, + "loss": 0.5718, + "step": 29466 + }, + { + "epoch": 0.9031200196150545, + "grad_norm": 1.9662172829950688, + "learning_rate": 2.4411962934950853e-07, + "loss": 0.5516, + "step": 29467 + }, + { + "epoch": 0.9031506681377958, + "grad_norm": 1.6875612636722273, + "learning_rate": 2.4396646487958195e-07, + "loss": 0.5711, + "step": 29468 + }, + { + "epoch": 0.9031813166605369, + "grad_norm": 1.201593389967192, + "learning_rate": 2.438133472720711e-07, + "loss": 0.3967, + "step": 29469 + }, + { + "epoch": 0.9032119651832782, + "grad_norm": 1.9661142732964016, + "learning_rate": 2.4366027652848513e-07, + "loss": 0.5929, + "step": 29470 + }, + { + "epoch": 0.9032426137060193, + "grad_norm": 1.8214341153360438, + "learning_rate": 2.435072526503307e-07, + "loss": 0.5576, + "step": 29471 + }, + { + "epoch": 0.9032732622287606, + "grad_norm": 1.8813825548986, + "learning_rate": 2.433542756391155e-07, + "loss": 0.5236, + "step": 29472 + }, + { + "epoch": 0.9033039107515017, + "grad_norm": 1.9661849247954888, + "learning_rate": 2.432013454963489e-07, + "loss": 0.5525, + "step": 29473 + }, + { + "epoch": 0.903334559274243, + "grad_norm": 1.8097574387598903, + "learning_rate": 2.4304846222353573e-07, + "loss": 0.6318, + "step": 29474 + }, + { + "epoch": 0.9033652077969841, + "grad_norm": 2.0577437631252202, + "learning_rate": 2.428956258221843e-07, + "loss": 0.6743, + "step": 29475 + }, + { + "epoch": 0.9033958563197254, + "grad_norm": 1.9907759450873566, + "learning_rate": 2.4274283629379833e-07, + "loss": 0.5463, + "step": 29476 + }, + { + "epoch": 0.9034265048424666, + "grad_norm": 1.9711975061861815, + "learning_rate": 2.4259009363988397e-07, + "loss": 0.4881, + "step": 29477 + }, + { + "epoch": 0.9034571533652078, + "grad_norm": 0.7958039164707195, + "learning_rate": 2.424373978619482e-07, + "loss": 0.3874, + "step": 29478 + }, + { + "epoch": 0.903487801887949, + "grad_norm": 1.9473212825871435, + "learning_rate": 2.4228474896149266e-07, + "loss": 0.5297, + "step": 29479 + }, + { + "epoch": 0.9035184504106902, + "grad_norm": 1.699238830618576, + "learning_rate": 2.421321469400234e-07, + "loss": 0.566, + "step": 29480 + }, + { + "epoch": 0.9035490989334314, + "grad_norm": 1.8722191453541133, + "learning_rate": 2.419795917990436e-07, + "loss": 0.612, + "step": 29481 + }, + { + "epoch": 0.9035797474561726, + "grad_norm": 2.1456604959659193, + "learning_rate": 2.4182708354005656e-07, + "loss": 0.6225, + "step": 29482 + }, + { + "epoch": 0.9036103959789138, + "grad_norm": 1.9317552542372027, + "learning_rate": 2.4167462216456326e-07, + "loss": 0.6313, + "step": 29483 + }, + { + "epoch": 0.903641044501655, + "grad_norm": 1.7684947266335724, + "learning_rate": 2.4152220767406863e-07, + "loss": 0.6548, + "step": 29484 + }, + { + "epoch": 0.9036716930243962, + "grad_norm": 0.7605769032644377, + "learning_rate": 2.413698400700726e-07, + "loss": 0.3913, + "step": 29485 + }, + { + "epoch": 0.9037023415471375, + "grad_norm": 1.9735958347673617, + "learning_rate": 2.4121751935407776e-07, + "loss": 0.5751, + "step": 29486 + }, + { + "epoch": 0.9037329900698786, + "grad_norm": 1.9778329804457462, + "learning_rate": 2.4106524552758414e-07, + "loss": 0.6775, + "step": 29487 + }, + { + "epoch": 0.9037636385926199, + "grad_norm": 1.8412232381758866, + "learning_rate": 2.409130185920916e-07, + "loss": 0.5474, + "step": 29488 + }, + { + "epoch": 0.903794287115361, + "grad_norm": 0.7758693577762805, + "learning_rate": 2.407608385491017e-07, + "loss": 0.4043, + "step": 29489 + }, + { + "epoch": 0.9038249356381023, + "grad_norm": 1.832335523321538, + "learning_rate": 2.4060870540011216e-07, + "loss": 0.5154, + "step": 29490 + }, + { + "epoch": 0.9038555841608434, + "grad_norm": 1.9435708635257127, + "learning_rate": 2.404566191466229e-07, + "loss": 0.5685, + "step": 29491 + }, + { + "epoch": 0.9038862326835847, + "grad_norm": 1.921153707109997, + "learning_rate": 2.4030457979013265e-07, + "loss": 0.5513, + "step": 29492 + }, + { + "epoch": 0.9039168812063259, + "grad_norm": 1.742086214875164, + "learning_rate": 2.401525873321392e-07, + "loss": 0.4814, + "step": 29493 + }, + { + "epoch": 0.9039475297290671, + "grad_norm": 1.851663810751358, + "learning_rate": 2.400006417741402e-07, + "loss": 0.4825, + "step": 29494 + }, + { + "epoch": 0.9039781782518083, + "grad_norm": 1.8601145580906453, + "learning_rate": 2.398487431176327e-07, + "loss": 0.5949, + "step": 29495 + }, + { + "epoch": 0.9040088267745494, + "grad_norm": 1.6975320992932015, + "learning_rate": 2.396968913641129e-07, + "loss": 0.584, + "step": 29496 + }, + { + "epoch": 0.9040394752972907, + "grad_norm": 1.782429001363504, + "learning_rate": 2.3954508651507837e-07, + "loss": 0.6105, + "step": 29497 + }, + { + "epoch": 0.9040701238200318, + "grad_norm": 2.0458841275196984, + "learning_rate": 2.3939332857202404e-07, + "loss": 0.6518, + "step": 29498 + }, + { + "epoch": 0.9041007723427731, + "grad_norm": 2.1911065947095127, + "learning_rate": 2.392416175364448e-07, + "loss": 0.6117, + "step": 29499 + }, + { + "epoch": 0.9041314208655142, + "grad_norm": 1.952338590924274, + "learning_rate": 2.390899534098368e-07, + "loss": 0.574, + "step": 29500 + }, + { + "epoch": 0.9041620693882555, + "grad_norm": 2.1218995005799757, + "learning_rate": 2.3893833619369255e-07, + "loss": 0.5729, + "step": 29501 + }, + { + "epoch": 0.9041927179109966, + "grad_norm": 1.9202135973056416, + "learning_rate": 2.387867658895077e-07, + "loss": 0.5543, + "step": 29502 + }, + { + "epoch": 0.9042233664337379, + "grad_norm": 1.8440802302016486, + "learning_rate": 2.386352424987753e-07, + "loss": 0.5526, + "step": 29503 + }, + { + "epoch": 0.9042540149564791, + "grad_norm": 1.9993345522828774, + "learning_rate": 2.3848376602298716e-07, + "loss": 0.6141, + "step": 29504 + }, + { + "epoch": 0.9042846634792203, + "grad_norm": 1.9828285795850877, + "learning_rate": 2.3833233646363806e-07, + "loss": 0.6286, + "step": 29505 + }, + { + "epoch": 0.9043153120019615, + "grad_norm": 0.8282248330474796, + "learning_rate": 2.3818095382221795e-07, + "loss": 0.3732, + "step": 29506 + }, + { + "epoch": 0.9043459605247027, + "grad_norm": 1.9482144556456567, + "learning_rate": 2.3802961810021896e-07, + "loss": 0.5442, + "step": 29507 + }, + { + "epoch": 0.9043766090474439, + "grad_norm": 1.780710681180047, + "learning_rate": 2.3787832929913324e-07, + "loss": 0.5164, + "step": 29508 + }, + { + "epoch": 0.9044072575701851, + "grad_norm": 1.8621492458587594, + "learning_rate": 2.377270874204507e-07, + "loss": 0.535, + "step": 29509 + }, + { + "epoch": 0.9044379060929263, + "grad_norm": 2.0007043851554904, + "learning_rate": 2.3757589246566127e-07, + "loss": 0.6485, + "step": 29510 + }, + { + "epoch": 0.9044685546156676, + "grad_norm": 2.0356713746252506, + "learning_rate": 2.374247444362554e-07, + "loss": 0.654, + "step": 29511 + }, + { + "epoch": 0.9044992031384087, + "grad_norm": 0.8116422430553808, + "learning_rate": 2.3727364333372194e-07, + "loss": 0.3924, + "step": 29512 + }, + { + "epoch": 0.90452985166115, + "grad_norm": 1.885093935017345, + "learning_rate": 2.3712258915954966e-07, + "loss": 0.4683, + "step": 29513 + }, + { + "epoch": 0.9045605001838911, + "grad_norm": 1.8978456850830852, + "learning_rate": 2.369715819152274e-07, + "loss": 0.5437, + "step": 29514 + }, + { + "epoch": 0.9045911487066324, + "grad_norm": 1.9840338154807478, + "learning_rate": 2.3682062160224284e-07, + "loss": 0.7028, + "step": 29515 + }, + { + "epoch": 0.9046217972293735, + "grad_norm": 1.8777241542666374, + "learning_rate": 2.366697082220837e-07, + "loss": 0.4991, + "step": 29516 + }, + { + "epoch": 0.9046524457521148, + "grad_norm": 1.7516969989224045, + "learning_rate": 2.3651884177623596e-07, + "loss": 0.5069, + "step": 29517 + }, + { + "epoch": 0.9046830942748559, + "grad_norm": 1.845781739718388, + "learning_rate": 2.3636802226618737e-07, + "loss": 0.5056, + "step": 29518 + }, + { + "epoch": 0.9047137427975972, + "grad_norm": 1.7367623123851994, + "learning_rate": 2.3621724969342342e-07, + "loss": 0.5589, + "step": 29519 + }, + { + "epoch": 0.9047443913203383, + "grad_norm": 1.8355157037925152, + "learning_rate": 2.3606652405942954e-07, + "loss": 0.5237, + "step": 29520 + }, + { + "epoch": 0.9047750398430796, + "grad_norm": 0.8022998045358689, + "learning_rate": 2.3591584536569123e-07, + "loss": 0.3994, + "step": 29521 + }, + { + "epoch": 0.9048056883658208, + "grad_norm": 0.8156995692598492, + "learning_rate": 2.3576521361369342e-07, + "loss": 0.41, + "step": 29522 + }, + { + "epoch": 0.904836336888562, + "grad_norm": 1.9368861028354314, + "learning_rate": 2.3561462880491935e-07, + "loss": 0.4907, + "step": 29523 + }, + { + "epoch": 0.9048669854113032, + "grad_norm": 0.8009440194434957, + "learning_rate": 2.3546409094085342e-07, + "loss": 0.4171, + "step": 29524 + }, + { + "epoch": 0.9048976339340444, + "grad_norm": 0.7814456713758997, + "learning_rate": 2.3531360002297944e-07, + "loss": 0.3847, + "step": 29525 + }, + { + "epoch": 0.9049282824567856, + "grad_norm": 2.0360996091905874, + "learning_rate": 2.3516315605277895e-07, + "loss": 0.5676, + "step": 29526 + }, + { + "epoch": 0.9049589309795267, + "grad_norm": 1.6740919486700439, + "learning_rate": 2.3501275903173582e-07, + "loss": 0.4981, + "step": 29527 + }, + { + "epoch": 0.904989579502268, + "grad_norm": 1.9735673923583195, + "learning_rate": 2.3486240896132996e-07, + "loss": 0.5826, + "step": 29528 + }, + { + "epoch": 0.9050202280250091, + "grad_norm": 0.7861530512545438, + "learning_rate": 2.3471210584304514e-07, + "loss": 0.3956, + "step": 29529 + }, + { + "epoch": 0.9050508765477504, + "grad_norm": 1.7682131529199474, + "learning_rate": 2.3456184967836138e-07, + "loss": 0.4547, + "step": 29530 + }, + { + "epoch": 0.9050815250704916, + "grad_norm": 1.847498407751274, + "learning_rate": 2.3441164046875797e-07, + "loss": 0.5748, + "step": 29531 + }, + { + "epoch": 0.9051121735932328, + "grad_norm": 1.925393771731317, + "learning_rate": 2.34261478215716e-07, + "loss": 0.5609, + "step": 29532 + }, + { + "epoch": 0.905142822115974, + "grad_norm": 0.8842771463438754, + "learning_rate": 2.341113629207159e-07, + "loss": 0.3964, + "step": 29533 + }, + { + "epoch": 0.9051734706387152, + "grad_norm": 1.895421357959315, + "learning_rate": 2.339612945852354e-07, + "loss": 0.5703, + "step": 29534 + }, + { + "epoch": 0.9052041191614564, + "grad_norm": 1.7977972475430273, + "learning_rate": 2.3381127321075338e-07, + "loss": 0.594, + "step": 29535 + }, + { + "epoch": 0.9052347676841976, + "grad_norm": 2.1695012073260562, + "learning_rate": 2.3366129879874965e-07, + "loss": 0.5893, + "step": 29536 + }, + { + "epoch": 0.9052654162069388, + "grad_norm": 1.8898681185164912, + "learning_rate": 2.3351137135069922e-07, + "loss": 0.5794, + "step": 29537 + }, + { + "epoch": 0.90529606472968, + "grad_norm": 1.8192502510805875, + "learning_rate": 2.3336149086808203e-07, + "loss": 0.5623, + "step": 29538 + }, + { + "epoch": 0.9053267132524212, + "grad_norm": 1.9793067806750653, + "learning_rate": 2.3321165735237294e-07, + "loss": 0.5441, + "step": 29539 + }, + { + "epoch": 0.9053573617751625, + "grad_norm": 1.8244210863240913, + "learning_rate": 2.330618708050486e-07, + "loss": 0.6479, + "step": 29540 + }, + { + "epoch": 0.9053880102979036, + "grad_norm": 1.7158800466254744, + "learning_rate": 2.329121312275867e-07, + "loss": 0.4929, + "step": 29541 + }, + { + "epoch": 0.9054186588206449, + "grad_norm": 1.991738060286121, + "learning_rate": 2.3276243862145998e-07, + "loss": 0.6331, + "step": 29542 + }, + { + "epoch": 0.905449307343386, + "grad_norm": 0.8285402323810862, + "learning_rate": 2.32612792988145e-07, + "loss": 0.3807, + "step": 29543 + }, + { + "epoch": 0.9054799558661273, + "grad_norm": 1.9076014505768668, + "learning_rate": 2.324631943291167e-07, + "loss": 0.5672, + "step": 29544 + }, + { + "epoch": 0.9055106043888684, + "grad_norm": 0.836543033107424, + "learning_rate": 2.3231364264584721e-07, + "loss": 0.4222, + "step": 29545 + }, + { + "epoch": 0.9055412529116097, + "grad_norm": 1.7288434590992463, + "learning_rate": 2.3216413793981207e-07, + "loss": 0.4667, + "step": 29546 + }, + { + "epoch": 0.9055719014343508, + "grad_norm": 1.7292097116545353, + "learning_rate": 2.3201468021248285e-07, + "loss": 0.6092, + "step": 29547 + }, + { + "epoch": 0.9056025499570921, + "grad_norm": 1.7789265115110935, + "learning_rate": 2.3186526946533395e-07, + "loss": 0.6232, + "step": 29548 + }, + { + "epoch": 0.9056331984798333, + "grad_norm": 0.7825880743657075, + "learning_rate": 2.3171590569983636e-07, + "loss": 0.3932, + "step": 29549 + }, + { + "epoch": 0.9056638470025745, + "grad_norm": 2.1290324152245432, + "learning_rate": 2.315665889174612e-07, + "loss": 0.492, + "step": 29550 + }, + { + "epoch": 0.9056944955253157, + "grad_norm": 1.7373629979082401, + "learning_rate": 2.3141731911968057e-07, + "loss": 0.6239, + "step": 29551 + }, + { + "epoch": 0.9057251440480569, + "grad_norm": 1.7976614127190562, + "learning_rate": 2.312680963079661e-07, + "loss": 0.5938, + "step": 29552 + }, + { + "epoch": 0.9057557925707981, + "grad_norm": 0.7828700745842545, + "learning_rate": 2.311189204837866e-07, + "loss": 0.3859, + "step": 29553 + }, + { + "epoch": 0.9057864410935393, + "grad_norm": 1.9619329457332744, + "learning_rate": 2.30969791648612e-07, + "loss": 0.463, + "step": 29554 + }, + { + "epoch": 0.9058170896162805, + "grad_norm": 2.090321698504476, + "learning_rate": 2.308207098039128e-07, + "loss": 0.5283, + "step": 29555 + }, + { + "epoch": 0.9058477381390218, + "grad_norm": 2.0413650610409255, + "learning_rate": 2.3067167495115783e-07, + "loss": 0.5041, + "step": 29556 + }, + { + "epoch": 0.9058783866617629, + "grad_norm": 1.9291783872362211, + "learning_rate": 2.3052268709181536e-07, + "loss": 0.5238, + "step": 29557 + }, + { + "epoch": 0.905909035184504, + "grad_norm": 1.9784804038661836, + "learning_rate": 2.3037374622735143e-07, + "loss": 0.6061, + "step": 29558 + }, + { + "epoch": 0.9059396837072453, + "grad_norm": 2.5340407495836548, + "learning_rate": 2.3022485235923708e-07, + "loss": 0.5834, + "step": 29559 + }, + { + "epoch": 0.9059703322299865, + "grad_norm": 0.7979483952763403, + "learning_rate": 2.3007600548893727e-07, + "loss": 0.3942, + "step": 29560 + }, + { + "epoch": 0.9060009807527277, + "grad_norm": 1.8697796850851613, + "learning_rate": 2.299272056179186e-07, + "loss": 0.5323, + "step": 29561 + }, + { + "epoch": 0.9060316292754689, + "grad_norm": 1.855315593746743, + "learning_rate": 2.2977845274764764e-07, + "loss": 0.5949, + "step": 29562 + }, + { + "epoch": 0.9060622777982101, + "grad_norm": 1.7548875676594622, + "learning_rate": 2.296297468795905e-07, + "loss": 0.522, + "step": 29563 + }, + { + "epoch": 0.9060929263209513, + "grad_norm": 2.103972639802503, + "learning_rate": 2.2948108801521207e-07, + "loss": 0.6001, + "step": 29564 + }, + { + "epoch": 0.9061235748436925, + "grad_norm": 1.6686320565298305, + "learning_rate": 2.293324761559762e-07, + "loss": 0.5281, + "step": 29565 + }, + { + "epoch": 0.9061542233664337, + "grad_norm": 1.8712687609668126, + "learning_rate": 2.2918391130334838e-07, + "loss": 0.5802, + "step": 29566 + }, + { + "epoch": 0.906184871889175, + "grad_norm": 0.7991883872580141, + "learning_rate": 2.29035393458793e-07, + "loss": 0.386, + "step": 29567 + }, + { + "epoch": 0.9062155204119161, + "grad_norm": 0.7697306878030268, + "learning_rate": 2.2888692262377276e-07, + "loss": 0.3799, + "step": 29568 + }, + { + "epoch": 0.9062461689346574, + "grad_norm": 2.0838491681246945, + "learning_rate": 2.2873849879974874e-07, + "loss": 0.6465, + "step": 29569 + }, + { + "epoch": 0.9062768174573985, + "grad_norm": 1.7021274863797589, + "learning_rate": 2.285901219881864e-07, + "loss": 0.6003, + "step": 29570 + }, + { + "epoch": 0.9063074659801398, + "grad_norm": 1.9065421986822464, + "learning_rate": 2.284417921905463e-07, + "loss": 0.5958, + "step": 29571 + }, + { + "epoch": 0.9063381145028809, + "grad_norm": 2.202675403679364, + "learning_rate": 2.2829350940828943e-07, + "loss": 0.5143, + "step": 29572 + }, + { + "epoch": 0.9063687630256222, + "grad_norm": 2.060823939313047, + "learning_rate": 2.2814527364287796e-07, + "loss": 0.651, + "step": 29573 + }, + { + "epoch": 0.9063994115483633, + "grad_norm": 1.8429939041116405, + "learning_rate": 2.2799708489577187e-07, + "loss": 0.4929, + "step": 29574 + }, + { + "epoch": 0.9064300600711046, + "grad_norm": 1.9581968316850797, + "learning_rate": 2.2784894316843165e-07, + "loss": 0.5228, + "step": 29575 + }, + { + "epoch": 0.9064607085938458, + "grad_norm": 1.8525762677618143, + "learning_rate": 2.2770084846231666e-07, + "loss": 0.5271, + "step": 29576 + }, + { + "epoch": 0.906491357116587, + "grad_norm": 1.6915501542812297, + "learning_rate": 2.275528007788863e-07, + "loss": 0.4368, + "step": 29577 + }, + { + "epoch": 0.9065220056393282, + "grad_norm": 2.042675236116029, + "learning_rate": 2.2740480011959942e-07, + "loss": 0.5907, + "step": 29578 + }, + { + "epoch": 0.9065526541620694, + "grad_norm": 1.8603608821217748, + "learning_rate": 2.2725684648591427e-07, + "loss": 0.5651, + "step": 29579 + }, + { + "epoch": 0.9065833026848106, + "grad_norm": 1.8686138978540143, + "learning_rate": 2.271089398792875e-07, + "loss": 0.6317, + "step": 29580 + }, + { + "epoch": 0.9066139512075518, + "grad_norm": 0.7857433063011086, + "learning_rate": 2.2696108030117902e-07, + "loss": 0.379, + "step": 29581 + }, + { + "epoch": 0.906644599730293, + "grad_norm": 1.7857403586860505, + "learning_rate": 2.2681326775304323e-07, + "loss": 0.5314, + "step": 29582 + }, + { + "epoch": 0.9066752482530342, + "grad_norm": 2.0898090456271565, + "learning_rate": 2.2666550223633844e-07, + "loss": 0.582, + "step": 29583 + }, + { + "epoch": 0.9067058967757754, + "grad_norm": 1.664774928922017, + "learning_rate": 2.2651778375251897e-07, + "loss": 0.5491, + "step": 29584 + }, + { + "epoch": 0.9067365452985167, + "grad_norm": 2.1269440344464425, + "learning_rate": 2.263701123030415e-07, + "loss": 0.6582, + "step": 29585 + }, + { + "epoch": 0.9067671938212578, + "grad_norm": 1.9362287090029398, + "learning_rate": 2.2622248788936098e-07, + "loss": 0.547, + "step": 29586 + }, + { + "epoch": 0.9067978423439991, + "grad_norm": 2.5289401863364818, + "learning_rate": 2.2607491051293119e-07, + "loss": 0.5223, + "step": 29587 + }, + { + "epoch": 0.9068284908667402, + "grad_norm": 1.8114882017612313, + "learning_rate": 2.2592738017520655e-07, + "loss": 0.4911, + "step": 29588 + }, + { + "epoch": 0.9068591393894814, + "grad_norm": 2.0953288587330894, + "learning_rate": 2.25779896877642e-07, + "loss": 0.5193, + "step": 29589 + }, + { + "epoch": 0.9068897879122226, + "grad_norm": 0.7914356177794579, + "learning_rate": 2.256324606216892e-07, + "loss": 0.3814, + "step": 29590 + }, + { + "epoch": 0.9069204364349638, + "grad_norm": 0.7745092793965092, + "learning_rate": 2.2548507140880081e-07, + "loss": 0.4081, + "step": 29591 + }, + { + "epoch": 0.906951084957705, + "grad_norm": 2.1039462546733763, + "learning_rate": 2.253377292404296e-07, + "loss": 0.5496, + "step": 29592 + }, + { + "epoch": 0.9069817334804462, + "grad_norm": 2.059893222780893, + "learning_rate": 2.2519043411802777e-07, + "loss": 0.5177, + "step": 29593 + }, + { + "epoch": 0.9070123820031875, + "grad_norm": 1.937094801450269, + "learning_rate": 2.2504318604304687e-07, + "loss": 0.6126, + "step": 29594 + }, + { + "epoch": 0.9070430305259286, + "grad_norm": 1.9688867912968178, + "learning_rate": 2.2489598501693632e-07, + "loss": 0.5881, + "step": 29595 + }, + { + "epoch": 0.9070736790486699, + "grad_norm": 1.8389831092881004, + "learning_rate": 2.2474883104114719e-07, + "loss": 0.6696, + "step": 29596 + }, + { + "epoch": 0.907104327571411, + "grad_norm": 2.161116497579345, + "learning_rate": 2.2460172411713054e-07, + "loss": 0.5097, + "step": 29597 + }, + { + "epoch": 0.9071349760941523, + "grad_norm": 1.707736254560601, + "learning_rate": 2.244546642463352e-07, + "loss": 0.5046, + "step": 29598 + }, + { + "epoch": 0.9071656246168934, + "grad_norm": 1.9175773773731073, + "learning_rate": 2.2430765143020783e-07, + "loss": 0.5801, + "step": 29599 + }, + { + "epoch": 0.9071962731396347, + "grad_norm": 1.6738489542752775, + "learning_rate": 2.241606856702011e-07, + "loss": 0.5022, + "step": 29600 + }, + { + "epoch": 0.9072269216623758, + "grad_norm": 2.1045868223694275, + "learning_rate": 2.2401376696776e-07, + "loss": 0.6283, + "step": 29601 + }, + { + "epoch": 0.9072575701851171, + "grad_norm": 0.7195310877894786, + "learning_rate": 2.2386689532433447e-07, + "loss": 0.3706, + "step": 29602 + }, + { + "epoch": 0.9072882187078583, + "grad_norm": 1.68175939725784, + "learning_rate": 2.237200707413695e-07, + "loss": 0.5457, + "step": 29603 + }, + { + "epoch": 0.9073188672305995, + "grad_norm": 1.8086070440157727, + "learning_rate": 2.2357329322031273e-07, + "loss": 0.6444, + "step": 29604 + }, + { + "epoch": 0.9073495157533407, + "grad_norm": 1.7688714607844067, + "learning_rate": 2.2342656276261087e-07, + "loss": 0.5213, + "step": 29605 + }, + { + "epoch": 0.9073801642760819, + "grad_norm": 1.935450489174442, + "learning_rate": 2.2327987936970885e-07, + "loss": 0.6073, + "step": 29606 + }, + { + "epoch": 0.9074108127988231, + "grad_norm": 0.8408036281978026, + "learning_rate": 2.2313324304305217e-07, + "loss": 0.414, + "step": 29607 + }, + { + "epoch": 0.9074414613215643, + "grad_norm": 1.8714145607477723, + "learning_rate": 2.2298665378408635e-07, + "loss": 0.6845, + "step": 29608 + }, + { + "epoch": 0.9074721098443055, + "grad_norm": 1.9308552183411534, + "learning_rate": 2.2284011159425466e-07, + "loss": 0.5279, + "step": 29609 + }, + { + "epoch": 0.9075027583670467, + "grad_norm": 1.9576026131700652, + "learning_rate": 2.226936164750021e-07, + "loss": 0.6093, + "step": 29610 + }, + { + "epoch": 0.9075334068897879, + "grad_norm": 1.7817601446257287, + "learning_rate": 2.225471684277719e-07, + "loss": 0.6017, + "step": 29611 + }, + { + "epoch": 0.9075640554125292, + "grad_norm": 2.001604589988426, + "learning_rate": 2.224007674540063e-07, + "loss": 0.5683, + "step": 29612 + }, + { + "epoch": 0.9075947039352703, + "grad_norm": 1.80483811130117, + "learning_rate": 2.222544135551491e-07, + "loss": 0.6274, + "step": 29613 + }, + { + "epoch": 0.9076253524580116, + "grad_norm": 1.8953372135938893, + "learning_rate": 2.2210810673264084e-07, + "loss": 0.5529, + "step": 29614 + }, + { + "epoch": 0.9076560009807527, + "grad_norm": 0.8267386909765997, + "learning_rate": 2.2196184698792368e-07, + "loss": 0.421, + "step": 29615 + }, + { + "epoch": 0.907686649503494, + "grad_norm": 1.8853415693470015, + "learning_rate": 2.218156343224398e-07, + "loss": 0.5755, + "step": 29616 + }, + { + "epoch": 0.9077172980262351, + "grad_norm": 1.796898392647778, + "learning_rate": 2.216694687376286e-07, + "loss": 0.5619, + "step": 29617 + }, + { + "epoch": 0.9077479465489764, + "grad_norm": 1.9442153795262116, + "learning_rate": 2.215233502349301e-07, + "loss": 0.5806, + "step": 29618 + }, + { + "epoch": 0.9077785950717175, + "grad_norm": 2.183827042801568, + "learning_rate": 2.2137727881578586e-07, + "loss": 0.5392, + "step": 29619 + }, + { + "epoch": 0.9078092435944587, + "grad_norm": 2.24748608607461, + "learning_rate": 2.2123125448163307e-07, + "loss": 0.5338, + "step": 29620 + }, + { + "epoch": 0.9078398921172, + "grad_norm": 2.0105210783358034, + "learning_rate": 2.2108527723391172e-07, + "loss": 0.5193, + "step": 29621 + }, + { + "epoch": 0.9078705406399411, + "grad_norm": 0.8373541377779637, + "learning_rate": 2.2093934707406007e-07, + "loss": 0.4067, + "step": 29622 + }, + { + "epoch": 0.9079011891626824, + "grad_norm": 2.0784903037764155, + "learning_rate": 2.2079346400351532e-07, + "loss": 0.5036, + "step": 29623 + }, + { + "epoch": 0.9079318376854235, + "grad_norm": 2.1399687466526864, + "learning_rate": 2.2064762802371632e-07, + "loss": 0.5719, + "step": 29624 + }, + { + "epoch": 0.9079624862081648, + "grad_norm": 0.8073074029206349, + "learning_rate": 2.2050183913609802e-07, + "loss": 0.3942, + "step": 29625 + }, + { + "epoch": 0.9079931347309059, + "grad_norm": 1.8324539551840697, + "learning_rate": 2.2035609734209818e-07, + "loss": 0.5818, + "step": 29626 + }, + { + "epoch": 0.9080237832536472, + "grad_norm": 0.8425577581086234, + "learning_rate": 2.202104026431534e-07, + "loss": 0.3866, + "step": 29627 + }, + { + "epoch": 0.9080544317763883, + "grad_norm": 1.8331272026253307, + "learning_rate": 2.2006475504069757e-07, + "loss": 0.5024, + "step": 29628 + }, + { + "epoch": 0.9080850802991296, + "grad_norm": 1.9609231392616753, + "learning_rate": 2.199191545361673e-07, + "loss": 0.6037, + "step": 29629 + }, + { + "epoch": 0.9081157288218707, + "grad_norm": 1.7329494531428475, + "learning_rate": 2.1977360113099643e-07, + "loss": 0.537, + "step": 29630 + }, + { + "epoch": 0.908146377344612, + "grad_norm": 1.722763106033029, + "learning_rate": 2.196280948266194e-07, + "loss": 0.516, + "step": 29631 + }, + { + "epoch": 0.9081770258673532, + "grad_norm": 1.8697838079131532, + "learning_rate": 2.194826356244695e-07, + "loss": 0.6357, + "step": 29632 + }, + { + "epoch": 0.9082076743900944, + "grad_norm": 1.9507382403412927, + "learning_rate": 2.1933722352598109e-07, + "loss": 0.5881, + "step": 29633 + }, + { + "epoch": 0.9082383229128356, + "grad_norm": 2.035914273603673, + "learning_rate": 2.191918585325853e-07, + "loss": 0.6646, + "step": 29634 + }, + { + "epoch": 0.9082689714355768, + "grad_norm": 1.8409436616979438, + "learning_rate": 2.190465406457165e-07, + "loss": 0.5314, + "step": 29635 + }, + { + "epoch": 0.908299619958318, + "grad_norm": 1.9184295482774945, + "learning_rate": 2.1890126986680416e-07, + "loss": 0.5459, + "step": 29636 + }, + { + "epoch": 0.9083302684810592, + "grad_norm": 1.7997753330685684, + "learning_rate": 2.1875604619728153e-07, + "loss": 0.5694, + "step": 29637 + }, + { + "epoch": 0.9083609170038004, + "grad_norm": 1.8890517095773711, + "learning_rate": 2.1861086963857914e-07, + "loss": 0.52, + "step": 29638 + }, + { + "epoch": 0.9083915655265417, + "grad_norm": 0.814540954454496, + "learning_rate": 2.1846574019212695e-07, + "loss": 0.3967, + "step": 29639 + }, + { + "epoch": 0.9084222140492828, + "grad_norm": 1.728849869108706, + "learning_rate": 2.1832065785935496e-07, + "loss": 0.5438, + "step": 29640 + }, + { + "epoch": 0.9084528625720241, + "grad_norm": 1.9271338608179733, + "learning_rate": 2.1817562264169312e-07, + "loss": 0.6212, + "step": 29641 + }, + { + "epoch": 0.9084835110947652, + "grad_norm": 1.9225278169585442, + "learning_rate": 2.1803063454057028e-07, + "loss": 0.603, + "step": 29642 + }, + { + "epoch": 0.9085141596175065, + "grad_norm": 1.823296872209789, + "learning_rate": 2.1788569355741583e-07, + "loss": 0.5309, + "step": 29643 + }, + { + "epoch": 0.9085448081402476, + "grad_norm": 1.8042322181624533, + "learning_rate": 2.1774079969365646e-07, + "loss": 0.5511, + "step": 29644 + }, + { + "epoch": 0.9085754566629889, + "grad_norm": 1.7523405677111625, + "learning_rate": 2.1759595295072044e-07, + "loss": 0.5145, + "step": 29645 + }, + { + "epoch": 0.90860610518573, + "grad_norm": 1.9050009178203684, + "learning_rate": 2.1745115333003607e-07, + "loss": 0.5351, + "step": 29646 + }, + { + "epoch": 0.9086367537084713, + "grad_norm": 1.821171658277009, + "learning_rate": 2.1730640083302834e-07, + "loss": 0.6127, + "step": 29647 + }, + { + "epoch": 0.9086674022312125, + "grad_norm": 1.95888198987617, + "learning_rate": 2.1716169546112442e-07, + "loss": 0.5055, + "step": 29648 + }, + { + "epoch": 0.9086980507539537, + "grad_norm": 1.9850594268722888, + "learning_rate": 2.170170372157504e-07, + "loss": 0.4724, + "step": 29649 + }, + { + "epoch": 0.9087286992766949, + "grad_norm": 0.7652970464377942, + "learning_rate": 2.168724260983307e-07, + "loss": 0.3847, + "step": 29650 + }, + { + "epoch": 0.908759347799436, + "grad_norm": 1.6493200989157122, + "learning_rate": 2.1672786211029085e-07, + "loss": 0.4847, + "step": 29651 + }, + { + "epoch": 0.9087899963221773, + "grad_norm": 2.1127710408785987, + "learning_rate": 2.1658334525305634e-07, + "loss": 0.6182, + "step": 29652 + }, + { + "epoch": 0.9088206448449184, + "grad_norm": 2.0445491548755412, + "learning_rate": 2.1643887552804888e-07, + "loss": 0.5575, + "step": 29653 + }, + { + "epoch": 0.9088512933676597, + "grad_norm": 1.7041232790082985, + "learning_rate": 2.1629445293669394e-07, + "loss": 0.614, + "step": 29654 + }, + { + "epoch": 0.9088819418904008, + "grad_norm": 1.8759923667326004, + "learning_rate": 2.1615007748041205e-07, + "loss": 0.5378, + "step": 29655 + }, + { + "epoch": 0.9089125904131421, + "grad_norm": 1.9520313517526122, + "learning_rate": 2.1600574916062934e-07, + "loss": 0.612, + "step": 29656 + }, + { + "epoch": 0.9089432389358832, + "grad_norm": 0.7757120275847026, + "learning_rate": 2.1586146797876574e-07, + "loss": 0.4015, + "step": 29657 + }, + { + "epoch": 0.9089738874586245, + "grad_norm": 1.9639827376041505, + "learning_rate": 2.1571723393624232e-07, + "loss": 0.5639, + "step": 29658 + }, + { + "epoch": 0.9090045359813657, + "grad_norm": 1.8465652485117319, + "learning_rate": 2.1557304703448134e-07, + "loss": 0.4925, + "step": 29659 + }, + { + "epoch": 0.9090351845041069, + "grad_norm": 1.954659775118195, + "learning_rate": 2.1542890727490385e-07, + "loss": 0.6949, + "step": 29660 + }, + { + "epoch": 0.9090658330268481, + "grad_norm": 2.073610571273492, + "learning_rate": 2.1528481465892869e-07, + "loss": 0.4773, + "step": 29661 + }, + { + "epoch": 0.9090964815495893, + "grad_norm": 0.8074427949224627, + "learning_rate": 2.1514076918797698e-07, + "loss": 0.3812, + "step": 29662 + }, + { + "epoch": 0.9091271300723305, + "grad_norm": 1.7461496985854423, + "learning_rate": 2.149967708634676e-07, + "loss": 0.5745, + "step": 29663 + }, + { + "epoch": 0.9091577785950717, + "grad_norm": 2.152882874530331, + "learning_rate": 2.1485281968681937e-07, + "loss": 0.6494, + "step": 29664 + }, + { + "epoch": 0.9091884271178129, + "grad_norm": 1.7520978925038773, + "learning_rate": 2.1470891565945062e-07, + "loss": 0.5498, + "step": 29665 + }, + { + "epoch": 0.9092190756405542, + "grad_norm": 1.717569665576465, + "learning_rate": 2.1456505878277855e-07, + "loss": 0.5506, + "step": 29666 + }, + { + "epoch": 0.9092497241632953, + "grad_norm": 1.854739544906703, + "learning_rate": 2.1442124905822204e-07, + "loss": 0.6096, + "step": 29667 + }, + { + "epoch": 0.9092803726860366, + "grad_norm": 2.140648880127492, + "learning_rate": 2.142774864871977e-07, + "loss": 0.6138, + "step": 29668 + }, + { + "epoch": 0.9093110212087777, + "grad_norm": 1.8964807857842167, + "learning_rate": 2.141337710711211e-07, + "loss": 0.5337, + "step": 29669 + }, + { + "epoch": 0.909341669731519, + "grad_norm": 1.91861456603135, + "learning_rate": 2.1399010281140941e-07, + "loss": 0.5613, + "step": 29670 + }, + { + "epoch": 0.9093723182542601, + "grad_norm": 1.835585437865799, + "learning_rate": 2.138464817094782e-07, + "loss": 0.5563, + "step": 29671 + }, + { + "epoch": 0.9094029667770014, + "grad_norm": 1.6269265445294676, + "learning_rate": 2.137029077667413e-07, + "loss": 0.5603, + "step": 29672 + }, + { + "epoch": 0.9094336152997425, + "grad_norm": 1.8297227115954018, + "learning_rate": 2.1355938098461427e-07, + "loss": 0.5399, + "step": 29673 + }, + { + "epoch": 0.9094642638224838, + "grad_norm": 1.8015988420763898, + "learning_rate": 2.1341590136451152e-07, + "loss": 0.5898, + "step": 29674 + }, + { + "epoch": 0.909494912345225, + "grad_norm": 1.9553149613196914, + "learning_rate": 2.1327246890784693e-07, + "loss": 0.5497, + "step": 29675 + }, + { + "epoch": 0.9095255608679662, + "grad_norm": 1.5843601083964227, + "learning_rate": 2.131290836160338e-07, + "loss": 0.5212, + "step": 29676 + }, + { + "epoch": 0.9095562093907074, + "grad_norm": 1.821082248261238, + "learning_rate": 2.129857454904838e-07, + "loss": 0.6106, + "step": 29677 + }, + { + "epoch": 0.9095868579134486, + "grad_norm": 1.9258347852912694, + "learning_rate": 2.1284245453261021e-07, + "loss": 0.6451, + "step": 29678 + }, + { + "epoch": 0.9096175064361898, + "grad_norm": 1.9060985753601098, + "learning_rate": 2.1269921074382528e-07, + "loss": 0.6199, + "step": 29679 + }, + { + "epoch": 0.909648154958931, + "grad_norm": 1.9004702293785922, + "learning_rate": 2.1255601412553895e-07, + "loss": 0.6021, + "step": 29680 + }, + { + "epoch": 0.9096788034816722, + "grad_norm": 1.9108984180100026, + "learning_rate": 2.1241286467916345e-07, + "loss": 0.5512, + "step": 29681 + }, + { + "epoch": 0.9097094520044133, + "grad_norm": 1.9352901015230835, + "learning_rate": 2.1226976240610875e-07, + "loss": 0.496, + "step": 29682 + }, + { + "epoch": 0.9097401005271546, + "grad_norm": 0.7709087533943932, + "learning_rate": 2.1212670730778594e-07, + "loss": 0.3942, + "step": 29683 + }, + { + "epoch": 0.9097707490498957, + "grad_norm": 1.8629948759601602, + "learning_rate": 2.1198369938560338e-07, + "loss": 0.6267, + "step": 29684 + }, + { + "epoch": 0.909801397572637, + "grad_norm": 1.8321467171726735, + "learning_rate": 2.1184073864096987e-07, + "loss": 0.6195, + "step": 29685 + }, + { + "epoch": 0.9098320460953782, + "grad_norm": 1.9710058864612074, + "learning_rate": 2.1169782507529545e-07, + "loss": 0.6332, + "step": 29686 + }, + { + "epoch": 0.9098626946181194, + "grad_norm": 1.8736425965056456, + "learning_rate": 2.1155495868998787e-07, + "loss": 0.5384, + "step": 29687 + }, + { + "epoch": 0.9098933431408606, + "grad_norm": 2.0233838059477147, + "learning_rate": 2.114121394864538e-07, + "loss": 0.5131, + "step": 29688 + }, + { + "epoch": 0.9099239916636018, + "grad_norm": 1.7872455507944498, + "learning_rate": 2.1126936746610094e-07, + "loss": 0.579, + "step": 29689 + }, + { + "epoch": 0.909954640186343, + "grad_norm": 1.9457506572004433, + "learning_rate": 2.1112664263033654e-07, + "loss": 0.6062, + "step": 29690 + }, + { + "epoch": 0.9099852887090842, + "grad_norm": 2.1522468992483375, + "learning_rate": 2.1098396498056616e-07, + "loss": 0.5401, + "step": 29691 + }, + { + "epoch": 0.9100159372318254, + "grad_norm": 1.9644561009884518, + "learning_rate": 2.1084133451819644e-07, + "loss": 0.7272, + "step": 29692 + }, + { + "epoch": 0.9100465857545667, + "grad_norm": 1.8798294145366168, + "learning_rate": 2.1069875124463235e-07, + "loss": 0.5482, + "step": 29693 + }, + { + "epoch": 0.9100772342773078, + "grad_norm": 1.9707608401977217, + "learning_rate": 2.1055621516127945e-07, + "loss": 0.5441, + "step": 29694 + }, + { + "epoch": 0.9101078828000491, + "grad_norm": 1.8669633609985514, + "learning_rate": 2.1041372626954103e-07, + "loss": 0.632, + "step": 29695 + }, + { + "epoch": 0.9101385313227902, + "grad_norm": 1.8859338308166613, + "learning_rate": 2.1027128457082102e-07, + "loss": 0.5328, + "step": 29696 + }, + { + "epoch": 0.9101691798455315, + "grad_norm": 2.2425193589906014, + "learning_rate": 2.1012889006652492e-07, + "loss": 0.5459, + "step": 29697 + }, + { + "epoch": 0.9101998283682726, + "grad_norm": 2.150882028442587, + "learning_rate": 2.0998654275805385e-07, + "loss": 0.5793, + "step": 29698 + }, + { + "epoch": 0.9102304768910139, + "grad_norm": 1.7164534860932017, + "learning_rate": 2.0984424264681057e-07, + "loss": 0.5182, + "step": 29699 + }, + { + "epoch": 0.910261125413755, + "grad_norm": 1.7592682773496704, + "learning_rate": 2.0970198973419786e-07, + "loss": 0.5541, + "step": 29700 + }, + { + "epoch": 0.9102917739364963, + "grad_norm": 1.7077437671873053, + "learning_rate": 2.095597840216168e-07, + "loss": 0.5359, + "step": 29701 + }, + { + "epoch": 0.9103224224592374, + "grad_norm": 2.016109004608056, + "learning_rate": 2.0941762551046906e-07, + "loss": 0.5209, + "step": 29702 + }, + { + "epoch": 0.9103530709819787, + "grad_norm": 1.8703776006672888, + "learning_rate": 2.092755142021552e-07, + "loss": 0.6526, + "step": 29703 + }, + { + "epoch": 0.9103837195047199, + "grad_norm": 1.8702423697312673, + "learning_rate": 2.0913345009807518e-07, + "loss": 0.5521, + "step": 29704 + }, + { + "epoch": 0.9104143680274611, + "grad_norm": 1.8378991481396267, + "learning_rate": 2.089914331996301e-07, + "loss": 0.6246, + "step": 29705 + }, + { + "epoch": 0.9104450165502023, + "grad_norm": 1.887384961869677, + "learning_rate": 2.088494635082178e-07, + "loss": 0.4739, + "step": 29706 + }, + { + "epoch": 0.9104756650729435, + "grad_norm": 1.9722775579415346, + "learning_rate": 2.087075410252365e-07, + "loss": 0.5489, + "step": 29707 + }, + { + "epoch": 0.9105063135956847, + "grad_norm": 0.8257560805056063, + "learning_rate": 2.0856566575208682e-07, + "loss": 0.3896, + "step": 29708 + }, + { + "epoch": 0.9105369621184259, + "grad_norm": 1.901393908561219, + "learning_rate": 2.084238376901654e-07, + "loss": 0.5538, + "step": 29709 + }, + { + "epoch": 0.9105676106411671, + "grad_norm": 2.0088486115684403, + "learning_rate": 2.0828205684087e-07, + "loss": 0.5392, + "step": 29710 + }, + { + "epoch": 0.9105982591639084, + "grad_norm": 1.8283329997410498, + "learning_rate": 2.081403232055973e-07, + "loss": 0.553, + "step": 29711 + }, + { + "epoch": 0.9106289076866495, + "grad_norm": 1.7869486894229902, + "learning_rate": 2.0799863678574396e-07, + "loss": 0.5535, + "step": 29712 + }, + { + "epoch": 0.9106595562093907, + "grad_norm": 1.913556650720194, + "learning_rate": 2.078569975827066e-07, + "loss": 0.5625, + "step": 29713 + }, + { + "epoch": 0.9106902047321319, + "grad_norm": 1.8893015592553424, + "learning_rate": 2.0771540559787973e-07, + "loss": 0.5632, + "step": 29714 + }, + { + "epoch": 0.9107208532548731, + "grad_norm": 2.0315097029440152, + "learning_rate": 2.0757386083265885e-07, + "loss": 0.5911, + "step": 29715 + }, + { + "epoch": 0.9107515017776143, + "grad_norm": 0.8339545238753595, + "learning_rate": 2.0743236328844007e-07, + "loss": 0.3852, + "step": 29716 + }, + { + "epoch": 0.9107821503003555, + "grad_norm": 1.8303772684310624, + "learning_rate": 2.0729091296661618e-07, + "loss": 0.4981, + "step": 29717 + }, + { + "epoch": 0.9108127988230967, + "grad_norm": 1.8362179779940764, + "learning_rate": 2.0714950986857995e-07, + "loss": 0.5812, + "step": 29718 + }, + { + "epoch": 0.9108434473458379, + "grad_norm": 0.7755825501458815, + "learning_rate": 2.0700815399572749e-07, + "loss": 0.4092, + "step": 29719 + }, + { + "epoch": 0.9108740958685791, + "grad_norm": 2.1070783916218288, + "learning_rate": 2.0686684534944878e-07, + "loss": 0.6467, + "step": 29720 + }, + { + "epoch": 0.9109047443913203, + "grad_norm": 1.838067416709029, + "learning_rate": 2.0672558393113884e-07, + "loss": 0.5205, + "step": 29721 + }, + { + "epoch": 0.9109353929140616, + "grad_norm": 2.0469300343781947, + "learning_rate": 2.0658436974218653e-07, + "loss": 0.5729, + "step": 29722 + }, + { + "epoch": 0.9109660414368027, + "grad_norm": 2.113165877479592, + "learning_rate": 2.0644320278398578e-07, + "loss": 0.5435, + "step": 29723 + }, + { + "epoch": 0.910996689959544, + "grad_norm": 1.6641150807938199, + "learning_rate": 2.0630208305792655e-07, + "loss": 0.4968, + "step": 29724 + }, + { + "epoch": 0.9110273384822851, + "grad_norm": 1.8298893166430195, + "learning_rate": 2.061610105653994e-07, + "loss": 0.562, + "step": 29725 + }, + { + "epoch": 0.9110579870050264, + "grad_norm": 2.0735315338819436, + "learning_rate": 2.0601998530779376e-07, + "loss": 0.5839, + "step": 29726 + }, + { + "epoch": 0.9110886355277675, + "grad_norm": 1.8111725938537926, + "learning_rate": 2.0587900728650078e-07, + "loss": 0.433, + "step": 29727 + }, + { + "epoch": 0.9111192840505088, + "grad_norm": 0.909149235110463, + "learning_rate": 2.0573807650290823e-07, + "loss": 0.4036, + "step": 29728 + }, + { + "epoch": 0.9111499325732499, + "grad_norm": 1.7666109257664193, + "learning_rate": 2.0559719295840552e-07, + "loss": 0.6515, + "step": 29729 + }, + { + "epoch": 0.9111805810959912, + "grad_norm": 1.915062137593043, + "learning_rate": 2.0545635665437936e-07, + "loss": 0.5819, + "step": 29730 + }, + { + "epoch": 0.9112112296187324, + "grad_norm": 2.246781140357417, + "learning_rate": 2.053155675922186e-07, + "loss": 0.5771, + "step": 29731 + }, + { + "epoch": 0.9112418781414736, + "grad_norm": 1.7735667750372373, + "learning_rate": 2.0517482577331105e-07, + "loss": 0.6173, + "step": 29732 + }, + { + "epoch": 0.9112725266642148, + "grad_norm": 1.8712015266219422, + "learning_rate": 2.0503413119904224e-07, + "loss": 0.5587, + "step": 29733 + }, + { + "epoch": 0.911303175186956, + "grad_norm": 1.8532138037624193, + "learning_rate": 2.0489348387079888e-07, + "loss": 0.5153, + "step": 29734 + }, + { + "epoch": 0.9113338237096972, + "grad_norm": 0.8058880490308359, + "learning_rate": 2.047528837899676e-07, + "loss": 0.4083, + "step": 29735 + }, + { + "epoch": 0.9113644722324384, + "grad_norm": 2.210130321913654, + "learning_rate": 2.046123309579323e-07, + "loss": 0.5781, + "step": 29736 + }, + { + "epoch": 0.9113951207551796, + "grad_norm": 0.7648680779274829, + "learning_rate": 2.0447182537607856e-07, + "loss": 0.3763, + "step": 29737 + }, + { + "epoch": 0.9114257692779208, + "grad_norm": 1.8857461246558889, + "learning_rate": 2.0433136704579194e-07, + "loss": 0.5054, + "step": 29738 + }, + { + "epoch": 0.911456417800662, + "grad_norm": 2.013026235585857, + "learning_rate": 2.0419095596845462e-07, + "loss": 0.5758, + "step": 29739 + }, + { + "epoch": 0.9114870663234033, + "grad_norm": 2.025255327174587, + "learning_rate": 2.0405059214545108e-07, + "loss": 0.5514, + "step": 29740 + }, + { + "epoch": 0.9115177148461444, + "grad_norm": 1.8230637449481857, + "learning_rate": 2.0391027557816412e-07, + "loss": 0.5764, + "step": 29741 + }, + { + "epoch": 0.9115483633688857, + "grad_norm": 1.7930975316784756, + "learning_rate": 2.0377000626797595e-07, + "loss": 0.5635, + "step": 29742 + }, + { + "epoch": 0.9115790118916268, + "grad_norm": 1.9554193450446438, + "learning_rate": 2.036297842162699e-07, + "loss": 0.5991, + "step": 29743 + }, + { + "epoch": 0.911609660414368, + "grad_norm": 1.9511231531259083, + "learning_rate": 2.0348960942442596e-07, + "loss": 0.5362, + "step": 29744 + }, + { + "epoch": 0.9116403089371092, + "grad_norm": 1.868082197170135, + "learning_rate": 2.033494818938264e-07, + "loss": 0.5652, + "step": 29745 + }, + { + "epoch": 0.9116709574598504, + "grad_norm": 2.10442787743176, + "learning_rate": 2.0320940162585234e-07, + "loss": 0.5132, + "step": 29746 + }, + { + "epoch": 0.9117016059825916, + "grad_norm": 2.127582307013866, + "learning_rate": 2.030693686218821e-07, + "loss": 0.544, + "step": 29747 + }, + { + "epoch": 0.9117322545053328, + "grad_norm": 2.181648540493091, + "learning_rate": 2.0292938288329733e-07, + "loss": 0.6084, + "step": 29748 + }, + { + "epoch": 0.9117629030280741, + "grad_norm": 1.909606872689317, + "learning_rate": 2.0278944441147751e-07, + "loss": 0.5347, + "step": 29749 + }, + { + "epoch": 0.9117935515508152, + "grad_norm": 1.8043339138179333, + "learning_rate": 2.0264955320779934e-07, + "loss": 0.5375, + "step": 29750 + }, + { + "epoch": 0.9118242000735565, + "grad_norm": 1.9604874503223229, + "learning_rate": 2.0250970927364387e-07, + "loss": 0.5073, + "step": 29751 + }, + { + "epoch": 0.9118548485962976, + "grad_norm": 1.9423337326012, + "learning_rate": 2.0236991261038674e-07, + "loss": 0.5174, + "step": 29752 + }, + { + "epoch": 0.9118854971190389, + "grad_norm": 1.7701606685805502, + "learning_rate": 2.022301632194068e-07, + "loss": 0.49, + "step": 29753 + }, + { + "epoch": 0.91191614564178, + "grad_norm": 0.8033370325221514, + "learning_rate": 2.0209046110208074e-07, + "loss": 0.408, + "step": 29754 + }, + { + "epoch": 0.9119467941645213, + "grad_norm": 1.9739664594780144, + "learning_rate": 2.019508062597847e-07, + "loss": 0.4982, + "step": 29755 + }, + { + "epoch": 0.9119774426872624, + "grad_norm": 1.9743219078196463, + "learning_rate": 2.0181119869389477e-07, + "loss": 0.5627, + "step": 29756 + }, + { + "epoch": 0.9120080912100037, + "grad_norm": 1.8918012788538072, + "learning_rate": 2.0167163840578762e-07, + "loss": 0.5366, + "step": 29757 + }, + { + "epoch": 0.9120387397327449, + "grad_norm": 1.9099765021513888, + "learning_rate": 2.0153212539683664e-07, + "loss": 0.5845, + "step": 29758 + }, + { + "epoch": 0.9120693882554861, + "grad_norm": 1.8131091011586693, + "learning_rate": 2.0139265966841738e-07, + "loss": 0.6197, + "step": 29759 + }, + { + "epoch": 0.9121000367782273, + "grad_norm": 1.729021626176158, + "learning_rate": 2.0125324122190483e-07, + "loss": 0.5338, + "step": 29760 + }, + { + "epoch": 0.9121306853009685, + "grad_norm": 1.9546492406502696, + "learning_rate": 2.0111387005867123e-07, + "loss": 0.541, + "step": 29761 + }, + { + "epoch": 0.9121613338237097, + "grad_norm": 2.186004672035276, + "learning_rate": 2.0097454618009104e-07, + "loss": 0.5545, + "step": 29762 + }, + { + "epoch": 0.9121919823464509, + "grad_norm": 1.8384899395649175, + "learning_rate": 2.008352695875354e-07, + "loss": 0.6322, + "step": 29763 + }, + { + "epoch": 0.9122226308691921, + "grad_norm": 1.8212084991127482, + "learning_rate": 2.0069604028237932e-07, + "loss": 0.5451, + "step": 29764 + }, + { + "epoch": 0.9122532793919333, + "grad_norm": 1.7645413605420928, + "learning_rate": 2.005568582659928e-07, + "loss": 0.5656, + "step": 29765 + }, + { + "epoch": 0.9122839279146745, + "grad_norm": 2.0227947968119993, + "learning_rate": 2.0041772353974699e-07, + "loss": 0.5436, + "step": 29766 + }, + { + "epoch": 0.9123145764374158, + "grad_norm": 0.8325429958812915, + "learning_rate": 2.0027863610501297e-07, + "loss": 0.392, + "step": 29767 + }, + { + "epoch": 0.9123452249601569, + "grad_norm": 1.915560374256313, + "learning_rate": 2.0013959596316247e-07, + "loss": 0.5198, + "step": 29768 + }, + { + "epoch": 0.9123758734828982, + "grad_norm": 1.9493501624178233, + "learning_rate": 2.0000060311556434e-07, + "loss": 0.5302, + "step": 29769 + }, + { + "epoch": 0.9124065220056393, + "grad_norm": 0.7792174596407336, + "learning_rate": 1.998616575635881e-07, + "loss": 0.3878, + "step": 29770 + }, + { + "epoch": 0.9124371705283806, + "grad_norm": 2.0547526861170997, + "learning_rate": 1.9972275930860374e-07, + "loss": 0.5409, + "step": 29771 + }, + { + "epoch": 0.9124678190511217, + "grad_norm": 1.6240109494525612, + "learning_rate": 1.9958390835197849e-07, + "loss": 0.5515, + "step": 29772 + }, + { + "epoch": 0.912498467573863, + "grad_norm": 1.9001729828973304, + "learning_rate": 1.994451046950824e-07, + "loss": 0.525, + "step": 29773 + }, + { + "epoch": 0.9125291160966041, + "grad_norm": 1.7424331371716175, + "learning_rate": 1.9930634833928097e-07, + "loss": 0.5048, + "step": 29774 + }, + { + "epoch": 0.9125597646193453, + "grad_norm": 2.1320727175239806, + "learning_rate": 1.9916763928594206e-07, + "loss": 0.6796, + "step": 29775 + }, + { + "epoch": 0.9125904131420866, + "grad_norm": 1.8834746087663297, + "learning_rate": 1.99028977536434e-07, + "loss": 0.6557, + "step": 29776 + }, + { + "epoch": 0.9126210616648277, + "grad_norm": 1.7820165488137425, + "learning_rate": 1.9889036309212073e-07, + "loss": 0.4734, + "step": 29777 + }, + { + "epoch": 0.912651710187569, + "grad_norm": 1.7977143580567192, + "learning_rate": 1.9875179595436944e-07, + "loss": 0.5309, + "step": 29778 + }, + { + "epoch": 0.9126823587103101, + "grad_norm": 1.8487919931039938, + "learning_rate": 1.9861327612454519e-07, + "loss": 0.5372, + "step": 29779 + }, + { + "epoch": 0.9127130072330514, + "grad_norm": 0.812941334588587, + "learning_rate": 1.9847480360401296e-07, + "loss": 0.3808, + "step": 29780 + }, + { + "epoch": 0.9127436557557925, + "grad_norm": 0.8218441269788579, + "learning_rate": 1.9833637839413722e-07, + "loss": 0.4126, + "step": 29781 + }, + { + "epoch": 0.9127743042785338, + "grad_norm": 2.0719323143990387, + "learning_rate": 1.981980004962808e-07, + "loss": 0.6144, + "step": 29782 + }, + { + "epoch": 0.9128049528012749, + "grad_norm": 2.0590232813196305, + "learning_rate": 1.9805966991180869e-07, + "loss": 0.5381, + "step": 29783 + }, + { + "epoch": 0.9128356013240162, + "grad_norm": 1.8240641679882563, + "learning_rate": 1.979213866420837e-07, + "loss": 0.4679, + "step": 29784 + }, + { + "epoch": 0.9128662498467573, + "grad_norm": 1.723262818076001, + "learning_rate": 1.9778315068846754e-07, + "loss": 0.5735, + "step": 29785 + }, + { + "epoch": 0.9128968983694986, + "grad_norm": 1.9869367100195723, + "learning_rate": 1.9764496205232243e-07, + "loss": 0.6786, + "step": 29786 + }, + { + "epoch": 0.9129275468922398, + "grad_norm": 0.8088707707998462, + "learning_rate": 1.975068207350106e-07, + "loss": 0.3946, + "step": 29787 + }, + { + "epoch": 0.912958195414981, + "grad_norm": 1.9283352241503933, + "learning_rate": 1.9736872673789266e-07, + "loss": 0.4868, + "step": 29788 + }, + { + "epoch": 0.9129888439377222, + "grad_norm": 1.83302899690976, + "learning_rate": 1.9723068006232916e-07, + "loss": 0.6837, + "step": 29789 + }, + { + "epoch": 0.9130194924604634, + "grad_norm": 1.7970675721686629, + "learning_rate": 1.9709268070968069e-07, + "loss": 0.5632, + "step": 29790 + }, + { + "epoch": 0.9130501409832046, + "grad_norm": 0.7954142264585664, + "learning_rate": 1.9695472868130783e-07, + "loss": 0.3836, + "step": 29791 + }, + { + "epoch": 0.9130807895059458, + "grad_norm": 1.9359522282975246, + "learning_rate": 1.9681682397856838e-07, + "loss": 0.6127, + "step": 29792 + }, + { + "epoch": 0.913111438028687, + "grad_norm": 2.2590878801107834, + "learning_rate": 1.9667896660282127e-07, + "loss": 0.5834, + "step": 29793 + }, + { + "epoch": 0.9131420865514283, + "grad_norm": 2.351317183283431, + "learning_rate": 1.9654115655542594e-07, + "loss": 0.5481, + "step": 29794 + }, + { + "epoch": 0.9131727350741694, + "grad_norm": 1.8136005574468739, + "learning_rate": 1.9640339383773966e-07, + "loss": 0.5881, + "step": 29795 + }, + { + "epoch": 0.9132033835969107, + "grad_norm": 1.9042667226020809, + "learning_rate": 1.962656784511191e-07, + "loss": 0.5428, + "step": 29796 + }, + { + "epoch": 0.9132340321196518, + "grad_norm": 2.088770671860707, + "learning_rate": 1.9612801039692208e-07, + "loss": 0.5983, + "step": 29797 + }, + { + "epoch": 0.9132646806423931, + "grad_norm": 1.8793711831875297, + "learning_rate": 1.959903896765053e-07, + "loss": 0.596, + "step": 29798 + }, + { + "epoch": 0.9132953291651342, + "grad_norm": 1.9697076609240003, + "learning_rate": 1.9585281629122377e-07, + "loss": 0.6658, + "step": 29799 + }, + { + "epoch": 0.9133259776878755, + "grad_norm": 2.035563548197452, + "learning_rate": 1.957152902424342e-07, + "loss": 0.6685, + "step": 29800 + }, + { + "epoch": 0.9133566262106166, + "grad_norm": 2.0321567617238276, + "learning_rate": 1.9557781153149047e-07, + "loss": 0.6296, + "step": 29801 + }, + { + "epoch": 0.9133872747333579, + "grad_norm": 1.6577251639746828, + "learning_rate": 1.9544038015974876e-07, + "loss": 0.547, + "step": 29802 + }, + { + "epoch": 0.913417923256099, + "grad_norm": 1.9972269495068276, + "learning_rate": 1.953029961285624e-07, + "loss": 0.5047, + "step": 29803 + }, + { + "epoch": 0.9134485717788403, + "grad_norm": 2.1006773836782613, + "learning_rate": 1.9516565943928311e-07, + "loss": 0.5763, + "step": 29804 + }, + { + "epoch": 0.9134792203015815, + "grad_norm": 0.8308279808088622, + "learning_rate": 1.9502837009326758e-07, + "loss": 0.4001, + "step": 29805 + }, + { + "epoch": 0.9135098688243226, + "grad_norm": 0.7929519512724081, + "learning_rate": 1.9489112809186695e-07, + "loss": 0.3913, + "step": 29806 + }, + { + "epoch": 0.9135405173470639, + "grad_norm": 2.102046152418683, + "learning_rate": 1.947539334364329e-07, + "loss": 0.5259, + "step": 29807 + }, + { + "epoch": 0.913571165869805, + "grad_norm": 0.7826532360218452, + "learning_rate": 1.946167861283177e-07, + "loss": 0.399, + "step": 29808 + }, + { + "epoch": 0.9136018143925463, + "grad_norm": 0.8377151583169092, + "learning_rate": 1.9447968616887302e-07, + "loss": 0.4065, + "step": 29809 + }, + { + "epoch": 0.9136324629152874, + "grad_norm": 1.9881223925058247, + "learning_rate": 1.9434263355945004e-07, + "loss": 0.5627, + "step": 29810 + }, + { + "epoch": 0.9136631114380287, + "grad_norm": 2.1628398145495242, + "learning_rate": 1.9420562830139766e-07, + "loss": 0.5606, + "step": 29811 + }, + { + "epoch": 0.9136937599607698, + "grad_norm": 2.103519598432719, + "learning_rate": 1.9406867039606759e-07, + "loss": 0.6246, + "step": 29812 + }, + { + "epoch": 0.9137244084835111, + "grad_norm": 1.82608147729515, + "learning_rate": 1.939317598448087e-07, + "loss": 0.5385, + "step": 29813 + }, + { + "epoch": 0.9137550570062523, + "grad_norm": 1.7680381776354255, + "learning_rate": 1.9379489664897e-07, + "loss": 0.4757, + "step": 29814 + }, + { + "epoch": 0.9137857055289935, + "grad_norm": 1.6851213601562043, + "learning_rate": 1.9365808080989868e-07, + "loss": 0.5394, + "step": 29815 + }, + { + "epoch": 0.9138163540517347, + "grad_norm": 1.8670631199755037, + "learning_rate": 1.9352131232894477e-07, + "loss": 0.5622, + "step": 29816 + }, + { + "epoch": 0.9138470025744759, + "grad_norm": 0.8751007802666039, + "learning_rate": 1.9338459120745555e-07, + "loss": 0.427, + "step": 29817 + }, + { + "epoch": 0.9138776510972171, + "grad_norm": 1.755041932577439, + "learning_rate": 1.9324791744677772e-07, + "loss": 0.4708, + "step": 29818 + }, + { + "epoch": 0.9139082996199583, + "grad_norm": 2.001544816412576, + "learning_rate": 1.9311129104825744e-07, + "loss": 0.633, + "step": 29819 + }, + { + "epoch": 0.9139389481426995, + "grad_norm": 1.7204736663974673, + "learning_rate": 1.9297471201324136e-07, + "loss": 0.5115, + "step": 29820 + }, + { + "epoch": 0.9139695966654408, + "grad_norm": 1.8092536726844792, + "learning_rate": 1.9283818034307623e-07, + "loss": 0.5641, + "step": 29821 + }, + { + "epoch": 0.9140002451881819, + "grad_norm": 0.7843256799944103, + "learning_rate": 1.9270169603910593e-07, + "loss": 0.4001, + "step": 29822 + }, + { + "epoch": 0.9140308937109232, + "grad_norm": 1.9165975855449944, + "learning_rate": 1.9256525910267555e-07, + "loss": 0.4965, + "step": 29823 + }, + { + "epoch": 0.9140615422336643, + "grad_norm": 2.066624618962266, + "learning_rate": 1.9242886953513062e-07, + "loss": 0.555, + "step": 29824 + }, + { + "epoch": 0.9140921907564056, + "grad_norm": 0.8260440683147674, + "learning_rate": 1.9229252733781402e-07, + "loss": 0.3847, + "step": 29825 + }, + { + "epoch": 0.9141228392791467, + "grad_norm": 2.317418273391752, + "learning_rate": 1.9215623251206849e-07, + "loss": 0.5948, + "step": 29826 + }, + { + "epoch": 0.914153487801888, + "grad_norm": 0.7601698782303822, + "learning_rate": 1.92019985059238e-07, + "loss": 0.3934, + "step": 29827 + }, + { + "epoch": 0.9141841363246291, + "grad_norm": 1.7701121672153355, + "learning_rate": 1.9188378498066485e-07, + "loss": 0.5211, + "step": 29828 + }, + { + "epoch": 0.9142147848473704, + "grad_norm": 1.620564714308119, + "learning_rate": 1.9174763227769122e-07, + "loss": 0.5372, + "step": 29829 + }, + { + "epoch": 0.9142454333701115, + "grad_norm": 1.8376358072555965, + "learning_rate": 1.9161152695165775e-07, + "loss": 0.5301, + "step": 29830 + }, + { + "epoch": 0.9142760818928528, + "grad_norm": 1.834457572055738, + "learning_rate": 1.9147546900390667e-07, + "loss": 0.6397, + "step": 29831 + }, + { + "epoch": 0.914306730415594, + "grad_norm": 2.002561637182817, + "learning_rate": 1.9133945843577805e-07, + "loss": 0.5887, + "step": 29832 + }, + { + "epoch": 0.9143373789383352, + "grad_norm": 1.873545338203715, + "learning_rate": 1.9120349524861247e-07, + "loss": 0.5404, + "step": 29833 + }, + { + "epoch": 0.9143680274610764, + "grad_norm": 1.9313336282749736, + "learning_rate": 1.9106757944374831e-07, + "loss": 0.5583, + "step": 29834 + }, + { + "epoch": 0.9143986759838176, + "grad_norm": 1.7981164045321143, + "learning_rate": 1.9093171102252672e-07, + "loss": 0.5724, + "step": 29835 + }, + { + "epoch": 0.9144293245065588, + "grad_norm": 2.2944023907103865, + "learning_rate": 1.907958899862844e-07, + "loss": 0.5136, + "step": 29836 + }, + { + "epoch": 0.9144599730292999, + "grad_norm": 1.9038416729546161, + "learning_rate": 1.9066011633636196e-07, + "loss": 0.5592, + "step": 29837 + }, + { + "epoch": 0.9144906215520412, + "grad_norm": 1.9802487108386877, + "learning_rate": 1.9052439007409495e-07, + "loss": 0.5413, + "step": 29838 + }, + { + "epoch": 0.9145212700747823, + "grad_norm": 2.2843399975553784, + "learning_rate": 1.9038871120082125e-07, + "loss": 0.5018, + "step": 29839 + }, + { + "epoch": 0.9145519185975236, + "grad_norm": 2.102622555683787, + "learning_rate": 1.9025307971787921e-07, + "loss": 0.5591, + "step": 29840 + }, + { + "epoch": 0.9145825671202648, + "grad_norm": 1.9682382605658804, + "learning_rate": 1.9011749562660388e-07, + "loss": 0.6196, + "step": 29841 + }, + { + "epoch": 0.914613215643006, + "grad_norm": 0.8131069313309174, + "learning_rate": 1.8998195892833137e-07, + "loss": 0.4002, + "step": 29842 + }, + { + "epoch": 0.9146438641657472, + "grad_norm": 0.7782865208099427, + "learning_rate": 1.898464696243979e-07, + "loss": 0.3971, + "step": 29843 + }, + { + "epoch": 0.9146745126884884, + "grad_norm": 1.787427554632268, + "learning_rate": 1.8971102771613736e-07, + "loss": 0.4623, + "step": 29844 + }, + { + "epoch": 0.9147051612112296, + "grad_norm": 1.824743145200961, + "learning_rate": 1.8957563320488427e-07, + "loss": 0.5031, + "step": 29845 + }, + { + "epoch": 0.9147358097339708, + "grad_norm": 2.0084720262422397, + "learning_rate": 1.8944028609197419e-07, + "loss": 0.5417, + "step": 29846 + }, + { + "epoch": 0.914766458256712, + "grad_norm": 0.7728143895472296, + "learning_rate": 1.893049863787394e-07, + "loss": 0.4008, + "step": 29847 + }, + { + "epoch": 0.9147971067794533, + "grad_norm": 0.7958647392681752, + "learning_rate": 1.8916973406651385e-07, + "loss": 0.3721, + "step": 29848 + }, + { + "epoch": 0.9148277553021944, + "grad_norm": 1.9010737069301877, + "learning_rate": 1.8903452915662924e-07, + "loss": 0.5354, + "step": 29849 + }, + { + "epoch": 0.9148584038249357, + "grad_norm": 1.8352376364610106, + "learning_rate": 1.888993716504184e-07, + "loss": 0.5719, + "step": 29850 + }, + { + "epoch": 0.9148890523476768, + "grad_norm": 1.7654695012098736, + "learning_rate": 1.8876426154921357e-07, + "loss": 0.4857, + "step": 29851 + }, + { + "epoch": 0.9149197008704181, + "grad_norm": 0.8133822629484254, + "learning_rate": 1.8862919885434537e-07, + "loss": 0.4073, + "step": 29852 + }, + { + "epoch": 0.9149503493931592, + "grad_norm": 1.9488651936680543, + "learning_rate": 1.8849418356714388e-07, + "loss": 0.6207, + "step": 29853 + }, + { + "epoch": 0.9149809979159005, + "grad_norm": 0.7907619829001993, + "learning_rate": 1.8835921568894133e-07, + "loss": 0.3826, + "step": 29854 + }, + { + "epoch": 0.9150116464386416, + "grad_norm": 1.8069057454026776, + "learning_rate": 1.882242952210661e-07, + "loss": 0.5074, + "step": 29855 + }, + { + "epoch": 0.9150422949613829, + "grad_norm": 0.7556608075004828, + "learning_rate": 1.880894221648477e-07, + "loss": 0.3813, + "step": 29856 + }, + { + "epoch": 0.915072943484124, + "grad_norm": 1.858192694137059, + "learning_rate": 1.8795459652161618e-07, + "loss": 0.6511, + "step": 29857 + }, + { + "epoch": 0.9151035920068653, + "grad_norm": 2.0355058893089866, + "learning_rate": 1.878198182926988e-07, + "loss": 0.5255, + "step": 29858 + }, + { + "epoch": 0.9151342405296065, + "grad_norm": 1.7955447216761946, + "learning_rate": 1.8768508747942393e-07, + "loss": 0.6339, + "step": 29859 + }, + { + "epoch": 0.9151648890523477, + "grad_norm": 1.850423834490208, + "learning_rate": 1.8755040408311941e-07, + "loss": 0.5086, + "step": 29860 + }, + { + "epoch": 0.9151955375750889, + "grad_norm": 0.7665965391927742, + "learning_rate": 1.874157681051114e-07, + "loss": 0.3917, + "step": 29861 + }, + { + "epoch": 0.9152261860978301, + "grad_norm": 2.07787818488601, + "learning_rate": 1.872811795467283e-07, + "loss": 0.592, + "step": 29862 + }, + { + "epoch": 0.9152568346205713, + "grad_norm": 2.0819718146861437, + "learning_rate": 1.8714663840929403e-07, + "loss": 0.6191, + "step": 29863 + }, + { + "epoch": 0.9152874831433125, + "grad_norm": 1.8248911732984692, + "learning_rate": 1.8701214469413588e-07, + "loss": 0.5991, + "step": 29864 + }, + { + "epoch": 0.9153181316660537, + "grad_norm": 1.9288650997973105, + "learning_rate": 1.8687769840257886e-07, + "loss": 0.6116, + "step": 29865 + }, + { + "epoch": 0.915348780188795, + "grad_norm": 2.007894687365859, + "learning_rate": 1.8674329953594693e-07, + "loss": 0.6744, + "step": 29866 + }, + { + "epoch": 0.9153794287115361, + "grad_norm": 1.888686699688477, + "learning_rate": 1.8660894809556464e-07, + "loss": 0.474, + "step": 29867 + }, + { + "epoch": 0.9154100772342773, + "grad_norm": 1.8232478375231507, + "learning_rate": 1.864746440827564e-07, + "loss": 0.6213, + "step": 29868 + }, + { + "epoch": 0.9154407257570185, + "grad_norm": 1.651192664806235, + "learning_rate": 1.8634038749884453e-07, + "loss": 0.5705, + "step": 29869 + }, + { + "epoch": 0.9154713742797597, + "grad_norm": 1.8955125029414923, + "learning_rate": 1.8620617834515299e-07, + "loss": 0.5601, + "step": 29870 + }, + { + "epoch": 0.9155020228025009, + "grad_norm": 1.9138241582202362, + "learning_rate": 1.8607201662300346e-07, + "loss": 0.541, + "step": 29871 + }, + { + "epoch": 0.9155326713252421, + "grad_norm": 0.849926327426312, + "learning_rate": 1.8593790233371766e-07, + "loss": 0.376, + "step": 29872 + }, + { + "epoch": 0.9155633198479833, + "grad_norm": 1.8951644740577365, + "learning_rate": 1.8580383547861792e-07, + "loss": 0.541, + "step": 29873 + }, + { + "epoch": 0.9155939683707245, + "grad_norm": 1.9242527376944571, + "learning_rate": 1.856698160590248e-07, + "loss": 0.5529, + "step": 29874 + }, + { + "epoch": 0.9156246168934657, + "grad_norm": 1.9089923785835765, + "learning_rate": 1.8553584407625834e-07, + "loss": 0.5474, + "step": 29875 + }, + { + "epoch": 0.9156552654162069, + "grad_norm": 1.8088991732278865, + "learning_rate": 1.8540191953163978e-07, + "loss": 0.6027, + "step": 29876 + }, + { + "epoch": 0.9156859139389482, + "grad_norm": 2.2285335699964572, + "learning_rate": 1.852680424264869e-07, + "loss": 0.6314, + "step": 29877 + }, + { + "epoch": 0.9157165624616893, + "grad_norm": 1.924940047709909, + "learning_rate": 1.8513421276212086e-07, + "loss": 0.5485, + "step": 29878 + }, + { + "epoch": 0.9157472109844306, + "grad_norm": 0.7908868024844797, + "learning_rate": 1.8500043053985894e-07, + "loss": 0.3861, + "step": 29879 + }, + { + "epoch": 0.9157778595071717, + "grad_norm": 1.9863357099960703, + "learning_rate": 1.8486669576101957e-07, + "loss": 0.6354, + "step": 29880 + }, + { + "epoch": 0.915808508029913, + "grad_norm": 1.906898444353477, + "learning_rate": 1.847330084269211e-07, + "loss": 0.5801, + "step": 29881 + }, + { + "epoch": 0.9158391565526541, + "grad_norm": 2.1082116295351296, + "learning_rate": 1.8459936853888028e-07, + "loss": 0.6394, + "step": 29882 + }, + { + "epoch": 0.9158698050753954, + "grad_norm": 2.236746010260709, + "learning_rate": 1.8446577609821325e-07, + "loss": 0.5585, + "step": 29883 + }, + { + "epoch": 0.9159004535981365, + "grad_norm": 1.7726134875365898, + "learning_rate": 1.843322311062379e-07, + "loss": 0.5309, + "step": 29884 + }, + { + "epoch": 0.9159311021208778, + "grad_norm": 2.0222924823151427, + "learning_rate": 1.8419873356426866e-07, + "loss": 0.6339, + "step": 29885 + }, + { + "epoch": 0.915961750643619, + "grad_norm": 1.6976302086934025, + "learning_rate": 1.8406528347362172e-07, + "loss": 0.4488, + "step": 29886 + }, + { + "epoch": 0.9159923991663602, + "grad_norm": 0.8448031019743651, + "learning_rate": 1.839318808356122e-07, + "loss": 0.3857, + "step": 29887 + }, + { + "epoch": 0.9160230476891014, + "grad_norm": 1.8923785641575195, + "learning_rate": 1.8379852565155343e-07, + "loss": 0.598, + "step": 29888 + }, + { + "epoch": 0.9160536962118426, + "grad_norm": 1.899553582131931, + "learning_rate": 1.836652179227605e-07, + "loss": 0.5924, + "step": 29889 + }, + { + "epoch": 0.9160843447345838, + "grad_norm": 2.108186321372636, + "learning_rate": 1.8353195765054566e-07, + "loss": 0.4471, + "step": 29890 + }, + { + "epoch": 0.916114993257325, + "grad_norm": 1.823651875884009, + "learning_rate": 1.8339874483622344e-07, + "loss": 0.6146, + "step": 29891 + }, + { + "epoch": 0.9161456417800662, + "grad_norm": 1.9840593231250891, + "learning_rate": 1.8326557948110611e-07, + "loss": 0.5067, + "step": 29892 + }, + { + "epoch": 0.9161762903028075, + "grad_norm": 1.9338520720822872, + "learning_rate": 1.831324615865049e-07, + "loss": 0.5455, + "step": 29893 + }, + { + "epoch": 0.9162069388255486, + "grad_norm": 1.830993745345836, + "learning_rate": 1.82999391153732e-07, + "loss": 0.6246, + "step": 29894 + }, + { + "epoch": 0.9162375873482899, + "grad_norm": 1.7195738110086303, + "learning_rate": 1.828663681840992e-07, + "loss": 0.4778, + "step": 29895 + }, + { + "epoch": 0.916268235871031, + "grad_norm": 1.9176348927173066, + "learning_rate": 1.8273339267891598e-07, + "loss": 0.5474, + "step": 29896 + }, + { + "epoch": 0.9162988843937723, + "grad_norm": 2.0003291183148333, + "learning_rate": 1.8260046463949298e-07, + "loss": 0.583, + "step": 29897 + }, + { + "epoch": 0.9163295329165134, + "grad_norm": 0.8013937719027161, + "learning_rate": 1.8246758406714082e-07, + "loss": 0.4073, + "step": 29898 + }, + { + "epoch": 0.9163601814392546, + "grad_norm": 0.8107480959432858, + "learning_rate": 1.8233475096316788e-07, + "loss": 0.4125, + "step": 29899 + }, + { + "epoch": 0.9163908299619958, + "grad_norm": 1.9889088520102665, + "learning_rate": 1.822019653288837e-07, + "loss": 0.646, + "step": 29900 + }, + { + "epoch": 0.916421478484737, + "grad_norm": 1.7370180482678679, + "learning_rate": 1.8206922716559493e-07, + "loss": 0.5184, + "step": 29901 + }, + { + "epoch": 0.9164521270074782, + "grad_norm": 1.90587649566498, + "learning_rate": 1.819365364746123e-07, + "loss": 0.5314, + "step": 29902 + }, + { + "epoch": 0.9164827755302194, + "grad_norm": 1.8348284480387997, + "learning_rate": 1.8180389325724135e-07, + "loss": 0.5348, + "step": 29903 + }, + { + "epoch": 0.9165134240529607, + "grad_norm": 0.807386427507119, + "learning_rate": 1.8167129751478886e-07, + "loss": 0.3991, + "step": 29904 + }, + { + "epoch": 0.9165440725757018, + "grad_norm": 2.107661199648564, + "learning_rate": 1.8153874924856207e-07, + "loss": 0.6146, + "step": 29905 + }, + { + "epoch": 0.9165747210984431, + "grad_norm": 1.9299936138053255, + "learning_rate": 1.814062484598672e-07, + "loss": 0.6056, + "step": 29906 + }, + { + "epoch": 0.9166053696211842, + "grad_norm": 2.1095606508557667, + "learning_rate": 1.812737951500093e-07, + "loss": 0.4446, + "step": 29907 + }, + { + "epoch": 0.9166360181439255, + "grad_norm": 1.9708110128186203, + "learning_rate": 1.8114138932029347e-07, + "loss": 0.6052, + "step": 29908 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.8300224173695621, + "learning_rate": 1.8100903097202415e-07, + "loss": 0.4005, + "step": 29909 + }, + { + "epoch": 0.9166973151894079, + "grad_norm": 1.8884469988384611, + "learning_rate": 1.8087672010650704e-07, + "loss": 0.5981, + "step": 29910 + }, + { + "epoch": 0.916727963712149, + "grad_norm": 1.7940027490770662, + "learning_rate": 1.8074445672504382e-07, + "loss": 0.5059, + "step": 29911 + }, + { + "epoch": 0.9167586122348903, + "grad_norm": 1.8056559156681649, + "learning_rate": 1.8061224082893791e-07, + "loss": 0.5462, + "step": 29912 + }, + { + "epoch": 0.9167892607576315, + "grad_norm": 1.7772004115917632, + "learning_rate": 1.8048007241949384e-07, + "loss": 0.5906, + "step": 29913 + }, + { + "epoch": 0.9168199092803727, + "grad_norm": 1.8295383446854867, + "learning_rate": 1.8034795149801276e-07, + "loss": 0.5076, + "step": 29914 + }, + { + "epoch": 0.9168505578031139, + "grad_norm": 0.8267780262616873, + "learning_rate": 1.802158780657959e-07, + "loss": 0.3966, + "step": 29915 + }, + { + "epoch": 0.9168812063258551, + "grad_norm": 0.7751601835117532, + "learning_rate": 1.800838521241449e-07, + "loss": 0.3884, + "step": 29916 + }, + { + "epoch": 0.9169118548485963, + "grad_norm": 1.9398736662491098, + "learning_rate": 1.7995187367436106e-07, + "loss": 0.5389, + "step": 29917 + }, + { + "epoch": 0.9169425033713375, + "grad_norm": 2.0729146313603475, + "learning_rate": 1.798199427177455e-07, + "loss": 0.5934, + "step": 29918 + }, + { + "epoch": 0.9169731518940787, + "grad_norm": 1.844961182876262, + "learning_rate": 1.7968805925559663e-07, + "loss": 0.504, + "step": 29919 + }, + { + "epoch": 0.91700380041682, + "grad_norm": 1.8933853842971278, + "learning_rate": 1.7955622328921451e-07, + "loss": 0.6435, + "step": 29920 + }, + { + "epoch": 0.9170344489395611, + "grad_norm": 2.0372937841157013, + "learning_rate": 1.794244348198987e-07, + "loss": 0.4687, + "step": 29921 + }, + { + "epoch": 0.9170650974623024, + "grad_norm": 2.217030217755701, + "learning_rate": 1.7929269384894755e-07, + "loss": 0.6346, + "step": 29922 + }, + { + "epoch": 0.9170957459850435, + "grad_norm": 1.9848104381679677, + "learning_rate": 1.7916100037765837e-07, + "loss": 0.5637, + "step": 29923 + }, + { + "epoch": 0.9171263945077848, + "grad_norm": 1.9326413561024731, + "learning_rate": 1.7902935440732962e-07, + "loss": 0.5398, + "step": 29924 + }, + { + "epoch": 0.9171570430305259, + "grad_norm": 2.092131969534347, + "learning_rate": 1.7889775593925795e-07, + "loss": 0.6468, + "step": 29925 + }, + { + "epoch": 0.9171876915532672, + "grad_norm": 2.1842064275426134, + "learning_rate": 1.7876620497474018e-07, + "loss": 0.5483, + "step": 29926 + }, + { + "epoch": 0.9172183400760083, + "grad_norm": 2.048325304120231, + "learning_rate": 1.7863470151507246e-07, + "loss": 0.6276, + "step": 29927 + }, + { + "epoch": 0.9172489885987496, + "grad_norm": 1.8491008169295664, + "learning_rate": 1.7850324556155096e-07, + "loss": 0.5962, + "step": 29928 + }, + { + "epoch": 0.9172796371214907, + "grad_norm": 1.71399178748151, + "learning_rate": 1.7837183711547078e-07, + "loss": 0.4805, + "step": 29929 + }, + { + "epoch": 0.9173102856442319, + "grad_norm": 1.8128748598794622, + "learning_rate": 1.7824047617812644e-07, + "loss": 0.5275, + "step": 29930 + }, + { + "epoch": 0.9173409341669732, + "grad_norm": 2.0879555280411033, + "learning_rate": 1.7810916275081136e-07, + "loss": 0.6215, + "step": 29931 + }, + { + "epoch": 0.9173715826897143, + "grad_norm": 1.8377251849188252, + "learning_rate": 1.779778968348217e-07, + "loss": 0.4862, + "step": 29932 + }, + { + "epoch": 0.9174022312124556, + "grad_norm": 0.8655734865652366, + "learning_rate": 1.7784667843144977e-07, + "loss": 0.4097, + "step": 29933 + }, + { + "epoch": 0.9174328797351967, + "grad_norm": 1.9358243153256087, + "learning_rate": 1.777155075419873e-07, + "loss": 0.5935, + "step": 29934 + }, + { + "epoch": 0.917463528257938, + "grad_norm": 1.6950051289924655, + "learning_rate": 1.7758438416772827e-07, + "loss": 0.5932, + "step": 29935 + }, + { + "epoch": 0.9174941767806791, + "grad_norm": 1.9913793312890131, + "learning_rate": 1.7745330830996387e-07, + "loss": 0.683, + "step": 29936 + }, + { + "epoch": 0.9175248253034204, + "grad_norm": 0.7749855411992175, + "learning_rate": 1.773222799699864e-07, + "loss": 0.4053, + "step": 29937 + }, + { + "epoch": 0.9175554738261615, + "grad_norm": 1.846442631808184, + "learning_rate": 1.7719129914908594e-07, + "loss": 0.546, + "step": 29938 + }, + { + "epoch": 0.9175861223489028, + "grad_norm": 1.8215289907328782, + "learning_rate": 1.770603658485537e-07, + "loss": 0.4972, + "step": 29939 + }, + { + "epoch": 0.917616770871644, + "grad_norm": 1.8333935948157287, + "learning_rate": 1.7692948006968024e-07, + "loss": 0.5806, + "step": 29940 + }, + { + "epoch": 0.9176474193943852, + "grad_norm": 1.7937221426365295, + "learning_rate": 1.767986418137546e-07, + "loss": 0.5165, + "step": 29941 + }, + { + "epoch": 0.9176780679171264, + "grad_norm": 2.3140110462526646, + "learning_rate": 1.7666785108206462e-07, + "loss": 0.5789, + "step": 29942 + }, + { + "epoch": 0.9177087164398676, + "grad_norm": 1.9758785222561035, + "learning_rate": 1.7653710787590206e-07, + "loss": 0.5709, + "step": 29943 + }, + { + "epoch": 0.9177393649626088, + "grad_norm": 1.9769737533079852, + "learning_rate": 1.7640641219655252e-07, + "loss": 0.5616, + "step": 29944 + }, + { + "epoch": 0.91777001348535, + "grad_norm": 1.7740059621650013, + "learning_rate": 1.7627576404530554e-07, + "loss": 0.5565, + "step": 29945 + }, + { + "epoch": 0.9178006620080912, + "grad_norm": 0.7893420944573207, + "learning_rate": 1.7614516342344678e-07, + "loss": 0.3893, + "step": 29946 + }, + { + "epoch": 0.9178313105308324, + "grad_norm": 1.8774525592759355, + "learning_rate": 1.7601461033226407e-07, + "loss": 0.5285, + "step": 29947 + }, + { + "epoch": 0.9178619590535736, + "grad_norm": 1.8007394754912385, + "learning_rate": 1.7588410477304475e-07, + "loss": 0.5143, + "step": 29948 + }, + { + "epoch": 0.9178926075763149, + "grad_norm": 1.8775920229868, + "learning_rate": 1.7575364674707275e-07, + "loss": 0.6116, + "step": 29949 + }, + { + "epoch": 0.917923256099056, + "grad_norm": 1.8894885115640385, + "learning_rate": 1.7562323625563427e-07, + "loss": 0.5855, + "step": 29950 + }, + { + "epoch": 0.9179539046217973, + "grad_norm": 1.5737939202098676, + "learning_rate": 1.7549287330001498e-07, + "loss": 0.5055, + "step": 29951 + }, + { + "epoch": 0.9179845531445384, + "grad_norm": 1.818232268891024, + "learning_rate": 1.753625578814988e-07, + "loss": 0.5918, + "step": 29952 + }, + { + "epoch": 0.9180152016672797, + "grad_norm": 1.9404723050635013, + "learning_rate": 1.7523229000136866e-07, + "loss": 0.6725, + "step": 29953 + }, + { + "epoch": 0.9180458501900208, + "grad_norm": 1.7233724940010537, + "learning_rate": 1.751020696609107e-07, + "loss": 0.4583, + "step": 29954 + }, + { + "epoch": 0.9180764987127621, + "grad_norm": 1.863811890994834, + "learning_rate": 1.749718968614056e-07, + "loss": 0.5948, + "step": 29955 + }, + { + "epoch": 0.9181071472355032, + "grad_norm": 2.2111717874956662, + "learning_rate": 1.7484177160413785e-07, + "loss": 0.6044, + "step": 29956 + }, + { + "epoch": 0.9181377957582445, + "grad_norm": 1.990187132663756, + "learning_rate": 1.7471169389038812e-07, + "loss": 0.4932, + "step": 29957 + }, + { + "epoch": 0.9181684442809857, + "grad_norm": 2.0807868686614617, + "learning_rate": 1.7458166372143815e-07, + "loss": 0.5695, + "step": 29958 + }, + { + "epoch": 0.9181990928037269, + "grad_norm": 1.9496194832721905, + "learning_rate": 1.744516810985708e-07, + "loss": 0.4997, + "step": 29959 + }, + { + "epoch": 0.9182297413264681, + "grad_norm": 1.8827040561658797, + "learning_rate": 1.7432174602306507e-07, + "loss": 0.6205, + "step": 29960 + }, + { + "epoch": 0.9182603898492092, + "grad_norm": 2.043493990415983, + "learning_rate": 1.7419185849620158e-07, + "loss": 0.4697, + "step": 29961 + }, + { + "epoch": 0.9182910383719505, + "grad_norm": 1.8430209553145536, + "learning_rate": 1.7406201851926097e-07, + "loss": 0.5791, + "step": 29962 + }, + { + "epoch": 0.9183216868946916, + "grad_norm": 1.8439695568247747, + "learning_rate": 1.7393222609352167e-07, + "loss": 0.6119, + "step": 29963 + }, + { + "epoch": 0.9183523354174329, + "grad_norm": 2.0391861881525966, + "learning_rate": 1.7380248122026322e-07, + "loss": 0.5131, + "step": 29964 + }, + { + "epoch": 0.918382983940174, + "grad_norm": 0.7514299420856326, + "learning_rate": 1.7367278390076404e-07, + "loss": 0.4011, + "step": 29965 + }, + { + "epoch": 0.9184136324629153, + "grad_norm": 1.9902693566785263, + "learning_rate": 1.735431341363014e-07, + "loss": 0.524, + "step": 29966 + }, + { + "epoch": 0.9184442809856564, + "grad_norm": 1.8278229364048026, + "learning_rate": 1.7341353192815325e-07, + "loss": 0.6025, + "step": 29967 + }, + { + "epoch": 0.9184749295083977, + "grad_norm": 1.7602220349404867, + "learning_rate": 1.7328397727759628e-07, + "loss": 0.5727, + "step": 29968 + }, + { + "epoch": 0.9185055780311389, + "grad_norm": 1.878385036113042, + "learning_rate": 1.7315447018590724e-07, + "loss": 0.5141, + "step": 29969 + }, + { + "epoch": 0.9185362265538801, + "grad_norm": 1.6681184692713042, + "learning_rate": 1.7302501065436295e-07, + "loss": 0.5333, + "step": 29970 + }, + { + "epoch": 0.9185668750766213, + "grad_norm": 2.0029761099041115, + "learning_rate": 1.7289559868423733e-07, + "loss": 0.5987, + "step": 29971 + }, + { + "epoch": 0.9185975235993625, + "grad_norm": 2.0291957021479203, + "learning_rate": 1.727662342768066e-07, + "loss": 0.6379, + "step": 29972 + }, + { + "epoch": 0.9186281721221037, + "grad_norm": 1.8144333895990579, + "learning_rate": 1.7263691743334587e-07, + "loss": 0.5901, + "step": 29973 + }, + { + "epoch": 0.9186588206448449, + "grad_norm": 2.0147866759982356, + "learning_rate": 1.7250764815512854e-07, + "loss": 0.5411, + "step": 29974 + }, + { + "epoch": 0.9186894691675861, + "grad_norm": 1.907783103987325, + "learning_rate": 1.7237842644342862e-07, + "loss": 0.5112, + "step": 29975 + }, + { + "epoch": 0.9187201176903274, + "grad_norm": 0.7845244213070537, + "learning_rate": 1.7224925229951838e-07, + "loss": 0.3726, + "step": 29976 + }, + { + "epoch": 0.9187507662130685, + "grad_norm": 1.9951646365500937, + "learning_rate": 1.721201257246724e-07, + "loss": 0.5935, + "step": 29977 + }, + { + "epoch": 0.9187814147358098, + "grad_norm": 1.8123838686003342, + "learning_rate": 1.7199104672016187e-07, + "loss": 0.6007, + "step": 29978 + }, + { + "epoch": 0.9188120632585509, + "grad_norm": 1.9338891218348713, + "learning_rate": 1.7186201528725855e-07, + "loss": 0.6056, + "step": 29979 + }, + { + "epoch": 0.9188427117812922, + "grad_norm": 1.7977638271421321, + "learning_rate": 1.7173303142723418e-07, + "loss": 0.5525, + "step": 29980 + }, + { + "epoch": 0.9188733603040333, + "grad_norm": 1.9817832150045476, + "learning_rate": 1.7160409514136e-07, + "loss": 0.5536, + "step": 29981 + }, + { + "epoch": 0.9189040088267746, + "grad_norm": 2.2972773564162394, + "learning_rate": 1.7147520643090554e-07, + "loss": 0.5752, + "step": 29982 + }, + { + "epoch": 0.9189346573495157, + "grad_norm": 0.8472819587060303, + "learning_rate": 1.7134636529714144e-07, + "loss": 0.3866, + "step": 29983 + }, + { + "epoch": 0.918965305872257, + "grad_norm": 1.8355002083022338, + "learning_rate": 1.712175717413378e-07, + "loss": 0.6101, + "step": 29984 + }, + { + "epoch": 0.9189959543949981, + "grad_norm": 0.8364051142968102, + "learning_rate": 1.7108882576476194e-07, + "loss": 0.416, + "step": 29985 + }, + { + "epoch": 0.9190266029177394, + "grad_norm": 1.9282604783846453, + "learning_rate": 1.709601273686845e-07, + "loss": 0.4454, + "step": 29986 + }, + { + "epoch": 0.9190572514404806, + "grad_norm": 1.7838835195682203, + "learning_rate": 1.7083147655437172e-07, + "loss": 0.6121, + "step": 29987 + }, + { + "epoch": 0.9190878999632218, + "grad_norm": 2.0617376999236785, + "learning_rate": 1.70702873323092e-07, + "loss": 0.5478, + "step": 29988 + }, + { + "epoch": 0.919118548485963, + "grad_norm": 1.8803356601796306, + "learning_rate": 1.7057431767611264e-07, + "loss": 0.5794, + "step": 29989 + }, + { + "epoch": 0.9191491970087042, + "grad_norm": 1.750155631247757, + "learning_rate": 1.7044580961469992e-07, + "loss": 0.5001, + "step": 29990 + }, + { + "epoch": 0.9191798455314454, + "grad_norm": 1.9742923969065593, + "learning_rate": 1.7031734914012056e-07, + "loss": 0.6258, + "step": 29991 + }, + { + "epoch": 0.9192104940541865, + "grad_norm": 2.985873264447969, + "learning_rate": 1.701889362536402e-07, + "loss": 0.6129, + "step": 29992 + }, + { + "epoch": 0.9192411425769278, + "grad_norm": 1.9193595020080418, + "learning_rate": 1.7006057095652395e-07, + "loss": 0.5121, + "step": 29993 + }, + { + "epoch": 0.919271791099669, + "grad_norm": 2.0662767623499882, + "learning_rate": 1.6993225325003638e-07, + "loss": 0.6058, + "step": 29994 + }, + { + "epoch": 0.9193024396224102, + "grad_norm": 1.9695479952479418, + "learning_rate": 1.6980398313544255e-07, + "loss": 0.5593, + "step": 29995 + }, + { + "epoch": 0.9193330881451514, + "grad_norm": 1.9154218238028915, + "learning_rate": 1.6967576061400592e-07, + "loss": 0.5517, + "step": 29996 + }, + { + "epoch": 0.9193637366678926, + "grad_norm": 1.9705838665661684, + "learning_rate": 1.6954758568698992e-07, + "loss": 0.5479, + "step": 29997 + }, + { + "epoch": 0.9193943851906338, + "grad_norm": 1.7880833254528465, + "learning_rate": 1.6941945835565686e-07, + "loss": 0.5893, + "step": 29998 + }, + { + "epoch": 0.919425033713375, + "grad_norm": 1.9401330922787172, + "learning_rate": 1.692913786212702e-07, + "loss": 0.561, + "step": 29999 + }, + { + "epoch": 0.9194556822361162, + "grad_norm": 2.156114348416436, + "learning_rate": 1.6916334648509225e-07, + "loss": 0.5146, + "step": 30000 + }, + { + "epoch": 0.9194863307588574, + "grad_norm": 1.8593846583369962, + "learning_rate": 1.690353619483831e-07, + "loss": 0.575, + "step": 30001 + }, + { + "epoch": 0.9195169792815986, + "grad_norm": 2.0996933031119602, + "learning_rate": 1.6890742501240453e-07, + "loss": 0.5913, + "step": 30002 + }, + { + "epoch": 0.9195476278043399, + "grad_norm": 1.694657846574413, + "learning_rate": 1.6877953567841777e-07, + "loss": 0.6016, + "step": 30003 + }, + { + "epoch": 0.919578276327081, + "grad_norm": 1.8258604952613309, + "learning_rate": 1.6865169394768176e-07, + "loss": 0.5323, + "step": 30004 + }, + { + "epoch": 0.9196089248498223, + "grad_norm": 2.079178727595316, + "learning_rate": 1.6852389982145722e-07, + "loss": 0.5442, + "step": 30005 + }, + { + "epoch": 0.9196395733725634, + "grad_norm": 1.9575972966063369, + "learning_rate": 1.6839615330100313e-07, + "loss": 0.5509, + "step": 30006 + }, + { + "epoch": 0.9196702218953047, + "grad_norm": 1.9779586541460523, + "learning_rate": 1.6826845438757733e-07, + "loss": 0.5084, + "step": 30007 + }, + { + "epoch": 0.9197008704180458, + "grad_norm": 1.7556303597396277, + "learning_rate": 1.6814080308243885e-07, + "loss": 0.4872, + "step": 30008 + }, + { + "epoch": 0.9197315189407871, + "grad_norm": 1.9571966150492626, + "learning_rate": 1.6801319938684502e-07, + "loss": 0.5561, + "step": 30009 + }, + { + "epoch": 0.9197621674635282, + "grad_norm": 2.1326907043990815, + "learning_rate": 1.678856433020537e-07, + "loss": 0.5448, + "step": 30010 + }, + { + "epoch": 0.9197928159862695, + "grad_norm": 1.7314794802256452, + "learning_rate": 1.6775813482932225e-07, + "loss": 0.4127, + "step": 30011 + }, + { + "epoch": 0.9198234645090106, + "grad_norm": 0.7651345764067198, + "learning_rate": 1.6763067396990517e-07, + "loss": 0.3699, + "step": 30012 + }, + { + "epoch": 0.9198541130317519, + "grad_norm": 2.010087459226924, + "learning_rate": 1.6750326072505984e-07, + "loss": 0.5307, + "step": 30013 + }, + { + "epoch": 0.9198847615544931, + "grad_norm": 1.941459578289682, + "learning_rate": 1.673758950960419e-07, + "loss": 0.6269, + "step": 30014 + }, + { + "epoch": 0.9199154100772343, + "grad_norm": 2.0445004459297236, + "learning_rate": 1.672485770841048e-07, + "loss": 0.6449, + "step": 30015 + }, + { + "epoch": 0.9199460585999755, + "grad_norm": 1.8442165576867562, + "learning_rate": 1.6712130669050476e-07, + "loss": 0.4744, + "step": 30016 + }, + { + "epoch": 0.9199767071227167, + "grad_norm": 1.8770841254594863, + "learning_rate": 1.6699408391649407e-07, + "loss": 0.5731, + "step": 30017 + }, + { + "epoch": 0.9200073556454579, + "grad_norm": 2.161507999885705, + "learning_rate": 1.6686690876332845e-07, + "loss": 0.6679, + "step": 30018 + }, + { + "epoch": 0.9200380041681991, + "grad_norm": 1.979714829442546, + "learning_rate": 1.6673978123225963e-07, + "loss": 0.6157, + "step": 30019 + }, + { + "epoch": 0.9200686526909403, + "grad_norm": 1.969544874008329, + "learning_rate": 1.6661270132454e-07, + "loss": 0.6503, + "step": 30020 + }, + { + "epoch": 0.9200993012136816, + "grad_norm": 2.315181038944726, + "learning_rate": 1.6648566904142183e-07, + "loss": 0.5086, + "step": 30021 + }, + { + "epoch": 0.9201299497364227, + "grad_norm": 1.862732801075558, + "learning_rate": 1.6635868438415748e-07, + "loss": 0.5873, + "step": 30022 + }, + { + "epoch": 0.9201605982591639, + "grad_norm": 1.9592712163190356, + "learning_rate": 1.662317473539976e-07, + "loss": 0.5564, + "step": 30023 + }, + { + "epoch": 0.9201912467819051, + "grad_norm": 1.846617112904417, + "learning_rate": 1.6610485795219288e-07, + "loss": 0.5228, + "step": 30024 + }, + { + "epoch": 0.9202218953046463, + "grad_norm": 1.929330686062303, + "learning_rate": 1.6597801617999454e-07, + "loss": 0.4819, + "step": 30025 + }, + { + "epoch": 0.9202525438273875, + "grad_norm": 1.6309367861396058, + "learning_rate": 1.6585122203865046e-07, + "loss": 0.5455, + "step": 30026 + }, + { + "epoch": 0.9202831923501287, + "grad_norm": 2.0626355228025615, + "learning_rate": 1.657244755294124e-07, + "loss": 0.5766, + "step": 30027 + }, + { + "epoch": 0.9203138408728699, + "grad_norm": 1.6474711071406505, + "learning_rate": 1.655977766535266e-07, + "loss": 0.5161, + "step": 30028 + }, + { + "epoch": 0.9203444893956111, + "grad_norm": 1.8122156387563908, + "learning_rate": 1.654711254122443e-07, + "loss": 0.5868, + "step": 30029 + }, + { + "epoch": 0.9203751379183523, + "grad_norm": 1.9105757294218313, + "learning_rate": 1.6534452180681115e-07, + "loss": 0.4359, + "step": 30030 + }, + { + "epoch": 0.9204057864410935, + "grad_norm": 0.7849252684243418, + "learning_rate": 1.652179658384756e-07, + "loss": 0.4101, + "step": 30031 + }, + { + "epoch": 0.9204364349638348, + "grad_norm": 2.148934657393557, + "learning_rate": 1.6509145750848444e-07, + "loss": 0.6533, + "step": 30032 + }, + { + "epoch": 0.9204670834865759, + "grad_norm": 2.1238638612309324, + "learning_rate": 1.649649968180844e-07, + "loss": 0.5641, + "step": 30033 + }, + { + "epoch": 0.9204977320093172, + "grad_norm": 1.9636049030160343, + "learning_rate": 1.6483858376852123e-07, + "loss": 0.6043, + "step": 30034 + }, + { + "epoch": 0.9205283805320583, + "grad_norm": 1.711662720907363, + "learning_rate": 1.6471221836104e-07, + "loss": 0.5187, + "step": 30035 + }, + { + "epoch": 0.9205590290547996, + "grad_norm": 1.7044924691327157, + "learning_rate": 1.6458590059688696e-07, + "loss": 0.5082, + "step": 30036 + }, + { + "epoch": 0.9205896775775407, + "grad_norm": 1.6683873638788085, + "learning_rate": 1.6445963047730663e-07, + "loss": 0.4625, + "step": 30037 + }, + { + "epoch": 0.920620326100282, + "grad_norm": 1.8557013714118882, + "learning_rate": 1.6433340800354302e-07, + "loss": 0.5393, + "step": 30038 + }, + { + "epoch": 0.9206509746230231, + "grad_norm": 1.9692370880580405, + "learning_rate": 1.6420723317683796e-07, + "loss": 0.6118, + "step": 30039 + }, + { + "epoch": 0.9206816231457644, + "grad_norm": 2.0267312123107577, + "learning_rate": 1.6408110599843763e-07, + "loss": 0.5806, + "step": 30040 + }, + { + "epoch": 0.9207122716685056, + "grad_norm": 2.234350277733038, + "learning_rate": 1.6395502646958385e-07, + "loss": 0.5704, + "step": 30041 + }, + { + "epoch": 0.9207429201912468, + "grad_norm": 2.0806959725003895, + "learning_rate": 1.638289945915178e-07, + "loss": 0.5637, + "step": 30042 + }, + { + "epoch": 0.920773568713988, + "grad_norm": 1.9711605523613058, + "learning_rate": 1.6370301036548186e-07, + "loss": 0.5822, + "step": 30043 + }, + { + "epoch": 0.9208042172367292, + "grad_norm": 2.129342451558588, + "learning_rate": 1.6357707379271782e-07, + "loss": 0.53, + "step": 30044 + }, + { + "epoch": 0.9208348657594704, + "grad_norm": 1.8634334841924873, + "learning_rate": 1.6345118487446687e-07, + "loss": 0.5385, + "step": 30045 + }, + { + "epoch": 0.9208655142822116, + "grad_norm": 1.7419973348208744, + "learning_rate": 1.6332534361196806e-07, + "loss": 0.6126, + "step": 30046 + }, + { + "epoch": 0.9208961628049528, + "grad_norm": 1.704001457584428, + "learning_rate": 1.6319955000646258e-07, + "loss": 0.4746, + "step": 30047 + }, + { + "epoch": 0.920926811327694, + "grad_norm": 0.7816022029876883, + "learning_rate": 1.630738040591895e-07, + "loss": 0.3982, + "step": 30048 + }, + { + "epoch": 0.9209574598504352, + "grad_norm": 2.0381573809104077, + "learning_rate": 1.6294810577138832e-07, + "loss": 0.5772, + "step": 30049 + }, + { + "epoch": 0.9209881083731765, + "grad_norm": 2.004991158067527, + "learning_rate": 1.6282245514429583e-07, + "loss": 0.5569, + "step": 30050 + }, + { + "epoch": 0.9210187568959176, + "grad_norm": 0.7973406183430272, + "learning_rate": 1.6269685217915222e-07, + "loss": 0.4062, + "step": 30051 + }, + { + "epoch": 0.9210494054186589, + "grad_norm": 1.8659150356846101, + "learning_rate": 1.625712968771942e-07, + "loss": 0.4984, + "step": 30052 + }, + { + "epoch": 0.9210800539414, + "grad_norm": 1.7765940909920486, + "learning_rate": 1.624457892396586e-07, + "loss": 0.5173, + "step": 30053 + }, + { + "epoch": 0.9211107024641412, + "grad_norm": 1.724575730563262, + "learning_rate": 1.6232032926778218e-07, + "loss": 0.5281, + "step": 30054 + }, + { + "epoch": 0.9211413509868824, + "grad_norm": 1.7899828009267333, + "learning_rate": 1.6219491696280122e-07, + "loss": 0.4934, + "step": 30055 + }, + { + "epoch": 0.9211719995096236, + "grad_norm": 1.8779048260828888, + "learning_rate": 1.6206955232595245e-07, + "loss": 0.523, + "step": 30056 + }, + { + "epoch": 0.9212026480323648, + "grad_norm": 0.8790113191026394, + "learning_rate": 1.6194423535846936e-07, + "loss": 0.3993, + "step": 30057 + }, + { + "epoch": 0.921233296555106, + "grad_norm": 2.0053704017049285, + "learning_rate": 1.6181896606158764e-07, + "loss": 0.4938, + "step": 30058 + }, + { + "epoch": 0.9212639450778473, + "grad_norm": 0.8227523844057286, + "learning_rate": 1.616937444365424e-07, + "loss": 0.3925, + "step": 30059 + }, + { + "epoch": 0.9212945936005884, + "grad_norm": 1.7683950236346433, + "learning_rate": 1.6156857048456654e-07, + "loss": 0.6005, + "step": 30060 + }, + { + "epoch": 0.9213252421233297, + "grad_norm": 1.9728951289658556, + "learning_rate": 1.614434442068924e-07, + "loss": 0.5877, + "step": 30061 + }, + { + "epoch": 0.9213558906460708, + "grad_norm": 1.8701459477693347, + "learning_rate": 1.6131836560475457e-07, + "loss": 0.4984, + "step": 30062 + }, + { + "epoch": 0.9213865391688121, + "grad_norm": 2.1013865404307106, + "learning_rate": 1.611933346793848e-07, + "loss": 0.6011, + "step": 30063 + }, + { + "epoch": 0.9214171876915532, + "grad_norm": 1.6824615916629904, + "learning_rate": 1.6106835143201605e-07, + "loss": 0.5507, + "step": 30064 + }, + { + "epoch": 0.9214478362142945, + "grad_norm": 1.697617023240348, + "learning_rate": 1.6094341586387785e-07, + "loss": 0.5692, + "step": 30065 + }, + { + "epoch": 0.9214784847370356, + "grad_norm": 0.803677674046571, + "learning_rate": 1.6081852797620257e-07, + "loss": 0.3841, + "step": 30066 + }, + { + "epoch": 0.9215091332597769, + "grad_norm": 1.8997716783471328, + "learning_rate": 1.6069368777022088e-07, + "loss": 0.5525, + "step": 30067 + }, + { + "epoch": 0.921539781782518, + "grad_norm": 2.0394819021631307, + "learning_rate": 1.6056889524716234e-07, + "loss": 0.577, + "step": 30068 + }, + { + "epoch": 0.9215704303052593, + "grad_norm": 1.786986005406287, + "learning_rate": 1.60444150408256e-07, + "loss": 0.5549, + "step": 30069 + }, + { + "epoch": 0.9216010788280005, + "grad_norm": 1.7970970760003244, + "learning_rate": 1.6031945325473253e-07, + "loss": 0.5747, + "step": 30070 + }, + { + "epoch": 0.9216317273507417, + "grad_norm": 1.7202712563450582, + "learning_rate": 1.6019480378781927e-07, + "loss": 0.5286, + "step": 30071 + }, + { + "epoch": 0.9216623758734829, + "grad_norm": 1.8769157938048997, + "learning_rate": 1.600702020087458e-07, + "loss": 0.579, + "step": 30072 + }, + { + "epoch": 0.9216930243962241, + "grad_norm": 1.8876107000271272, + "learning_rate": 1.5994564791873835e-07, + "loss": 0.6286, + "step": 30073 + }, + { + "epoch": 0.9217236729189653, + "grad_norm": 1.7829164403432887, + "learning_rate": 1.5982114151902428e-07, + "loss": 0.5656, + "step": 30074 + }, + { + "epoch": 0.9217543214417065, + "grad_norm": 1.9120772750447348, + "learning_rate": 1.5969668281083207e-07, + "loss": 0.5472, + "step": 30075 + }, + { + "epoch": 0.9217849699644477, + "grad_norm": 1.7921603192966507, + "learning_rate": 1.595722717953857e-07, + "loss": 0.6409, + "step": 30076 + }, + { + "epoch": 0.921815618487189, + "grad_norm": 1.7487789463988437, + "learning_rate": 1.5944790847391255e-07, + "loss": 0.5291, + "step": 30077 + }, + { + "epoch": 0.9218462670099301, + "grad_norm": 1.8855362513730693, + "learning_rate": 1.5932359284763832e-07, + "loss": 0.5728, + "step": 30078 + }, + { + "epoch": 0.9218769155326714, + "grad_norm": 1.6678820152434681, + "learning_rate": 1.59199324917787e-07, + "loss": 0.4966, + "step": 30079 + }, + { + "epoch": 0.9219075640554125, + "grad_norm": 1.9318158907920318, + "learning_rate": 1.5907510468558264e-07, + "loss": 0.5851, + "step": 30080 + }, + { + "epoch": 0.9219382125781538, + "grad_norm": 1.9225751394894546, + "learning_rate": 1.5895093215225089e-07, + "loss": 0.5585, + "step": 30081 + }, + { + "epoch": 0.9219688611008949, + "grad_norm": 1.849049511489584, + "learning_rate": 1.5882680731901356e-07, + "loss": 0.5223, + "step": 30082 + }, + { + "epoch": 0.9219995096236362, + "grad_norm": 2.1919309498010775, + "learning_rate": 1.587027301870947e-07, + "loss": 0.6075, + "step": 30083 + }, + { + "epoch": 0.9220301581463773, + "grad_norm": 0.823093088490152, + "learning_rate": 1.5857870075771608e-07, + "loss": 0.3891, + "step": 30084 + }, + { + "epoch": 0.9220608066691185, + "grad_norm": 0.8055653224025559, + "learning_rate": 1.5845471903210063e-07, + "loss": 0.3859, + "step": 30085 + }, + { + "epoch": 0.9220914551918598, + "grad_norm": 0.8141301082443593, + "learning_rate": 1.583307850114696e-07, + "loss": 0.3948, + "step": 30086 + }, + { + "epoch": 0.9221221037146009, + "grad_norm": 4.2793180443249375, + "learning_rate": 1.5820689869704364e-07, + "loss": 0.5639, + "step": 30087 + }, + { + "epoch": 0.9221527522373422, + "grad_norm": 1.9918385751086995, + "learning_rate": 1.5808306009004458e-07, + "loss": 0.5376, + "step": 30088 + }, + { + "epoch": 0.9221834007600833, + "grad_norm": 1.8812482625853963, + "learning_rate": 1.5795926919169201e-07, + "loss": 0.6199, + "step": 30089 + }, + { + "epoch": 0.9222140492828246, + "grad_norm": 1.854982902743777, + "learning_rate": 1.5783552600320495e-07, + "loss": 0.6074, + "step": 30090 + }, + { + "epoch": 0.9222446978055657, + "grad_norm": 1.9318227015661167, + "learning_rate": 1.5771183052580353e-07, + "loss": 0.6342, + "step": 30091 + }, + { + "epoch": 0.922275346328307, + "grad_norm": 1.9033360811447548, + "learning_rate": 1.575881827607073e-07, + "loss": 0.5885, + "step": 30092 + }, + { + "epoch": 0.9223059948510481, + "grad_norm": 1.7919160340755327, + "learning_rate": 1.5746458270913258e-07, + "loss": 0.5448, + "step": 30093 + }, + { + "epoch": 0.9223366433737894, + "grad_norm": 2.1177785862873173, + "learning_rate": 1.5734103037229942e-07, + "loss": 0.6564, + "step": 30094 + }, + { + "epoch": 0.9223672918965306, + "grad_norm": 1.8827045492074608, + "learning_rate": 1.5721752575142357e-07, + "loss": 0.5818, + "step": 30095 + }, + { + "epoch": 0.9223979404192718, + "grad_norm": 2.0551106371247476, + "learning_rate": 1.5709406884772182e-07, + "loss": 0.5168, + "step": 30096 + }, + { + "epoch": 0.922428588942013, + "grad_norm": 1.8905468721905765, + "learning_rate": 1.5697065966241266e-07, + "loss": 0.5165, + "step": 30097 + }, + { + "epoch": 0.9224592374647542, + "grad_norm": 2.0117169821277274, + "learning_rate": 1.5684729819671008e-07, + "loss": 0.5275, + "step": 30098 + }, + { + "epoch": 0.9224898859874954, + "grad_norm": 1.6093235328049154, + "learning_rate": 1.5672398445182978e-07, + "loss": 0.4846, + "step": 30099 + }, + { + "epoch": 0.9225205345102366, + "grad_norm": 0.8107951988203376, + "learning_rate": 1.5660071842898806e-07, + "loss": 0.397, + "step": 30100 + }, + { + "epoch": 0.9225511830329778, + "grad_norm": 2.029522805756595, + "learning_rate": 1.5647750012939833e-07, + "loss": 0.5279, + "step": 30101 + }, + { + "epoch": 0.922581831555719, + "grad_norm": 1.5877579511616549, + "learning_rate": 1.5635432955427464e-07, + "loss": 0.458, + "step": 30102 + }, + { + "epoch": 0.9226124800784602, + "grad_norm": 1.8972530179758766, + "learning_rate": 1.5623120670483215e-07, + "loss": 0.6016, + "step": 30103 + }, + { + "epoch": 0.9226431286012015, + "grad_norm": 2.044835222057019, + "learning_rate": 1.5610813158228156e-07, + "loss": 0.6241, + "step": 30104 + }, + { + "epoch": 0.9226737771239426, + "grad_norm": 0.8086664561428432, + "learning_rate": 1.5598510418783796e-07, + "loss": 0.393, + "step": 30105 + }, + { + "epoch": 0.9227044256466839, + "grad_norm": 1.9463871333850724, + "learning_rate": 1.558621245227121e-07, + "loss": 0.5674, + "step": 30106 + }, + { + "epoch": 0.922735074169425, + "grad_norm": 2.0939090888464373, + "learning_rate": 1.557391925881163e-07, + "loss": 0.5982, + "step": 30107 + }, + { + "epoch": 0.9227657226921663, + "grad_norm": 1.8175392753348563, + "learning_rate": 1.5561630838526186e-07, + "loss": 0.6246, + "step": 30108 + }, + { + "epoch": 0.9227963712149074, + "grad_norm": 2.104317487353734, + "learning_rate": 1.554934719153589e-07, + "loss": 0.7009, + "step": 30109 + }, + { + "epoch": 0.9228270197376487, + "grad_norm": 2.0034278381306705, + "learning_rate": 1.553706831796181e-07, + "loss": 0.6504, + "step": 30110 + }, + { + "epoch": 0.9228576682603898, + "grad_norm": 1.8023647728406444, + "learning_rate": 1.5524794217925077e-07, + "loss": 0.5317, + "step": 30111 + }, + { + "epoch": 0.9228883167831311, + "grad_norm": 0.8139807872267302, + "learning_rate": 1.5512524891546366e-07, + "loss": 0.3972, + "step": 30112 + }, + { + "epoch": 0.9229189653058723, + "grad_norm": 1.7840419614609537, + "learning_rate": 1.550026033894675e-07, + "loss": 0.6119, + "step": 30113 + }, + { + "epoch": 0.9229496138286135, + "grad_norm": 1.7966565715202565, + "learning_rate": 1.548800056024713e-07, + "loss": 0.5003, + "step": 30114 + }, + { + "epoch": 0.9229802623513547, + "grad_norm": 1.906185639590162, + "learning_rate": 1.5475745555568077e-07, + "loss": 0.4824, + "step": 30115 + }, + { + "epoch": 0.9230109108740958, + "grad_norm": 1.9067665503503692, + "learning_rate": 1.5463495325030554e-07, + "loss": 0.587, + "step": 30116 + }, + { + "epoch": 0.9230415593968371, + "grad_norm": 2.079167665646108, + "learning_rate": 1.5451249868755126e-07, + "loss": 0.6266, + "step": 30117 + }, + { + "epoch": 0.9230722079195782, + "grad_norm": 1.6625177265386037, + "learning_rate": 1.543900918686253e-07, + "loss": 0.5607, + "step": 30118 + }, + { + "epoch": 0.9231028564423195, + "grad_norm": 2.1581239878301606, + "learning_rate": 1.5426773279473395e-07, + "loss": 0.5146, + "step": 30119 + }, + { + "epoch": 0.9231335049650606, + "grad_norm": 2.0926545754316255, + "learning_rate": 1.5414542146708234e-07, + "loss": 0.5674, + "step": 30120 + }, + { + "epoch": 0.9231641534878019, + "grad_norm": 1.7560449110990752, + "learning_rate": 1.5402315788687506e-07, + "loss": 0.5913, + "step": 30121 + }, + { + "epoch": 0.923194802010543, + "grad_norm": 2.028102952158552, + "learning_rate": 1.5390094205531893e-07, + "loss": 0.5499, + "step": 30122 + }, + { + "epoch": 0.9232254505332843, + "grad_norm": 1.9496569897310654, + "learning_rate": 1.5377877397361518e-07, + "loss": 0.5964, + "step": 30123 + }, + { + "epoch": 0.9232560990560255, + "grad_norm": 2.076696473832834, + "learning_rate": 1.536566536429701e-07, + "loss": 0.5628, + "step": 30124 + }, + { + "epoch": 0.9232867475787667, + "grad_norm": 1.8609044413074927, + "learning_rate": 1.5353458106458551e-07, + "loss": 0.5123, + "step": 30125 + }, + { + "epoch": 0.9233173961015079, + "grad_norm": 2.168948358435581, + "learning_rate": 1.5341255623966488e-07, + "loss": 0.5777, + "step": 30126 + }, + { + "epoch": 0.9233480446242491, + "grad_norm": 1.9525324442159229, + "learning_rate": 1.5329057916941114e-07, + "loss": 0.5419, + "step": 30127 + }, + { + "epoch": 0.9233786931469903, + "grad_norm": 1.7231696837608799, + "learning_rate": 1.5316864985502445e-07, + "loss": 0.592, + "step": 30128 + }, + { + "epoch": 0.9234093416697315, + "grad_norm": 2.1890650957182154, + "learning_rate": 1.5304676829770716e-07, + "loss": 0.6128, + "step": 30129 + }, + { + "epoch": 0.9234399901924727, + "grad_norm": 2.072484186567036, + "learning_rate": 1.5292493449866053e-07, + "loss": 0.6195, + "step": 30130 + }, + { + "epoch": 0.923470638715214, + "grad_norm": 0.7816452377874767, + "learning_rate": 1.5280314845908474e-07, + "loss": 0.3946, + "step": 30131 + }, + { + "epoch": 0.9235012872379551, + "grad_norm": 1.816546993625056, + "learning_rate": 1.5268141018017933e-07, + "loss": 0.5642, + "step": 30132 + }, + { + "epoch": 0.9235319357606964, + "grad_norm": 2.0529956622166146, + "learning_rate": 1.5255971966314508e-07, + "loss": 0.5904, + "step": 30133 + }, + { + "epoch": 0.9235625842834375, + "grad_norm": 0.828286370442792, + "learning_rate": 1.5243807690917932e-07, + "loss": 0.4092, + "step": 30134 + }, + { + "epoch": 0.9235932328061788, + "grad_norm": 1.7250871554940077, + "learning_rate": 1.5231648191948224e-07, + "loss": 0.5071, + "step": 30135 + }, + { + "epoch": 0.9236238813289199, + "grad_norm": 1.8501128547664256, + "learning_rate": 1.5219493469525004e-07, + "loss": 0.6195, + "step": 30136 + }, + { + "epoch": 0.9236545298516612, + "grad_norm": 0.7466819577064897, + "learning_rate": 1.5207343523768237e-07, + "loss": 0.4014, + "step": 30137 + }, + { + "epoch": 0.9236851783744023, + "grad_norm": 1.889683029164006, + "learning_rate": 1.5195198354797547e-07, + "loss": 0.6052, + "step": 30138 + }, + { + "epoch": 0.9237158268971436, + "grad_norm": 0.7960577879430237, + "learning_rate": 1.5183057962732617e-07, + "loss": 0.3923, + "step": 30139 + }, + { + "epoch": 0.9237464754198847, + "grad_norm": 1.9288619084155239, + "learning_rate": 1.5170922347693017e-07, + "loss": 0.4997, + "step": 30140 + }, + { + "epoch": 0.923777123942626, + "grad_norm": 1.875906870334187, + "learning_rate": 1.5158791509798432e-07, + "loss": 0.5556, + "step": 30141 + }, + { + "epoch": 0.9238077724653672, + "grad_norm": 1.9077038312599983, + "learning_rate": 1.5146665449168262e-07, + "loss": 0.5479, + "step": 30142 + }, + { + "epoch": 0.9238384209881084, + "grad_norm": 2.0613679815857098, + "learning_rate": 1.5134544165922083e-07, + "loss": 0.4802, + "step": 30143 + }, + { + "epoch": 0.9238690695108496, + "grad_norm": 1.969083046941238, + "learning_rate": 1.5122427660179295e-07, + "loss": 0.6367, + "step": 30144 + }, + { + "epoch": 0.9238997180335908, + "grad_norm": 0.7728356749465064, + "learning_rate": 1.5110315932059304e-07, + "loss": 0.3792, + "step": 30145 + }, + { + "epoch": 0.923930366556332, + "grad_norm": 0.7589528953416846, + "learning_rate": 1.5098208981681462e-07, + "loss": 0.3686, + "step": 30146 + }, + { + "epoch": 0.9239610150790731, + "grad_norm": 0.7502341403060475, + "learning_rate": 1.5086106809164947e-07, + "loss": 0.3637, + "step": 30147 + }, + { + "epoch": 0.9239916636018144, + "grad_norm": 1.9448035248794522, + "learning_rate": 1.5074009414629165e-07, + "loss": 0.6318, + "step": 30148 + }, + { + "epoch": 0.9240223121245555, + "grad_norm": 0.809916829097725, + "learning_rate": 1.5061916798193242e-07, + "loss": 0.395, + "step": 30149 + }, + { + "epoch": 0.9240529606472968, + "grad_norm": 2.005212361313888, + "learning_rate": 1.5049828959976308e-07, + "loss": 0.5315, + "step": 30150 + }, + { + "epoch": 0.924083609170038, + "grad_norm": 1.9303711667823589, + "learning_rate": 1.5037745900097435e-07, + "loss": 0.5821, + "step": 30151 + }, + { + "epoch": 0.9241142576927792, + "grad_norm": 1.8570418202554857, + "learning_rate": 1.5025667618675855e-07, + "loss": 0.5941, + "step": 30152 + }, + { + "epoch": 0.9241449062155204, + "grad_norm": 1.6577252771992483, + "learning_rate": 1.5013594115830367e-07, + "loss": 0.4957, + "step": 30153 + }, + { + "epoch": 0.9241755547382616, + "grad_norm": 1.9522900175307811, + "learning_rate": 1.5001525391679982e-07, + "loss": 0.5467, + "step": 30154 + }, + { + "epoch": 0.9242062032610028, + "grad_norm": 2.0203510675224607, + "learning_rate": 1.4989461446343723e-07, + "loss": 0.5697, + "step": 30155 + }, + { + "epoch": 0.924236851783744, + "grad_norm": 1.9865433125300378, + "learning_rate": 1.4977402279940434e-07, + "loss": 0.6144, + "step": 30156 + }, + { + "epoch": 0.9242675003064852, + "grad_norm": 0.813427335235647, + "learning_rate": 1.496534789258891e-07, + "loss": 0.4178, + "step": 30157 + }, + { + "epoch": 0.9242981488292265, + "grad_norm": 2.112096466115649, + "learning_rate": 1.495329828440778e-07, + "loss": 0.604, + "step": 30158 + }, + { + "epoch": 0.9243287973519676, + "grad_norm": 0.7494659582510629, + "learning_rate": 1.4941253455516002e-07, + "loss": 0.4061, + "step": 30159 + }, + { + "epoch": 0.9243594458747089, + "grad_norm": 1.9930114708706106, + "learning_rate": 1.4929213406032205e-07, + "loss": 0.5752, + "step": 30160 + }, + { + "epoch": 0.92439009439745, + "grad_norm": 1.9187386147556982, + "learning_rate": 1.4917178136074906e-07, + "loss": 0.514, + "step": 30161 + }, + { + "epoch": 0.9244207429201913, + "grad_norm": 1.9759344721057268, + "learning_rate": 1.4905147645762785e-07, + "loss": 0.5339, + "step": 30162 + }, + { + "epoch": 0.9244513914429324, + "grad_norm": 1.8790990815168644, + "learning_rate": 1.489312193521436e-07, + "loss": 0.5899, + "step": 30163 + }, + { + "epoch": 0.9244820399656737, + "grad_norm": 1.9669894552494334, + "learning_rate": 1.488110100454815e-07, + "loss": 0.6025, + "step": 30164 + }, + { + "epoch": 0.9245126884884148, + "grad_norm": 1.8766491594052568, + "learning_rate": 1.4869084853882497e-07, + "loss": 0.5155, + "step": 30165 + }, + { + "epoch": 0.9245433370111561, + "grad_norm": 1.853859217775322, + "learning_rate": 1.4857073483335927e-07, + "loss": 0.5803, + "step": 30166 + }, + { + "epoch": 0.9245739855338972, + "grad_norm": 0.798773937536044, + "learning_rate": 1.4845066893026783e-07, + "loss": 0.403, + "step": 30167 + }, + { + "epoch": 0.9246046340566385, + "grad_norm": 1.718464066865541, + "learning_rate": 1.4833065083073305e-07, + "loss": 0.5356, + "step": 30168 + }, + { + "epoch": 0.9246352825793797, + "grad_norm": 2.189376851230975, + "learning_rate": 1.4821068053593734e-07, + "loss": 0.6016, + "step": 30169 + }, + { + "epoch": 0.9246659311021209, + "grad_norm": 2.1692409815650513, + "learning_rate": 1.4809075804706363e-07, + "loss": 0.5355, + "step": 30170 + }, + { + "epoch": 0.9246965796248621, + "grad_norm": 2.1327098999680745, + "learning_rate": 1.4797088336529264e-07, + "loss": 0.5964, + "step": 30171 + }, + { + "epoch": 0.9247272281476033, + "grad_norm": 1.6881643053418685, + "learning_rate": 1.478510564918062e-07, + "loss": 0.5684, + "step": 30172 + }, + { + "epoch": 0.9247578766703445, + "grad_norm": 1.7800173528736154, + "learning_rate": 1.4773127742778503e-07, + "loss": 0.5591, + "step": 30173 + }, + { + "epoch": 0.9247885251930857, + "grad_norm": 1.7460982254033355, + "learning_rate": 1.476115461744082e-07, + "loss": 0.5697, + "step": 30174 + }, + { + "epoch": 0.9248191737158269, + "grad_norm": 2.2093966036636874, + "learning_rate": 1.4749186273285755e-07, + "loss": 0.6592, + "step": 30175 + }, + { + "epoch": 0.9248498222385682, + "grad_norm": 0.7440157444590293, + "learning_rate": 1.4737222710431098e-07, + "loss": 0.3953, + "step": 30176 + }, + { + "epoch": 0.9248804707613093, + "grad_norm": 1.9662428123348334, + "learning_rate": 1.472526392899465e-07, + "loss": 0.5057, + "step": 30177 + }, + { + "epoch": 0.9249111192840505, + "grad_norm": 2.064102536748657, + "learning_rate": 1.471330992909442e-07, + "loss": 0.6365, + "step": 30178 + }, + { + "epoch": 0.9249417678067917, + "grad_norm": 0.8353309828048852, + "learning_rate": 1.4701360710848102e-07, + "loss": 0.4085, + "step": 30179 + }, + { + "epoch": 0.9249724163295329, + "grad_norm": 1.8647089526934308, + "learning_rate": 1.4689416274373426e-07, + "loss": 0.576, + "step": 30180 + }, + { + "epoch": 0.9250030648522741, + "grad_norm": 1.8088253216750603, + "learning_rate": 1.4677476619788078e-07, + "loss": 0.4856, + "step": 30181 + }, + { + "epoch": 0.9250337133750153, + "grad_norm": 1.954385962302232, + "learning_rate": 1.4665541747209743e-07, + "loss": 0.6225, + "step": 30182 + }, + { + "epoch": 0.9250643618977565, + "grad_norm": 2.04842766517975, + "learning_rate": 1.465361165675605e-07, + "loss": 0.558, + "step": 30183 + }, + { + "epoch": 0.9250950104204977, + "grad_norm": 1.8526110598074648, + "learning_rate": 1.464168634854446e-07, + "loss": 0.5009, + "step": 30184 + }, + { + "epoch": 0.925125658943239, + "grad_norm": 1.8899733716511031, + "learning_rate": 1.4629765822692487e-07, + "loss": 0.5767, + "step": 30185 + }, + { + "epoch": 0.9251563074659801, + "grad_norm": 1.9499083526958216, + "learning_rate": 1.4617850079317707e-07, + "loss": 0.6195, + "step": 30186 + }, + { + "epoch": 0.9251869559887214, + "grad_norm": 0.8085355715905059, + "learning_rate": 1.4605939118537415e-07, + "loss": 0.399, + "step": 30187 + }, + { + "epoch": 0.9252176045114625, + "grad_norm": 1.931720083292043, + "learning_rate": 1.4594032940468905e-07, + "loss": 0.6155, + "step": 30188 + }, + { + "epoch": 0.9252482530342038, + "grad_norm": 1.7458851074935169, + "learning_rate": 1.4582131545229693e-07, + "loss": 0.5632, + "step": 30189 + }, + { + "epoch": 0.9252789015569449, + "grad_norm": 2.111131942845891, + "learning_rate": 1.4570234932936912e-07, + "loss": 0.5993, + "step": 30190 + }, + { + "epoch": 0.9253095500796862, + "grad_norm": 1.8305973669911129, + "learning_rate": 1.455834310370785e-07, + "loss": 0.5309, + "step": 30191 + }, + { + "epoch": 0.9253401986024273, + "grad_norm": 2.0707809253801504, + "learning_rate": 1.4546456057659532e-07, + "loss": 0.6729, + "step": 30192 + }, + { + "epoch": 0.9253708471251686, + "grad_norm": 1.9974764961210545, + "learning_rate": 1.4534573794909246e-07, + "loss": 0.5771, + "step": 30193 + }, + { + "epoch": 0.9254014956479097, + "grad_norm": 0.7987826708638458, + "learning_rate": 1.452269631557407e-07, + "loss": 0.388, + "step": 30194 + }, + { + "epoch": 0.925432144170651, + "grad_norm": 2.1174615645774653, + "learning_rate": 1.451082361977091e-07, + "loss": 0.5074, + "step": 30195 + }, + { + "epoch": 0.9254627926933922, + "grad_norm": 1.9555763944709974, + "learning_rate": 1.4498955707616836e-07, + "loss": 0.541, + "step": 30196 + }, + { + "epoch": 0.9254934412161334, + "grad_norm": 2.054120319697463, + "learning_rate": 1.4487092579228812e-07, + "loss": 0.5059, + "step": 30197 + }, + { + "epoch": 0.9255240897388746, + "grad_norm": 1.8904587900028589, + "learning_rate": 1.4475234234723633e-07, + "loss": 0.6307, + "step": 30198 + }, + { + "epoch": 0.9255547382616158, + "grad_norm": 2.061805539864277, + "learning_rate": 1.4463380674218208e-07, + "loss": 0.5855, + "step": 30199 + }, + { + "epoch": 0.925585386784357, + "grad_norm": 1.8398171156481524, + "learning_rate": 1.4451531897829384e-07, + "loss": 0.5912, + "step": 30200 + }, + { + "epoch": 0.9256160353070982, + "grad_norm": 2.119812453388559, + "learning_rate": 1.443968790567374e-07, + "loss": 0.6049, + "step": 30201 + }, + { + "epoch": 0.9256466838298394, + "grad_norm": 1.744901977307004, + "learning_rate": 1.4427848697868175e-07, + "loss": 0.5163, + "step": 30202 + }, + { + "epoch": 0.9256773323525807, + "grad_norm": 0.789061339919895, + "learning_rate": 1.4416014274529211e-07, + "loss": 0.3918, + "step": 30203 + }, + { + "epoch": 0.9257079808753218, + "grad_norm": 0.7268667094357432, + "learning_rate": 1.4404184635773532e-07, + "loss": 0.3708, + "step": 30204 + }, + { + "epoch": 0.9257386293980631, + "grad_norm": 2.095796494024011, + "learning_rate": 1.4392359781717658e-07, + "loss": 0.4956, + "step": 30205 + }, + { + "epoch": 0.9257692779208042, + "grad_norm": 1.9281138351237261, + "learning_rate": 1.4380539712478102e-07, + "loss": 0.572, + "step": 30206 + }, + { + "epoch": 0.9257999264435455, + "grad_norm": 2.0649347539805634, + "learning_rate": 1.4368724428171333e-07, + "loss": 0.5977, + "step": 30207 + }, + { + "epoch": 0.9258305749662866, + "grad_norm": 2.110845772877831, + "learning_rate": 1.4356913928913807e-07, + "loss": 0.5197, + "step": 30208 + }, + { + "epoch": 0.9258612234890278, + "grad_norm": 1.692934573086164, + "learning_rate": 1.4345108214821823e-07, + "loss": 0.496, + "step": 30209 + }, + { + "epoch": 0.925891872011769, + "grad_norm": 2.0222273502128676, + "learning_rate": 1.4333307286011789e-07, + "loss": 0.5368, + "step": 30210 + }, + { + "epoch": 0.9259225205345102, + "grad_norm": 1.9917520237013344, + "learning_rate": 1.4321511142599943e-07, + "loss": 0.6138, + "step": 30211 + }, + { + "epoch": 0.9259531690572514, + "grad_norm": 1.9374240160473657, + "learning_rate": 1.430971978470247e-07, + "loss": 0.5456, + "step": 30212 + }, + { + "epoch": 0.9259838175799926, + "grad_norm": 1.710775827805957, + "learning_rate": 1.4297933212435665e-07, + "loss": 0.5637, + "step": 30213 + }, + { + "epoch": 0.9260144661027339, + "grad_norm": 0.7707150950558563, + "learning_rate": 1.428615142591555e-07, + "loss": 0.399, + "step": 30214 + }, + { + "epoch": 0.926045114625475, + "grad_norm": 1.953173275205177, + "learning_rate": 1.4274374425258196e-07, + "loss": 0.596, + "step": 30215 + }, + { + "epoch": 0.9260757631482163, + "grad_norm": 1.7389013103779591, + "learning_rate": 1.4262602210579846e-07, + "loss": 0.6047, + "step": 30216 + }, + { + "epoch": 0.9261064116709574, + "grad_norm": 2.047848911132523, + "learning_rate": 1.4250834781996237e-07, + "loss": 0.5523, + "step": 30217 + }, + { + "epoch": 0.9261370601936987, + "grad_norm": 2.035735179716976, + "learning_rate": 1.4239072139623499e-07, + "loss": 0.628, + "step": 30218 + }, + { + "epoch": 0.9261677087164398, + "grad_norm": 1.7670225927652026, + "learning_rate": 1.4227314283577488e-07, + "loss": 0.4617, + "step": 30219 + }, + { + "epoch": 0.9261983572391811, + "grad_norm": 0.7952813644688762, + "learning_rate": 1.4215561213973994e-07, + "loss": 0.3856, + "step": 30220 + }, + { + "epoch": 0.9262290057619222, + "grad_norm": 1.9222972030619656, + "learning_rate": 1.4203812930928927e-07, + "loss": 0.5588, + "step": 30221 + }, + { + "epoch": 0.9262596542846635, + "grad_norm": 1.7278453319142173, + "learning_rate": 1.419206943455792e-07, + "loss": 0.5762, + "step": 30222 + }, + { + "epoch": 0.9262903028074047, + "grad_norm": 1.905456911126969, + "learning_rate": 1.4180330724976764e-07, + "loss": 0.5383, + "step": 30223 + }, + { + "epoch": 0.9263209513301459, + "grad_norm": 1.9015137239136093, + "learning_rate": 1.4168596802301148e-07, + "loss": 0.5036, + "step": 30224 + }, + { + "epoch": 0.9263515998528871, + "grad_norm": 7.516035816074913, + "learning_rate": 1.415686766664659e-07, + "loss": 0.57, + "step": 30225 + }, + { + "epoch": 0.9263822483756283, + "grad_norm": 2.089072428001156, + "learning_rate": 1.4145143318128773e-07, + "loss": 0.6117, + "step": 30226 + }, + { + "epoch": 0.9264128968983695, + "grad_norm": 1.6317089687209803, + "learning_rate": 1.4133423756863164e-07, + "loss": 0.5275, + "step": 30227 + }, + { + "epoch": 0.9264435454211107, + "grad_norm": 1.919935232866215, + "learning_rate": 1.4121708982965278e-07, + "loss": 0.5822, + "step": 30228 + }, + { + "epoch": 0.9264741939438519, + "grad_norm": 1.8622054372590795, + "learning_rate": 1.410999899655041e-07, + "loss": 0.5397, + "step": 30229 + }, + { + "epoch": 0.9265048424665931, + "grad_norm": 2.0198648795054064, + "learning_rate": 1.4098293797734142e-07, + "loss": 0.6299, + "step": 30230 + }, + { + "epoch": 0.9265354909893343, + "grad_norm": 2.1207213833074836, + "learning_rate": 1.4086593386631653e-07, + "loss": 0.633, + "step": 30231 + }, + { + "epoch": 0.9265661395120756, + "grad_norm": 1.8800846315742699, + "learning_rate": 1.4074897763358354e-07, + "loss": 0.6175, + "step": 30232 + }, + { + "epoch": 0.9265967880348167, + "grad_norm": 1.7479275333988102, + "learning_rate": 1.406320692802937e-07, + "loss": 0.5028, + "step": 30233 + }, + { + "epoch": 0.926627436557558, + "grad_norm": 0.8574100047520695, + "learning_rate": 1.4051520880759896e-07, + "loss": 0.4203, + "step": 30234 + }, + { + "epoch": 0.9266580850802991, + "grad_norm": 0.7609132584809805, + "learning_rate": 1.403983962166522e-07, + "loss": 0.4015, + "step": 30235 + }, + { + "epoch": 0.9266887336030404, + "grad_norm": 1.6397918107373475, + "learning_rate": 1.4028163150860252e-07, + "loss": 0.5114, + "step": 30236 + }, + { + "epoch": 0.9267193821257815, + "grad_norm": 1.985334234367259, + "learning_rate": 1.401649146846018e-07, + "loss": 0.5858, + "step": 30237 + }, + { + "epoch": 0.9267500306485228, + "grad_norm": 1.9699014998873885, + "learning_rate": 1.4004824574579967e-07, + "loss": 0.6131, + "step": 30238 + }, + { + "epoch": 0.9267806791712639, + "grad_norm": 1.827700453023318, + "learning_rate": 1.399316246933452e-07, + "loss": 0.534, + "step": 30239 + }, + { + "epoch": 0.9268113276940051, + "grad_norm": 1.7192532858071636, + "learning_rate": 1.3981505152838803e-07, + "loss": 0.5659, + "step": 30240 + }, + { + "epoch": 0.9268419762167464, + "grad_norm": 1.7847621713731492, + "learning_rate": 1.3969852625207726e-07, + "loss": 0.5851, + "step": 30241 + }, + { + "epoch": 0.9268726247394875, + "grad_norm": 2.132941706448631, + "learning_rate": 1.395820488655597e-07, + "loss": 0.5663, + "step": 30242 + }, + { + "epoch": 0.9269032732622288, + "grad_norm": 2.097746000958309, + "learning_rate": 1.3946561936998448e-07, + "loss": 0.5447, + "step": 30243 + }, + { + "epoch": 0.9269339217849699, + "grad_norm": 1.8972839375654087, + "learning_rate": 1.3934923776649734e-07, + "loss": 0.6076, + "step": 30244 + }, + { + "epoch": 0.9269645703077112, + "grad_norm": 1.884653723348513, + "learning_rate": 1.3923290405624678e-07, + "loss": 0.5886, + "step": 30245 + }, + { + "epoch": 0.9269952188304523, + "grad_norm": 1.8964101094484496, + "learning_rate": 1.3911661824037803e-07, + "loss": 0.5935, + "step": 30246 + }, + { + "epoch": 0.9270258673531936, + "grad_norm": 1.8891159852524557, + "learning_rate": 1.3900038032003627e-07, + "loss": 0.5879, + "step": 30247 + }, + { + "epoch": 0.9270565158759347, + "grad_norm": 1.9827183335070564, + "learning_rate": 1.388841902963678e-07, + "loss": 0.5239, + "step": 30248 + }, + { + "epoch": 0.927087164398676, + "grad_norm": 1.8541385387979088, + "learning_rate": 1.3876804817051727e-07, + "loss": 0.5853, + "step": 30249 + }, + { + "epoch": 0.9271178129214172, + "grad_norm": 2.0240153001451007, + "learning_rate": 1.3865195394362875e-07, + "loss": 0.5888, + "step": 30250 + }, + { + "epoch": 0.9271484614441584, + "grad_norm": 2.0502118030769516, + "learning_rate": 1.3853590761684631e-07, + "loss": 0.53, + "step": 30251 + }, + { + "epoch": 0.9271791099668996, + "grad_norm": 1.870710701497551, + "learning_rate": 1.3841990919131354e-07, + "loss": 0.5475, + "step": 30252 + }, + { + "epoch": 0.9272097584896408, + "grad_norm": 1.8456837825347687, + "learning_rate": 1.3830395866817336e-07, + "loss": 0.6424, + "step": 30253 + }, + { + "epoch": 0.927240407012382, + "grad_norm": 2.0638114886597276, + "learning_rate": 1.3818805604856877e-07, + "loss": 0.5742, + "step": 30254 + }, + { + "epoch": 0.9272710555351232, + "grad_norm": 1.8064871809673229, + "learning_rate": 1.3807220133363996e-07, + "loss": 0.5001, + "step": 30255 + }, + { + "epoch": 0.9273017040578644, + "grad_norm": 1.844406140112449, + "learning_rate": 1.3795639452453047e-07, + "loss": 0.4863, + "step": 30256 + }, + { + "epoch": 0.9273323525806056, + "grad_norm": 1.8172951500478813, + "learning_rate": 1.3784063562238104e-07, + "loss": 0.5361, + "step": 30257 + }, + { + "epoch": 0.9273630011033468, + "grad_norm": 1.7593064849074052, + "learning_rate": 1.377249246283313e-07, + "loss": 0.5437, + "step": 30258 + }, + { + "epoch": 0.9273936496260881, + "grad_norm": 2.158646540755094, + "learning_rate": 1.376092615435215e-07, + "loss": 0.5552, + "step": 30259 + }, + { + "epoch": 0.9274242981488292, + "grad_norm": 1.6991700888371106, + "learning_rate": 1.374936463690929e-07, + "loss": 0.5769, + "step": 30260 + }, + { + "epoch": 0.9274549466715705, + "grad_norm": 1.9287205170217883, + "learning_rate": 1.373780791061824e-07, + "loss": 0.5348, + "step": 30261 + }, + { + "epoch": 0.9274855951943116, + "grad_norm": 1.99548948790754, + "learning_rate": 1.3726255975593018e-07, + "loss": 0.5921, + "step": 30262 + }, + { + "epoch": 0.9275162437170529, + "grad_norm": 0.7737556343960671, + "learning_rate": 1.371470883194742e-07, + "loss": 0.3876, + "step": 30263 + }, + { + "epoch": 0.927546892239794, + "grad_norm": 1.981429480573051, + "learning_rate": 1.370316647979525e-07, + "loss": 0.5548, + "step": 30264 + }, + { + "epoch": 0.9275775407625353, + "grad_norm": 1.8652047927735753, + "learning_rate": 1.3691628919250245e-07, + "loss": 0.5377, + "step": 30265 + }, + { + "epoch": 0.9276081892852764, + "grad_norm": 2.1738010446464204, + "learning_rate": 1.3680096150425925e-07, + "loss": 0.5782, + "step": 30266 + }, + { + "epoch": 0.9276388378080177, + "grad_norm": 1.803454509162057, + "learning_rate": 1.366856817343609e-07, + "loss": 0.5648, + "step": 30267 + }, + { + "epoch": 0.9276694863307589, + "grad_norm": 1.9730597647183632, + "learning_rate": 1.3657044988394376e-07, + "loss": 0.596, + "step": 30268 + }, + { + "epoch": 0.9277001348535001, + "grad_norm": 1.7692823724090838, + "learning_rate": 1.3645526595414126e-07, + "loss": 0.5857, + "step": 30269 + }, + { + "epoch": 0.9277307833762413, + "grad_norm": 1.9976671132314021, + "learning_rate": 1.363401299460898e-07, + "loss": 0.6018, + "step": 30270 + }, + { + "epoch": 0.9277614318989824, + "grad_norm": 2.125456849906902, + "learning_rate": 1.362250418609229e-07, + "loss": 0.5563, + "step": 30271 + }, + { + "epoch": 0.9277920804217237, + "grad_norm": 0.7685281706654071, + "learning_rate": 1.361100016997763e-07, + "loss": 0.3746, + "step": 30272 + }, + { + "epoch": 0.9278227289444648, + "grad_norm": 2.1073970704169436, + "learning_rate": 1.3599500946378185e-07, + "loss": 0.6062, + "step": 30273 + }, + { + "epoch": 0.9278533774672061, + "grad_norm": 1.7991381100919097, + "learning_rate": 1.3588006515407203e-07, + "loss": 0.5131, + "step": 30274 + }, + { + "epoch": 0.9278840259899472, + "grad_norm": 2.0947461856566005, + "learning_rate": 1.3576516877178204e-07, + "loss": 0.5679, + "step": 30275 + }, + { + "epoch": 0.9279146745126885, + "grad_norm": 2.1796274018600865, + "learning_rate": 1.3565032031804205e-07, + "loss": 0.5412, + "step": 30276 + }, + { + "epoch": 0.9279453230354296, + "grad_norm": 1.8850612351526441, + "learning_rate": 1.3553551979398339e-07, + "loss": 0.5868, + "step": 30277 + }, + { + "epoch": 0.9279759715581709, + "grad_norm": 2.2466644852578996, + "learning_rate": 1.3542076720073792e-07, + "loss": 0.6016, + "step": 30278 + }, + { + "epoch": 0.9280066200809121, + "grad_norm": 1.9586065758324935, + "learning_rate": 1.3530606253943645e-07, + "loss": 0.5506, + "step": 30279 + }, + { + "epoch": 0.9280372686036533, + "grad_norm": 2.041862041404441, + "learning_rate": 1.3519140581120914e-07, + "loss": 0.5988, + "step": 30280 + }, + { + "epoch": 0.9280679171263945, + "grad_norm": 1.7144928330548517, + "learning_rate": 1.3507679701718568e-07, + "loss": 0.4985, + "step": 30281 + }, + { + "epoch": 0.9280985656491357, + "grad_norm": 1.9168304000834508, + "learning_rate": 1.3496223615849513e-07, + "loss": 0.438, + "step": 30282 + }, + { + "epoch": 0.9281292141718769, + "grad_norm": 1.6785248199754992, + "learning_rate": 1.348477232362666e-07, + "loss": 0.4653, + "step": 30283 + }, + { + "epoch": 0.9281598626946181, + "grad_norm": 1.96992975359013, + "learning_rate": 1.3473325825162864e-07, + "loss": 0.555, + "step": 30284 + }, + { + "epoch": 0.9281905112173593, + "grad_norm": 0.7596629493858744, + "learning_rate": 1.3461884120570756e-07, + "loss": 0.3729, + "step": 30285 + }, + { + "epoch": 0.9282211597401006, + "grad_norm": 2.0926485672701807, + "learning_rate": 1.3450447209963303e-07, + "loss": 0.6235, + "step": 30286 + }, + { + "epoch": 0.9282518082628417, + "grad_norm": 2.017563701909049, + "learning_rate": 1.3439015093453078e-07, + "loss": 0.5825, + "step": 30287 + }, + { + "epoch": 0.928282456785583, + "grad_norm": 1.9146592070821977, + "learning_rate": 1.342758777115266e-07, + "loss": 0.4776, + "step": 30288 + }, + { + "epoch": 0.9283131053083241, + "grad_norm": 1.7139619831766688, + "learning_rate": 1.341616524317474e-07, + "loss": 0.5812, + "step": 30289 + }, + { + "epoch": 0.9283437538310654, + "grad_norm": 1.8205189315931603, + "learning_rate": 1.3404747509631833e-07, + "loss": 0.5289, + "step": 30290 + }, + { + "epoch": 0.9283744023538065, + "grad_norm": 1.88364162861242, + "learning_rate": 1.3393334570636517e-07, + "loss": 0.5297, + "step": 30291 + }, + { + "epoch": 0.9284050508765478, + "grad_norm": 2.01916903677195, + "learning_rate": 1.3381926426301095e-07, + "loss": 0.6011, + "step": 30292 + }, + { + "epoch": 0.9284356993992889, + "grad_norm": 1.9109176965435817, + "learning_rate": 1.3370523076738083e-07, + "loss": 0.5542, + "step": 30293 + }, + { + "epoch": 0.9284663479220302, + "grad_norm": 2.0759179056822896, + "learning_rate": 1.335912452205984e-07, + "loss": 0.6207, + "step": 30294 + }, + { + "epoch": 0.9284969964447713, + "grad_norm": 1.7075359092577462, + "learning_rate": 1.3347730762378664e-07, + "loss": 0.5741, + "step": 30295 + }, + { + "epoch": 0.9285276449675126, + "grad_norm": 2.06954045854017, + "learning_rate": 1.333634179780674e-07, + "loss": 0.5406, + "step": 30296 + }, + { + "epoch": 0.9285582934902538, + "grad_norm": 0.7525169690112795, + "learning_rate": 1.3324957628456424e-07, + "loss": 0.383, + "step": 30297 + }, + { + "epoch": 0.928588942012995, + "grad_norm": 1.7660900777942217, + "learning_rate": 1.3313578254439796e-07, + "loss": 0.5496, + "step": 30298 + }, + { + "epoch": 0.9286195905357362, + "grad_norm": 1.9973543558894133, + "learning_rate": 1.330220367586904e-07, + "loss": 0.5577, + "step": 30299 + }, + { + "epoch": 0.9286502390584774, + "grad_norm": 1.937670048176482, + "learning_rate": 1.329083389285618e-07, + "loss": 0.5816, + "step": 30300 + }, + { + "epoch": 0.9286808875812186, + "grad_norm": 2.108573569673201, + "learning_rate": 1.3279468905513237e-07, + "loss": 0.5985, + "step": 30301 + }, + { + "epoch": 0.9287115361039597, + "grad_norm": 1.9309690198751375, + "learning_rate": 1.326810871395229e-07, + "loss": 0.579, + "step": 30302 + }, + { + "epoch": 0.928742184626701, + "grad_norm": 1.9708199324562663, + "learning_rate": 1.325675331828513e-07, + "loss": 0.6363, + "step": 30303 + }, + { + "epoch": 0.9287728331494421, + "grad_norm": 1.7998165428884934, + "learning_rate": 1.3245402718623678e-07, + "loss": 0.517, + "step": 30304 + }, + { + "epoch": 0.9288034816721834, + "grad_norm": 0.8132432123264824, + "learning_rate": 1.323405691507995e-07, + "loss": 0.4101, + "step": 30305 + }, + { + "epoch": 0.9288341301949246, + "grad_norm": 1.8587952292258143, + "learning_rate": 1.3222715907765471e-07, + "loss": 0.5225, + "step": 30306 + }, + { + "epoch": 0.9288647787176658, + "grad_norm": 1.7817167939600589, + "learning_rate": 1.321137969679226e-07, + "loss": 0.5077, + "step": 30307 + }, + { + "epoch": 0.928895427240407, + "grad_norm": 1.9409385002880803, + "learning_rate": 1.3200048282271727e-07, + "loss": 0.6007, + "step": 30308 + }, + { + "epoch": 0.9289260757631482, + "grad_norm": 1.746778715105681, + "learning_rate": 1.318872166431573e-07, + "loss": 0.5884, + "step": 30309 + }, + { + "epoch": 0.9289567242858894, + "grad_norm": 2.217168067641722, + "learning_rate": 1.3177399843035898e-07, + "loss": 0.5461, + "step": 30310 + }, + { + "epoch": 0.9289873728086306, + "grad_norm": 1.805618423791435, + "learning_rate": 1.316608281854359e-07, + "loss": 0.5401, + "step": 30311 + }, + { + "epoch": 0.9290180213313718, + "grad_norm": 0.7603653629725331, + "learning_rate": 1.3154770590950438e-07, + "loss": 0.3885, + "step": 30312 + }, + { + "epoch": 0.929048669854113, + "grad_norm": 1.8202932296359118, + "learning_rate": 1.3143463160367964e-07, + "loss": 0.4853, + "step": 30313 + }, + { + "epoch": 0.9290793183768542, + "grad_norm": 0.8188486802721628, + "learning_rate": 1.3132160526907467e-07, + "loss": 0.3959, + "step": 30314 + }, + { + "epoch": 0.9291099668995955, + "grad_norm": 0.8762924744445023, + "learning_rate": 1.3120862690680358e-07, + "loss": 0.4037, + "step": 30315 + }, + { + "epoch": 0.9291406154223366, + "grad_norm": 1.7973031839419003, + "learning_rate": 1.3109569651797992e-07, + "loss": 0.5601, + "step": 30316 + }, + { + "epoch": 0.9291712639450779, + "grad_norm": 1.9725139760497072, + "learning_rate": 1.3098281410371616e-07, + "loss": 0.5186, + "step": 30317 + }, + { + "epoch": 0.929201912467819, + "grad_norm": 1.8292274233130392, + "learning_rate": 1.3086997966512472e-07, + "loss": 0.6376, + "step": 30318 + }, + { + "epoch": 0.9292325609905603, + "grad_norm": 1.7282537232964204, + "learning_rate": 1.3075719320331636e-07, + "loss": 0.549, + "step": 30319 + }, + { + "epoch": 0.9292632095133014, + "grad_norm": 1.9697086714163485, + "learning_rate": 1.3064445471940358e-07, + "loss": 0.4925, + "step": 30320 + }, + { + "epoch": 0.9292938580360427, + "grad_norm": 2.014056854200755, + "learning_rate": 1.305317642144971e-07, + "loss": 0.5631, + "step": 30321 + }, + { + "epoch": 0.9293245065587838, + "grad_norm": 1.6651052869124021, + "learning_rate": 1.3041912168970715e-07, + "loss": 0.4902, + "step": 30322 + }, + { + "epoch": 0.9293551550815251, + "grad_norm": 1.940008516163006, + "learning_rate": 1.303065271461429e-07, + "loss": 0.4768, + "step": 30323 + }, + { + "epoch": 0.9293858036042663, + "grad_norm": 2.29177549180772, + "learning_rate": 1.3019398058491507e-07, + "loss": 0.563, + "step": 30324 + }, + { + "epoch": 0.9294164521270075, + "grad_norm": 2.352689476981958, + "learning_rate": 1.3008148200713166e-07, + "loss": 0.5944, + "step": 30325 + }, + { + "epoch": 0.9294471006497487, + "grad_norm": 1.5806774003797164, + "learning_rate": 1.2996903141390127e-07, + "loss": 0.5224, + "step": 30326 + }, + { + "epoch": 0.9294777491724899, + "grad_norm": 1.7306643093017688, + "learning_rate": 1.2985662880633243e-07, + "loss": 0.5324, + "step": 30327 + }, + { + "epoch": 0.9295083976952311, + "grad_norm": 1.9274391932317068, + "learning_rate": 1.2974427418553205e-07, + "loss": 0.5257, + "step": 30328 + }, + { + "epoch": 0.9295390462179723, + "grad_norm": 1.778849041692575, + "learning_rate": 1.296319675526081e-07, + "loss": 0.6328, + "step": 30329 + }, + { + "epoch": 0.9295696947407135, + "grad_norm": 0.7971298690615559, + "learning_rate": 1.2951970890866527e-07, + "loss": 0.3686, + "step": 30330 + }, + { + "epoch": 0.9296003432634548, + "grad_norm": 1.8803297214697077, + "learning_rate": 1.294074982548116e-07, + "loss": 0.4911, + "step": 30331 + }, + { + "epoch": 0.9296309917861959, + "grad_norm": 1.838789156057047, + "learning_rate": 1.2929533559215223e-07, + "loss": 0.5233, + "step": 30332 + }, + { + "epoch": 0.929661640308937, + "grad_norm": 1.8600318124030248, + "learning_rate": 1.2918322092179192e-07, + "loss": 0.5994, + "step": 30333 + }, + { + "epoch": 0.9296922888316783, + "grad_norm": 1.8094204978819133, + "learning_rate": 1.290711542448353e-07, + "loss": 0.4603, + "step": 30334 + }, + { + "epoch": 0.9297229373544195, + "grad_norm": 2.0305582506496367, + "learning_rate": 1.289591355623876e-07, + "loss": 0.5222, + "step": 30335 + }, + { + "epoch": 0.9297535858771607, + "grad_norm": 1.735317767591369, + "learning_rate": 1.2884716487555128e-07, + "loss": 0.5609, + "step": 30336 + }, + { + "epoch": 0.9297842343999019, + "grad_norm": 1.903383455446117, + "learning_rate": 1.2873524218542988e-07, + "loss": 0.5928, + "step": 30337 + }, + { + "epoch": 0.9298148829226431, + "grad_norm": 1.7441644203309326, + "learning_rate": 1.2862336749312753e-07, + "loss": 0.6008, + "step": 30338 + }, + { + "epoch": 0.9298455314453843, + "grad_norm": 1.864636570685893, + "learning_rate": 1.2851154079974448e-07, + "loss": 0.6545, + "step": 30339 + }, + { + "epoch": 0.9298761799681255, + "grad_norm": 0.7888717497059411, + "learning_rate": 1.2839976210638482e-07, + "loss": 0.3874, + "step": 30340 + }, + { + "epoch": 0.9299068284908667, + "grad_norm": 1.7032984978063992, + "learning_rate": 1.282880314141477e-07, + "loss": 0.5025, + "step": 30341 + }, + { + "epoch": 0.929937477013608, + "grad_norm": 2.3669894718266855, + "learning_rate": 1.2817634872413555e-07, + "loss": 0.6379, + "step": 30342 + }, + { + "epoch": 0.9299681255363491, + "grad_norm": 0.7921816192285469, + "learning_rate": 1.2806471403744857e-07, + "loss": 0.3998, + "step": 30343 + }, + { + "epoch": 0.9299987740590904, + "grad_norm": 2.2883156043180803, + "learning_rate": 1.2795312735518595e-07, + "loss": 0.6101, + "step": 30344 + }, + { + "epoch": 0.9300294225818315, + "grad_norm": 1.8701791952368778, + "learning_rate": 1.2784158867844788e-07, + "loss": 0.4578, + "step": 30345 + }, + { + "epoch": 0.9300600711045728, + "grad_norm": 2.009275669451542, + "learning_rate": 1.2773009800833346e-07, + "loss": 0.5795, + "step": 30346 + }, + { + "epoch": 0.9300907196273139, + "grad_norm": 1.6919749558342725, + "learning_rate": 1.2761865534594077e-07, + "loss": 0.4688, + "step": 30347 + }, + { + "epoch": 0.9301213681500552, + "grad_norm": 1.8502767267640452, + "learning_rate": 1.2750726069236774e-07, + "loss": 0.5235, + "step": 30348 + }, + { + "epoch": 0.9301520166727963, + "grad_norm": 0.8554420125514794, + "learning_rate": 1.27395914048713e-07, + "loss": 0.4008, + "step": 30349 + }, + { + "epoch": 0.9301826651955376, + "grad_norm": 0.8097881074189925, + "learning_rate": 1.2728461541607285e-07, + "loss": 0.4063, + "step": 30350 + }, + { + "epoch": 0.9302133137182788, + "grad_norm": 1.9066976135070317, + "learning_rate": 1.2717336479554422e-07, + "loss": 0.5463, + "step": 30351 + }, + { + "epoch": 0.93024396224102, + "grad_norm": 0.7946698991559457, + "learning_rate": 1.2706216218822343e-07, + "loss": 0.3941, + "step": 30352 + }, + { + "epoch": 0.9302746107637612, + "grad_norm": 1.885693627118648, + "learning_rate": 1.269510075952052e-07, + "loss": 0.5677, + "step": 30353 + }, + { + "epoch": 0.9303052592865024, + "grad_norm": 0.7702294222247479, + "learning_rate": 1.2683990101758636e-07, + "loss": 0.3801, + "step": 30354 + }, + { + "epoch": 0.9303359078092436, + "grad_norm": 2.1218944780761553, + "learning_rate": 1.2672884245646e-07, + "loss": 0.5635, + "step": 30355 + }, + { + "epoch": 0.9303665563319848, + "grad_norm": 2.055326487135701, + "learning_rate": 1.266178319129213e-07, + "loss": 0.6795, + "step": 30356 + }, + { + "epoch": 0.930397204854726, + "grad_norm": 2.0145400173394794, + "learning_rate": 1.26506869388065e-07, + "loss": 0.5519, + "step": 30357 + }, + { + "epoch": 0.9304278533774673, + "grad_norm": 1.1080094655503636, + "learning_rate": 1.263959548829824e-07, + "loss": 0.4101, + "step": 30358 + }, + { + "epoch": 0.9304585019002084, + "grad_norm": 1.8062553908752002, + "learning_rate": 1.2628508839876818e-07, + "loss": 0.5497, + "step": 30359 + }, + { + "epoch": 0.9304891504229497, + "grad_norm": 2.3618217539218276, + "learning_rate": 1.2617426993651372e-07, + "loss": 0.5737, + "step": 30360 + }, + { + "epoch": 0.9305197989456908, + "grad_norm": 2.1072074172371917, + "learning_rate": 1.260634994973109e-07, + "loss": 0.5458, + "step": 30361 + }, + { + "epoch": 0.9305504474684321, + "grad_norm": 1.963972650026863, + "learning_rate": 1.259527770822516e-07, + "loss": 0.5268, + "step": 30362 + }, + { + "epoch": 0.9305810959911732, + "grad_norm": 2.133390256540111, + "learning_rate": 1.258421026924267e-07, + "loss": 0.4506, + "step": 30363 + }, + { + "epoch": 0.9306117445139144, + "grad_norm": 1.773781271688466, + "learning_rate": 1.2573147632892635e-07, + "loss": 0.5178, + "step": 30364 + }, + { + "epoch": 0.9306423930366556, + "grad_norm": 0.7808316506242561, + "learning_rate": 1.2562089799284195e-07, + "loss": 0.3963, + "step": 30365 + }, + { + "epoch": 0.9306730415593968, + "grad_norm": 1.8120647837545865, + "learning_rate": 1.2551036768526093e-07, + "loss": 0.5278, + "step": 30366 + }, + { + "epoch": 0.930703690082138, + "grad_norm": 2.0039174132167985, + "learning_rate": 1.2539988540727354e-07, + "loss": 0.5651, + "step": 30367 + }, + { + "epoch": 0.9307343386048792, + "grad_norm": 1.806709796871289, + "learning_rate": 1.2528945115996892e-07, + "loss": 0.5357, + "step": 30368 + }, + { + "epoch": 0.9307649871276205, + "grad_norm": 2.036605817410067, + "learning_rate": 1.2517906494443399e-07, + "loss": 0.5291, + "step": 30369 + }, + { + "epoch": 0.9307956356503616, + "grad_norm": 1.9059528269652735, + "learning_rate": 1.2506872676175786e-07, + "loss": 0.4704, + "step": 30370 + }, + { + "epoch": 0.9308262841731029, + "grad_norm": 1.7795213589235017, + "learning_rate": 1.2495843661302577e-07, + "loss": 0.5858, + "step": 30371 + }, + { + "epoch": 0.930856932695844, + "grad_norm": 2.0141727278059425, + "learning_rate": 1.248481944993263e-07, + "loss": 0.5786, + "step": 30372 + }, + { + "epoch": 0.9308875812185853, + "grad_norm": 0.7713854756426611, + "learning_rate": 1.2473800042174465e-07, + "loss": 0.3918, + "step": 30373 + }, + { + "epoch": 0.9309182297413264, + "grad_norm": 1.7749595802328872, + "learning_rate": 1.246278543813667e-07, + "loss": 0.4911, + "step": 30374 + }, + { + "epoch": 0.9309488782640677, + "grad_norm": 1.8055870493557233, + "learning_rate": 1.245177563792782e-07, + "loss": 0.6139, + "step": 30375 + }, + { + "epoch": 0.9309795267868088, + "grad_norm": 1.9241578128506638, + "learning_rate": 1.2440770641656385e-07, + "loss": 0.554, + "step": 30376 + }, + { + "epoch": 0.9310101753095501, + "grad_norm": 0.7869500064025791, + "learning_rate": 1.2429770449430723e-07, + "loss": 0.3947, + "step": 30377 + }, + { + "epoch": 0.9310408238322913, + "grad_norm": 1.8626436067072227, + "learning_rate": 1.2418775061359302e-07, + "loss": 0.5571, + "step": 30378 + }, + { + "epoch": 0.9310714723550325, + "grad_norm": 2.1387551996341827, + "learning_rate": 1.2407784477550477e-07, + "loss": 0.5515, + "step": 30379 + }, + { + "epoch": 0.9311021208777737, + "grad_norm": 1.8961987612276587, + "learning_rate": 1.23967986981125e-07, + "loss": 0.5422, + "step": 30380 + }, + { + "epoch": 0.9311327694005149, + "grad_norm": 1.840442851886647, + "learning_rate": 1.238581772315367e-07, + "loss": 0.5969, + "step": 30381 + }, + { + "epoch": 0.9311634179232561, + "grad_norm": 1.9270931518111545, + "learning_rate": 1.2374841552782014e-07, + "loss": 0.5459, + "step": 30382 + }, + { + "epoch": 0.9311940664459973, + "grad_norm": 0.7725641192014657, + "learning_rate": 1.2363870187105886e-07, + "loss": 0.3687, + "step": 30383 + }, + { + "epoch": 0.9312247149687385, + "grad_norm": 0.7861092697117893, + "learning_rate": 1.235290362623337e-07, + "loss": 0.4033, + "step": 30384 + }, + { + "epoch": 0.9312553634914797, + "grad_norm": 2.0325804028718544, + "learning_rate": 1.234194187027238e-07, + "loss": 0.6183, + "step": 30385 + }, + { + "epoch": 0.9312860120142209, + "grad_norm": 0.8260853676368017, + "learning_rate": 1.233098491933099e-07, + "loss": 0.3944, + "step": 30386 + }, + { + "epoch": 0.9313166605369622, + "grad_norm": 1.7556416980093001, + "learning_rate": 1.2320032773517233e-07, + "loss": 0.4932, + "step": 30387 + }, + { + "epoch": 0.9313473090597033, + "grad_norm": 0.8034493301113103, + "learning_rate": 1.2309085432938962e-07, + "loss": 0.3789, + "step": 30388 + }, + { + "epoch": 0.9313779575824446, + "grad_norm": 1.8131834097182373, + "learning_rate": 1.2298142897704035e-07, + "loss": 0.4995, + "step": 30389 + }, + { + "epoch": 0.9314086061051857, + "grad_norm": 1.8999732567136707, + "learning_rate": 1.2287205167920313e-07, + "loss": 0.4966, + "step": 30390 + }, + { + "epoch": 0.931439254627927, + "grad_norm": 0.8544499602128677, + "learning_rate": 1.2276272243695542e-07, + "loss": 0.4256, + "step": 30391 + }, + { + "epoch": 0.9314699031506681, + "grad_norm": 2.030452108902028, + "learning_rate": 1.2265344125137467e-07, + "loss": 0.5855, + "step": 30392 + }, + { + "epoch": 0.9315005516734094, + "grad_norm": 1.658330472472909, + "learning_rate": 1.2254420812353674e-07, + "loss": 0.5246, + "step": 30393 + }, + { + "epoch": 0.9315312001961505, + "grad_norm": 0.8244028975135712, + "learning_rate": 1.224350230545196e-07, + "loss": 0.3928, + "step": 30394 + }, + { + "epoch": 0.9315618487188917, + "grad_norm": 1.9407904369483944, + "learning_rate": 1.2232588604539796e-07, + "loss": 0.5453, + "step": 30395 + }, + { + "epoch": 0.931592497241633, + "grad_norm": 1.9674487534544298, + "learning_rate": 1.222167970972471e-07, + "loss": 0.6041, + "step": 30396 + }, + { + "epoch": 0.9316231457643741, + "grad_norm": 0.7755284253392458, + "learning_rate": 1.2210775621114224e-07, + "loss": 0.3751, + "step": 30397 + }, + { + "epoch": 0.9316537942871154, + "grad_norm": 2.004701457257427, + "learning_rate": 1.2199876338815753e-07, + "loss": 0.4881, + "step": 30398 + }, + { + "epoch": 0.9316844428098565, + "grad_norm": 1.9478214120555393, + "learning_rate": 1.2188981862936765e-07, + "loss": 0.5307, + "step": 30399 + }, + { + "epoch": 0.9317150913325978, + "grad_norm": 1.7603160771577961, + "learning_rate": 1.2178092193584457e-07, + "loss": 0.6011, + "step": 30400 + }, + { + "epoch": 0.9317457398553389, + "grad_norm": 1.9408482601794996, + "learning_rate": 1.2167207330866294e-07, + "loss": 0.6058, + "step": 30401 + }, + { + "epoch": 0.9317763883780802, + "grad_norm": 0.7801108780469107, + "learning_rate": 1.2156327274889467e-07, + "loss": 0.3769, + "step": 30402 + }, + { + "epoch": 0.9318070369008213, + "grad_norm": 2.0310058910111115, + "learning_rate": 1.2145452025761173e-07, + "loss": 0.5781, + "step": 30403 + }, + { + "epoch": 0.9318376854235626, + "grad_norm": 1.894704543268682, + "learning_rate": 1.213458158358849e-07, + "loss": 0.5057, + "step": 30404 + }, + { + "epoch": 0.9318683339463038, + "grad_norm": 1.9581418461856281, + "learning_rate": 1.2123715948478608e-07, + "loss": 0.528, + "step": 30405 + }, + { + "epoch": 0.931898982469045, + "grad_norm": 1.7545730375509372, + "learning_rate": 1.2112855120538613e-07, + "loss": 0.5068, + "step": 30406 + }, + { + "epoch": 0.9319296309917862, + "grad_norm": 1.9509897160441698, + "learning_rate": 1.2101999099875529e-07, + "loss": 0.538, + "step": 30407 + }, + { + "epoch": 0.9319602795145274, + "grad_norm": 1.9867845341709518, + "learning_rate": 1.209114788659621e-07, + "loss": 0.5805, + "step": 30408 + }, + { + "epoch": 0.9319909280372686, + "grad_norm": 2.053658414261732, + "learning_rate": 1.2080301480807687e-07, + "loss": 0.5975, + "step": 30409 + }, + { + "epoch": 0.9320215765600098, + "grad_norm": 0.8130059424366901, + "learning_rate": 1.2069459882616818e-07, + "loss": 0.3829, + "step": 30410 + }, + { + "epoch": 0.932052225082751, + "grad_norm": 1.9623457828923727, + "learning_rate": 1.2058623092130406e-07, + "loss": 0.5877, + "step": 30411 + }, + { + "epoch": 0.9320828736054922, + "grad_norm": 2.0435556604148393, + "learning_rate": 1.2047791109455087e-07, + "loss": 0.5447, + "step": 30412 + }, + { + "epoch": 0.9321135221282334, + "grad_norm": 1.924039178650051, + "learning_rate": 1.2036963934697887e-07, + "loss": 0.5372, + "step": 30413 + }, + { + "epoch": 0.9321441706509747, + "grad_norm": 1.9354599265710761, + "learning_rate": 1.2026141567965332e-07, + "loss": 0.5797, + "step": 30414 + }, + { + "epoch": 0.9321748191737158, + "grad_norm": 1.9911155983310158, + "learning_rate": 1.201532400936395e-07, + "loss": 0.6276, + "step": 30415 + }, + { + "epoch": 0.9322054676964571, + "grad_norm": 2.0294742354006017, + "learning_rate": 1.2004511259000485e-07, + "loss": 0.5451, + "step": 30416 + }, + { + "epoch": 0.9322361162191982, + "grad_norm": 2.6004022328154, + "learning_rate": 1.1993703316981408e-07, + "loss": 0.6545, + "step": 30417 + }, + { + "epoch": 0.9322667647419395, + "grad_norm": 1.9045503662533372, + "learning_rate": 1.1982900183413304e-07, + "loss": 0.5295, + "step": 30418 + }, + { + "epoch": 0.9322974132646806, + "grad_norm": 1.6634543687514454, + "learning_rate": 1.1972101858402475e-07, + "loss": 0.5302, + "step": 30419 + }, + { + "epoch": 0.9323280617874219, + "grad_norm": 1.965424279978089, + "learning_rate": 1.1961308342055444e-07, + "loss": 0.6634, + "step": 30420 + }, + { + "epoch": 0.932358710310163, + "grad_norm": 1.8251510990912796, + "learning_rate": 1.195051963447852e-07, + "loss": 0.6131, + "step": 30421 + }, + { + "epoch": 0.9323893588329043, + "grad_norm": 1.7280209164343692, + "learning_rate": 1.1939735735778e-07, + "loss": 0.5295, + "step": 30422 + }, + { + "epoch": 0.9324200073556455, + "grad_norm": 1.983268412732296, + "learning_rate": 1.192895664606003e-07, + "loss": 0.5082, + "step": 30423 + }, + { + "epoch": 0.9324506558783867, + "grad_norm": 1.828564196802525, + "learning_rate": 1.1918182365431075e-07, + "loss": 0.5778, + "step": 30424 + }, + { + "epoch": 0.9324813044011279, + "grad_norm": 0.8476933504289318, + "learning_rate": 1.190741289399705e-07, + "loss": 0.4105, + "step": 30425 + }, + { + "epoch": 0.932511952923869, + "grad_norm": 1.8912771092054503, + "learning_rate": 1.1896648231864205e-07, + "loss": 0.5584, + "step": 30426 + }, + { + "epoch": 0.9325426014466103, + "grad_norm": 0.8026325811025863, + "learning_rate": 1.1885888379138565e-07, + "loss": 0.3927, + "step": 30427 + }, + { + "epoch": 0.9325732499693514, + "grad_norm": 1.9701776620773388, + "learning_rate": 1.1875133335926159e-07, + "loss": 0.5597, + "step": 30428 + }, + { + "epoch": 0.9326038984920927, + "grad_norm": 2.2421682902620472, + "learning_rate": 1.186438310233301e-07, + "loss": 0.4702, + "step": 30429 + }, + { + "epoch": 0.9326345470148338, + "grad_norm": 2.0718324544481614, + "learning_rate": 1.1853637678464925e-07, + "loss": 0.5365, + "step": 30430 + }, + { + "epoch": 0.9326651955375751, + "grad_norm": 0.8106780858682594, + "learning_rate": 1.1842897064427816e-07, + "loss": 0.375, + "step": 30431 + }, + { + "epoch": 0.9326958440603162, + "grad_norm": 1.9916027678653492, + "learning_rate": 1.1832161260327602e-07, + "loss": 0.5422, + "step": 30432 + }, + { + "epoch": 0.9327264925830575, + "grad_norm": 1.8223853365430738, + "learning_rate": 1.1821430266269973e-07, + "loss": 0.6164, + "step": 30433 + }, + { + "epoch": 0.9327571411057987, + "grad_norm": 0.792150705958518, + "learning_rate": 1.1810704082360735e-07, + "loss": 0.3873, + "step": 30434 + }, + { + "epoch": 0.9327877896285399, + "grad_norm": 1.9176068546477976, + "learning_rate": 1.1799982708705526e-07, + "loss": 0.5172, + "step": 30435 + }, + { + "epoch": 0.9328184381512811, + "grad_norm": 1.6743624888984374, + "learning_rate": 1.1789266145409984e-07, + "loss": 0.525, + "step": 30436 + }, + { + "epoch": 0.9328490866740223, + "grad_norm": 1.7939122462758905, + "learning_rate": 1.1778554392579745e-07, + "loss": 0.5212, + "step": 30437 + }, + { + "epoch": 0.9328797351967635, + "grad_norm": 1.8558245770887571, + "learning_rate": 1.176784745032028e-07, + "loss": 0.6154, + "step": 30438 + }, + { + "epoch": 0.9329103837195047, + "grad_norm": 1.8147955584391204, + "learning_rate": 1.1757145318737172e-07, + "loss": 0.5325, + "step": 30439 + }, + { + "epoch": 0.9329410322422459, + "grad_norm": 1.7960735393124785, + "learning_rate": 1.174644799793584e-07, + "loss": 0.5675, + "step": 30440 + }, + { + "epoch": 0.9329716807649872, + "grad_norm": 0.7838336073208652, + "learning_rate": 1.1735755488021638e-07, + "loss": 0.3916, + "step": 30441 + }, + { + "epoch": 0.9330023292877283, + "grad_norm": 1.9834552752255916, + "learning_rate": 1.1725067789099986e-07, + "loss": 0.596, + "step": 30442 + }, + { + "epoch": 0.9330329778104696, + "grad_norm": 2.0199762180019785, + "learning_rate": 1.1714384901276188e-07, + "loss": 0.5696, + "step": 30443 + }, + { + "epoch": 0.9330636263332107, + "grad_norm": 1.7446180991772455, + "learning_rate": 1.1703706824655492e-07, + "loss": 0.4852, + "step": 30444 + }, + { + "epoch": 0.933094274855952, + "grad_norm": 2.2515594734864064, + "learning_rate": 1.1693033559343037e-07, + "loss": 0.6169, + "step": 30445 + }, + { + "epoch": 0.9331249233786931, + "grad_norm": 1.7815121922183055, + "learning_rate": 1.1682365105444182e-07, + "loss": 0.5209, + "step": 30446 + }, + { + "epoch": 0.9331555719014344, + "grad_norm": 2.0601871743811295, + "learning_rate": 1.1671701463063845e-07, + "loss": 0.5851, + "step": 30447 + }, + { + "epoch": 0.9331862204241755, + "grad_norm": 2.151754594954003, + "learning_rate": 1.1661042632307218e-07, + "loss": 0.5882, + "step": 30448 + }, + { + "epoch": 0.9332168689469168, + "grad_norm": 0.8055100764575162, + "learning_rate": 1.1650388613279218e-07, + "loss": 0.386, + "step": 30449 + }, + { + "epoch": 0.933247517469658, + "grad_norm": 1.8914335298777267, + "learning_rate": 1.1639739406084927e-07, + "loss": 0.5935, + "step": 30450 + }, + { + "epoch": 0.9332781659923992, + "grad_norm": 1.8045129256784886, + "learning_rate": 1.1629095010829262e-07, + "loss": 0.5259, + "step": 30451 + }, + { + "epoch": 0.9333088145151404, + "grad_norm": 1.5888409235577556, + "learning_rate": 1.1618455427617026e-07, + "loss": 0.4865, + "step": 30452 + }, + { + "epoch": 0.9333394630378816, + "grad_norm": 1.7564968193307984, + "learning_rate": 1.1607820656553138e-07, + "loss": 0.5041, + "step": 30453 + }, + { + "epoch": 0.9333701115606228, + "grad_norm": 1.921660624112481, + "learning_rate": 1.1597190697742344e-07, + "loss": 0.5296, + "step": 30454 + }, + { + "epoch": 0.933400760083364, + "grad_norm": 2.08823929116713, + "learning_rate": 1.1586565551289397e-07, + "loss": 0.6034, + "step": 30455 + }, + { + "epoch": 0.9334314086061052, + "grad_norm": 0.8243634847006549, + "learning_rate": 1.1575945217299045e-07, + "loss": 0.3933, + "step": 30456 + }, + { + "epoch": 0.9334620571288463, + "grad_norm": 2.153475520132104, + "learning_rate": 1.156532969587576e-07, + "loss": 0.6143, + "step": 30457 + }, + { + "epoch": 0.9334927056515876, + "grad_norm": 1.9329113255993065, + "learning_rate": 1.1554718987124292e-07, + "loss": 0.6142, + "step": 30458 + }, + { + "epoch": 0.9335233541743287, + "grad_norm": 0.7916393853716633, + "learning_rate": 1.1544113091149222e-07, + "loss": 0.3925, + "step": 30459 + }, + { + "epoch": 0.93355400269707, + "grad_norm": 1.897302417989542, + "learning_rate": 1.1533512008054915e-07, + "loss": 0.5994, + "step": 30460 + }, + { + "epoch": 0.9335846512198112, + "grad_norm": 0.7645305503514448, + "learning_rate": 1.1522915737945895e-07, + "loss": 0.3986, + "step": 30461 + }, + { + "epoch": 0.9336152997425524, + "grad_norm": 1.9491138286116685, + "learning_rate": 1.1512324280926635e-07, + "loss": 0.5474, + "step": 30462 + }, + { + "epoch": 0.9336459482652936, + "grad_norm": 1.7884979548406539, + "learning_rate": 1.1501737637101329e-07, + "loss": 0.5827, + "step": 30463 + }, + { + "epoch": 0.9336765967880348, + "grad_norm": 1.9881905307794887, + "learning_rate": 1.149115580657445e-07, + "loss": 0.5793, + "step": 30464 + }, + { + "epoch": 0.933707245310776, + "grad_norm": 0.8676383701852162, + "learning_rate": 1.1480578789450192e-07, + "loss": 0.401, + "step": 30465 + }, + { + "epoch": 0.9337378938335172, + "grad_norm": 1.8568185247125235, + "learning_rate": 1.147000658583275e-07, + "loss": 0.5606, + "step": 30466 + }, + { + "epoch": 0.9337685423562584, + "grad_norm": 1.9846198650889928, + "learning_rate": 1.1459439195826427e-07, + "loss": 0.6106, + "step": 30467 + }, + { + "epoch": 0.9337991908789997, + "grad_norm": 1.6862404840320644, + "learning_rate": 1.1448876619535143e-07, + "loss": 0.5388, + "step": 30468 + }, + { + "epoch": 0.9338298394017408, + "grad_norm": 1.9229047479687615, + "learning_rate": 1.1438318857063091e-07, + "loss": 0.5683, + "step": 30469 + }, + { + "epoch": 0.9338604879244821, + "grad_norm": 1.8728124028945186, + "learning_rate": 1.1427765908514355e-07, + "loss": 0.5776, + "step": 30470 + }, + { + "epoch": 0.9338911364472232, + "grad_norm": 1.6736387879023615, + "learning_rate": 1.1417217773992795e-07, + "loss": 0.6039, + "step": 30471 + }, + { + "epoch": 0.9339217849699645, + "grad_norm": 2.1401789124659936, + "learning_rate": 1.1406674453602385e-07, + "loss": 0.672, + "step": 30472 + }, + { + "epoch": 0.9339524334927056, + "grad_norm": 0.8171638097031042, + "learning_rate": 1.1396135947447096e-07, + "loss": 0.4044, + "step": 30473 + }, + { + "epoch": 0.9339830820154469, + "grad_norm": 1.7261476031356362, + "learning_rate": 1.1385602255630624e-07, + "loss": 0.5453, + "step": 30474 + }, + { + "epoch": 0.934013730538188, + "grad_norm": 1.7088493302962906, + "learning_rate": 1.137507337825683e-07, + "loss": 0.5717, + "step": 30475 + }, + { + "epoch": 0.9340443790609293, + "grad_norm": 1.9214173541115007, + "learning_rate": 1.1364549315429519e-07, + "loss": 0.5705, + "step": 30476 + }, + { + "epoch": 0.9340750275836704, + "grad_norm": 1.7442241966149028, + "learning_rate": 1.1354030067252275e-07, + "loss": 0.6075, + "step": 30477 + }, + { + "epoch": 0.9341056761064117, + "grad_norm": 1.9948620191223134, + "learning_rate": 1.134351563382885e-07, + "loss": 0.5253, + "step": 30478 + }, + { + "epoch": 0.9341363246291529, + "grad_norm": 1.928566099909711, + "learning_rate": 1.1333006015262659e-07, + "loss": 0.5842, + "step": 30479 + }, + { + "epoch": 0.9341669731518941, + "grad_norm": 1.715259781501888, + "learning_rate": 1.1322501211657566e-07, + "loss": 0.5058, + "step": 30480 + }, + { + "epoch": 0.9341976216746353, + "grad_norm": 1.7313839365774617, + "learning_rate": 1.131200122311682e-07, + "loss": 0.4581, + "step": 30481 + }, + { + "epoch": 0.9342282701973765, + "grad_norm": 1.7840706912502502, + "learning_rate": 1.1301506049743949e-07, + "loss": 0.6102, + "step": 30482 + }, + { + "epoch": 0.9342589187201177, + "grad_norm": 0.8239430824499689, + "learning_rate": 1.129101569164237e-07, + "loss": 0.4044, + "step": 30483 + }, + { + "epoch": 0.9342895672428589, + "grad_norm": 1.6702638291395286, + "learning_rate": 1.1280530148915503e-07, + "loss": 0.5709, + "step": 30484 + }, + { + "epoch": 0.9343202157656001, + "grad_norm": 1.916496144719198, + "learning_rate": 1.1270049421666596e-07, + "loss": 0.5829, + "step": 30485 + }, + { + "epoch": 0.9343508642883414, + "grad_norm": 1.809920303387526, + "learning_rate": 1.12595735099989e-07, + "loss": 0.5721, + "step": 30486 + }, + { + "epoch": 0.9343815128110825, + "grad_norm": 2.4332434369866163, + "learning_rate": 1.1249102414015722e-07, + "loss": 0.6565, + "step": 30487 + }, + { + "epoch": 0.9344121613338237, + "grad_norm": 0.7815360060665272, + "learning_rate": 1.1238636133820202e-07, + "loss": 0.3878, + "step": 30488 + }, + { + "epoch": 0.9344428098565649, + "grad_norm": 2.120890495820727, + "learning_rate": 1.122817466951548e-07, + "loss": 0.5308, + "step": 30489 + }, + { + "epoch": 0.9344734583793061, + "grad_norm": 1.8621088315699386, + "learning_rate": 1.1217718021204471e-07, + "loss": 0.6553, + "step": 30490 + }, + { + "epoch": 0.9345041069020473, + "grad_norm": 0.7676511578914174, + "learning_rate": 1.1207266188990485e-07, + "loss": 0.395, + "step": 30491 + }, + { + "epoch": 0.9345347554247885, + "grad_norm": 1.7080666339349193, + "learning_rate": 1.1196819172976325e-07, + "loss": 0.5426, + "step": 30492 + }, + { + "epoch": 0.9345654039475297, + "grad_norm": 1.7691167426034413, + "learning_rate": 1.1186376973264967e-07, + "loss": 0.5768, + "step": 30493 + }, + { + "epoch": 0.9345960524702709, + "grad_norm": 1.9506004855009214, + "learning_rate": 1.1175939589959328e-07, + "loss": 0.5516, + "step": 30494 + }, + { + "epoch": 0.9346267009930121, + "grad_norm": 1.91025860843002, + "learning_rate": 1.1165507023162214e-07, + "loss": 0.5858, + "step": 30495 + }, + { + "epoch": 0.9346573495157533, + "grad_norm": 1.861166779865468, + "learning_rate": 1.1155079272976432e-07, + "loss": 0.5411, + "step": 30496 + }, + { + "epoch": 0.9346879980384946, + "grad_norm": 0.7907238022546068, + "learning_rate": 1.1144656339504678e-07, + "loss": 0.4144, + "step": 30497 + }, + { + "epoch": 0.9347186465612357, + "grad_norm": 1.7278415521686903, + "learning_rate": 1.1134238222849758e-07, + "loss": 0.4977, + "step": 30498 + }, + { + "epoch": 0.934749295083977, + "grad_norm": 1.7841302751115702, + "learning_rate": 1.1123824923114257e-07, + "loss": 0.4826, + "step": 30499 + }, + { + "epoch": 0.9347799436067181, + "grad_norm": 2.097282628894226, + "learning_rate": 1.1113416440400815e-07, + "loss": 0.5794, + "step": 30500 + }, + { + "epoch": 0.9348105921294594, + "grad_norm": 2.025998696395405, + "learning_rate": 1.1103012774811961e-07, + "loss": 0.5831, + "step": 30501 + }, + { + "epoch": 0.9348412406522005, + "grad_norm": 1.8085276569632203, + "learning_rate": 1.1092613926450169e-07, + "loss": 0.5072, + "step": 30502 + }, + { + "epoch": 0.9348718891749418, + "grad_norm": 2.002773128538355, + "learning_rate": 1.1082219895418022e-07, + "loss": 0.6127, + "step": 30503 + }, + { + "epoch": 0.934902537697683, + "grad_norm": 1.927231298687507, + "learning_rate": 1.1071830681817775e-07, + "loss": 0.5202, + "step": 30504 + }, + { + "epoch": 0.9349331862204242, + "grad_norm": 0.8023890467203006, + "learning_rate": 1.1061446285751897e-07, + "loss": 0.3996, + "step": 30505 + }, + { + "epoch": 0.9349638347431654, + "grad_norm": 0.8251473014551244, + "learning_rate": 1.1051066707322699e-07, + "loss": 0.4043, + "step": 30506 + }, + { + "epoch": 0.9349944832659066, + "grad_norm": 1.9665079770509555, + "learning_rate": 1.1040691946632487e-07, + "loss": 0.5933, + "step": 30507 + }, + { + "epoch": 0.9350251317886478, + "grad_norm": 2.0096097608708487, + "learning_rate": 1.1030322003783456e-07, + "loss": 0.564, + "step": 30508 + }, + { + "epoch": 0.935055780311389, + "grad_norm": 0.8023744946280961, + "learning_rate": 1.1019956878877635e-07, + "loss": 0.4, + "step": 30509 + }, + { + "epoch": 0.9350864288341302, + "grad_norm": 0.8299355783780531, + "learning_rate": 1.1009596572017445e-07, + "loss": 0.4128, + "step": 30510 + }, + { + "epoch": 0.9351170773568714, + "grad_norm": 1.774452247474805, + "learning_rate": 1.0999241083304746e-07, + "loss": 0.5695, + "step": 30511 + }, + { + "epoch": 0.9351477258796126, + "grad_norm": 1.9062286684503253, + "learning_rate": 1.0988890412841624e-07, + "loss": 0.5669, + "step": 30512 + }, + { + "epoch": 0.9351783744023539, + "grad_norm": 1.8924636200867986, + "learning_rate": 1.0978544560730109e-07, + "loss": 0.51, + "step": 30513 + }, + { + "epoch": 0.935209022925095, + "grad_norm": 1.984456538639923, + "learning_rate": 1.0968203527072119e-07, + "loss": 0.6329, + "step": 30514 + }, + { + "epoch": 0.9352396714478363, + "grad_norm": 1.7754809171708947, + "learning_rate": 1.0957867311969516e-07, + "loss": 0.6169, + "step": 30515 + }, + { + "epoch": 0.9352703199705774, + "grad_norm": 1.9092263510348326, + "learning_rate": 1.0947535915524166e-07, + "loss": 0.5288, + "step": 30516 + }, + { + "epoch": 0.9353009684933187, + "grad_norm": 2.0655840064364024, + "learning_rate": 1.0937209337837872e-07, + "loss": 0.6135, + "step": 30517 + }, + { + "epoch": 0.9353316170160598, + "grad_norm": 2.00877940844232, + "learning_rate": 1.0926887579012446e-07, + "loss": 0.5689, + "step": 30518 + }, + { + "epoch": 0.935362265538801, + "grad_norm": 1.8422150395283556, + "learning_rate": 1.0916570639149526e-07, + "loss": 0.6019, + "step": 30519 + }, + { + "epoch": 0.9353929140615422, + "grad_norm": 0.8188807102770522, + "learning_rate": 1.0906258518350698e-07, + "loss": 0.3989, + "step": 30520 + }, + { + "epoch": 0.9354235625842834, + "grad_norm": 1.7910059573694739, + "learning_rate": 1.0895951216717715e-07, + "loss": 0.5024, + "step": 30521 + }, + { + "epoch": 0.9354542111070246, + "grad_norm": 1.907596709551668, + "learning_rate": 1.0885648734352105e-07, + "loss": 0.5583, + "step": 30522 + }, + { + "epoch": 0.9354848596297658, + "grad_norm": 1.963945820477966, + "learning_rate": 1.0875351071355234e-07, + "loss": 0.599, + "step": 30523 + }, + { + "epoch": 0.9355155081525071, + "grad_norm": 1.8660593191991914, + "learning_rate": 1.086505822782874e-07, + "loss": 0.5233, + "step": 30524 + }, + { + "epoch": 0.9355461566752482, + "grad_norm": 1.8660103987344807, + "learning_rate": 1.0854770203873932e-07, + "loss": 0.5896, + "step": 30525 + }, + { + "epoch": 0.9355768051979895, + "grad_norm": 2.0702313816568196, + "learning_rate": 1.0844486999592285e-07, + "loss": 0.657, + "step": 30526 + }, + { + "epoch": 0.9356074537207306, + "grad_norm": 1.882848725314382, + "learning_rate": 1.0834208615084996e-07, + "loss": 0.5718, + "step": 30527 + }, + { + "epoch": 0.9356381022434719, + "grad_norm": 1.7867950177397067, + "learning_rate": 1.0823935050453427e-07, + "loss": 0.5202, + "step": 30528 + }, + { + "epoch": 0.935668750766213, + "grad_norm": 1.8713470004483628, + "learning_rate": 1.081366630579883e-07, + "loss": 0.6394, + "step": 30529 + }, + { + "epoch": 0.9356993992889543, + "grad_norm": 2.1037861939754925, + "learning_rate": 1.080340238122235e-07, + "loss": 0.6959, + "step": 30530 + }, + { + "epoch": 0.9357300478116954, + "grad_norm": 2.1485413159908413, + "learning_rate": 1.0793143276825068e-07, + "loss": 0.5675, + "step": 30531 + }, + { + "epoch": 0.9357606963344367, + "grad_norm": 1.7140117242183706, + "learning_rate": 1.0782888992708129e-07, + "loss": 0.5958, + "step": 30532 + }, + { + "epoch": 0.9357913448571779, + "grad_norm": 0.753430231137135, + "learning_rate": 1.0772639528972562e-07, + "loss": 0.3969, + "step": 30533 + }, + { + "epoch": 0.9358219933799191, + "grad_norm": 0.7750324798177829, + "learning_rate": 1.076239488571934e-07, + "loss": 0.3839, + "step": 30534 + }, + { + "epoch": 0.9358526419026603, + "grad_norm": 2.076242701760184, + "learning_rate": 1.0752155063049441e-07, + "loss": 0.5561, + "step": 30535 + }, + { + "epoch": 0.9358832904254015, + "grad_norm": 1.6076214176442176, + "learning_rate": 1.0741920061063671e-07, + "loss": 0.5194, + "step": 30536 + }, + { + "epoch": 0.9359139389481427, + "grad_norm": 2.3155563945113844, + "learning_rate": 1.0731689879863061e-07, + "loss": 0.5242, + "step": 30537 + }, + { + "epoch": 0.9359445874708839, + "grad_norm": 0.7895870526622443, + "learning_rate": 1.0721464519548198e-07, + "loss": 0.38, + "step": 30538 + }, + { + "epoch": 0.9359752359936251, + "grad_norm": 1.7180601332337302, + "learning_rate": 1.0711243980219943e-07, + "loss": 0.539, + "step": 30539 + }, + { + "epoch": 0.9360058845163663, + "grad_norm": 1.92840083603284, + "learning_rate": 1.0701028261979052e-07, + "loss": 0.6091, + "step": 30540 + }, + { + "epoch": 0.9360365330391075, + "grad_norm": 2.1236150581699134, + "learning_rate": 1.0690817364926109e-07, + "loss": 0.6128, + "step": 30541 + }, + { + "epoch": 0.9360671815618488, + "grad_norm": 2.0510172862341616, + "learning_rate": 1.0680611289161647e-07, + "loss": 0.5381, + "step": 30542 + }, + { + "epoch": 0.9360978300845899, + "grad_norm": 1.924865841829892, + "learning_rate": 1.0670410034786416e-07, + "loss": 0.5834, + "step": 30543 + }, + { + "epoch": 0.9361284786073312, + "grad_norm": 2.1039866723439102, + "learning_rate": 1.0660213601900782e-07, + "loss": 0.5998, + "step": 30544 + }, + { + "epoch": 0.9361591271300723, + "grad_norm": 1.8768932353473011, + "learning_rate": 1.065002199060533e-07, + "loss": 0.5902, + "step": 30545 + }, + { + "epoch": 0.9361897756528136, + "grad_norm": 1.7811917447115695, + "learning_rate": 1.0639835201000315e-07, + "loss": 0.5669, + "step": 30546 + }, + { + "epoch": 0.9362204241755547, + "grad_norm": 1.824428575976078, + "learning_rate": 1.0629653233186265e-07, + "loss": 0.55, + "step": 30547 + }, + { + "epoch": 0.936251072698296, + "grad_norm": 1.9584300484798602, + "learning_rate": 1.0619476087263491e-07, + "loss": 0.5487, + "step": 30548 + }, + { + "epoch": 0.9362817212210371, + "grad_norm": 1.7680411679240695, + "learning_rate": 1.060930376333219e-07, + "loss": 0.545, + "step": 30549 + }, + { + "epoch": 0.9363123697437783, + "grad_norm": 1.8667340051082892, + "learning_rate": 1.0599136261492615e-07, + "loss": 0.5978, + "step": 30550 + }, + { + "epoch": 0.9363430182665196, + "grad_norm": 2.107423193270652, + "learning_rate": 1.0588973581845019e-07, + "loss": 0.5274, + "step": 30551 + }, + { + "epoch": 0.9363736667892607, + "grad_norm": 0.7939237417298767, + "learning_rate": 1.0578815724489489e-07, + "loss": 0.3857, + "step": 30552 + }, + { + "epoch": 0.936404315312002, + "grad_norm": 1.910849249875178, + "learning_rate": 1.056866268952611e-07, + "loss": 0.6158, + "step": 30553 + }, + { + "epoch": 0.9364349638347431, + "grad_norm": 2.0740262969994303, + "learning_rate": 1.055851447705486e-07, + "loss": 0.555, + "step": 30554 + }, + { + "epoch": 0.9364656123574844, + "grad_norm": 1.92704754348519, + "learning_rate": 1.0548371087175824e-07, + "loss": 0.6005, + "step": 30555 + }, + { + "epoch": 0.9364962608802255, + "grad_norm": 1.6686538002819695, + "learning_rate": 1.0538232519988978e-07, + "loss": 0.5173, + "step": 30556 + }, + { + "epoch": 0.9365269094029668, + "grad_norm": 1.8930405877899517, + "learning_rate": 1.0528098775594131e-07, + "loss": 0.5355, + "step": 30557 + }, + { + "epoch": 0.9365575579257079, + "grad_norm": 1.883179713467252, + "learning_rate": 1.0517969854091092e-07, + "loss": 0.5867, + "step": 30558 + }, + { + "epoch": 0.9365882064484492, + "grad_norm": 2.0286365049315083, + "learning_rate": 1.0507845755579838e-07, + "loss": 0.6139, + "step": 30559 + }, + { + "epoch": 0.9366188549711904, + "grad_norm": 1.852637981635783, + "learning_rate": 1.0497726480159954e-07, + "loss": 0.5256, + "step": 30560 + }, + { + "epoch": 0.9366495034939316, + "grad_norm": 0.7902089144076336, + "learning_rate": 1.0487612027931249e-07, + "loss": 0.379, + "step": 30561 + }, + { + "epoch": 0.9366801520166728, + "grad_norm": 2.0194029812072776, + "learning_rate": 1.0477502398993367e-07, + "loss": 0.5839, + "step": 30562 + }, + { + "epoch": 0.936710800539414, + "grad_norm": 1.82117124807287, + "learning_rate": 1.0467397593445838e-07, + "loss": 0.5269, + "step": 30563 + }, + { + "epoch": 0.9367414490621552, + "grad_norm": 0.8123426450307518, + "learning_rate": 1.0457297611388362e-07, + "loss": 0.3793, + "step": 30564 + }, + { + "epoch": 0.9367720975848964, + "grad_norm": 1.8413345232698133, + "learning_rate": 1.0447202452920357e-07, + "loss": 0.5486, + "step": 30565 + }, + { + "epoch": 0.9368027461076376, + "grad_norm": 2.3241240481028376, + "learning_rate": 1.0437112118141302e-07, + "loss": 0.4973, + "step": 30566 + }, + { + "epoch": 0.9368333946303788, + "grad_norm": 1.938470283323702, + "learning_rate": 1.042702660715067e-07, + "loss": 0.5185, + "step": 30567 + }, + { + "epoch": 0.93686404315312, + "grad_norm": 1.7818044847556291, + "learning_rate": 1.0416945920047771e-07, + "loss": 0.499, + "step": 30568 + }, + { + "epoch": 0.9368946916758613, + "grad_norm": 1.7873118153343954, + "learning_rate": 1.0406870056932028e-07, + "loss": 0.5651, + "step": 30569 + }, + { + "epoch": 0.9369253401986024, + "grad_norm": 1.9366720575270175, + "learning_rate": 1.0396799017902637e-07, + "loss": 0.566, + "step": 30570 + }, + { + "epoch": 0.9369559887213437, + "grad_norm": 0.8709506809062358, + "learning_rate": 1.0386732803058852e-07, + "loss": 0.4089, + "step": 30571 + }, + { + "epoch": 0.9369866372440848, + "grad_norm": 1.9316292554607568, + "learning_rate": 1.0376671412499817e-07, + "loss": 0.6397, + "step": 30572 + }, + { + "epoch": 0.9370172857668261, + "grad_norm": 1.9080995127963492, + "learning_rate": 1.0366614846324786e-07, + "loss": 0.5914, + "step": 30573 + }, + { + "epoch": 0.9370479342895672, + "grad_norm": 0.8420920203689021, + "learning_rate": 1.0356563104632733e-07, + "loss": 0.4189, + "step": 30574 + }, + { + "epoch": 0.9370785828123085, + "grad_norm": 0.7820772460735179, + "learning_rate": 1.0346516187522804e-07, + "loss": 0.3883, + "step": 30575 + }, + { + "epoch": 0.9371092313350496, + "grad_norm": 1.6676047682018855, + "learning_rate": 1.0336474095093862e-07, + "loss": 0.5523, + "step": 30576 + }, + { + "epoch": 0.9371398798577909, + "grad_norm": 1.9528756764549389, + "learning_rate": 1.0326436827444941e-07, + "loss": 0.5844, + "step": 30577 + }, + { + "epoch": 0.937170528380532, + "grad_norm": 2.0257658389041087, + "learning_rate": 1.0316404384674961e-07, + "loss": 0.5328, + "step": 30578 + }, + { + "epoch": 0.9372011769032733, + "grad_norm": 1.7791797371947633, + "learning_rate": 1.030637676688273e-07, + "loss": 0.5879, + "step": 30579 + }, + { + "epoch": 0.9372318254260145, + "grad_norm": 1.9489620897029263, + "learning_rate": 1.0296353974167006e-07, + "loss": 0.587, + "step": 30580 + }, + { + "epoch": 0.9372624739487556, + "grad_norm": 0.8128570483215016, + "learning_rate": 1.0286336006626707e-07, + "loss": 0.4004, + "step": 30581 + }, + { + "epoch": 0.9372931224714969, + "grad_norm": 0.7938253594664986, + "learning_rate": 1.0276322864360366e-07, + "loss": 0.3968, + "step": 30582 + }, + { + "epoch": 0.937323770994238, + "grad_norm": 1.9570600472206803, + "learning_rate": 1.0266314547466683e-07, + "loss": 0.5908, + "step": 30583 + }, + { + "epoch": 0.9373544195169793, + "grad_norm": 2.297376168267138, + "learning_rate": 1.025631105604441e-07, + "loss": 0.4538, + "step": 30584 + }, + { + "epoch": 0.9373850680397204, + "grad_norm": 2.0220414188529094, + "learning_rate": 1.0246312390191915e-07, + "loss": 0.5282, + "step": 30585 + }, + { + "epoch": 0.9374157165624617, + "grad_norm": 1.9536291672297075, + "learning_rate": 1.0236318550007896e-07, + "loss": 0.5461, + "step": 30586 + }, + { + "epoch": 0.9374463650852028, + "grad_norm": 1.457569321845854, + "learning_rate": 1.0226329535590607e-07, + "loss": 0.3629, + "step": 30587 + }, + { + "epoch": 0.9374770136079441, + "grad_norm": 1.7800263444841402, + "learning_rate": 1.0216345347038748e-07, + "loss": 0.5572, + "step": 30588 + }, + { + "epoch": 0.9375076621306853, + "grad_norm": 1.9180964105197251, + "learning_rate": 1.0206365984450516e-07, + "loss": 0.6069, + "step": 30589 + }, + { + "epoch": 0.9375383106534265, + "grad_norm": 1.891807212152802, + "learning_rate": 1.0196391447924282e-07, + "loss": 0.5929, + "step": 30590 + }, + { + "epoch": 0.9375689591761677, + "grad_norm": 2.105648100434335, + "learning_rate": 1.0186421737558294e-07, + "loss": 0.771, + "step": 30591 + }, + { + "epoch": 0.9375996076989089, + "grad_norm": 0.7760342769839029, + "learning_rate": 1.0176456853450812e-07, + "loss": 0.4041, + "step": 30592 + }, + { + "epoch": 0.9376302562216501, + "grad_norm": 0.7936099481259686, + "learning_rate": 1.0166496795700031e-07, + "loss": 0.4263, + "step": 30593 + }, + { + "epoch": 0.9376609047443913, + "grad_norm": 1.8757882685638074, + "learning_rate": 1.0156541564404099e-07, + "loss": 0.5381, + "step": 30594 + }, + { + "epoch": 0.9376915532671325, + "grad_norm": 2.0674174296901953, + "learning_rate": 1.0146591159661157e-07, + "loss": 0.7136, + "step": 30595 + }, + { + "epoch": 0.9377222017898738, + "grad_norm": 2.0295210815290727, + "learning_rate": 1.0136645581569071e-07, + "loss": 0.613, + "step": 30596 + }, + { + "epoch": 0.9377528503126149, + "grad_norm": 1.6498582928300598, + "learning_rate": 1.0126704830226042e-07, + "loss": 0.5229, + "step": 30597 + }, + { + "epoch": 0.9377834988353562, + "grad_norm": 1.785297855364811, + "learning_rate": 1.011676890572988e-07, + "loss": 0.5758, + "step": 30598 + }, + { + "epoch": 0.9378141473580973, + "grad_norm": 0.8024078027532198, + "learning_rate": 1.0106837808178561e-07, + "loss": 0.4024, + "step": 30599 + }, + { + "epoch": 0.9378447958808386, + "grad_norm": 1.778972934328205, + "learning_rate": 1.0096911537669951e-07, + "loss": 0.5006, + "step": 30600 + }, + { + "epoch": 0.9378754444035797, + "grad_norm": 1.8922069416885303, + "learning_rate": 1.0086990094301751e-07, + "loss": 0.5269, + "step": 30601 + }, + { + "epoch": 0.937906092926321, + "grad_norm": 2.209310963060305, + "learning_rate": 1.0077073478171773e-07, + "loss": 0.6126, + "step": 30602 + }, + { + "epoch": 0.9379367414490621, + "grad_norm": 1.7852310291674895, + "learning_rate": 1.0067161689377825e-07, + "loss": 0.5475, + "step": 30603 + }, + { + "epoch": 0.9379673899718034, + "grad_norm": 0.8981225539427515, + "learning_rate": 1.005725472801744e-07, + "loss": 0.3996, + "step": 30604 + }, + { + "epoch": 0.9379980384945446, + "grad_norm": 1.7546301694036175, + "learning_rate": 1.004735259418832e-07, + "loss": 0.5225, + "step": 30605 + }, + { + "epoch": 0.9380286870172858, + "grad_norm": 1.8810153207980207, + "learning_rate": 1.0037455287987885e-07, + "loss": 0.6153, + "step": 30606 + }, + { + "epoch": 0.938059335540027, + "grad_norm": 1.7474668619576783, + "learning_rate": 1.0027562809513836e-07, + "loss": 0.5477, + "step": 30607 + }, + { + "epoch": 0.9380899840627682, + "grad_norm": 1.8913743638796416, + "learning_rate": 1.0017675158863649e-07, + "loss": 0.5475, + "step": 30608 + }, + { + "epoch": 0.9381206325855094, + "grad_norm": 1.871672040564825, + "learning_rate": 1.0007792336134581e-07, + "loss": 0.5725, + "step": 30609 + }, + { + "epoch": 0.9381512811082506, + "grad_norm": 2.153683475404787, + "learning_rate": 9.997914341424109e-08, + "loss": 0.632, + "step": 30610 + }, + { + "epoch": 0.9381819296309918, + "grad_norm": 1.76329568769421, + "learning_rate": 9.9880411748296e-08, + "loss": 0.5396, + "step": 30611 + }, + { + "epoch": 0.938212578153733, + "grad_norm": 1.8379108559944595, + "learning_rate": 9.978172836448252e-08, + "loss": 0.5728, + "step": 30612 + }, + { + "epoch": 0.9382432266764742, + "grad_norm": 1.782825314482853, + "learning_rate": 9.968309326377379e-08, + "loss": 0.6024, + "step": 30613 + }, + { + "epoch": 0.9382738751992153, + "grad_norm": 1.9290808563525779, + "learning_rate": 9.958450644714068e-08, + "loss": 0.6035, + "step": 30614 + }, + { + "epoch": 0.9383045237219566, + "grad_norm": 1.7296323434404823, + "learning_rate": 9.948596791555632e-08, + "loss": 0.5193, + "step": 30615 + }, + { + "epoch": 0.9383351722446978, + "grad_norm": 1.8377432370336395, + "learning_rate": 9.938747766999046e-08, + "loss": 0.6803, + "step": 30616 + }, + { + "epoch": 0.938365820767439, + "grad_norm": 1.884912500851155, + "learning_rate": 9.928903571141235e-08, + "loss": 0.4512, + "step": 30617 + }, + { + "epoch": 0.9383964692901802, + "grad_norm": 1.809667842412141, + "learning_rate": 9.919064204079454e-08, + "loss": 0.5157, + "step": 30618 + }, + { + "epoch": 0.9384271178129214, + "grad_norm": 1.9444375960759686, + "learning_rate": 9.909229665910514e-08, + "loss": 0.5213, + "step": 30619 + }, + { + "epoch": 0.9384577663356626, + "grad_norm": 1.7810499348080462, + "learning_rate": 9.899399956731281e-08, + "loss": 0.5274, + "step": 30620 + }, + { + "epoch": 0.9384884148584038, + "grad_norm": 1.844418978567794, + "learning_rate": 9.88957507663868e-08, + "loss": 0.5862, + "step": 30621 + }, + { + "epoch": 0.938519063381145, + "grad_norm": 2.127548441149026, + "learning_rate": 9.879755025729521e-08, + "loss": 0.6728, + "step": 30622 + }, + { + "epoch": 0.9385497119038863, + "grad_norm": 1.724528708183913, + "learning_rate": 9.869939804100504e-08, + "loss": 0.5082, + "step": 30623 + }, + { + "epoch": 0.9385803604266274, + "grad_norm": 1.7469318507792855, + "learning_rate": 9.860129411848385e-08, + "loss": 0.4667, + "step": 30624 + }, + { + "epoch": 0.9386110089493687, + "grad_norm": 1.8346566483372817, + "learning_rate": 9.850323849069809e-08, + "loss": 0.6063, + "step": 30625 + }, + { + "epoch": 0.9386416574721098, + "grad_norm": 1.9054391159917858, + "learning_rate": 9.840523115861423e-08, + "loss": 0.5287, + "step": 30626 + }, + { + "epoch": 0.9386723059948511, + "grad_norm": 1.9840570792051686, + "learning_rate": 9.830727212319813e-08, + "loss": 0.5472, + "step": 30627 + }, + { + "epoch": 0.9387029545175922, + "grad_norm": 1.758124785369086, + "learning_rate": 9.820936138541348e-08, + "loss": 0.5118, + "step": 30628 + }, + { + "epoch": 0.9387336030403335, + "grad_norm": 1.8603354087117743, + "learning_rate": 9.811149894622729e-08, + "loss": 0.6456, + "step": 30629 + }, + { + "epoch": 0.9387642515630746, + "grad_norm": 1.7457450128406384, + "learning_rate": 9.801368480660267e-08, + "loss": 0.5606, + "step": 30630 + }, + { + "epoch": 0.9387949000858159, + "grad_norm": 0.7858236239448939, + "learning_rate": 9.791591896750275e-08, + "loss": 0.3988, + "step": 30631 + }, + { + "epoch": 0.938825548608557, + "grad_norm": 0.7848838393183531, + "learning_rate": 9.781820142989173e-08, + "loss": 0.3971, + "step": 30632 + }, + { + "epoch": 0.9388561971312983, + "grad_norm": 0.7869370431418792, + "learning_rate": 9.772053219473221e-08, + "loss": 0.3964, + "step": 30633 + }, + { + "epoch": 0.9388868456540395, + "grad_norm": 2.612061549856875, + "learning_rate": 9.762291126298673e-08, + "loss": 0.5824, + "step": 30634 + }, + { + "epoch": 0.9389174941767807, + "grad_norm": 1.8374649440458475, + "learning_rate": 9.752533863561676e-08, + "loss": 0.5043, + "step": 30635 + }, + { + "epoch": 0.9389481426995219, + "grad_norm": 1.7436376289261113, + "learning_rate": 9.742781431358428e-08, + "loss": 0.578, + "step": 30636 + }, + { + "epoch": 0.9389787912222631, + "grad_norm": 1.9756068257240436, + "learning_rate": 9.733033829785021e-08, + "loss": 0.5218, + "step": 30637 + }, + { + "epoch": 0.9390094397450043, + "grad_norm": 1.8066122405809593, + "learning_rate": 9.723291058937434e-08, + "loss": 0.4795, + "step": 30638 + }, + { + "epoch": 0.9390400882677455, + "grad_norm": 0.8029551891570746, + "learning_rate": 9.713553118911645e-08, + "loss": 0.3987, + "step": 30639 + }, + { + "epoch": 0.9390707367904867, + "grad_norm": 2.3495048598152457, + "learning_rate": 9.7038200098038e-08, + "loss": 0.5532, + "step": 30640 + }, + { + "epoch": 0.939101385313228, + "grad_norm": 1.7422064019539156, + "learning_rate": 9.694091731709542e-08, + "loss": 0.5799, + "step": 30641 + }, + { + "epoch": 0.9391320338359691, + "grad_norm": 2.2600708979916924, + "learning_rate": 9.68436828472491e-08, + "loss": 0.6347, + "step": 30642 + }, + { + "epoch": 0.9391626823587104, + "grad_norm": 1.9857760222087837, + "learning_rate": 9.674649668945657e-08, + "loss": 0.5319, + "step": 30643 + }, + { + "epoch": 0.9391933308814515, + "grad_norm": 1.9026790273417622, + "learning_rate": 9.664935884467486e-08, + "loss": 0.5417, + "step": 30644 + }, + { + "epoch": 0.9392239794041927, + "grad_norm": 1.9439204212563153, + "learning_rate": 9.655226931386208e-08, + "loss": 0.5578, + "step": 30645 + }, + { + "epoch": 0.9392546279269339, + "grad_norm": 1.8052891440712682, + "learning_rate": 9.645522809797414e-08, + "loss": 0.602, + "step": 30646 + }, + { + "epoch": 0.9392852764496751, + "grad_norm": 1.8929643474098166, + "learning_rate": 9.63582351979675e-08, + "loss": 0.5936, + "step": 30647 + }, + { + "epoch": 0.9393159249724163, + "grad_norm": 1.9656496533756054, + "learning_rate": 9.626129061479806e-08, + "loss": 0.5774, + "step": 30648 + }, + { + "epoch": 0.9393465734951575, + "grad_norm": 1.9443302557770474, + "learning_rate": 9.61643943494206e-08, + "loss": 0.6075, + "step": 30649 + }, + { + "epoch": 0.9393772220178987, + "grad_norm": 1.9090535557818418, + "learning_rate": 9.606754640278993e-08, + "loss": 0.4916, + "step": 30650 + }, + { + "epoch": 0.9394078705406399, + "grad_norm": 1.958270846139174, + "learning_rate": 9.597074677586027e-08, + "loss": 0.5497, + "step": 30651 + }, + { + "epoch": 0.9394385190633812, + "grad_norm": 2.1703502904270495, + "learning_rate": 9.587399546958586e-08, + "loss": 0.6079, + "step": 30652 + }, + { + "epoch": 0.9394691675861223, + "grad_norm": 1.9620098932631247, + "learning_rate": 9.577729248491985e-08, + "loss": 0.5613, + "step": 30653 + }, + { + "epoch": 0.9394998161088636, + "grad_norm": 0.8276033244589128, + "learning_rate": 9.568063782281478e-08, + "loss": 0.4121, + "step": 30654 + }, + { + "epoch": 0.9395304646316047, + "grad_norm": 2.071190513676695, + "learning_rate": 9.558403148422324e-08, + "loss": 0.562, + "step": 30655 + }, + { + "epoch": 0.939561113154346, + "grad_norm": 1.9821065800653286, + "learning_rate": 9.548747347009779e-08, + "loss": 0.6546, + "step": 30656 + }, + { + "epoch": 0.9395917616770871, + "grad_norm": 2.0452838174816512, + "learning_rate": 9.539096378138879e-08, + "loss": 0.5971, + "step": 30657 + }, + { + "epoch": 0.9396224101998284, + "grad_norm": 1.9540579317973081, + "learning_rate": 9.529450241904659e-08, + "loss": 0.5966, + "step": 30658 + }, + { + "epoch": 0.9396530587225695, + "grad_norm": 0.8336380610679281, + "learning_rate": 9.51980893840232e-08, + "loss": 0.3925, + "step": 30659 + }, + { + "epoch": 0.9396837072453108, + "grad_norm": 2.20416912922291, + "learning_rate": 9.510172467726786e-08, + "loss": 0.5406, + "step": 30660 + }, + { + "epoch": 0.939714355768052, + "grad_norm": 2.0947012584986147, + "learning_rate": 9.500540829973093e-08, + "loss": 0.5242, + "step": 30661 + }, + { + "epoch": 0.9397450042907932, + "grad_norm": 2.06595474844286, + "learning_rate": 9.490914025235998e-08, + "loss": 0.6449, + "step": 30662 + }, + { + "epoch": 0.9397756528135344, + "grad_norm": 0.8427488086514479, + "learning_rate": 9.481292053610424e-08, + "loss": 0.407, + "step": 30663 + }, + { + "epoch": 0.9398063013362756, + "grad_norm": 0.7670647124941585, + "learning_rate": 9.471674915191242e-08, + "loss": 0.3879, + "step": 30664 + }, + { + "epoch": 0.9398369498590168, + "grad_norm": 0.8227094628594452, + "learning_rate": 9.462062610073098e-08, + "loss": 0.4004, + "step": 30665 + }, + { + "epoch": 0.939867598381758, + "grad_norm": 2.030127643268019, + "learning_rate": 9.452455138350746e-08, + "loss": 0.5763, + "step": 30666 + }, + { + "epoch": 0.9398982469044992, + "grad_norm": 0.781460732424952, + "learning_rate": 9.442852500118949e-08, + "loss": 0.3758, + "step": 30667 + }, + { + "epoch": 0.9399288954272405, + "grad_norm": 2.042524255323359, + "learning_rate": 9.433254695472183e-08, + "loss": 0.594, + "step": 30668 + }, + { + "epoch": 0.9399595439499816, + "grad_norm": 1.8819243582439757, + "learning_rate": 9.42366172450504e-08, + "loss": 0.6505, + "step": 30669 + }, + { + "epoch": 0.9399901924727229, + "grad_norm": 2.036585725258693, + "learning_rate": 9.414073587312167e-08, + "loss": 0.6391, + "step": 30670 + }, + { + "epoch": 0.940020840995464, + "grad_norm": 0.8225749166626458, + "learning_rate": 9.404490283987877e-08, + "loss": 0.4043, + "step": 30671 + }, + { + "epoch": 0.9400514895182053, + "grad_norm": 0.8310314286423981, + "learning_rate": 9.394911814626706e-08, + "loss": 0.3929, + "step": 30672 + }, + { + "epoch": 0.9400821380409464, + "grad_norm": 1.8975442055621308, + "learning_rate": 9.385338179322967e-08, + "loss": 0.6185, + "step": 30673 + }, + { + "epoch": 0.9401127865636877, + "grad_norm": 1.962887299774007, + "learning_rate": 9.375769378171029e-08, + "loss": 0.5302, + "step": 30674 + }, + { + "epoch": 0.9401434350864288, + "grad_norm": 2.227243444421822, + "learning_rate": 9.366205411265205e-08, + "loss": 0.6537, + "step": 30675 + }, + { + "epoch": 0.94017408360917, + "grad_norm": 1.8138353928998852, + "learning_rate": 9.356646278699643e-08, + "loss": 0.5496, + "step": 30676 + }, + { + "epoch": 0.9402047321319112, + "grad_norm": 0.8140861817132444, + "learning_rate": 9.347091980568601e-08, + "loss": 0.3808, + "step": 30677 + }, + { + "epoch": 0.9402353806546524, + "grad_norm": 2.077276653462024, + "learning_rate": 9.337542516966224e-08, + "loss": 0.6116, + "step": 30678 + }, + { + "epoch": 0.9402660291773937, + "grad_norm": 0.8058716206162084, + "learning_rate": 9.32799788798655e-08, + "loss": 0.3908, + "step": 30679 + }, + { + "epoch": 0.9402966777001348, + "grad_norm": 2.029480163460199, + "learning_rate": 9.318458093723614e-08, + "loss": 0.4638, + "step": 30680 + }, + { + "epoch": 0.9403273262228761, + "grad_norm": 2.001749770100484, + "learning_rate": 9.308923134271563e-08, + "loss": 0.5844, + "step": 30681 + }, + { + "epoch": 0.9403579747456172, + "grad_norm": 1.8331080904565253, + "learning_rate": 9.299393009724156e-08, + "loss": 0.6256, + "step": 30682 + }, + { + "epoch": 0.9403886232683585, + "grad_norm": 1.8256412135063766, + "learning_rate": 9.289867720175427e-08, + "loss": 0.5688, + "step": 30683 + }, + { + "epoch": 0.9404192717910996, + "grad_norm": 1.8980194811692148, + "learning_rate": 9.280347265719192e-08, + "loss": 0.5669, + "step": 30684 + }, + { + "epoch": 0.9404499203138409, + "grad_norm": 1.775160081863164, + "learning_rate": 9.270831646449207e-08, + "loss": 0.4881, + "step": 30685 + }, + { + "epoch": 0.940480568836582, + "grad_norm": 2.2242716417608333, + "learning_rate": 9.261320862459288e-08, + "loss": 0.5256, + "step": 30686 + }, + { + "epoch": 0.9405112173593233, + "grad_norm": 1.918085261989034, + "learning_rate": 9.251814913843138e-08, + "loss": 0.5364, + "step": 30687 + }, + { + "epoch": 0.9405418658820645, + "grad_norm": 2.0365589033282454, + "learning_rate": 9.242313800694402e-08, + "loss": 0.5673, + "step": 30688 + }, + { + "epoch": 0.9405725144048057, + "grad_norm": 2.0027066559185442, + "learning_rate": 9.232817523106785e-08, + "loss": 0.6333, + "step": 30689 + }, + { + "epoch": 0.9406031629275469, + "grad_norm": 2.1144846423024815, + "learning_rate": 9.223326081173712e-08, + "loss": 0.6037, + "step": 30690 + }, + { + "epoch": 0.9406338114502881, + "grad_norm": 2.186095397796437, + "learning_rate": 9.213839474988829e-08, + "loss": 0.7227, + "step": 30691 + }, + { + "epoch": 0.9406644599730293, + "grad_norm": 1.888013700739318, + "learning_rate": 9.204357704645562e-08, + "loss": 0.5747, + "step": 30692 + }, + { + "epoch": 0.9406951084957705, + "grad_norm": 1.9219942722958958, + "learning_rate": 9.19488077023728e-08, + "loss": 0.5447, + "step": 30693 + }, + { + "epoch": 0.9407257570185117, + "grad_norm": 0.8138727535390005, + "learning_rate": 9.185408671857465e-08, + "loss": 0.3926, + "step": 30694 + }, + { + "epoch": 0.940756405541253, + "grad_norm": 1.777749682759295, + "learning_rate": 9.175941409599376e-08, + "loss": 0.5163, + "step": 30695 + }, + { + "epoch": 0.9407870540639941, + "grad_norm": 1.7736200759839142, + "learning_rate": 9.16647898355627e-08, + "loss": 0.4644, + "step": 30696 + }, + { + "epoch": 0.9408177025867354, + "grad_norm": 1.8803944096766008, + "learning_rate": 9.15702139382152e-08, + "loss": 0.5291, + "step": 30697 + }, + { + "epoch": 0.9408483511094765, + "grad_norm": 1.986441843874043, + "learning_rate": 9.147568640488158e-08, + "loss": 0.5684, + "step": 30698 + }, + { + "epoch": 0.9408789996322178, + "grad_norm": 2.0362388217878338, + "learning_rate": 9.13812072364939e-08, + "loss": 0.619, + "step": 30699 + }, + { + "epoch": 0.9409096481549589, + "grad_norm": 1.7570409063091086, + "learning_rate": 9.128677643398365e-08, + "loss": 0.5242, + "step": 30700 + }, + { + "epoch": 0.9409402966777002, + "grad_norm": 1.8583093700886455, + "learning_rate": 9.119239399828062e-08, + "loss": 0.5626, + "step": 30701 + }, + { + "epoch": 0.9409709452004413, + "grad_norm": 1.9475511821387326, + "learning_rate": 9.109805993031517e-08, + "loss": 0.5221, + "step": 30702 + }, + { + "epoch": 0.9410015937231826, + "grad_norm": 2.2011140959663877, + "learning_rate": 9.100377423101603e-08, + "loss": 0.5879, + "step": 30703 + }, + { + "epoch": 0.9410322422459237, + "grad_norm": 1.789742136288403, + "learning_rate": 9.090953690131243e-08, + "loss": 0.4915, + "step": 30704 + }, + { + "epoch": 0.941062890768665, + "grad_norm": 2.0162389447727005, + "learning_rate": 9.08153479421342e-08, + "loss": 0.6379, + "step": 30705 + }, + { + "epoch": 0.9410935392914062, + "grad_norm": 1.7897544668463754, + "learning_rate": 9.072120735440726e-08, + "loss": 0.4898, + "step": 30706 + }, + { + "epoch": 0.9411241878141473, + "grad_norm": 1.8562802375152774, + "learning_rate": 9.062711513906087e-08, + "loss": 0.5761, + "step": 30707 + }, + { + "epoch": 0.9411548363368886, + "grad_norm": 1.7799505973303142, + "learning_rate": 9.053307129702204e-08, + "loss": 0.484, + "step": 30708 + }, + { + "epoch": 0.9411854848596297, + "grad_norm": 0.8196392229733329, + "learning_rate": 9.043907582921673e-08, + "loss": 0.3773, + "step": 30709 + }, + { + "epoch": 0.941216133382371, + "grad_norm": 1.9158249774959528, + "learning_rate": 9.034512873657086e-08, + "loss": 0.5748, + "step": 30710 + }, + { + "epoch": 0.9412467819051121, + "grad_norm": 1.7965449512233664, + "learning_rate": 9.025123002001146e-08, + "loss": 0.45, + "step": 30711 + }, + { + "epoch": 0.9412774304278534, + "grad_norm": 1.8113202434546467, + "learning_rate": 9.015737968046223e-08, + "loss": 0.5434, + "step": 30712 + }, + { + "epoch": 0.9413080789505945, + "grad_norm": 0.7978888493819439, + "learning_rate": 9.00635777188491e-08, + "loss": 0.3908, + "step": 30713 + }, + { + "epoch": 0.9413387274733358, + "grad_norm": 1.90360581723147, + "learning_rate": 8.996982413609523e-08, + "loss": 0.5689, + "step": 30714 + }, + { + "epoch": 0.941369375996077, + "grad_norm": 1.8585578720129208, + "learning_rate": 8.987611893312542e-08, + "loss": 0.4805, + "step": 30715 + }, + { + "epoch": 0.9414000245188182, + "grad_norm": 1.968484401310985, + "learning_rate": 8.978246211086228e-08, + "loss": 0.5932, + "step": 30716 + }, + { + "epoch": 0.9414306730415594, + "grad_norm": 1.949694843421069, + "learning_rate": 8.968885367022895e-08, + "loss": 0.6108, + "step": 30717 + }, + { + "epoch": 0.9414613215643006, + "grad_norm": 1.6840115768981356, + "learning_rate": 8.959529361214748e-08, + "loss": 0.5239, + "step": 30718 + }, + { + "epoch": 0.9414919700870418, + "grad_norm": 1.8465295313758863, + "learning_rate": 8.950178193754045e-08, + "loss": 0.6381, + "step": 30719 + }, + { + "epoch": 0.941522618609783, + "grad_norm": 1.806290973308188, + "learning_rate": 8.94083186473288e-08, + "loss": 0.6053, + "step": 30720 + }, + { + "epoch": 0.9415532671325242, + "grad_norm": 0.7866005928656588, + "learning_rate": 8.93149037424329e-08, + "loss": 0.4021, + "step": 30721 + }, + { + "epoch": 0.9415839156552654, + "grad_norm": 1.8208889384324585, + "learning_rate": 8.922153722377425e-08, + "loss": 0.6128, + "step": 30722 + }, + { + "epoch": 0.9416145641780066, + "grad_norm": 1.8725607611092752, + "learning_rate": 8.912821909227154e-08, + "loss": 0.5877, + "step": 30723 + }, + { + "epoch": 0.9416452127007479, + "grad_norm": 1.8001824890909521, + "learning_rate": 8.903494934884572e-08, + "loss": 0.5263, + "step": 30724 + }, + { + "epoch": 0.941675861223489, + "grad_norm": 1.7736836768246436, + "learning_rate": 8.894172799441436e-08, + "loss": 0.5848, + "step": 30725 + }, + { + "epoch": 0.9417065097462303, + "grad_norm": 1.7304559778596555, + "learning_rate": 8.884855502989732e-08, + "loss": 0.4503, + "step": 30726 + }, + { + "epoch": 0.9417371582689714, + "grad_norm": 2.092843781812343, + "learning_rate": 8.875543045621216e-08, + "loss": 0.5724, + "step": 30727 + }, + { + "epoch": 0.9417678067917127, + "grad_norm": 0.7815158190837229, + "learning_rate": 8.866235427427594e-08, + "loss": 0.3825, + "step": 30728 + }, + { + "epoch": 0.9417984553144538, + "grad_norm": 2.0982716170657794, + "learning_rate": 8.856932648500571e-08, + "loss": 0.5884, + "step": 30729 + }, + { + "epoch": 0.9418291038371951, + "grad_norm": 1.9365747490730316, + "learning_rate": 8.847634708931963e-08, + "loss": 0.638, + "step": 30730 + }, + { + "epoch": 0.9418597523599362, + "grad_norm": 0.7793467723707076, + "learning_rate": 8.838341608813194e-08, + "loss": 0.3815, + "step": 30731 + }, + { + "epoch": 0.9418904008826775, + "grad_norm": 1.7451944457039381, + "learning_rate": 8.829053348235917e-08, + "loss": 0.4851, + "step": 30732 + }, + { + "epoch": 0.9419210494054187, + "grad_norm": 1.9798191208023137, + "learning_rate": 8.819769927291666e-08, + "loss": 0.5195, + "step": 30733 + }, + { + "epoch": 0.9419516979281599, + "grad_norm": 2.0098955948502777, + "learning_rate": 8.810491346071926e-08, + "loss": 0.5612, + "step": 30734 + }, + { + "epoch": 0.9419823464509011, + "grad_norm": 2.002100303313886, + "learning_rate": 8.801217604668121e-08, + "loss": 0.5973, + "step": 30735 + }, + { + "epoch": 0.9420129949736423, + "grad_norm": 2.1188913901061417, + "learning_rate": 8.79194870317146e-08, + "loss": 0.5649, + "step": 30736 + }, + { + "epoch": 0.9420436434963835, + "grad_norm": 0.7613150177289116, + "learning_rate": 8.782684641673533e-08, + "loss": 0.3982, + "step": 30737 + }, + { + "epoch": 0.9420742920191246, + "grad_norm": 1.694139638039624, + "learning_rate": 8.773425420265491e-08, + "loss": 0.4834, + "step": 30738 + }, + { + "epoch": 0.9421049405418659, + "grad_norm": 1.9841335501653097, + "learning_rate": 8.764171039038538e-08, + "loss": 0.5894, + "step": 30739 + }, + { + "epoch": 0.942135589064607, + "grad_norm": 1.8815103973267513, + "learning_rate": 8.754921498083879e-08, + "loss": 0.5318, + "step": 30740 + }, + { + "epoch": 0.9421662375873483, + "grad_norm": 1.7816698619563525, + "learning_rate": 8.745676797492664e-08, + "loss": 0.6067, + "step": 30741 + }, + { + "epoch": 0.9421968861100894, + "grad_norm": 1.7584085416668709, + "learning_rate": 8.73643693735604e-08, + "loss": 0.6712, + "step": 30742 + }, + { + "epoch": 0.9422275346328307, + "grad_norm": 1.8841709941099993, + "learning_rate": 8.727201917764938e-08, + "loss": 0.583, + "step": 30743 + }, + { + "epoch": 0.9422581831555719, + "grad_norm": 1.9551959589352272, + "learning_rate": 8.717971738810448e-08, + "loss": 0.5955, + "step": 30744 + }, + { + "epoch": 0.9422888316783131, + "grad_norm": 1.628711820476706, + "learning_rate": 8.7087464005835e-08, + "loss": 0.5813, + "step": 30745 + }, + { + "epoch": 0.9423194802010543, + "grad_norm": 0.8267265194129803, + "learning_rate": 8.699525903175022e-08, + "loss": 0.4112, + "step": 30746 + }, + { + "epoch": 0.9423501287237955, + "grad_norm": 0.8594362257133937, + "learning_rate": 8.690310246675715e-08, + "loss": 0.4315, + "step": 30747 + }, + { + "epoch": 0.9423807772465367, + "grad_norm": 2.091632698680766, + "learning_rate": 8.68109943117651e-08, + "loss": 0.6343, + "step": 30748 + }, + { + "epoch": 0.9424114257692779, + "grad_norm": 1.9938757732711834, + "learning_rate": 8.671893456768166e-08, + "loss": 0.6009, + "step": 30749 + }, + { + "epoch": 0.9424420742920191, + "grad_norm": 1.8295454840196055, + "learning_rate": 8.662692323541388e-08, + "loss": 0.6028, + "step": 30750 + }, + { + "epoch": 0.9424727228147604, + "grad_norm": 1.9872237007372302, + "learning_rate": 8.653496031586772e-08, + "loss": 0.6037, + "step": 30751 + }, + { + "epoch": 0.9425033713375015, + "grad_norm": 2.408638359113397, + "learning_rate": 8.644304580994967e-08, + "loss": 0.5446, + "step": 30752 + }, + { + "epoch": 0.9425340198602428, + "grad_norm": 1.6968853801820776, + "learning_rate": 8.635117971856622e-08, + "loss": 0.4104, + "step": 30753 + }, + { + "epoch": 0.9425646683829839, + "grad_norm": 0.7768050244670885, + "learning_rate": 8.625936204262165e-08, + "loss": 0.3817, + "step": 30754 + }, + { + "epoch": 0.9425953169057252, + "grad_norm": 1.9650008514375246, + "learning_rate": 8.616759278301967e-08, + "loss": 0.588, + "step": 30755 + }, + { + "epoch": 0.9426259654284663, + "grad_norm": 1.971070081698132, + "learning_rate": 8.60758719406668e-08, + "loss": 0.5761, + "step": 30756 + }, + { + "epoch": 0.9426566139512076, + "grad_norm": 1.9855110659069954, + "learning_rate": 8.598419951646564e-08, + "loss": 0.6366, + "step": 30757 + }, + { + "epoch": 0.9426872624739487, + "grad_norm": 1.8704174424529054, + "learning_rate": 8.58925755113188e-08, + "loss": 0.5235, + "step": 30758 + }, + { + "epoch": 0.94271791099669, + "grad_norm": 2.073024849765034, + "learning_rate": 8.580099992613001e-08, + "loss": 0.5355, + "step": 30759 + }, + { + "epoch": 0.9427485595194312, + "grad_norm": 1.7753379950659292, + "learning_rate": 8.570947276180131e-08, + "loss": 0.5682, + "step": 30760 + }, + { + "epoch": 0.9427792080421724, + "grad_norm": 1.875087597578183, + "learning_rate": 8.561799401923477e-08, + "loss": 0.4821, + "step": 30761 + }, + { + "epoch": 0.9428098565649136, + "grad_norm": 1.7168649721755884, + "learning_rate": 8.552656369933132e-08, + "loss": 0.572, + "step": 30762 + }, + { + "epoch": 0.9428405050876548, + "grad_norm": 1.827145391768168, + "learning_rate": 8.543518180299192e-08, + "loss": 0.4725, + "step": 30763 + }, + { + "epoch": 0.942871153610396, + "grad_norm": 1.7566892619849301, + "learning_rate": 8.534384833111809e-08, + "loss": 0.4834, + "step": 30764 + }, + { + "epoch": 0.9429018021331372, + "grad_norm": 1.7634627313822113, + "learning_rate": 8.525256328460796e-08, + "loss": 0.6165, + "step": 30765 + }, + { + "epoch": 0.9429324506558784, + "grad_norm": 1.8057095417386722, + "learning_rate": 8.51613266643614e-08, + "loss": 0.5359, + "step": 30766 + }, + { + "epoch": 0.9429630991786196, + "grad_norm": 1.8324055753084325, + "learning_rate": 8.507013847127876e-08, + "loss": 0.5821, + "step": 30767 + }, + { + "epoch": 0.9429937477013608, + "grad_norm": 1.863722214291174, + "learning_rate": 8.497899870625715e-08, + "loss": 0.599, + "step": 30768 + }, + { + "epoch": 0.943024396224102, + "grad_norm": 2.2554254020187487, + "learning_rate": 8.488790737019525e-08, + "loss": 0.6207, + "step": 30769 + }, + { + "epoch": 0.9430550447468432, + "grad_norm": 0.7715938721857639, + "learning_rate": 8.47968644639896e-08, + "loss": 0.4018, + "step": 30770 + }, + { + "epoch": 0.9430856932695844, + "grad_norm": 1.6605104432595683, + "learning_rate": 8.470586998853891e-08, + "loss": 0.4885, + "step": 30771 + }, + { + "epoch": 0.9431163417923256, + "grad_norm": 1.9262221173325569, + "learning_rate": 8.461492394473859e-08, + "loss": 0.5678, + "step": 30772 + }, + { + "epoch": 0.9431469903150668, + "grad_norm": 1.9240488261766464, + "learning_rate": 8.452402633348511e-08, + "loss": 0.5087, + "step": 30773 + }, + { + "epoch": 0.943177638837808, + "grad_norm": 2.05330220000334, + "learning_rate": 8.44331771556739e-08, + "loss": 0.635, + "step": 30774 + }, + { + "epoch": 0.9432082873605492, + "grad_norm": 0.815572279076092, + "learning_rate": 8.434237641220088e-08, + "loss": 0.42, + "step": 30775 + }, + { + "epoch": 0.9432389358832904, + "grad_norm": 1.998645412013749, + "learning_rate": 8.425162410395981e-08, + "loss": 0.6922, + "step": 30776 + }, + { + "epoch": 0.9432695844060316, + "grad_norm": 0.7970622570525705, + "learning_rate": 8.416092023184441e-08, + "loss": 0.384, + "step": 30777 + }, + { + "epoch": 0.9433002329287729, + "grad_norm": 0.8004424272080015, + "learning_rate": 8.407026479675063e-08, + "loss": 0.384, + "step": 30778 + }, + { + "epoch": 0.943330881451514, + "grad_norm": 1.7550736678138215, + "learning_rate": 8.39796577995694e-08, + "loss": 0.495, + "step": 30779 + }, + { + "epoch": 0.9433615299742553, + "grad_norm": 1.7821506010812906, + "learning_rate": 8.388909924119503e-08, + "loss": 0.6161, + "step": 30780 + }, + { + "epoch": 0.9433921784969964, + "grad_norm": 1.9599838688786975, + "learning_rate": 8.379858912251848e-08, + "loss": 0.6158, + "step": 30781 + }, + { + "epoch": 0.9434228270197377, + "grad_norm": 0.7392142884502415, + "learning_rate": 8.37081274444329e-08, + "loss": 0.3561, + "step": 30782 + }, + { + "epoch": 0.9434534755424788, + "grad_norm": 1.6861714458433728, + "learning_rate": 8.361771420782871e-08, + "loss": 0.4958, + "step": 30783 + }, + { + "epoch": 0.9434841240652201, + "grad_norm": 1.8174202388615803, + "learning_rate": 8.352734941359741e-08, + "loss": 0.467, + "step": 30784 + }, + { + "epoch": 0.9435147725879612, + "grad_norm": 1.7412818932012533, + "learning_rate": 8.343703306262829e-08, + "loss": 0.5107, + "step": 30785 + }, + { + "epoch": 0.9435454211107025, + "grad_norm": 1.9668698499628803, + "learning_rate": 8.334676515581286e-08, + "loss": 0.4977, + "step": 30786 + }, + { + "epoch": 0.9435760696334436, + "grad_norm": 2.044019583700591, + "learning_rate": 8.325654569403985e-08, + "loss": 0.6201, + "step": 30787 + }, + { + "epoch": 0.9436067181561849, + "grad_norm": 1.5719994005710314, + "learning_rate": 8.316637467819744e-08, + "loss": 0.5343, + "step": 30788 + }, + { + "epoch": 0.9436373666789261, + "grad_norm": 1.9218838298090044, + "learning_rate": 8.307625210917548e-08, + "loss": 0.6276, + "step": 30789 + }, + { + "epoch": 0.9436680152016673, + "grad_norm": 1.9814913524595452, + "learning_rate": 8.298617798786047e-08, + "loss": 0.6252, + "step": 30790 + }, + { + "epoch": 0.9436986637244085, + "grad_norm": 2.5149796611474144, + "learning_rate": 8.289615231514115e-08, + "loss": 0.5292, + "step": 30791 + }, + { + "epoch": 0.9437293122471497, + "grad_norm": 0.824652403603273, + "learning_rate": 8.280617509190403e-08, + "loss": 0.4092, + "step": 30792 + }, + { + "epoch": 0.9437599607698909, + "grad_norm": 2.0424388672714477, + "learning_rate": 8.271624631903564e-08, + "loss": 0.6023, + "step": 30793 + }, + { + "epoch": 0.9437906092926321, + "grad_norm": 1.7226600848798084, + "learning_rate": 8.262636599742301e-08, + "loss": 0.5068, + "step": 30794 + }, + { + "epoch": 0.9438212578153733, + "grad_norm": 1.9826069624497746, + "learning_rate": 8.253653412794994e-08, + "loss": 0.5893, + "step": 30795 + }, + { + "epoch": 0.9438519063381146, + "grad_norm": 2.1017130420668515, + "learning_rate": 8.244675071150287e-08, + "loss": 0.6416, + "step": 30796 + }, + { + "epoch": 0.9438825548608557, + "grad_norm": 1.998726253581635, + "learning_rate": 8.23570157489667e-08, + "loss": 0.5529, + "step": 30797 + }, + { + "epoch": 0.943913203383597, + "grad_norm": 1.842832084036919, + "learning_rate": 8.22673292412246e-08, + "loss": 0.5339, + "step": 30798 + }, + { + "epoch": 0.9439438519063381, + "grad_norm": 1.8228760420288226, + "learning_rate": 8.217769118916085e-08, + "loss": 0.5064, + "step": 30799 + }, + { + "epoch": 0.9439745004290793, + "grad_norm": 2.0793161504146433, + "learning_rate": 8.208810159365865e-08, + "loss": 0.5988, + "step": 30800 + }, + { + "epoch": 0.9440051489518205, + "grad_norm": 2.0954201474839325, + "learning_rate": 8.199856045560062e-08, + "loss": 0.6383, + "step": 30801 + }, + { + "epoch": 0.9440357974745617, + "grad_norm": 2.002774922603127, + "learning_rate": 8.190906777586938e-08, + "loss": 0.507, + "step": 30802 + }, + { + "epoch": 0.9440664459973029, + "grad_norm": 1.6487595163882016, + "learning_rate": 8.18196235553459e-08, + "loss": 0.4289, + "step": 30803 + }, + { + "epoch": 0.9440970945200441, + "grad_norm": 1.9300279939012512, + "learning_rate": 8.173022779491224e-08, + "loss": 0.6348, + "step": 30804 + }, + { + "epoch": 0.9441277430427854, + "grad_norm": 1.9610089585237165, + "learning_rate": 8.164088049544938e-08, + "loss": 0.5967, + "step": 30805 + }, + { + "epoch": 0.9441583915655265, + "grad_norm": 0.7948180144489922, + "learning_rate": 8.155158165783661e-08, + "loss": 0.404, + "step": 30806 + }, + { + "epoch": 0.9441890400882678, + "grad_norm": 1.8753559056493632, + "learning_rate": 8.146233128295489e-08, + "loss": 0.6002, + "step": 30807 + }, + { + "epoch": 0.9442196886110089, + "grad_norm": 1.9251770901463738, + "learning_rate": 8.137312937168407e-08, + "loss": 0.5668, + "step": 30808 + }, + { + "epoch": 0.9442503371337502, + "grad_norm": 1.9696372955316814, + "learning_rate": 8.128397592490123e-08, + "loss": 0.6296, + "step": 30809 + }, + { + "epoch": 0.9442809856564913, + "grad_norm": 1.791628944884019, + "learning_rate": 8.119487094348677e-08, + "loss": 0.6144, + "step": 30810 + }, + { + "epoch": 0.9443116341792326, + "grad_norm": 1.7965476171657218, + "learning_rate": 8.110581442831666e-08, + "loss": 0.5417, + "step": 30811 + }, + { + "epoch": 0.9443422827019737, + "grad_norm": 1.9556793913989357, + "learning_rate": 8.10168063802702e-08, + "loss": 0.5427, + "step": 30812 + }, + { + "epoch": 0.944372931224715, + "grad_norm": 2.0292135454407045, + "learning_rate": 8.092784680022391e-08, + "loss": 0.6486, + "step": 30813 + }, + { + "epoch": 0.9444035797474561, + "grad_norm": 1.8425146268627919, + "learning_rate": 8.083893568905376e-08, + "loss": 0.5422, + "step": 30814 + }, + { + "epoch": 0.9444342282701974, + "grad_norm": 1.8960540611044658, + "learning_rate": 8.075007304763626e-08, + "loss": 0.5331, + "step": 30815 + }, + { + "epoch": 0.9444648767929386, + "grad_norm": 0.8240392099656495, + "learning_rate": 8.066125887684739e-08, + "loss": 0.4003, + "step": 30816 + }, + { + "epoch": 0.9444955253156798, + "grad_norm": 1.8684856212806866, + "learning_rate": 8.057249317756089e-08, + "loss": 0.5338, + "step": 30817 + }, + { + "epoch": 0.944526173838421, + "grad_norm": 1.9458426758952498, + "learning_rate": 8.048377595065271e-08, + "loss": 0.6441, + "step": 30818 + }, + { + "epoch": 0.9445568223611622, + "grad_norm": 1.8248774922345912, + "learning_rate": 8.039510719699717e-08, + "loss": 0.5194, + "step": 30819 + }, + { + "epoch": 0.9445874708839034, + "grad_norm": 2.0028923301562958, + "learning_rate": 8.03064869174669e-08, + "loss": 0.5804, + "step": 30820 + }, + { + "epoch": 0.9446181194066446, + "grad_norm": 1.7251705334258538, + "learning_rate": 8.021791511293564e-08, + "loss": 0.5665, + "step": 30821 + }, + { + "epoch": 0.9446487679293858, + "grad_norm": 2.2277666995713115, + "learning_rate": 8.012939178427547e-08, + "loss": 0.5319, + "step": 30822 + }, + { + "epoch": 0.944679416452127, + "grad_norm": 0.8180288162018265, + "learning_rate": 8.004091693236016e-08, + "loss": 0.3939, + "step": 30823 + }, + { + "epoch": 0.9447100649748682, + "grad_norm": 1.99254953114895, + "learning_rate": 7.995249055806009e-08, + "loss": 0.5704, + "step": 30824 + }, + { + "epoch": 0.9447407134976095, + "grad_norm": 1.9562156207204784, + "learning_rate": 7.986411266224681e-08, + "loss": 0.533, + "step": 30825 + }, + { + "epoch": 0.9447713620203506, + "grad_norm": 1.9761398516571298, + "learning_rate": 7.977578324579127e-08, + "loss": 0.5402, + "step": 30826 + }, + { + "epoch": 0.9448020105430919, + "grad_norm": 0.8021618331709844, + "learning_rate": 7.968750230956445e-08, + "loss": 0.3915, + "step": 30827 + }, + { + "epoch": 0.944832659065833, + "grad_norm": 1.8071565614054725, + "learning_rate": 7.959926985443511e-08, + "loss": 0.548, + "step": 30828 + }, + { + "epoch": 0.9448633075885743, + "grad_norm": 1.8127152906420407, + "learning_rate": 7.95110858812731e-08, + "loss": 0.5984, + "step": 30829 + }, + { + "epoch": 0.9448939561113154, + "grad_norm": 1.773869321018094, + "learning_rate": 7.942295039094771e-08, + "loss": 0.4696, + "step": 30830 + }, + { + "epoch": 0.9449246046340566, + "grad_norm": 0.9778885200933358, + "learning_rate": 7.933486338432661e-08, + "loss": 0.3969, + "step": 30831 + }, + { + "epoch": 0.9449552531567978, + "grad_norm": 1.9671591698973536, + "learning_rate": 7.924682486227797e-08, + "loss": 0.5635, + "step": 30832 + }, + { + "epoch": 0.944985901679539, + "grad_norm": 2.005978830606242, + "learning_rate": 7.915883482566943e-08, + "loss": 0.5857, + "step": 30833 + }, + { + "epoch": 0.9450165502022803, + "grad_norm": 1.869661851669318, + "learning_rate": 7.90708932753681e-08, + "loss": 0.5877, + "step": 30834 + }, + { + "epoch": 0.9450471987250214, + "grad_norm": 2.011317367604866, + "learning_rate": 7.898300021224048e-08, + "loss": 0.5999, + "step": 30835 + }, + { + "epoch": 0.9450778472477627, + "grad_norm": 1.798276131309933, + "learning_rate": 7.889515563715256e-08, + "loss": 0.5278, + "step": 30836 + }, + { + "epoch": 0.9451084957705038, + "grad_norm": 1.7523887402957607, + "learning_rate": 7.880735955096918e-08, + "loss": 0.548, + "step": 30837 + }, + { + "epoch": 0.9451391442932451, + "grad_norm": 2.1832860761882453, + "learning_rate": 7.871961195455635e-08, + "loss": 0.5621, + "step": 30838 + }, + { + "epoch": 0.9451697928159862, + "grad_norm": 1.9517632154011808, + "learning_rate": 7.863191284877836e-08, + "loss": 0.564, + "step": 30839 + }, + { + "epoch": 0.9452004413387275, + "grad_norm": 1.8611410851602854, + "learning_rate": 7.854426223449951e-08, + "loss": 0.4537, + "step": 30840 + }, + { + "epoch": 0.9452310898614686, + "grad_norm": 1.703246882170462, + "learning_rate": 7.845666011258247e-08, + "loss": 0.5206, + "step": 30841 + }, + { + "epoch": 0.9452617383842099, + "grad_norm": 2.1633662548376376, + "learning_rate": 7.836910648389206e-08, + "loss": 0.5807, + "step": 30842 + }, + { + "epoch": 0.945292386906951, + "grad_norm": 1.7028050591150836, + "learning_rate": 7.828160134929041e-08, + "loss": 0.508, + "step": 30843 + }, + { + "epoch": 0.9453230354296923, + "grad_norm": 0.802692261814572, + "learning_rate": 7.819414470963848e-08, + "loss": 0.401, + "step": 30844 + }, + { + "epoch": 0.9453536839524335, + "grad_norm": 1.819694000763012, + "learning_rate": 7.810673656579947e-08, + "loss": 0.5897, + "step": 30845 + }, + { + "epoch": 0.9453843324751747, + "grad_norm": 1.61654079660305, + "learning_rate": 7.801937691863381e-08, + "loss": 0.4706, + "step": 30846 + }, + { + "epoch": 0.9454149809979159, + "grad_norm": 2.132931691724455, + "learning_rate": 7.793206576900247e-08, + "loss": 0.6449, + "step": 30847 + }, + { + "epoch": 0.9454456295206571, + "grad_norm": 1.7247931974701045, + "learning_rate": 7.784480311776588e-08, + "loss": 0.5032, + "step": 30848 + }, + { + "epoch": 0.9454762780433983, + "grad_norm": 1.8094041166245547, + "learning_rate": 7.775758896578445e-08, + "loss": 0.5988, + "step": 30849 + }, + { + "epoch": 0.9455069265661395, + "grad_norm": 2.001295469650313, + "learning_rate": 7.767042331391638e-08, + "loss": 0.5637, + "step": 30850 + }, + { + "epoch": 0.9455375750888807, + "grad_norm": 1.8903515848767187, + "learning_rate": 7.758330616302156e-08, + "loss": 0.5929, + "step": 30851 + }, + { + "epoch": 0.945568223611622, + "grad_norm": 1.9806717597714945, + "learning_rate": 7.749623751395707e-08, + "loss": 0.4918, + "step": 30852 + }, + { + "epoch": 0.9455988721343631, + "grad_norm": 2.449874528321828, + "learning_rate": 7.740921736758222e-08, + "loss": 0.5763, + "step": 30853 + }, + { + "epoch": 0.9456295206571044, + "grad_norm": 1.898199163436087, + "learning_rate": 7.732224572475355e-08, + "loss": 0.5782, + "step": 30854 + }, + { + "epoch": 0.9456601691798455, + "grad_norm": 2.1190686524561766, + "learning_rate": 7.72353225863287e-08, + "loss": 0.5446, + "step": 30855 + }, + { + "epoch": 0.9456908177025868, + "grad_norm": 1.7314309379943642, + "learning_rate": 7.714844795316312e-08, + "loss": 0.5439, + "step": 30856 + }, + { + "epoch": 0.9457214662253279, + "grad_norm": 2.1377385042231847, + "learning_rate": 7.706162182611387e-08, + "loss": 0.6245, + "step": 30857 + }, + { + "epoch": 0.9457521147480692, + "grad_norm": 1.9205258615927319, + "learning_rate": 7.697484420603584e-08, + "loss": 0.5718, + "step": 30858 + }, + { + "epoch": 0.9457827632708103, + "grad_norm": 1.942000348891351, + "learning_rate": 7.688811509378447e-08, + "loss": 0.605, + "step": 30859 + }, + { + "epoch": 0.9458134117935516, + "grad_norm": 1.8756442188346918, + "learning_rate": 7.680143449021404e-08, + "loss": 0.5064, + "step": 30860 + }, + { + "epoch": 0.9458440603162928, + "grad_norm": 2.0893059827228475, + "learning_rate": 7.67148023961789e-08, + "loss": 0.6355, + "step": 30861 + }, + { + "epoch": 0.9458747088390339, + "grad_norm": 0.8549054688848124, + "learning_rate": 7.662821881253279e-08, + "loss": 0.4082, + "step": 30862 + }, + { + "epoch": 0.9459053573617752, + "grad_norm": 1.9023922194427927, + "learning_rate": 7.654168374012782e-08, + "loss": 0.4823, + "step": 30863 + }, + { + "epoch": 0.9459360058845163, + "grad_norm": 0.8020266068423344, + "learning_rate": 7.645519717981775e-08, + "loss": 0.4021, + "step": 30864 + }, + { + "epoch": 0.9459666544072576, + "grad_norm": 2.104561788502694, + "learning_rate": 7.636875913245467e-08, + "loss": 0.4987, + "step": 30865 + }, + { + "epoch": 0.9459973029299987, + "grad_norm": 1.9363294493359229, + "learning_rate": 7.628236959888902e-08, + "loss": 0.4902, + "step": 30866 + }, + { + "epoch": 0.94602795145274, + "grad_norm": 1.9483104430250937, + "learning_rate": 7.619602857997344e-08, + "loss": 0.5002, + "step": 30867 + }, + { + "epoch": 0.9460585999754811, + "grad_norm": 1.9472387534993332, + "learning_rate": 7.610973607655836e-08, + "loss": 0.592, + "step": 30868 + }, + { + "epoch": 0.9460892484982224, + "grad_norm": 2.1202954223426853, + "learning_rate": 7.602349208949422e-08, + "loss": 0.5654, + "step": 30869 + }, + { + "epoch": 0.9461198970209636, + "grad_norm": 2.092687189843874, + "learning_rate": 7.593729661962979e-08, + "loss": 0.6368, + "step": 30870 + }, + { + "epoch": 0.9461505455437048, + "grad_norm": 2.2408438986613795, + "learning_rate": 7.585114966781493e-08, + "loss": 0.5061, + "step": 30871 + }, + { + "epoch": 0.946181194066446, + "grad_norm": 1.912631861499024, + "learning_rate": 7.576505123489952e-08, + "loss": 0.5367, + "step": 30872 + }, + { + "epoch": 0.9462118425891872, + "grad_norm": 1.7664442219276146, + "learning_rate": 7.567900132173067e-08, + "loss": 0.615, + "step": 30873 + }, + { + "epoch": 0.9462424911119284, + "grad_norm": 0.7998272718691155, + "learning_rate": 7.559299992915602e-08, + "loss": 0.4057, + "step": 30874 + }, + { + "epoch": 0.9462731396346696, + "grad_norm": 1.918557105485891, + "learning_rate": 7.550704705802381e-08, + "loss": 0.5848, + "step": 30875 + }, + { + "epoch": 0.9463037881574108, + "grad_norm": 0.7975651894406763, + "learning_rate": 7.542114270918111e-08, + "loss": 0.3846, + "step": 30876 + }, + { + "epoch": 0.946334436680152, + "grad_norm": 2.0892091053791875, + "learning_rate": 7.533528688347336e-08, + "loss": 0.6, + "step": 30877 + }, + { + "epoch": 0.9463650852028932, + "grad_norm": 1.7695611015459554, + "learning_rate": 7.524947958174655e-08, + "loss": 0.4603, + "step": 30878 + }, + { + "epoch": 0.9463957337256345, + "grad_norm": 1.9625061567613178, + "learning_rate": 7.516372080484724e-08, + "loss": 0.5231, + "step": 30879 + }, + { + "epoch": 0.9464263822483756, + "grad_norm": 2.029628469145157, + "learning_rate": 7.50780105536203e-08, + "loss": 0.5206, + "step": 30880 + }, + { + "epoch": 0.9464570307711169, + "grad_norm": 2.115794533439602, + "learning_rate": 7.499234882890949e-08, + "loss": 0.5534, + "step": 30881 + }, + { + "epoch": 0.946487679293858, + "grad_norm": 2.035686627795528, + "learning_rate": 7.49067356315586e-08, + "loss": 0.5544, + "step": 30882 + }, + { + "epoch": 0.9465183278165993, + "grad_norm": 2.200659318516957, + "learning_rate": 7.482117096241248e-08, + "loss": 0.4961, + "step": 30883 + }, + { + "epoch": 0.9465489763393404, + "grad_norm": 0.795541496610307, + "learning_rate": 7.473565482231382e-08, + "loss": 0.3967, + "step": 30884 + }, + { + "epoch": 0.9465796248620817, + "grad_norm": 0.7844098140885174, + "learning_rate": 7.465018721210416e-08, + "loss": 0.3782, + "step": 30885 + }, + { + "epoch": 0.9466102733848228, + "grad_norm": 1.9514999110468432, + "learning_rate": 7.45647681326267e-08, + "loss": 0.5537, + "step": 30886 + }, + { + "epoch": 0.9466409219075641, + "grad_norm": 1.949747859210283, + "learning_rate": 7.447939758472245e-08, + "loss": 0.5755, + "step": 30887 + }, + { + "epoch": 0.9466715704303053, + "grad_norm": 0.8338501146864048, + "learning_rate": 7.43940755692335e-08, + "loss": 0.4278, + "step": 30888 + }, + { + "epoch": 0.9467022189530465, + "grad_norm": 2.0453881758003827, + "learning_rate": 7.430880208699975e-08, + "loss": 0.5433, + "step": 30889 + }, + { + "epoch": 0.9467328674757877, + "grad_norm": 1.8327238123232852, + "learning_rate": 7.422357713886163e-08, + "loss": 0.5173, + "step": 30890 + }, + { + "epoch": 0.9467635159985289, + "grad_norm": 1.908808249294877, + "learning_rate": 7.413840072565959e-08, + "loss": 0.5224, + "step": 30891 + }, + { + "epoch": 0.9467941645212701, + "grad_norm": 2.023593128690564, + "learning_rate": 7.405327284823128e-08, + "loss": 0.5612, + "step": 30892 + }, + { + "epoch": 0.9468248130440112, + "grad_norm": 1.6990567015523828, + "learning_rate": 7.396819350741657e-08, + "loss": 0.5758, + "step": 30893 + }, + { + "epoch": 0.9468554615667525, + "grad_norm": 1.8958958180863643, + "learning_rate": 7.388316270405427e-08, + "loss": 0.5524, + "step": 30894 + }, + { + "epoch": 0.9468861100894936, + "grad_norm": 1.9351201124627895, + "learning_rate": 7.37981804389809e-08, + "loss": 0.5539, + "step": 30895 + }, + { + "epoch": 0.9469167586122349, + "grad_norm": 1.7969712397051942, + "learning_rate": 7.37132467130347e-08, + "loss": 0.4817, + "step": 30896 + }, + { + "epoch": 0.946947407134976, + "grad_norm": 1.9028657527503734, + "learning_rate": 7.362836152705221e-08, + "loss": 0.5545, + "step": 30897 + }, + { + "epoch": 0.9469780556577173, + "grad_norm": 1.9537539147507301, + "learning_rate": 7.354352488187e-08, + "loss": 0.6457, + "step": 30898 + }, + { + "epoch": 0.9470087041804585, + "grad_norm": 1.8052042655382121, + "learning_rate": 7.34587367783246e-08, + "loss": 0.5448, + "step": 30899 + }, + { + "epoch": 0.9470393527031997, + "grad_norm": 2.1045126931671216, + "learning_rate": 7.33739972172498e-08, + "loss": 0.6162, + "step": 30900 + }, + { + "epoch": 0.9470700012259409, + "grad_norm": 0.8186609017697417, + "learning_rate": 7.32893061994816e-08, + "loss": 0.4202, + "step": 30901 + }, + { + "epoch": 0.9471006497486821, + "grad_norm": 2.1110578648577043, + "learning_rate": 7.320466372585544e-08, + "loss": 0.5476, + "step": 30902 + }, + { + "epoch": 0.9471312982714233, + "grad_norm": 1.7739712113872788, + "learning_rate": 7.312006979720344e-08, + "loss": 0.501, + "step": 30903 + }, + { + "epoch": 0.9471619467941645, + "grad_norm": 1.9628043419843804, + "learning_rate": 7.30355244143599e-08, + "loss": 0.5782, + "step": 30904 + }, + { + "epoch": 0.9471925953169057, + "grad_norm": 1.8793095856322024, + "learning_rate": 7.295102757815864e-08, + "loss": 0.4839, + "step": 30905 + }, + { + "epoch": 0.947223243839647, + "grad_norm": 2.1435340067528763, + "learning_rate": 7.286657928943064e-08, + "loss": 0.5369, + "step": 30906 + }, + { + "epoch": 0.9472538923623881, + "grad_norm": 1.810430197743001, + "learning_rate": 7.278217954900968e-08, + "loss": 0.4933, + "step": 30907 + }, + { + "epoch": 0.9472845408851294, + "grad_norm": 1.8667223501835069, + "learning_rate": 7.269782835772621e-08, + "loss": 0.655, + "step": 30908 + }, + { + "epoch": 0.9473151894078705, + "grad_norm": 1.9225278234353838, + "learning_rate": 7.261352571641179e-08, + "loss": 0.5441, + "step": 30909 + }, + { + "epoch": 0.9473458379306118, + "grad_norm": 1.8338780617088204, + "learning_rate": 7.252927162589684e-08, + "loss": 0.5702, + "step": 30910 + }, + { + "epoch": 0.9473764864533529, + "grad_norm": 1.7464253255294468, + "learning_rate": 7.244506608701186e-08, + "loss": 0.5755, + "step": 30911 + }, + { + "epoch": 0.9474071349760942, + "grad_norm": 1.913654299449949, + "learning_rate": 7.236090910058668e-08, + "loss": 0.5375, + "step": 30912 + }, + { + "epoch": 0.9474377834988353, + "grad_norm": 1.8164081538219352, + "learning_rate": 7.227680066745013e-08, + "loss": 0.5463, + "step": 30913 + }, + { + "epoch": 0.9474684320215766, + "grad_norm": 1.7692368793441795, + "learning_rate": 7.219274078843097e-08, + "loss": 0.469, + "step": 30914 + }, + { + "epoch": 0.9474990805443178, + "grad_norm": 1.8463994012160099, + "learning_rate": 7.210872946435743e-08, + "loss": 0.5989, + "step": 30915 + }, + { + "epoch": 0.947529729067059, + "grad_norm": 2.081932412043895, + "learning_rate": 7.202476669605774e-08, + "loss": 0.5678, + "step": 30916 + }, + { + "epoch": 0.9475603775898002, + "grad_norm": 0.7807313399654812, + "learning_rate": 7.194085248435844e-08, + "loss": 0.3726, + "step": 30917 + }, + { + "epoch": 0.9475910261125414, + "grad_norm": 1.9534814834200707, + "learning_rate": 7.18569868300878e-08, + "loss": 0.5716, + "step": 30918 + }, + { + "epoch": 0.9476216746352826, + "grad_norm": 1.8259929284685588, + "learning_rate": 7.177316973407011e-08, + "loss": 0.5196, + "step": 30919 + }, + { + "epoch": 0.9476523231580238, + "grad_norm": 2.058498664584029, + "learning_rate": 7.168940119713252e-08, + "loss": 0.541, + "step": 30920 + }, + { + "epoch": 0.947682971680765, + "grad_norm": 1.9381655706878858, + "learning_rate": 7.160568122010103e-08, + "loss": 0.5338, + "step": 30921 + }, + { + "epoch": 0.9477136202035062, + "grad_norm": 1.973056311953984, + "learning_rate": 7.152200980379887e-08, + "loss": 0.5296, + "step": 30922 + }, + { + "epoch": 0.9477442687262474, + "grad_norm": 2.000839327832495, + "learning_rate": 7.143838694905148e-08, + "loss": 0.5568, + "step": 30923 + }, + { + "epoch": 0.9477749172489885, + "grad_norm": 0.7845122345572013, + "learning_rate": 7.13548126566832e-08, + "loss": 0.3851, + "step": 30924 + }, + { + "epoch": 0.9478055657717298, + "grad_norm": 2.0222659270640793, + "learning_rate": 7.127128692751617e-08, + "loss": 0.5517, + "step": 30925 + }, + { + "epoch": 0.947836214294471, + "grad_norm": 1.9046933100695382, + "learning_rate": 7.118780976237471e-08, + "loss": 0.5652, + "step": 30926 + }, + { + "epoch": 0.9478668628172122, + "grad_norm": 1.713415068938289, + "learning_rate": 7.110438116208096e-08, + "loss": 0.5217, + "step": 30927 + }, + { + "epoch": 0.9478975113399534, + "grad_norm": 0.7864791235168408, + "learning_rate": 7.102100112745702e-08, + "loss": 0.4105, + "step": 30928 + }, + { + "epoch": 0.9479281598626946, + "grad_norm": 1.8239389730425977, + "learning_rate": 7.093766965932392e-08, + "loss": 0.5138, + "step": 30929 + }, + { + "epoch": 0.9479588083854358, + "grad_norm": 1.8932539615600172, + "learning_rate": 7.08543867585032e-08, + "loss": 0.5495, + "step": 30930 + }, + { + "epoch": 0.947989456908177, + "grad_norm": 1.7234908065934391, + "learning_rate": 7.077115242581534e-08, + "loss": 0.6329, + "step": 30931 + }, + { + "epoch": 0.9480201054309182, + "grad_norm": 0.7540940178570785, + "learning_rate": 7.068796666208078e-08, + "loss": 0.3976, + "step": 30932 + }, + { + "epoch": 0.9480507539536595, + "grad_norm": 1.6873359386112925, + "learning_rate": 7.060482946811831e-08, + "loss": 0.5074, + "step": 30933 + }, + { + "epoch": 0.9480814024764006, + "grad_norm": 1.7403046376111138, + "learning_rate": 7.052174084474784e-08, + "loss": 0.5719, + "step": 30934 + }, + { + "epoch": 0.9481120509991419, + "grad_norm": 2.0460753621078003, + "learning_rate": 7.043870079278869e-08, + "loss": 0.4413, + "step": 30935 + }, + { + "epoch": 0.948142699521883, + "grad_norm": 1.9525797078904414, + "learning_rate": 7.035570931305746e-08, + "loss": 0.6214, + "step": 30936 + }, + { + "epoch": 0.9481733480446243, + "grad_norm": 2.194833389933503, + "learning_rate": 7.027276640637293e-08, + "loss": 0.5751, + "step": 30937 + }, + { + "epoch": 0.9482039965673654, + "grad_norm": 1.807725351612749, + "learning_rate": 7.018987207355276e-08, + "loss": 0.5504, + "step": 30938 + }, + { + "epoch": 0.9482346450901067, + "grad_norm": 1.899799224883368, + "learning_rate": 7.010702631541245e-08, + "loss": 0.5599, + "step": 30939 + }, + { + "epoch": 0.9482652936128478, + "grad_norm": 1.8857053942932709, + "learning_rate": 7.002422913276907e-08, + "loss": 0.5829, + "step": 30940 + }, + { + "epoch": 0.9482959421355891, + "grad_norm": 2.169263282948279, + "learning_rate": 6.994148052643868e-08, + "loss": 0.5609, + "step": 30941 + }, + { + "epoch": 0.9483265906583302, + "grad_norm": 1.90374712713827, + "learning_rate": 6.98587804972356e-08, + "loss": 0.6053, + "step": 30942 + }, + { + "epoch": 0.9483572391810715, + "grad_norm": 0.7756941045997442, + "learning_rate": 6.977612904597586e-08, + "loss": 0.3882, + "step": 30943 + }, + { + "epoch": 0.9483878877038127, + "grad_norm": 1.9798268754964674, + "learning_rate": 6.969352617347325e-08, + "loss": 0.6209, + "step": 30944 + }, + { + "epoch": 0.9484185362265539, + "grad_norm": 1.795736120277724, + "learning_rate": 6.961097188054211e-08, + "loss": 0.538, + "step": 30945 + }, + { + "epoch": 0.9484491847492951, + "grad_norm": 2.081140873779094, + "learning_rate": 6.952846616799569e-08, + "loss": 0.658, + "step": 30946 + }, + { + "epoch": 0.9484798332720363, + "grad_norm": 1.9175732212172247, + "learning_rate": 6.944600903664612e-08, + "loss": 0.586, + "step": 30947 + }, + { + "epoch": 0.9485104817947775, + "grad_norm": 1.9080041228013325, + "learning_rate": 6.936360048730718e-08, + "loss": 0.5328, + "step": 30948 + }, + { + "epoch": 0.9485411303175187, + "grad_norm": 0.8079766582255864, + "learning_rate": 6.928124052078933e-08, + "loss": 0.396, + "step": 30949 + }, + { + "epoch": 0.9485717788402599, + "grad_norm": 2.0843425687532497, + "learning_rate": 6.919892913790582e-08, + "loss": 0.5335, + "step": 30950 + }, + { + "epoch": 0.9486024273630012, + "grad_norm": 1.8435035984495656, + "learning_rate": 6.911666633946712e-08, + "loss": 0.5158, + "step": 30951 + }, + { + "epoch": 0.9486330758857423, + "grad_norm": 1.7399627944119658, + "learning_rate": 6.903445212628257e-08, + "loss": 0.5821, + "step": 30952 + }, + { + "epoch": 0.9486637244084836, + "grad_norm": 2.271402905568892, + "learning_rate": 6.895228649916374e-08, + "loss": 0.4477, + "step": 30953 + }, + { + "epoch": 0.9486943729312247, + "grad_norm": 1.8455645320389253, + "learning_rate": 6.887016945892e-08, + "loss": 0.6207, + "step": 30954 + }, + { + "epoch": 0.9487250214539659, + "grad_norm": 2.133341446383731, + "learning_rate": 6.878810100635958e-08, + "loss": 0.5697, + "step": 30955 + }, + { + "epoch": 0.9487556699767071, + "grad_norm": 1.8595570260570555, + "learning_rate": 6.870608114229183e-08, + "loss": 0.5306, + "step": 30956 + }, + { + "epoch": 0.9487863184994483, + "grad_norm": 2.2305397356077106, + "learning_rate": 6.8624109867525e-08, + "loss": 0.5896, + "step": 30957 + }, + { + "epoch": 0.9488169670221895, + "grad_norm": 2.083471876513204, + "learning_rate": 6.854218718286676e-08, + "loss": 0.5852, + "step": 30958 + }, + { + "epoch": 0.9488476155449307, + "grad_norm": 2.161757106052577, + "learning_rate": 6.846031308912371e-08, + "loss": 0.5445, + "step": 30959 + }, + { + "epoch": 0.948878264067672, + "grad_norm": 1.8471246173942282, + "learning_rate": 6.837848758710241e-08, + "loss": 0.5471, + "step": 30960 + }, + { + "epoch": 0.9489089125904131, + "grad_norm": 1.8305356322877953, + "learning_rate": 6.829671067761112e-08, + "loss": 0.5972, + "step": 30961 + }, + { + "epoch": 0.9489395611131544, + "grad_norm": 1.7260542462563702, + "learning_rate": 6.821498236145363e-08, + "loss": 0.4882, + "step": 30962 + }, + { + "epoch": 0.9489702096358955, + "grad_norm": 0.7910997959005236, + "learning_rate": 6.81333026394354e-08, + "loss": 0.3978, + "step": 30963 + }, + { + "epoch": 0.9490008581586368, + "grad_norm": 1.830249857125107, + "learning_rate": 6.805167151236137e-08, + "loss": 0.5393, + "step": 30964 + }, + { + "epoch": 0.9490315066813779, + "grad_norm": 0.8003328872114168, + "learning_rate": 6.797008898103697e-08, + "loss": 0.3977, + "step": 30965 + }, + { + "epoch": 0.9490621552041192, + "grad_norm": 1.8825450054191388, + "learning_rate": 6.788855504626435e-08, + "loss": 0.501, + "step": 30966 + }, + { + "epoch": 0.9490928037268603, + "grad_norm": 2.217163769265625, + "learning_rate": 6.780706970884788e-08, + "loss": 0.5751, + "step": 30967 + }, + { + "epoch": 0.9491234522496016, + "grad_norm": 1.7178583763258921, + "learning_rate": 6.772563296959079e-08, + "loss": 0.5185, + "step": 30968 + }, + { + "epoch": 0.9491541007723427, + "grad_norm": 1.8307502402391194, + "learning_rate": 6.764424482929465e-08, + "loss": 0.6054, + "step": 30969 + }, + { + "epoch": 0.949184749295084, + "grad_norm": 1.9632160129701366, + "learning_rate": 6.75629052887622e-08, + "loss": 0.5561, + "step": 30970 + }, + { + "epoch": 0.9492153978178252, + "grad_norm": 1.932199502874756, + "learning_rate": 6.748161434879386e-08, + "loss": 0.5892, + "step": 30971 + }, + { + "epoch": 0.9492460463405664, + "grad_norm": 0.8446718609640425, + "learning_rate": 6.740037201019179e-08, + "loss": 0.407, + "step": 30972 + }, + { + "epoch": 0.9492766948633076, + "grad_norm": 1.995512408006633, + "learning_rate": 6.731917827375589e-08, + "loss": 0.5611, + "step": 30973 + }, + { + "epoch": 0.9493073433860488, + "grad_norm": 1.8943066615268749, + "learning_rate": 6.723803314028554e-08, + "loss": 0.5823, + "step": 30974 + }, + { + "epoch": 0.94933799190879, + "grad_norm": 1.8314671316827118, + "learning_rate": 6.71569366105812e-08, + "loss": 0.6238, + "step": 30975 + }, + { + "epoch": 0.9493686404315312, + "grad_norm": 1.96539563577945, + "learning_rate": 6.707588868544168e-08, + "loss": 0.5716, + "step": 30976 + }, + { + "epoch": 0.9493992889542724, + "grad_norm": 2.0368358203910644, + "learning_rate": 6.699488936566634e-08, + "loss": 0.6115, + "step": 30977 + }, + { + "epoch": 0.9494299374770137, + "grad_norm": 1.9182855582734288, + "learning_rate": 6.691393865205176e-08, + "loss": 0.6033, + "step": 30978 + }, + { + "epoch": 0.9494605859997548, + "grad_norm": 1.9747721068842137, + "learning_rate": 6.683303654539619e-08, + "loss": 0.5834, + "step": 30979 + }, + { + "epoch": 0.9494912345224961, + "grad_norm": 2.0449132363171048, + "learning_rate": 6.675218304649733e-08, + "loss": 0.5256, + "step": 30980 + }, + { + "epoch": 0.9495218830452372, + "grad_norm": 1.7772363688634416, + "learning_rate": 6.667137815615176e-08, + "loss": 0.5108, + "step": 30981 + }, + { + "epoch": 0.9495525315679785, + "grad_norm": 1.7716774812246265, + "learning_rate": 6.659062187515498e-08, + "loss": 0.6101, + "step": 30982 + }, + { + "epoch": 0.9495831800907196, + "grad_norm": 2.286655431635491, + "learning_rate": 6.650991420430241e-08, + "loss": 0.6775, + "step": 30983 + }, + { + "epoch": 0.9496138286134609, + "grad_norm": 1.9133913370253293, + "learning_rate": 6.642925514439125e-08, + "loss": 0.5754, + "step": 30984 + }, + { + "epoch": 0.949644477136202, + "grad_norm": 1.7205987478948466, + "learning_rate": 6.634864469621361e-08, + "loss": 0.5068, + "step": 30985 + }, + { + "epoch": 0.9496751256589432, + "grad_norm": 2.0095703376451466, + "learning_rate": 6.626808286056607e-08, + "loss": 0.5826, + "step": 30986 + }, + { + "epoch": 0.9497057741816844, + "grad_norm": 1.912573108539849, + "learning_rate": 6.61875696382408e-08, + "loss": 0.6541, + "step": 30987 + }, + { + "epoch": 0.9497364227044256, + "grad_norm": 1.691961981139282, + "learning_rate": 6.610710503003214e-08, + "loss": 0.5933, + "step": 30988 + }, + { + "epoch": 0.9497670712271669, + "grad_norm": 1.7836960485076938, + "learning_rate": 6.602668903673226e-08, + "loss": 0.4789, + "step": 30989 + }, + { + "epoch": 0.949797719749908, + "grad_norm": 0.8421656071778608, + "learning_rate": 6.59463216591344e-08, + "loss": 0.384, + "step": 30990 + }, + { + "epoch": 0.9498283682726493, + "grad_norm": 1.816348830287691, + "learning_rate": 6.586600289802958e-08, + "loss": 0.495, + "step": 30991 + }, + { + "epoch": 0.9498590167953904, + "grad_norm": 1.7653982642977681, + "learning_rate": 6.578573275420941e-08, + "loss": 0.5205, + "step": 30992 + }, + { + "epoch": 0.9498896653181317, + "grad_norm": 1.8622976601196832, + "learning_rate": 6.570551122846491e-08, + "loss": 0.4979, + "step": 30993 + }, + { + "epoch": 0.9499203138408728, + "grad_norm": 1.747613933883945, + "learning_rate": 6.562533832158657e-08, + "loss": 0.4633, + "step": 30994 + }, + { + "epoch": 0.9499509623636141, + "grad_norm": 1.8703827935678294, + "learning_rate": 6.554521403436376e-08, + "loss": 0.6107, + "step": 30995 + }, + { + "epoch": 0.9499816108863552, + "grad_norm": 1.9972685889635684, + "learning_rate": 6.54651383675875e-08, + "loss": 0.6011, + "step": 30996 + }, + { + "epoch": 0.9500122594090965, + "grad_norm": 1.960110998492348, + "learning_rate": 6.538511132204495e-08, + "loss": 0.4632, + "step": 30997 + }, + { + "epoch": 0.9500429079318377, + "grad_norm": 1.7674775134321294, + "learning_rate": 6.530513289852603e-08, + "loss": 0.5624, + "step": 30998 + }, + { + "epoch": 0.9500735564545789, + "grad_norm": 2.071699323365764, + "learning_rate": 6.522520309781788e-08, + "loss": 0.6153, + "step": 30999 + }, + { + "epoch": 0.9501042049773201, + "grad_norm": 1.832615523657835, + "learning_rate": 6.514532192070876e-08, + "loss": 0.5555, + "step": 31000 + }, + { + "epoch": 0.9501348535000613, + "grad_norm": 0.802354482821107, + "learning_rate": 6.506548936798474e-08, + "loss": 0.3963, + "step": 31001 + }, + { + "epoch": 0.9501655020228025, + "grad_norm": 1.8405848757399657, + "learning_rate": 6.498570544043348e-08, + "loss": 0.6113, + "step": 31002 + }, + { + "epoch": 0.9501961505455437, + "grad_norm": 1.8835500770640923, + "learning_rate": 6.490597013884103e-08, + "loss": 0.6182, + "step": 31003 + }, + { + "epoch": 0.9502267990682849, + "grad_norm": 1.9299958584539838, + "learning_rate": 6.482628346399289e-08, + "loss": 0.5897, + "step": 31004 + }, + { + "epoch": 0.9502574475910261, + "grad_norm": 1.9433412287572038, + "learning_rate": 6.474664541667341e-08, + "loss": 0.5851, + "step": 31005 + }, + { + "epoch": 0.9502880961137673, + "grad_norm": 1.8288298172233342, + "learning_rate": 6.466705599766809e-08, + "loss": 0.6184, + "step": 31006 + }, + { + "epoch": 0.9503187446365086, + "grad_norm": 1.9470633715217385, + "learning_rate": 6.45875152077613e-08, + "loss": 0.6116, + "step": 31007 + }, + { + "epoch": 0.9503493931592497, + "grad_norm": 2.1242578420801723, + "learning_rate": 6.450802304773629e-08, + "loss": 0.6038, + "step": 31008 + }, + { + "epoch": 0.950380041681991, + "grad_norm": 1.835844175364146, + "learning_rate": 6.442857951837689e-08, + "loss": 0.5406, + "step": 31009 + }, + { + "epoch": 0.9504106902047321, + "grad_norm": 1.9308708320961614, + "learning_rate": 6.434918462046525e-08, + "loss": 0.6428, + "step": 31010 + }, + { + "epoch": 0.9504413387274734, + "grad_norm": 0.8320531466493298, + "learning_rate": 6.426983835478462e-08, + "loss": 0.4036, + "step": 31011 + }, + { + "epoch": 0.9504719872502145, + "grad_norm": 1.7218169715464302, + "learning_rate": 6.419054072211494e-08, + "loss": 0.5694, + "step": 31012 + }, + { + "epoch": 0.9505026357729558, + "grad_norm": 2.0701249718474273, + "learning_rate": 6.411129172323949e-08, + "loss": 0.6513, + "step": 31013 + }, + { + "epoch": 0.950533284295697, + "grad_norm": 2.0037402225703005, + "learning_rate": 6.403209135893818e-08, + "loss": 0.4572, + "step": 31014 + }, + { + "epoch": 0.9505639328184382, + "grad_norm": 1.8786632811414756, + "learning_rate": 6.39529396299915e-08, + "loss": 0.5835, + "step": 31015 + }, + { + "epoch": 0.9505945813411794, + "grad_norm": 0.7747344251682287, + "learning_rate": 6.387383653717938e-08, + "loss": 0.3848, + "step": 31016 + }, + { + "epoch": 0.9506252298639205, + "grad_norm": 1.9596668787246232, + "learning_rate": 6.379478208128176e-08, + "loss": 0.5876, + "step": 31017 + }, + { + "epoch": 0.9506558783866618, + "grad_norm": 1.7352617511793205, + "learning_rate": 6.37157762630769e-08, + "loss": 0.5904, + "step": 31018 + }, + { + "epoch": 0.9506865269094029, + "grad_norm": 1.9504051482479245, + "learning_rate": 6.363681908334307e-08, + "loss": 0.575, + "step": 31019 + }, + { + "epoch": 0.9507171754321442, + "grad_norm": 2.0689224778241457, + "learning_rate": 6.355791054285908e-08, + "loss": 0.7093, + "step": 31020 + }, + { + "epoch": 0.9507478239548853, + "grad_norm": 1.8299219504960893, + "learning_rate": 6.347905064240211e-08, + "loss": 0.4924, + "step": 31021 + }, + { + "epoch": 0.9507784724776266, + "grad_norm": 1.868014880782125, + "learning_rate": 6.340023938274931e-08, + "loss": 0.4062, + "step": 31022 + }, + { + "epoch": 0.9508091210003677, + "grad_norm": 2.0076874136692746, + "learning_rate": 6.332147676467671e-08, + "loss": 0.5619, + "step": 31023 + }, + { + "epoch": 0.950839769523109, + "grad_norm": 2.007038905246689, + "learning_rate": 6.324276278896091e-08, + "loss": 0.6555, + "step": 31024 + }, + { + "epoch": 0.9508704180458502, + "grad_norm": 1.7560521496670727, + "learning_rate": 6.316409745637686e-08, + "loss": 0.5072, + "step": 31025 + }, + { + "epoch": 0.9509010665685914, + "grad_norm": 0.8050330118665351, + "learning_rate": 6.30854807677006e-08, + "loss": 0.3877, + "step": 31026 + }, + { + "epoch": 0.9509317150913326, + "grad_norm": 1.8094012513575801, + "learning_rate": 6.300691272370596e-08, + "loss": 0.5221, + "step": 31027 + }, + { + "epoch": 0.9509623636140738, + "grad_norm": 1.8761387263781946, + "learning_rate": 6.292839332516731e-08, + "loss": 0.5663, + "step": 31028 + }, + { + "epoch": 0.950993012136815, + "grad_norm": 1.8339260464808116, + "learning_rate": 6.284992257285904e-08, + "loss": 0.6023, + "step": 31029 + }, + { + "epoch": 0.9510236606595562, + "grad_norm": 2.0153179685599634, + "learning_rate": 6.277150046755276e-08, + "loss": 0.5613, + "step": 31030 + }, + { + "epoch": 0.9510543091822974, + "grad_norm": 1.8279836006186863, + "learning_rate": 6.269312701002284e-08, + "loss": 0.6227, + "step": 31031 + }, + { + "epoch": 0.9510849577050386, + "grad_norm": 2.008754418004958, + "learning_rate": 6.261480220104088e-08, + "loss": 0.5243, + "step": 31032 + }, + { + "epoch": 0.9511156062277798, + "grad_norm": 1.9206489658038177, + "learning_rate": 6.253652604137794e-08, + "loss": 0.5542, + "step": 31033 + }, + { + "epoch": 0.9511462547505211, + "grad_norm": 1.9763262766370053, + "learning_rate": 6.245829853180618e-08, + "loss": 0.6372, + "step": 31034 + }, + { + "epoch": 0.9511769032732622, + "grad_norm": 2.058583401506986, + "learning_rate": 6.23801196730961e-08, + "loss": 0.5356, + "step": 31035 + }, + { + "epoch": 0.9512075517960035, + "grad_norm": 1.852099565445736, + "learning_rate": 6.230198946601818e-08, + "loss": 0.5698, + "step": 31036 + }, + { + "epoch": 0.9512382003187446, + "grad_norm": 1.8479511263721071, + "learning_rate": 6.222390791134236e-08, + "loss": 0.566, + "step": 31037 + }, + { + "epoch": 0.9512688488414859, + "grad_norm": 0.8400452890405624, + "learning_rate": 6.214587500983748e-08, + "loss": 0.4006, + "step": 31038 + }, + { + "epoch": 0.951299497364227, + "grad_norm": 2.0262002870689257, + "learning_rate": 6.206789076227238e-08, + "loss": 0.5795, + "step": 31039 + }, + { + "epoch": 0.9513301458869683, + "grad_norm": 2.1133823789338475, + "learning_rate": 6.198995516941642e-08, + "loss": 0.5189, + "step": 31040 + }, + { + "epoch": 0.9513607944097094, + "grad_norm": 1.9936648454900678, + "learning_rate": 6.191206823203622e-08, + "loss": 0.5532, + "step": 31041 + }, + { + "epoch": 0.9513914429324507, + "grad_norm": 2.087665539403235, + "learning_rate": 6.183422995090005e-08, + "loss": 0.517, + "step": 31042 + }, + { + "epoch": 0.9514220914551919, + "grad_norm": 1.902723614277879, + "learning_rate": 6.175644032677508e-08, + "loss": 0.5695, + "step": 31043 + }, + { + "epoch": 0.9514527399779331, + "grad_norm": 1.9063919504154494, + "learning_rate": 6.167869936042737e-08, + "loss": 0.5435, + "step": 31044 + }, + { + "epoch": 0.9514833885006743, + "grad_norm": 1.7402627452890154, + "learning_rate": 6.160100705262295e-08, + "loss": 0.586, + "step": 31045 + }, + { + "epoch": 0.9515140370234155, + "grad_norm": 2.067613167009254, + "learning_rate": 6.152336340412679e-08, + "loss": 0.5768, + "step": 31046 + }, + { + "epoch": 0.9515446855461567, + "grad_norm": 0.781311312447566, + "learning_rate": 6.144576841570494e-08, + "loss": 0.383, + "step": 31047 + }, + { + "epoch": 0.9515753340688978, + "grad_norm": 1.826525788939158, + "learning_rate": 6.136822208812121e-08, + "loss": 0.524, + "step": 31048 + }, + { + "epoch": 0.9516059825916391, + "grad_norm": 2.0481566262012842, + "learning_rate": 6.129072442214057e-08, + "loss": 0.6792, + "step": 31049 + }, + { + "epoch": 0.9516366311143802, + "grad_norm": 1.8390826793522845, + "learning_rate": 6.121327541852517e-08, + "loss": 0.5788, + "step": 31050 + }, + { + "epoch": 0.9516672796371215, + "grad_norm": 1.8940768673581785, + "learning_rate": 6.113587507803997e-08, + "loss": 0.5088, + "step": 31051 + }, + { + "epoch": 0.9516979281598626, + "grad_norm": 2.005456402174599, + "learning_rate": 6.105852340144602e-08, + "loss": 0.5993, + "step": 31052 + }, + { + "epoch": 0.9517285766826039, + "grad_norm": 2.0325946095112664, + "learning_rate": 6.098122038950605e-08, + "loss": 0.5115, + "step": 31053 + }, + { + "epoch": 0.9517592252053451, + "grad_norm": 2.0060673212206956, + "learning_rate": 6.090396604298276e-08, + "loss": 0.5338, + "step": 31054 + }, + { + "epoch": 0.9517898737280863, + "grad_norm": 1.9228095648519468, + "learning_rate": 6.082676036263558e-08, + "loss": 0.5379, + "step": 31055 + }, + { + "epoch": 0.9518205222508275, + "grad_norm": 1.8670215378640018, + "learning_rate": 6.074960334922609e-08, + "loss": 0.5503, + "step": 31056 + }, + { + "epoch": 0.9518511707735687, + "grad_norm": 1.8504635812718875, + "learning_rate": 6.06724950035148e-08, + "loss": 0.48, + "step": 31057 + }, + { + "epoch": 0.9518818192963099, + "grad_norm": 1.9712118115218251, + "learning_rate": 6.059543532626111e-08, + "loss": 0.5447, + "step": 31058 + }, + { + "epoch": 0.9519124678190511, + "grad_norm": 1.7833290845162533, + "learning_rate": 6.051842431822442e-08, + "loss": 0.5187, + "step": 31059 + }, + { + "epoch": 0.9519431163417923, + "grad_norm": 1.9205089901049668, + "learning_rate": 6.044146198016299e-08, + "loss": 0.5707, + "step": 31060 + }, + { + "epoch": 0.9519737648645336, + "grad_norm": 1.7861346172075763, + "learning_rate": 6.036454831283623e-08, + "loss": 0.5363, + "step": 31061 + }, + { + "epoch": 0.9520044133872747, + "grad_norm": 0.7846767547662303, + "learning_rate": 6.02876833170013e-08, + "loss": 0.3997, + "step": 31062 + }, + { + "epoch": 0.952035061910016, + "grad_norm": 1.9006203954691379, + "learning_rate": 6.021086699341594e-08, + "loss": 0.534, + "step": 31063 + }, + { + "epoch": 0.9520657104327571, + "grad_norm": 2.2088006447274906, + "learning_rate": 6.01340993428362e-08, + "loss": 0.6445, + "step": 31064 + }, + { + "epoch": 0.9520963589554984, + "grad_norm": 2.0260558665701787, + "learning_rate": 6.005738036601982e-08, + "loss": 0.4961, + "step": 31065 + }, + { + "epoch": 0.9521270074782395, + "grad_norm": 2.074656902266161, + "learning_rate": 5.998071006372175e-08, + "loss": 0.6014, + "step": 31066 + }, + { + "epoch": 0.9521576560009808, + "grad_norm": 1.8119962248881556, + "learning_rate": 5.990408843669803e-08, + "loss": 0.6162, + "step": 31067 + }, + { + "epoch": 0.9521883045237219, + "grad_norm": 1.9460433207418049, + "learning_rate": 5.982751548570253e-08, + "loss": 0.6043, + "step": 31068 + }, + { + "epoch": 0.9522189530464632, + "grad_norm": 0.797124795091327, + "learning_rate": 5.975099121149074e-08, + "loss": 0.3884, + "step": 31069 + }, + { + "epoch": 0.9522496015692044, + "grad_norm": 1.692668660345055, + "learning_rate": 5.967451561481708e-08, + "loss": 0.5097, + "step": 31070 + }, + { + "epoch": 0.9522802500919456, + "grad_norm": 1.9625769135145283, + "learning_rate": 5.959808869643369e-08, + "loss": 0.5659, + "step": 31071 + }, + { + "epoch": 0.9523108986146868, + "grad_norm": 2.197697746181491, + "learning_rate": 5.952171045709443e-08, + "loss": 0.5457, + "step": 31072 + }, + { + "epoch": 0.952341547137428, + "grad_norm": 0.7965830724362266, + "learning_rate": 5.944538089755258e-08, + "loss": 0.3938, + "step": 31073 + }, + { + "epoch": 0.9523721956601692, + "grad_norm": 1.844336323519031, + "learning_rate": 5.936910001855867e-08, + "loss": 0.6779, + "step": 31074 + }, + { + "epoch": 0.9524028441829104, + "grad_norm": 1.9051926626011686, + "learning_rate": 5.929286782086541e-08, + "loss": 0.6533, + "step": 31075 + }, + { + "epoch": 0.9524334927056516, + "grad_norm": 1.9675252540873345, + "learning_rate": 5.921668430522387e-08, + "loss": 0.5146, + "step": 31076 + }, + { + "epoch": 0.9524641412283928, + "grad_norm": 1.8008685792023778, + "learning_rate": 5.914054947238457e-08, + "loss": 0.5672, + "step": 31077 + }, + { + "epoch": 0.952494789751134, + "grad_norm": 0.8196438923458761, + "learning_rate": 5.906446332309745e-08, + "loss": 0.3845, + "step": 31078 + }, + { + "epoch": 0.9525254382738751, + "grad_norm": 0.7848585809042018, + "learning_rate": 5.898842585811193e-08, + "loss": 0.4039, + "step": 31079 + }, + { + "epoch": 0.9525560867966164, + "grad_norm": 1.907456166382401, + "learning_rate": 5.8912437078177953e-08, + "loss": 0.5645, + "step": 31080 + }, + { + "epoch": 0.9525867353193576, + "grad_norm": 1.857613192702058, + "learning_rate": 5.883649698404437e-08, + "loss": 0.6156, + "step": 31081 + }, + { + "epoch": 0.9526173838420988, + "grad_norm": 2.0291254805430263, + "learning_rate": 5.876060557645835e-08, + "loss": 0.606, + "step": 31082 + }, + { + "epoch": 0.95264803236484, + "grad_norm": 2.0335478575819605, + "learning_rate": 5.8684762856168756e-08, + "loss": 0.5492, + "step": 31083 + }, + { + "epoch": 0.9526786808875812, + "grad_norm": 2.169274481255191, + "learning_rate": 5.8608968823922754e-08, + "loss": 0.5384, + "step": 31084 + }, + { + "epoch": 0.9527093294103224, + "grad_norm": 1.8288805319527868, + "learning_rate": 5.8533223480466417e-08, + "loss": 0.602, + "step": 31085 + }, + { + "epoch": 0.9527399779330636, + "grad_norm": 2.059098085448526, + "learning_rate": 5.845752682654693e-08, + "loss": 0.5563, + "step": 31086 + }, + { + "epoch": 0.9527706264558048, + "grad_norm": 1.954932115656238, + "learning_rate": 5.838187886290925e-08, + "loss": 0.5967, + "step": 31087 + }, + { + "epoch": 0.952801274978546, + "grad_norm": 1.875466146158423, + "learning_rate": 5.8306279590299444e-08, + "loss": 0.5248, + "step": 31088 + }, + { + "epoch": 0.9528319235012872, + "grad_norm": 1.7371303706835346, + "learning_rate": 5.823072900946303e-08, + "loss": 0.5814, + "step": 31089 + }, + { + "epoch": 0.9528625720240285, + "grad_norm": 2.306863401285187, + "learning_rate": 5.815522712114274e-08, + "loss": 0.6961, + "step": 31090 + }, + { + "epoch": 0.9528932205467696, + "grad_norm": 2.0248128066905924, + "learning_rate": 5.8079773926083546e-08, + "loss": 0.609, + "step": 31091 + }, + { + "epoch": 0.9529238690695109, + "grad_norm": 1.7910235337263625, + "learning_rate": 5.80043694250293e-08, + "loss": 0.5233, + "step": 31092 + }, + { + "epoch": 0.952954517592252, + "grad_norm": 1.9611787440426234, + "learning_rate": 5.792901361872216e-08, + "loss": 0.4334, + "step": 31093 + }, + { + "epoch": 0.9529851661149933, + "grad_norm": 2.074080582795116, + "learning_rate": 5.7853706507904337e-08, + "loss": 0.5817, + "step": 31094 + }, + { + "epoch": 0.9530158146377344, + "grad_norm": 3.1595968395183167, + "learning_rate": 5.7778448093319115e-08, + "loss": 0.5071, + "step": 31095 + }, + { + "epoch": 0.9530464631604757, + "grad_norm": 2.586010802845303, + "learning_rate": 5.770323837570757e-08, + "loss": 0.5847, + "step": 31096 + }, + { + "epoch": 0.9530771116832168, + "grad_norm": 1.993772705859699, + "learning_rate": 5.762807735581022e-08, + "loss": 0.5403, + "step": 31097 + }, + { + "epoch": 0.9531077602059581, + "grad_norm": 1.8952997982920012, + "learning_rate": 5.755296503436758e-08, + "loss": 0.5854, + "step": 31098 + }, + { + "epoch": 0.9531384087286993, + "grad_norm": 1.869001861914134, + "learning_rate": 5.747790141212073e-08, + "loss": 0.4147, + "step": 31099 + }, + { + "epoch": 0.9531690572514405, + "grad_norm": 1.9184625044500627, + "learning_rate": 5.7402886489809075e-08, + "loss": 0.5306, + "step": 31100 + }, + { + "epoch": 0.9531997057741817, + "grad_norm": 1.7919086946565392, + "learning_rate": 5.7327920268170356e-08, + "loss": 0.556, + "step": 31101 + }, + { + "epoch": 0.9532303542969229, + "grad_norm": 1.7446791192566908, + "learning_rate": 5.725300274794454e-08, + "loss": 0.5174, + "step": 31102 + }, + { + "epoch": 0.9532610028196641, + "grad_norm": 1.9895491465998538, + "learning_rate": 5.717813392986993e-08, + "loss": 0.5224, + "step": 31103 + }, + { + "epoch": 0.9532916513424053, + "grad_norm": 2.2210647154682612, + "learning_rate": 5.71033138146837e-08, + "loss": 0.5798, + "step": 31104 + }, + { + "epoch": 0.9533222998651465, + "grad_norm": 0.7968411758965319, + "learning_rate": 5.702854240312361e-08, + "loss": 0.4125, + "step": 31105 + }, + { + "epoch": 0.9533529483878878, + "grad_norm": 2.090872550356026, + "learning_rate": 5.6953819695925175e-08, + "loss": 0.5497, + "step": 31106 + }, + { + "epoch": 0.9533835969106289, + "grad_norm": 1.9758055570971245, + "learning_rate": 5.6879145693826133e-08, + "loss": 0.5633, + "step": 31107 + }, + { + "epoch": 0.9534142454333702, + "grad_norm": 2.1024542287455583, + "learning_rate": 5.6804520397561456e-08, + "loss": 0.5805, + "step": 31108 + }, + { + "epoch": 0.9534448939561113, + "grad_norm": 2.036131988160263, + "learning_rate": 5.672994380786667e-08, + "loss": 0.6148, + "step": 31109 + }, + { + "epoch": 0.9534755424788525, + "grad_norm": 1.7578371579045826, + "learning_rate": 5.665541592547619e-08, + "loss": 0.5406, + "step": 31110 + }, + { + "epoch": 0.9535061910015937, + "grad_norm": 1.85938482859866, + "learning_rate": 5.6580936751125526e-08, + "loss": 0.5264, + "step": 31111 + }, + { + "epoch": 0.9535368395243349, + "grad_norm": 2.238173830910778, + "learning_rate": 5.650650628554688e-08, + "loss": 0.5406, + "step": 31112 + }, + { + "epoch": 0.9535674880470761, + "grad_norm": 1.824168058503052, + "learning_rate": 5.643212452947466e-08, + "loss": 0.5605, + "step": 31113 + }, + { + "epoch": 0.9535981365698173, + "grad_norm": 1.7607520215265737, + "learning_rate": 5.635779148364162e-08, + "loss": 0.549, + "step": 31114 + }, + { + "epoch": 0.9536287850925586, + "grad_norm": 1.8301763351362053, + "learning_rate": 5.6283507148780505e-08, + "loss": 0.5386, + "step": 31115 + }, + { + "epoch": 0.9536594336152997, + "grad_norm": 0.8823648224475552, + "learning_rate": 5.6209271525622385e-08, + "loss": 0.386, + "step": 31116 + }, + { + "epoch": 0.953690082138041, + "grad_norm": 1.9338156687424983, + "learning_rate": 5.613508461489947e-08, + "loss": 0.6354, + "step": 31117 + }, + { + "epoch": 0.9537207306607821, + "grad_norm": 1.930339678849446, + "learning_rate": 5.6060946417342276e-08, + "loss": 0.5641, + "step": 31118 + }, + { + "epoch": 0.9537513791835234, + "grad_norm": 1.9303872257894779, + "learning_rate": 5.598685693368189e-08, + "loss": 0.5819, + "step": 31119 + }, + { + "epoch": 0.9537820277062645, + "grad_norm": 1.996183718324916, + "learning_rate": 5.591281616464772e-08, + "loss": 0.5197, + "step": 31120 + }, + { + "epoch": 0.9538126762290058, + "grad_norm": 1.8219130343138845, + "learning_rate": 5.5838824110969745e-08, + "loss": 0.601, + "step": 31121 + }, + { + "epoch": 0.9538433247517469, + "grad_norm": 2.1550266949234103, + "learning_rate": 5.5764880773376826e-08, + "loss": 0.5333, + "step": 31122 + }, + { + "epoch": 0.9538739732744882, + "grad_norm": 1.756523129563703, + "learning_rate": 5.5690986152597824e-08, + "loss": 0.5715, + "step": 31123 + }, + { + "epoch": 0.9539046217972293, + "grad_norm": 0.7869542990034883, + "learning_rate": 5.5617140249359934e-08, + "loss": 0.4165, + "step": 31124 + }, + { + "epoch": 0.9539352703199706, + "grad_norm": 2.0291384466525564, + "learning_rate": 5.554334306439202e-08, + "loss": 0.6312, + "step": 31125 + }, + { + "epoch": 0.9539659188427118, + "grad_norm": 2.028368018096644, + "learning_rate": 5.5469594598420164e-08, + "loss": 0.482, + "step": 31126 + }, + { + "epoch": 0.953996567365453, + "grad_norm": 1.8902693213128143, + "learning_rate": 5.5395894852172116e-08, + "loss": 0.5519, + "step": 31127 + }, + { + "epoch": 0.9540272158881942, + "grad_norm": 1.8672432636863898, + "learning_rate": 5.532224382637286e-08, + "loss": 0.6085, + "step": 31128 + }, + { + "epoch": 0.9540578644109354, + "grad_norm": 1.7656914898424498, + "learning_rate": 5.5248641521749024e-08, + "loss": 0.5816, + "step": 31129 + }, + { + "epoch": 0.9540885129336766, + "grad_norm": 1.8323443598523719, + "learning_rate": 5.5175087939025596e-08, + "loss": 0.602, + "step": 31130 + }, + { + "epoch": 0.9541191614564178, + "grad_norm": 0.7888386915907795, + "learning_rate": 5.510158307892699e-08, + "loss": 0.3906, + "step": 31131 + }, + { + "epoch": 0.954149809979159, + "grad_norm": 1.9135759409151498, + "learning_rate": 5.5028126942177626e-08, + "loss": 0.589, + "step": 31132 + }, + { + "epoch": 0.9541804585019003, + "grad_norm": 2.1369157321176155, + "learning_rate": 5.4954719529501376e-08, + "loss": 0.4514, + "step": 31133 + }, + { + "epoch": 0.9542111070246414, + "grad_norm": 2.0112440690830726, + "learning_rate": 5.488136084162155e-08, + "loss": 0.5852, + "step": 31134 + }, + { + "epoch": 0.9542417555473827, + "grad_norm": 1.741679598543246, + "learning_rate": 5.480805087926089e-08, + "loss": 0.6029, + "step": 31135 + }, + { + "epoch": 0.9542724040701238, + "grad_norm": 2.078365595412863, + "learning_rate": 5.473478964314216e-08, + "loss": 0.6343, + "step": 31136 + }, + { + "epoch": 0.9543030525928651, + "grad_norm": 2.0178327458164547, + "learning_rate": 5.4661577133986455e-08, + "loss": 0.4433, + "step": 31137 + }, + { + "epoch": 0.9543337011156062, + "grad_norm": 1.7550600978854487, + "learning_rate": 5.458841335251597e-08, + "loss": 0.5868, + "step": 31138 + }, + { + "epoch": 0.9543643496383475, + "grad_norm": 1.9755268517908857, + "learning_rate": 5.4515298299450126e-08, + "loss": 0.6165, + "step": 31139 + }, + { + "epoch": 0.9543949981610886, + "grad_norm": 2.065988198996507, + "learning_rate": 5.444223197551168e-08, + "loss": 0.7032, + "step": 31140 + }, + { + "epoch": 0.9544256466838298, + "grad_norm": 2.2909391910978387, + "learning_rate": 5.43692143814184e-08, + "loss": 0.6394, + "step": 31141 + }, + { + "epoch": 0.954456295206571, + "grad_norm": 1.5605945640567418, + "learning_rate": 5.429624551789136e-08, + "loss": 0.4382, + "step": 31142 + }, + { + "epoch": 0.9544869437293122, + "grad_norm": 1.7646054496587025, + "learning_rate": 5.422332538564834e-08, + "loss": 0.5624, + "step": 31143 + }, + { + "epoch": 0.9545175922520535, + "grad_norm": 2.0833544761436777, + "learning_rate": 5.4150453985408194e-08, + "loss": 0.6636, + "step": 31144 + }, + { + "epoch": 0.9545482407747946, + "grad_norm": 1.8563616001334484, + "learning_rate": 5.407763131788979e-08, + "loss": 0.6071, + "step": 31145 + }, + { + "epoch": 0.9545788892975359, + "grad_norm": 2.049121962196826, + "learning_rate": 5.400485738380923e-08, + "loss": 0.6287, + "step": 31146 + }, + { + "epoch": 0.954609537820277, + "grad_norm": 1.8329161990944156, + "learning_rate": 5.393213218388482e-08, + "loss": 0.5529, + "step": 31147 + }, + { + "epoch": 0.9546401863430183, + "grad_norm": 2.039588215249282, + "learning_rate": 5.3859455718832667e-08, + "loss": 0.4983, + "step": 31148 + }, + { + "epoch": 0.9546708348657594, + "grad_norm": 0.7864410900202188, + "learning_rate": 5.3786827989368296e-08, + "loss": 0.3918, + "step": 31149 + }, + { + "epoch": 0.9547014833885007, + "grad_norm": 1.9037858956113625, + "learning_rate": 5.3714248996207804e-08, + "loss": 0.4975, + "step": 31150 + }, + { + "epoch": 0.9547321319112418, + "grad_norm": 1.995018671333513, + "learning_rate": 5.364171874006674e-08, + "loss": 0.528, + "step": 31151 + }, + { + "epoch": 0.9547627804339831, + "grad_norm": 1.9500049773958852, + "learning_rate": 5.3569237221659523e-08, + "loss": 0.6036, + "step": 31152 + }, + { + "epoch": 0.9547934289567243, + "grad_norm": 0.8455106889877346, + "learning_rate": 5.3496804441700024e-08, + "loss": 0.396, + "step": 31153 + }, + { + "epoch": 0.9548240774794655, + "grad_norm": 1.7270561671356162, + "learning_rate": 5.342442040090212e-08, + "loss": 0.6222, + "step": 31154 + }, + { + "epoch": 0.9548547260022067, + "grad_norm": 1.7475516095387695, + "learning_rate": 5.335208509997858e-08, + "loss": 0.5974, + "step": 31155 + }, + { + "epoch": 0.9548853745249479, + "grad_norm": 1.9652541049267507, + "learning_rate": 5.327979853964327e-08, + "loss": 0.5509, + "step": 31156 + }, + { + "epoch": 0.9549160230476891, + "grad_norm": 1.952734610135969, + "learning_rate": 5.320756072060784e-08, + "loss": 0.4975, + "step": 31157 + }, + { + "epoch": 0.9549466715704303, + "grad_norm": 1.7676611551255348, + "learning_rate": 5.31353716435834e-08, + "loss": 0.5133, + "step": 31158 + }, + { + "epoch": 0.9549773200931715, + "grad_norm": 1.9475678692489007, + "learning_rate": 5.3063231309282706e-08, + "loss": 0.5774, + "step": 31159 + }, + { + "epoch": 0.9550079686159128, + "grad_norm": 1.5879306341931745, + "learning_rate": 5.299113971841463e-08, + "loss": 0.6703, + "step": 31160 + }, + { + "epoch": 0.9550386171386539, + "grad_norm": 1.8337489225819221, + "learning_rate": 5.291909687169139e-08, + "loss": 0.4857, + "step": 31161 + }, + { + "epoch": 0.9550692656613952, + "grad_norm": 2.093396595334691, + "learning_rate": 5.2847102769821854e-08, + "loss": 0.561, + "step": 31162 + }, + { + "epoch": 0.9550999141841363, + "grad_norm": 1.7419453594852001, + "learning_rate": 5.2775157413515464e-08, + "loss": 0.5998, + "step": 31163 + }, + { + "epoch": 0.9551305627068776, + "grad_norm": 2.0423229727461316, + "learning_rate": 5.2703260803481645e-08, + "loss": 0.5963, + "step": 31164 + }, + { + "epoch": 0.9551612112296187, + "grad_norm": 1.9855000360704937, + "learning_rate": 5.263141294042817e-08, + "loss": 0.5818, + "step": 31165 + }, + { + "epoch": 0.95519185975236, + "grad_norm": 1.9449960597596117, + "learning_rate": 5.2559613825062806e-08, + "loss": 0.605, + "step": 31166 + }, + { + "epoch": 0.9552225082751011, + "grad_norm": 2.0115500608586667, + "learning_rate": 5.2487863458093867e-08, + "loss": 0.5611, + "step": 31167 + }, + { + "epoch": 0.9552531567978424, + "grad_norm": 1.7469019806808161, + "learning_rate": 5.2416161840228016e-08, + "loss": 0.5429, + "step": 31168 + }, + { + "epoch": 0.9552838053205835, + "grad_norm": 1.716740329641963, + "learning_rate": 5.234450897217136e-08, + "loss": 0.5289, + "step": 31169 + }, + { + "epoch": 0.9553144538433248, + "grad_norm": 0.7962620450843126, + "learning_rate": 5.227290485462999e-08, + "loss": 0.3923, + "step": 31170 + }, + { + "epoch": 0.955345102366066, + "grad_norm": 1.7894871927752893, + "learning_rate": 5.2201349488310015e-08, + "loss": 0.4912, + "step": 31171 + }, + { + "epoch": 0.9553757508888071, + "grad_norm": 1.7981717888734978, + "learning_rate": 5.212984287391587e-08, + "loss": 0.464, + "step": 31172 + }, + { + "epoch": 0.9554063994115484, + "grad_norm": 1.742494383723993, + "learning_rate": 5.205838501215254e-08, + "loss": 0.5444, + "step": 31173 + }, + { + "epoch": 0.9554370479342895, + "grad_norm": 2.021068948398119, + "learning_rate": 5.1986975903723926e-08, + "loss": 0.604, + "step": 31174 + }, + { + "epoch": 0.9554676964570308, + "grad_norm": 1.8549419065678394, + "learning_rate": 5.191561554933333e-08, + "loss": 0.6041, + "step": 31175 + }, + { + "epoch": 0.9554983449797719, + "grad_norm": 2.0266006345222385, + "learning_rate": 5.184430394968465e-08, + "loss": 0.6134, + "step": 31176 + }, + { + "epoch": 0.9555289935025132, + "grad_norm": 1.9020546725611756, + "learning_rate": 5.177304110547954e-08, + "loss": 0.5302, + "step": 31177 + }, + { + "epoch": 0.9555596420252543, + "grad_norm": 2.0138163605139, + "learning_rate": 5.170182701742133e-08, + "loss": 0.6187, + "step": 31178 + }, + { + "epoch": 0.9555902905479956, + "grad_norm": 2.0421660266840274, + "learning_rate": 5.163066168621056e-08, + "loss": 0.5531, + "step": 31179 + }, + { + "epoch": 0.9556209390707368, + "grad_norm": 1.896567721858918, + "learning_rate": 5.1559545112548904e-08, + "loss": 0.5287, + "step": 31180 + }, + { + "epoch": 0.955651587593478, + "grad_norm": 1.8537738397404249, + "learning_rate": 5.1488477297137465e-08, + "loss": 0.5986, + "step": 31181 + }, + { + "epoch": 0.9556822361162192, + "grad_norm": 2.205230090971057, + "learning_rate": 5.141745824067623e-08, + "loss": 0.6126, + "step": 31182 + }, + { + "epoch": 0.9557128846389604, + "grad_norm": 2.0902733432590206, + "learning_rate": 5.1346487943865206e-08, + "loss": 0.5087, + "step": 31183 + }, + { + "epoch": 0.9557435331617016, + "grad_norm": 1.8110539269389565, + "learning_rate": 5.127556640740272e-08, + "loss": 0.5073, + "step": 31184 + }, + { + "epoch": 0.9557741816844428, + "grad_norm": 1.8031996713098748, + "learning_rate": 5.1204693631988764e-08, + "loss": 0.5565, + "step": 31185 + }, + { + "epoch": 0.955804830207184, + "grad_norm": 1.851773840707076, + "learning_rate": 5.113386961832112e-08, + "loss": 0.5266, + "step": 31186 + }, + { + "epoch": 0.9558354787299252, + "grad_norm": 0.8036130694790324, + "learning_rate": 5.106309436709756e-08, + "loss": 0.4109, + "step": 31187 + }, + { + "epoch": 0.9558661272526664, + "grad_norm": 1.662013341334166, + "learning_rate": 5.099236787901529e-08, + "loss": 0.3846, + "step": 31188 + }, + { + "epoch": 0.9558967757754077, + "grad_norm": 0.7521450014307924, + "learning_rate": 5.092169015477211e-08, + "loss": 0.3805, + "step": 31189 + }, + { + "epoch": 0.9559274242981488, + "grad_norm": 2.0904236365224262, + "learning_rate": 5.0851061195063e-08, + "loss": 0.5048, + "step": 31190 + }, + { + "epoch": 0.9559580728208901, + "grad_norm": 1.768824547033017, + "learning_rate": 5.0780481000585194e-08, + "loss": 0.5792, + "step": 31191 + }, + { + "epoch": 0.9559887213436312, + "grad_norm": 1.8817968805834568, + "learning_rate": 5.070994957203368e-08, + "loss": 0.584, + "step": 31192 + }, + { + "epoch": 0.9560193698663725, + "grad_norm": 1.6328352998997389, + "learning_rate": 5.0639466910102905e-08, + "loss": 0.4565, + "step": 31193 + }, + { + "epoch": 0.9560500183891136, + "grad_norm": 1.8021649085419007, + "learning_rate": 5.0569033015488436e-08, + "loss": 0.4784, + "step": 31194 + }, + { + "epoch": 0.9560806669118549, + "grad_norm": 0.7984631909676558, + "learning_rate": 5.0498647888883036e-08, + "loss": 0.4053, + "step": 31195 + }, + { + "epoch": 0.956111315434596, + "grad_norm": 1.8456883134146882, + "learning_rate": 5.0428311530981155e-08, + "loss": 0.5395, + "step": 31196 + }, + { + "epoch": 0.9561419639573373, + "grad_norm": 2.0307675815884467, + "learning_rate": 5.0358023942476134e-08, + "loss": 0.5162, + "step": 31197 + }, + { + "epoch": 0.9561726124800785, + "grad_norm": 1.7931825149562055, + "learning_rate": 5.0287785124059074e-08, + "loss": 0.6973, + "step": 31198 + }, + { + "epoch": 0.9562032610028197, + "grad_norm": 1.9928890674352386, + "learning_rate": 5.021759507642277e-08, + "loss": 0.5353, + "step": 31199 + }, + { + "epoch": 0.9562339095255609, + "grad_norm": 1.8468163003979245, + "learning_rate": 5.014745380025998e-08, + "loss": 0.5, + "step": 31200 + }, + { + "epoch": 0.9562645580483021, + "grad_norm": 2.0009599835039436, + "learning_rate": 5.007736129625962e-08, + "loss": 0.6395, + "step": 31201 + }, + { + "epoch": 0.9562952065710433, + "grad_norm": 1.8657781269513714, + "learning_rate": 5.00073175651139e-08, + "loss": 0.5611, + "step": 31202 + }, + { + "epoch": 0.9563258550937844, + "grad_norm": 0.8225018931550059, + "learning_rate": 4.993732260751283e-08, + "loss": 0.4115, + "step": 31203 + }, + { + "epoch": 0.9563565036165257, + "grad_norm": 2.0146875842803085, + "learning_rate": 4.986737642414585e-08, + "loss": 0.5, + "step": 31204 + }, + { + "epoch": 0.9563871521392668, + "grad_norm": 1.974603955532535, + "learning_rate": 4.979747901570242e-08, + "loss": 0.5732, + "step": 31205 + }, + { + "epoch": 0.9564178006620081, + "grad_norm": 1.9039566972072937, + "learning_rate": 4.9727630382870315e-08, + "loss": 0.5973, + "step": 31206 + }, + { + "epoch": 0.9564484491847492, + "grad_norm": 0.7873337024195984, + "learning_rate": 4.9657830526338993e-08, + "loss": 0.3991, + "step": 31207 + }, + { + "epoch": 0.9564790977074905, + "grad_norm": 1.7216571679328907, + "learning_rate": 4.958807944679567e-08, + "loss": 0.5189, + "step": 31208 + }, + { + "epoch": 0.9565097462302317, + "grad_norm": 1.9139705771960733, + "learning_rate": 4.9518377144927024e-08, + "loss": 0.5631, + "step": 31209 + }, + { + "epoch": 0.9565403947529729, + "grad_norm": 2.0346726273896425, + "learning_rate": 4.9448723621420834e-08, + "loss": 0.5602, + "step": 31210 + }, + { + "epoch": 0.9565710432757141, + "grad_norm": 1.9218210922262442, + "learning_rate": 4.9379118876963227e-08, + "loss": 0.5821, + "step": 31211 + }, + { + "epoch": 0.9566016917984553, + "grad_norm": 2.0610892445287776, + "learning_rate": 4.9309562912239207e-08, + "loss": 0.5694, + "step": 31212 + }, + { + "epoch": 0.9566323403211965, + "grad_norm": 0.7806337093462767, + "learning_rate": 4.924005572793544e-08, + "loss": 0.394, + "step": 31213 + }, + { + "epoch": 0.9566629888439377, + "grad_norm": 2.0982098070446606, + "learning_rate": 4.917059732473528e-08, + "loss": 0.5895, + "step": 31214 + }, + { + "epoch": 0.9566936373666789, + "grad_norm": 1.8018123749846684, + "learning_rate": 4.9101187703324835e-08, + "loss": 0.5672, + "step": 31215 + }, + { + "epoch": 0.9567242858894202, + "grad_norm": 1.7160104069353994, + "learning_rate": 4.90318268643869e-08, + "loss": 0.5481, + "step": 31216 + }, + { + "epoch": 0.9567549344121613, + "grad_norm": 1.9849213134970884, + "learning_rate": 4.896251480860481e-08, + "loss": 0.5773, + "step": 31217 + }, + { + "epoch": 0.9567855829349026, + "grad_norm": 1.7930529266312334, + "learning_rate": 4.889325153666247e-08, + "loss": 0.5473, + "step": 31218 + }, + { + "epoch": 0.9568162314576437, + "grad_norm": 1.8423418885820817, + "learning_rate": 4.882403704924099e-08, + "loss": 0.5426, + "step": 31219 + }, + { + "epoch": 0.956846879980385, + "grad_norm": 1.6682719283395955, + "learning_rate": 4.8754871347023725e-08, + "loss": 0.5217, + "step": 31220 + }, + { + "epoch": 0.9568775285031261, + "grad_norm": 1.9172721035780904, + "learning_rate": 4.868575443069068e-08, + "loss": 0.5111, + "step": 31221 + }, + { + "epoch": 0.9569081770258674, + "grad_norm": 1.6612877784180577, + "learning_rate": 4.8616686300924644e-08, + "loss": 0.4589, + "step": 31222 + }, + { + "epoch": 0.9569388255486085, + "grad_norm": 1.739604383138107, + "learning_rate": 4.854766695840507e-08, + "loss": 0.4854, + "step": 31223 + }, + { + "epoch": 0.9569694740713498, + "grad_norm": 1.7994347902102243, + "learning_rate": 4.847869640381142e-08, + "loss": 0.5958, + "step": 31224 + }, + { + "epoch": 0.957000122594091, + "grad_norm": 1.9498158040281375, + "learning_rate": 4.840977463782481e-08, + "loss": 0.6043, + "step": 31225 + }, + { + "epoch": 0.9570307711168322, + "grad_norm": 1.8342794277805243, + "learning_rate": 4.8340901661123596e-08, + "loss": 0.5962, + "step": 31226 + }, + { + "epoch": 0.9570614196395734, + "grad_norm": 1.9613206241185124, + "learning_rate": 4.827207747438667e-08, + "loss": 0.6194, + "step": 31227 + }, + { + "epoch": 0.9570920681623146, + "grad_norm": 1.9526814673553599, + "learning_rate": 4.820330207829127e-08, + "loss": 0.5095, + "step": 31228 + }, + { + "epoch": 0.9571227166850558, + "grad_norm": 0.8196547735706278, + "learning_rate": 4.81345754735163e-08, + "loss": 0.4081, + "step": 31229 + }, + { + "epoch": 0.957153365207797, + "grad_norm": 1.9020701369078117, + "learning_rate": 4.806589766073788e-08, + "loss": 0.499, + "step": 31230 + }, + { + "epoch": 0.9571840137305382, + "grad_norm": 1.7906083388956522, + "learning_rate": 4.7997268640633255e-08, + "loss": 0.6161, + "step": 31231 + }, + { + "epoch": 0.9572146622532794, + "grad_norm": 1.8071572573712134, + "learning_rate": 4.792868841387854e-08, + "loss": 0.5058, + "step": 31232 + }, + { + "epoch": 0.9572453107760206, + "grad_norm": 0.8087363480172078, + "learning_rate": 4.786015698114988e-08, + "loss": 0.4042, + "step": 31233 + }, + { + "epoch": 0.9572759592987617, + "grad_norm": 1.8647708195894022, + "learning_rate": 4.779167434312171e-08, + "loss": 0.4688, + "step": 31234 + }, + { + "epoch": 0.957306607821503, + "grad_norm": 2.0477646818026467, + "learning_rate": 4.7723240500469616e-08, + "loss": 0.6723, + "step": 31235 + }, + { + "epoch": 0.9573372563442442, + "grad_norm": 0.7987026532201917, + "learning_rate": 4.7654855453866944e-08, + "loss": 0.3927, + "step": 31236 + }, + { + "epoch": 0.9573679048669854, + "grad_norm": 2.0139496837293596, + "learning_rate": 4.758651920398871e-08, + "loss": 0.6185, + "step": 31237 + }, + { + "epoch": 0.9573985533897266, + "grad_norm": 2.2566219111913446, + "learning_rate": 4.7518231751507715e-08, + "loss": 0.6582, + "step": 31238 + }, + { + "epoch": 0.9574292019124678, + "grad_norm": 2.0243937076742022, + "learning_rate": 4.744999309709619e-08, + "loss": 0.616, + "step": 31239 + }, + { + "epoch": 0.957459850435209, + "grad_norm": 1.7280854678336641, + "learning_rate": 4.738180324142749e-08, + "loss": 0.4209, + "step": 31240 + }, + { + "epoch": 0.9574904989579502, + "grad_norm": 2.183943923912822, + "learning_rate": 4.7313662185172745e-08, + "loss": 0.6168, + "step": 31241 + }, + { + "epoch": 0.9575211474806914, + "grad_norm": 1.6868372332759005, + "learning_rate": 4.7245569929003644e-08, + "loss": 0.4532, + "step": 31242 + }, + { + "epoch": 0.9575517960034327, + "grad_norm": 1.98288775690004, + "learning_rate": 4.717752647359131e-08, + "loss": 0.5986, + "step": 31243 + }, + { + "epoch": 0.9575824445261738, + "grad_norm": 2.015273983141244, + "learning_rate": 4.710953181960576e-08, + "loss": 0.4651, + "step": 31244 + }, + { + "epoch": 0.9576130930489151, + "grad_norm": 0.7722838862717484, + "learning_rate": 4.704158596771813e-08, + "loss": 0.3939, + "step": 31245 + }, + { + "epoch": 0.9576437415716562, + "grad_norm": 1.883678084306338, + "learning_rate": 4.6973688918596214e-08, + "loss": 0.4779, + "step": 31246 + }, + { + "epoch": 0.9576743900943975, + "grad_norm": 2.3397817656115043, + "learning_rate": 4.6905840672910044e-08, + "loss": 0.5745, + "step": 31247 + }, + { + "epoch": 0.9577050386171386, + "grad_norm": 2.0591991279478057, + "learning_rate": 4.6838041231327956e-08, + "loss": 0.6021, + "step": 31248 + }, + { + "epoch": 0.9577356871398799, + "grad_norm": 0.8355709335979418, + "learning_rate": 4.677029059451776e-08, + "loss": 0.4084, + "step": 31249 + }, + { + "epoch": 0.957766335662621, + "grad_norm": 1.9261013595526468, + "learning_rate": 4.670258876314781e-08, + "loss": 0.5974, + "step": 31250 + }, + { + "epoch": 0.9577969841853623, + "grad_norm": 2.2503430330186167, + "learning_rate": 4.663493573788369e-08, + "loss": 0.6312, + "step": 31251 + }, + { + "epoch": 0.9578276327081034, + "grad_norm": 1.9780896831778347, + "learning_rate": 4.6567331519393747e-08, + "loss": 0.5416, + "step": 31252 + }, + { + "epoch": 0.9578582812308447, + "grad_norm": 1.661702601810785, + "learning_rate": 4.6499776108343e-08, + "loss": 0.4631, + "step": 31253 + }, + { + "epoch": 0.9578889297535859, + "grad_norm": 2.1142703977675885, + "learning_rate": 4.643226950539703e-08, + "loss": 0.5723, + "step": 31254 + }, + { + "epoch": 0.9579195782763271, + "grad_norm": 2.13315509136335, + "learning_rate": 4.6364811711221426e-08, + "loss": 0.6074, + "step": 31255 + }, + { + "epoch": 0.9579502267990683, + "grad_norm": 1.9164947879639473, + "learning_rate": 4.6297402726481197e-08, + "loss": 0.5993, + "step": 31256 + }, + { + "epoch": 0.9579808753218095, + "grad_norm": 1.988814002898255, + "learning_rate": 4.623004255183971e-08, + "loss": 0.5468, + "step": 31257 + }, + { + "epoch": 0.9580115238445507, + "grad_norm": 1.9512710440502834, + "learning_rate": 4.616273118796144e-08, + "loss": 0.5861, + "step": 31258 + }, + { + "epoch": 0.9580421723672919, + "grad_norm": 1.501131347621706, + "learning_rate": 4.609546863550918e-08, + "loss": 0.4696, + "step": 31259 + }, + { + "epoch": 0.9580728208900331, + "grad_norm": 1.8852997171287702, + "learning_rate": 4.602825489514573e-08, + "loss": 0.5238, + "step": 31260 + }, + { + "epoch": 0.9581034694127744, + "grad_norm": 1.9314346346682545, + "learning_rate": 4.5961089967533346e-08, + "loss": 0.5625, + "step": 31261 + }, + { + "epoch": 0.9581341179355155, + "grad_norm": 1.8675671060666583, + "learning_rate": 4.589397385333427e-08, + "loss": 0.5321, + "step": 31262 + }, + { + "epoch": 0.9581647664582568, + "grad_norm": 1.9727471297119397, + "learning_rate": 4.582690655320854e-08, + "loss": 0.6101, + "step": 31263 + }, + { + "epoch": 0.9581954149809979, + "grad_norm": 0.8059993481207911, + "learning_rate": 4.575988806781895e-08, + "loss": 0.4127, + "step": 31264 + }, + { + "epoch": 0.9582260635037391, + "grad_norm": 1.8940162343295217, + "learning_rate": 4.569291839782386e-08, + "loss": 0.4186, + "step": 31265 + }, + { + "epoch": 0.9582567120264803, + "grad_norm": 2.438626499838043, + "learning_rate": 4.562599754388441e-08, + "loss": 0.5173, + "step": 31266 + }, + { + "epoch": 0.9582873605492215, + "grad_norm": 1.9151186749795213, + "learning_rate": 4.5559125506660084e-08, + "loss": 0.5815, + "step": 31267 + }, + { + "epoch": 0.9583180090719627, + "grad_norm": 1.835251098419266, + "learning_rate": 4.5492302286808676e-08, + "loss": 0.6763, + "step": 31268 + }, + { + "epoch": 0.9583486575947039, + "grad_norm": 1.7696354074445333, + "learning_rate": 4.542552788498966e-08, + "loss": 0.5779, + "step": 31269 + }, + { + "epoch": 0.9583793061174452, + "grad_norm": 1.7628099419920633, + "learning_rate": 4.535880230186085e-08, + "loss": 0.5102, + "step": 31270 + }, + { + "epoch": 0.9584099546401863, + "grad_norm": 1.8392857005838212, + "learning_rate": 4.5292125538078933e-08, + "loss": 0.6319, + "step": 31271 + }, + { + "epoch": 0.9584406031629276, + "grad_norm": 1.9127444681052868, + "learning_rate": 4.522549759430173e-08, + "loss": 0.5369, + "step": 31272 + }, + { + "epoch": 0.9584712516856687, + "grad_norm": 1.9588176733428695, + "learning_rate": 4.5158918471185365e-08, + "loss": 0.5315, + "step": 31273 + }, + { + "epoch": 0.95850190020841, + "grad_norm": 0.7595521078746742, + "learning_rate": 4.5092388169385436e-08, + "loss": 0.3879, + "step": 31274 + }, + { + "epoch": 0.9585325487311511, + "grad_norm": 1.8738704862223994, + "learning_rate": 4.502590668955864e-08, + "loss": 0.5121, + "step": 31275 + }, + { + "epoch": 0.9585631972538924, + "grad_norm": 1.7064790976911204, + "learning_rate": 4.495947403235889e-08, + "loss": 0.5248, + "step": 31276 + }, + { + "epoch": 0.9585938457766335, + "grad_norm": 1.9911674787758697, + "learning_rate": 4.489309019844124e-08, + "loss": 0.5387, + "step": 31277 + }, + { + "epoch": 0.9586244942993748, + "grad_norm": 2.124649892054935, + "learning_rate": 4.482675518846069e-08, + "loss": 0.6481, + "step": 31278 + }, + { + "epoch": 0.958655142822116, + "grad_norm": 0.7986421894665096, + "learning_rate": 4.4760469003068965e-08, + "loss": 0.3967, + "step": 31279 + }, + { + "epoch": 0.9586857913448572, + "grad_norm": 0.8376987083310761, + "learning_rate": 4.469423164292053e-08, + "loss": 0.4052, + "step": 31280 + }, + { + "epoch": 0.9587164398675984, + "grad_norm": 0.8218228993675248, + "learning_rate": 4.462804310866764e-08, + "loss": 0.4043, + "step": 31281 + }, + { + "epoch": 0.9587470883903396, + "grad_norm": 2.0524383140907845, + "learning_rate": 4.456190340096256e-08, + "loss": 0.589, + "step": 31282 + }, + { + "epoch": 0.9587777369130808, + "grad_norm": 2.0052035565185715, + "learning_rate": 4.449581252045698e-08, + "loss": 0.6163, + "step": 31283 + }, + { + "epoch": 0.958808385435822, + "grad_norm": 1.9097938999793798, + "learning_rate": 4.442977046780206e-08, + "loss": 0.5682, + "step": 31284 + }, + { + "epoch": 0.9588390339585632, + "grad_norm": 1.8402601298399466, + "learning_rate": 4.4363777243648377e-08, + "loss": 0.5415, + "step": 31285 + }, + { + "epoch": 0.9588696824813044, + "grad_norm": 0.7691515531797322, + "learning_rate": 4.4297832848647084e-08, + "loss": 0.3799, + "step": 31286 + }, + { + "epoch": 0.9589003310040456, + "grad_norm": 1.843994181145781, + "learning_rate": 4.4231937283446544e-08, + "loss": 0.5445, + "step": 31287 + }, + { + "epoch": 0.9589309795267869, + "grad_norm": 2.3350317712832522, + "learning_rate": 4.416609054869681e-08, + "loss": 0.5415, + "step": 31288 + }, + { + "epoch": 0.958961628049528, + "grad_norm": 1.8280975212209682, + "learning_rate": 4.410029264504678e-08, + "loss": 0.5216, + "step": 31289 + }, + { + "epoch": 0.9589922765722693, + "grad_norm": 2.1717239503846635, + "learning_rate": 4.4034543573144295e-08, + "loss": 0.4831, + "step": 31290 + }, + { + "epoch": 0.9590229250950104, + "grad_norm": 1.9688551743614326, + "learning_rate": 4.396884333363771e-08, + "loss": 0.5709, + "step": 31291 + }, + { + "epoch": 0.9590535736177517, + "grad_norm": 2.042437738788527, + "learning_rate": 4.3903191927173736e-08, + "loss": 0.6054, + "step": 31292 + }, + { + "epoch": 0.9590842221404928, + "grad_norm": 1.7871023971833968, + "learning_rate": 4.383758935440019e-08, + "loss": 0.6065, + "step": 31293 + }, + { + "epoch": 0.9591148706632341, + "grad_norm": 2.111490095371684, + "learning_rate": 4.377203561596266e-08, + "loss": 0.5533, + "step": 31294 + }, + { + "epoch": 0.9591455191859752, + "grad_norm": 1.8854034970401155, + "learning_rate": 4.3706530712507854e-08, + "loss": 0.5915, + "step": 31295 + }, + { + "epoch": 0.9591761677087164, + "grad_norm": 1.8571068425782609, + "learning_rate": 4.364107464468026e-08, + "loss": 0.5371, + "step": 31296 + }, + { + "epoch": 0.9592068162314576, + "grad_norm": 2.4271333720975936, + "learning_rate": 4.3575667413125466e-08, + "loss": 0.6262, + "step": 31297 + }, + { + "epoch": 0.9592374647541988, + "grad_norm": 1.8261266102139624, + "learning_rate": 4.351030901848741e-08, + "loss": 0.5489, + "step": 31298 + }, + { + "epoch": 0.9592681132769401, + "grad_norm": 1.9105915947780459, + "learning_rate": 4.344499946141056e-08, + "loss": 0.5478, + "step": 31299 + }, + { + "epoch": 0.9592987617996812, + "grad_norm": 1.875067974203376, + "learning_rate": 4.337973874253887e-08, + "loss": 0.5535, + "step": 31300 + }, + { + "epoch": 0.9593294103224225, + "grad_norm": 0.8090554477455706, + "learning_rate": 4.331452686251458e-08, + "loss": 0.4086, + "step": 31301 + }, + { + "epoch": 0.9593600588451636, + "grad_norm": 1.7902329608286685, + "learning_rate": 4.324936382198053e-08, + "loss": 0.4651, + "step": 31302 + }, + { + "epoch": 0.9593907073679049, + "grad_norm": 2.124513569875996, + "learning_rate": 4.318424962157786e-08, + "loss": 0.499, + "step": 31303 + }, + { + "epoch": 0.959421355890646, + "grad_norm": 0.8142861086782649, + "learning_rate": 4.3119184261949945e-08, + "loss": 0.3919, + "step": 31304 + }, + { + "epoch": 0.9594520044133873, + "grad_norm": 2.243625649007804, + "learning_rate": 4.3054167743737385e-08, + "loss": 0.573, + "step": 31305 + }, + { + "epoch": 0.9594826529361284, + "grad_norm": 1.9046112401696393, + "learning_rate": 4.298920006757967e-08, + "loss": 0.6235, + "step": 31306 + }, + { + "epoch": 0.9595133014588697, + "grad_norm": 1.7734998051958895, + "learning_rate": 4.2924281234117407e-08, + "loss": 0.577, + "step": 31307 + }, + { + "epoch": 0.9595439499816109, + "grad_norm": 1.781485772506605, + "learning_rate": 4.285941124399118e-08, + "loss": 0.5306, + "step": 31308 + }, + { + "epoch": 0.9595745985043521, + "grad_norm": 2.0089523487517424, + "learning_rate": 4.2794590097839375e-08, + "loss": 0.6523, + "step": 31309 + }, + { + "epoch": 0.9596052470270933, + "grad_norm": 0.8143141650426291, + "learning_rate": 4.272981779630036e-08, + "loss": 0.3784, + "step": 31310 + }, + { + "epoch": 0.9596358955498345, + "grad_norm": 1.8996184838967074, + "learning_rate": 4.266509434001309e-08, + "loss": 0.5478, + "step": 31311 + }, + { + "epoch": 0.9596665440725757, + "grad_norm": 0.7985292201151827, + "learning_rate": 4.260041972961537e-08, + "loss": 0.3916, + "step": 31312 + }, + { + "epoch": 0.9596971925953169, + "grad_norm": 2.00385458370692, + "learning_rate": 4.253579396574392e-08, + "loss": 0.6068, + "step": 31313 + }, + { + "epoch": 0.9597278411180581, + "grad_norm": 2.000867174155858, + "learning_rate": 4.247121704903545e-08, + "loss": 0.683, + "step": 31314 + }, + { + "epoch": 0.9597584896407994, + "grad_norm": 1.8928658163125243, + "learning_rate": 4.2406688980126675e-08, + "loss": 0.6123, + "step": 31315 + }, + { + "epoch": 0.9597891381635405, + "grad_norm": 1.942430771209755, + "learning_rate": 4.234220975965375e-08, + "loss": 0.5977, + "step": 31316 + }, + { + "epoch": 0.9598197866862818, + "grad_norm": 1.7655410856889122, + "learning_rate": 4.227777938825117e-08, + "loss": 0.4815, + "step": 31317 + }, + { + "epoch": 0.9598504352090229, + "grad_norm": 1.8062388466333505, + "learning_rate": 4.221339786655343e-08, + "loss": 0.5348, + "step": 31318 + }, + { + "epoch": 0.9598810837317642, + "grad_norm": 2.3478010700552625, + "learning_rate": 4.214906519519668e-08, + "loss": 0.5611, + "step": 31319 + }, + { + "epoch": 0.9599117322545053, + "grad_norm": 1.8506109357990748, + "learning_rate": 4.20847813748132e-08, + "loss": 0.5837, + "step": 31320 + }, + { + "epoch": 0.9599423807772466, + "grad_norm": 1.8065367367941958, + "learning_rate": 4.2020546406036364e-08, + "loss": 0.6185, + "step": 31321 + }, + { + "epoch": 0.9599730292999877, + "grad_norm": 2.1917629617534224, + "learning_rate": 4.195636028950012e-08, + "loss": 0.6561, + "step": 31322 + }, + { + "epoch": 0.960003677822729, + "grad_norm": 2.1760070803535743, + "learning_rate": 4.189222302583673e-08, + "loss": 0.5447, + "step": 31323 + }, + { + "epoch": 0.9600343263454701, + "grad_norm": 1.9858524949678285, + "learning_rate": 4.182813461567792e-08, + "loss": 0.495, + "step": 31324 + }, + { + "epoch": 0.9600649748682114, + "grad_norm": 1.9934928791457274, + "learning_rate": 4.176409505965484e-08, + "loss": 0.6507, + "step": 31325 + }, + { + "epoch": 0.9600956233909526, + "grad_norm": 0.8061047399956712, + "learning_rate": 4.1700104358398106e-08, + "loss": 0.3928, + "step": 31326 + }, + { + "epoch": 0.9601262719136937, + "grad_norm": 1.8799559691656131, + "learning_rate": 4.163616251253999e-08, + "loss": 0.4388, + "step": 31327 + }, + { + "epoch": 0.960156920436435, + "grad_norm": 1.9718060036113665, + "learning_rate": 4.1572269522708875e-08, + "loss": 0.4938, + "step": 31328 + }, + { + "epoch": 0.9601875689591761, + "grad_norm": 1.988289741962226, + "learning_rate": 4.150842538953481e-08, + "loss": 0.498, + "step": 31329 + }, + { + "epoch": 0.9602182174819174, + "grad_norm": 1.9225928611376337, + "learning_rate": 4.144463011364675e-08, + "loss": 0.5706, + "step": 31330 + }, + { + "epoch": 0.9602488660046585, + "grad_norm": 1.8947058575008988, + "learning_rate": 4.138088369567361e-08, + "loss": 0.5559, + "step": 31331 + }, + { + "epoch": 0.9602795145273998, + "grad_norm": 1.9499719892755727, + "learning_rate": 4.1317186136243805e-08, + "loss": 0.6391, + "step": 31332 + }, + { + "epoch": 0.9603101630501409, + "grad_norm": 1.738587933860036, + "learning_rate": 4.125353743598348e-08, + "loss": 0.4697, + "step": 31333 + }, + { + "epoch": 0.9603408115728822, + "grad_norm": 1.8711510611671118, + "learning_rate": 4.118993759552159e-08, + "loss": 0.4815, + "step": 31334 + }, + { + "epoch": 0.9603714600956234, + "grad_norm": 1.7543672362562173, + "learning_rate": 4.112638661548429e-08, + "loss": 0.5619, + "step": 31335 + }, + { + "epoch": 0.9604021086183646, + "grad_norm": 0.8003468442344579, + "learning_rate": 4.1062884496496645e-08, + "loss": 0.4046, + "step": 31336 + }, + { + "epoch": 0.9604327571411058, + "grad_norm": 2.100471753046198, + "learning_rate": 4.099943123918593e-08, + "loss": 0.5064, + "step": 31337 + }, + { + "epoch": 0.960463405663847, + "grad_norm": 2.321064635416969, + "learning_rate": 4.093602684417608e-08, + "loss": 0.4514, + "step": 31338 + }, + { + "epoch": 0.9604940541865882, + "grad_norm": 1.945649776932131, + "learning_rate": 4.087267131209271e-08, + "loss": 0.5032, + "step": 31339 + }, + { + "epoch": 0.9605247027093294, + "grad_norm": 2.000892849548619, + "learning_rate": 4.080936464355978e-08, + "loss": 0.5922, + "step": 31340 + }, + { + "epoch": 0.9605553512320706, + "grad_norm": 2.2452838776406154, + "learning_rate": 4.074610683920066e-08, + "loss": 0.626, + "step": 31341 + }, + { + "epoch": 0.9605859997548118, + "grad_norm": 1.8100125992608167, + "learning_rate": 4.068289789963931e-08, + "loss": 0.4871, + "step": 31342 + }, + { + "epoch": 0.960616648277553, + "grad_norm": 1.8235481870119161, + "learning_rate": 4.061973782549855e-08, + "loss": 0.4886, + "step": 31343 + }, + { + "epoch": 0.9606472968002943, + "grad_norm": 0.7694432459618766, + "learning_rate": 4.0556626617399566e-08, + "loss": 0.3784, + "step": 31344 + }, + { + "epoch": 0.9606779453230354, + "grad_norm": 2.3335928104971724, + "learning_rate": 4.0493564275965735e-08, + "loss": 0.4752, + "step": 31345 + }, + { + "epoch": 0.9607085938457767, + "grad_norm": 1.9227126031114978, + "learning_rate": 4.043055080181824e-08, + "loss": 0.636, + "step": 31346 + }, + { + "epoch": 0.9607392423685178, + "grad_norm": 2.0171399586846133, + "learning_rate": 4.036758619557657e-08, + "loss": 0.6292, + "step": 31347 + }, + { + "epoch": 0.9607698908912591, + "grad_norm": 1.8065203377065562, + "learning_rate": 4.030467045786246e-08, + "loss": 0.5421, + "step": 31348 + }, + { + "epoch": 0.9608005394140002, + "grad_norm": 1.8419026653847066, + "learning_rate": 4.024180358929486e-08, + "loss": 0.4728, + "step": 31349 + }, + { + "epoch": 0.9608311879367415, + "grad_norm": 0.7893837211290605, + "learning_rate": 4.017898559049438e-08, + "loss": 0.3817, + "step": 31350 + }, + { + "epoch": 0.9608618364594826, + "grad_norm": 1.7108990299364002, + "learning_rate": 4.011621646207942e-08, + "loss": 0.4471, + "step": 31351 + }, + { + "epoch": 0.9608924849822239, + "grad_norm": 1.7332606153693737, + "learning_rate": 4.005349620466836e-08, + "loss": 0.4834, + "step": 31352 + }, + { + "epoch": 0.960923133504965, + "grad_norm": 1.8497134318765922, + "learning_rate": 3.999082481887906e-08, + "loss": 0.5365, + "step": 31353 + }, + { + "epoch": 0.9609537820277063, + "grad_norm": 2.0308797456813776, + "learning_rate": 3.992820230532934e-08, + "loss": 0.5667, + "step": 31354 + }, + { + "epoch": 0.9609844305504475, + "grad_norm": 1.9680375673029942, + "learning_rate": 3.9865628664635945e-08, + "loss": 0.5873, + "step": 31355 + }, + { + "epoch": 0.9610150790731887, + "grad_norm": 2.05297054996984, + "learning_rate": 3.980310389741615e-08, + "loss": 0.5211, + "step": 31356 + }, + { + "epoch": 0.9610457275959299, + "grad_norm": 1.8804333168844207, + "learning_rate": 3.974062800428502e-08, + "loss": 0.5435, + "step": 31357 + }, + { + "epoch": 0.961076376118671, + "grad_norm": 1.7571645194728986, + "learning_rate": 3.9678200985858726e-08, + "loss": 0.4818, + "step": 31358 + }, + { + "epoch": 0.9611070246414123, + "grad_norm": 0.7513353805283806, + "learning_rate": 3.961582284275234e-08, + "loss": 0.4111, + "step": 31359 + }, + { + "epoch": 0.9611376731641534, + "grad_norm": 1.933987012292599, + "learning_rate": 3.9553493575579804e-08, + "loss": 0.6062, + "step": 31360 + }, + { + "epoch": 0.9611683216868947, + "grad_norm": 2.2988674342884825, + "learning_rate": 3.949121318495674e-08, + "loss": 0.6434, + "step": 31361 + }, + { + "epoch": 0.9611989702096359, + "grad_norm": 2.075040895730147, + "learning_rate": 3.9428981671495446e-08, + "loss": 0.5807, + "step": 31362 + }, + { + "epoch": 0.9612296187323771, + "grad_norm": 2.1189770960405383, + "learning_rate": 3.936679903580986e-08, + "loss": 0.5595, + "step": 31363 + }, + { + "epoch": 0.9612602672551183, + "grad_norm": 1.719705376925939, + "learning_rate": 3.9304665278512846e-08, + "loss": 0.5005, + "step": 31364 + }, + { + "epoch": 0.9612909157778595, + "grad_norm": 1.8044127304997921, + "learning_rate": 3.924258040021556e-08, + "loss": 0.4966, + "step": 31365 + }, + { + "epoch": 0.9613215643006007, + "grad_norm": 1.6624869439568088, + "learning_rate": 3.9180544401530296e-08, + "loss": 0.4774, + "step": 31366 + }, + { + "epoch": 0.9613522128233419, + "grad_norm": 1.7327501667045218, + "learning_rate": 3.911855728306879e-08, + "loss": 0.5291, + "step": 31367 + }, + { + "epoch": 0.9613828613460831, + "grad_norm": 1.7845504953503635, + "learning_rate": 3.905661904544167e-08, + "loss": 0.5701, + "step": 31368 + }, + { + "epoch": 0.9614135098688243, + "grad_norm": 0.7915888684570761, + "learning_rate": 3.899472968925844e-08, + "loss": 0.3944, + "step": 31369 + }, + { + "epoch": 0.9614441583915655, + "grad_norm": 2.0456043050056065, + "learning_rate": 3.893288921512972e-08, + "loss": 0.57, + "step": 31370 + }, + { + "epoch": 0.9614748069143068, + "grad_norm": 2.135312733619914, + "learning_rate": 3.8871097623664475e-08, + "loss": 0.5066, + "step": 31371 + }, + { + "epoch": 0.9615054554370479, + "grad_norm": 1.9540228766124634, + "learning_rate": 3.880935491547222e-08, + "loss": 0.5257, + "step": 31372 + }, + { + "epoch": 0.9615361039597892, + "grad_norm": 2.1467835640940196, + "learning_rate": 3.874766109115968e-08, + "loss": 0.5861, + "step": 31373 + }, + { + "epoch": 0.9615667524825303, + "grad_norm": 1.8513885613856322, + "learning_rate": 3.8686016151336384e-08, + "loss": 0.5566, + "step": 31374 + }, + { + "epoch": 0.9615974010052716, + "grad_norm": 1.8253164623605238, + "learning_rate": 3.8624420096609604e-08, + "loss": 0.6485, + "step": 31375 + }, + { + "epoch": 0.9616280495280127, + "grad_norm": 1.8079707747994378, + "learning_rate": 3.856287292758554e-08, + "loss": 0.569, + "step": 31376 + }, + { + "epoch": 0.961658698050754, + "grad_norm": 1.9603243958376573, + "learning_rate": 3.8501374644870914e-08, + "loss": 0.5251, + "step": 31377 + }, + { + "epoch": 0.9616893465734951, + "grad_norm": 1.7361274764273489, + "learning_rate": 3.8439925249071366e-08, + "loss": 0.572, + "step": 31378 + }, + { + "epoch": 0.9617199950962364, + "grad_norm": 0.8136981348035252, + "learning_rate": 3.837852474079307e-08, + "loss": 0.394, + "step": 31379 + }, + { + "epoch": 0.9617506436189776, + "grad_norm": 2.0054595626454033, + "learning_rate": 3.831717312064054e-08, + "loss": 0.5342, + "step": 31380 + }, + { + "epoch": 0.9617812921417188, + "grad_norm": 1.9822359016977573, + "learning_rate": 3.8255870389218297e-08, + "loss": 0.5602, + "step": 31381 + }, + { + "epoch": 0.96181194066446, + "grad_norm": 0.7985270209598556, + "learning_rate": 3.8194616547130856e-08, + "loss": 0.3879, + "step": 31382 + }, + { + "epoch": 0.9618425891872012, + "grad_norm": 1.8381390157496567, + "learning_rate": 3.813341159498107e-08, + "loss": 0.6462, + "step": 31383 + }, + { + "epoch": 0.9618732377099424, + "grad_norm": 2.04092190471796, + "learning_rate": 3.807225553337235e-08, + "loss": 0.5674, + "step": 31384 + }, + { + "epoch": 0.9619038862326836, + "grad_norm": 1.936105160282342, + "learning_rate": 3.801114836290754e-08, + "loss": 0.5758, + "step": 31385 + }, + { + "epoch": 0.9619345347554248, + "grad_norm": 1.9342319043493799, + "learning_rate": 3.795009008418837e-08, + "loss": 0.5974, + "step": 31386 + }, + { + "epoch": 0.961965183278166, + "grad_norm": 2.001503357467748, + "learning_rate": 3.788908069781605e-08, + "loss": 0.52, + "step": 31387 + }, + { + "epoch": 0.9619958318009072, + "grad_norm": 2.0790202623469134, + "learning_rate": 3.782812020439286e-08, + "loss": 0.674, + "step": 31388 + }, + { + "epoch": 0.9620264803236483, + "grad_norm": 1.975149096819531, + "learning_rate": 3.7767208604518325e-08, + "loss": 0.5191, + "step": 31389 + }, + { + "epoch": 0.9620571288463896, + "grad_norm": 2.165529403701501, + "learning_rate": 3.770634589879363e-08, + "loss": 0.6017, + "step": 31390 + }, + { + "epoch": 0.9620877773691308, + "grad_norm": 2.021856870221355, + "learning_rate": 3.764553208781774e-08, + "loss": 0.5744, + "step": 31391 + }, + { + "epoch": 0.962118425891872, + "grad_norm": 1.992241132110484, + "learning_rate": 3.7584767172190175e-08, + "loss": 0.52, + "step": 31392 + }, + { + "epoch": 0.9621490744146132, + "grad_norm": 1.9004699543989214, + "learning_rate": 3.7524051152509346e-08, + "loss": 0.5357, + "step": 31393 + }, + { + "epoch": 0.9621797229373544, + "grad_norm": 0.8321379139152727, + "learning_rate": 3.746338402937366e-08, + "loss": 0.4031, + "step": 31394 + }, + { + "epoch": 0.9622103714600956, + "grad_norm": 1.835714526366607, + "learning_rate": 3.740276580338098e-08, + "loss": 0.6085, + "step": 31395 + }, + { + "epoch": 0.9622410199828368, + "grad_norm": 2.003353399594089, + "learning_rate": 3.7342196475129156e-08, + "loss": 0.6529, + "step": 31396 + }, + { + "epoch": 0.962271668505578, + "grad_norm": 0.7826198340830863, + "learning_rate": 3.728167604521382e-08, + "loss": 0.3957, + "step": 31397 + }, + { + "epoch": 0.9623023170283193, + "grad_norm": 1.735353975411378, + "learning_rate": 3.722120451423228e-08, + "loss": 0.521, + "step": 31398 + }, + { + "epoch": 0.9623329655510604, + "grad_norm": 0.8033151442230045, + "learning_rate": 3.7160781882780164e-08, + "loss": 0.3888, + "step": 31399 + }, + { + "epoch": 0.9623636140738017, + "grad_norm": 1.890714950788884, + "learning_rate": 3.710040815145199e-08, + "loss": 0.5939, + "step": 31400 + }, + { + "epoch": 0.9623942625965428, + "grad_norm": 0.7831916695759189, + "learning_rate": 3.704008332084341e-08, + "loss": 0.4122, + "step": 31401 + }, + { + "epoch": 0.9624249111192841, + "grad_norm": 2.024422372761532, + "learning_rate": 3.6979807391549495e-08, + "loss": 0.5611, + "step": 31402 + }, + { + "epoch": 0.9624555596420252, + "grad_norm": 1.8138707171116777, + "learning_rate": 3.6919580364162547e-08, + "loss": 0.589, + "step": 31403 + }, + { + "epoch": 0.9624862081647665, + "grad_norm": 0.7510568060037691, + "learning_rate": 3.685940223927709e-08, + "loss": 0.402, + "step": 31404 + }, + { + "epoch": 0.9625168566875076, + "grad_norm": 2.21716087353525, + "learning_rate": 3.6799273017485985e-08, + "loss": 0.5189, + "step": 31405 + }, + { + "epoch": 0.9625475052102489, + "grad_norm": 0.7833276185145915, + "learning_rate": 3.673919269938153e-08, + "loss": 0.3772, + "step": 31406 + }, + { + "epoch": 0.96257815373299, + "grad_norm": 0.7691089633083004, + "learning_rate": 3.667916128555549e-08, + "loss": 0.3959, + "step": 31407 + }, + { + "epoch": 0.9626088022557313, + "grad_norm": 2.0186008917900256, + "learning_rate": 3.661917877659959e-08, + "loss": 0.5259, + "step": 31408 + }, + { + "epoch": 0.9626394507784725, + "grad_norm": 1.952247957210946, + "learning_rate": 3.655924517310505e-08, + "loss": 0.6227, + "step": 31409 + }, + { + "epoch": 0.9626700993012137, + "grad_norm": 0.9022158369493533, + "learning_rate": 3.649936047566249e-08, + "loss": 0.4103, + "step": 31410 + }, + { + "epoch": 0.9627007478239549, + "grad_norm": 2.0960228315069616, + "learning_rate": 3.6439524684860894e-08, + "loss": 0.589, + "step": 31411 + }, + { + "epoch": 0.9627313963466961, + "grad_norm": 1.8155498609011573, + "learning_rate": 3.6379737801290896e-08, + "loss": 0.5569, + "step": 31412 + }, + { + "epoch": 0.9627620448694373, + "grad_norm": 1.850578917064874, + "learning_rate": 3.631999982554202e-08, + "loss": 0.5523, + "step": 31413 + }, + { + "epoch": 0.9627926933921785, + "grad_norm": 1.8254579221389662, + "learning_rate": 3.626031075820158e-08, + "loss": 0.4851, + "step": 31414 + }, + { + "epoch": 0.9628233419149197, + "grad_norm": 2.103653317462176, + "learning_rate": 3.6200670599858544e-08, + "loss": 0.4957, + "step": 31415 + }, + { + "epoch": 0.962853990437661, + "grad_norm": 1.8215367944229144, + "learning_rate": 3.614107935110023e-08, + "loss": 0.5518, + "step": 31416 + }, + { + "epoch": 0.9628846389604021, + "grad_norm": 1.9980605380843959, + "learning_rate": 3.608153701251393e-08, + "loss": 0.5618, + "step": 31417 + }, + { + "epoch": 0.9629152874831434, + "grad_norm": 2.1860303911093872, + "learning_rate": 3.6022043584686416e-08, + "loss": 0.596, + "step": 31418 + }, + { + "epoch": 0.9629459360058845, + "grad_norm": 1.9777807445055087, + "learning_rate": 3.596259906820387e-08, + "loss": 0.6373, + "step": 31419 + }, + { + "epoch": 0.9629765845286257, + "grad_norm": 1.9165307021181235, + "learning_rate": 3.590320346365139e-08, + "loss": 0.6071, + "step": 31420 + }, + { + "epoch": 0.9630072330513669, + "grad_norm": 0.8099800267237576, + "learning_rate": 3.584385677161517e-08, + "loss": 0.4185, + "step": 31421 + }, + { + "epoch": 0.9630378815741081, + "grad_norm": 1.9047544241728034, + "learning_rate": 3.5784558992679744e-08, + "loss": 0.5726, + "step": 31422 + }, + { + "epoch": 0.9630685300968493, + "grad_norm": 2.248549772413251, + "learning_rate": 3.572531012742908e-08, + "loss": 0.6708, + "step": 31423 + }, + { + "epoch": 0.9630991786195905, + "grad_norm": 0.7776625391962969, + "learning_rate": 3.5666110176447166e-08, + "loss": 0.3968, + "step": 31424 + }, + { + "epoch": 0.9631298271423318, + "grad_norm": 0.7674493036269067, + "learning_rate": 3.5606959140316863e-08, + "loss": 0.3833, + "step": 31425 + }, + { + "epoch": 0.9631604756650729, + "grad_norm": 0.7755819061495585, + "learning_rate": 3.5547857019621603e-08, + "loss": 0.395, + "step": 31426 + }, + { + "epoch": 0.9631911241878142, + "grad_norm": 1.652849448300206, + "learning_rate": 3.548880381494369e-08, + "loss": 0.4881, + "step": 31427 + }, + { + "epoch": 0.9632217727105553, + "grad_norm": 1.9906979510416816, + "learning_rate": 3.542979952686432e-08, + "loss": 0.5531, + "step": 31428 + }, + { + "epoch": 0.9632524212332966, + "grad_norm": 1.9000034859761354, + "learning_rate": 3.537084415596636e-08, + "loss": 0.5855, + "step": 31429 + }, + { + "epoch": 0.9632830697560377, + "grad_norm": 2.0946698386212885, + "learning_rate": 3.531193770282826e-08, + "loss": 0.6489, + "step": 31430 + }, + { + "epoch": 0.963313718278779, + "grad_norm": 0.7564458680191282, + "learning_rate": 3.5253080168033414e-08, + "loss": 0.3798, + "step": 31431 + }, + { + "epoch": 0.9633443668015201, + "grad_norm": 1.8153450673964353, + "learning_rate": 3.519427155215971e-08, + "loss": 0.545, + "step": 31432 + }, + { + "epoch": 0.9633750153242614, + "grad_norm": 1.8996949575411035, + "learning_rate": 3.513551185578667e-08, + "loss": 0.6358, + "step": 31433 + }, + { + "epoch": 0.9634056638470025, + "grad_norm": 2.191179024721583, + "learning_rate": 3.5076801079493847e-08, + "loss": 0.6203, + "step": 31434 + }, + { + "epoch": 0.9634363123697438, + "grad_norm": 1.9558003626177223, + "learning_rate": 3.501813922386022e-08, + "loss": 0.5448, + "step": 31435 + }, + { + "epoch": 0.963466960892485, + "grad_norm": 1.7895066462813691, + "learning_rate": 3.4959526289463085e-08, + "loss": 0.5529, + "step": 31436 + }, + { + "epoch": 0.9634976094152262, + "grad_norm": 2.288730705587839, + "learning_rate": 3.490096227687978e-08, + "loss": 0.5507, + "step": 31437 + }, + { + "epoch": 0.9635282579379674, + "grad_norm": 1.8577096458535458, + "learning_rate": 3.484244718668761e-08, + "loss": 0.4834, + "step": 31438 + }, + { + "epoch": 0.9635589064607086, + "grad_norm": 1.9081739514597538, + "learning_rate": 3.478398101946334e-08, + "loss": 0.5743, + "step": 31439 + }, + { + "epoch": 0.9635895549834498, + "grad_norm": 2.3242888217624476, + "learning_rate": 3.4725563775783175e-08, + "loss": 0.692, + "step": 31440 + }, + { + "epoch": 0.963620203506191, + "grad_norm": 2.032487282275462, + "learning_rate": 3.466719545622166e-08, + "loss": 0.5609, + "step": 31441 + }, + { + "epoch": 0.9636508520289322, + "grad_norm": 1.811200747478175, + "learning_rate": 3.460887606135554e-08, + "loss": 0.5526, + "step": 31442 + }, + { + "epoch": 0.9636815005516735, + "grad_norm": 2.3337069687739667, + "learning_rate": 3.455060559175882e-08, + "loss": 0.6151, + "step": 31443 + }, + { + "epoch": 0.9637121490744146, + "grad_norm": 1.9189873651716352, + "learning_rate": 3.449238404800492e-08, + "loss": 0.6229, + "step": 31444 + }, + { + "epoch": 0.9637427975971559, + "grad_norm": 2.0822818259379052, + "learning_rate": 3.443421143066783e-08, + "loss": 0.5869, + "step": 31445 + }, + { + "epoch": 0.963773446119897, + "grad_norm": 2.1018895405664026, + "learning_rate": 3.437608774032153e-08, + "loss": 0.5487, + "step": 31446 + }, + { + "epoch": 0.9638040946426383, + "grad_norm": 1.9101729908505163, + "learning_rate": 3.431801297753778e-08, + "loss": 0.4893, + "step": 31447 + }, + { + "epoch": 0.9638347431653794, + "grad_norm": 1.801931892415407, + "learning_rate": 3.4259987142888916e-08, + "loss": 0.5397, + "step": 31448 + }, + { + "epoch": 0.9638653916881207, + "grad_norm": 1.6830435267278268, + "learning_rate": 3.420201023694725e-08, + "loss": 0.4871, + "step": 31449 + }, + { + "epoch": 0.9638960402108618, + "grad_norm": 2.1012958416488563, + "learning_rate": 3.4144082260283984e-08, + "loss": 0.6854, + "step": 31450 + }, + { + "epoch": 0.963926688733603, + "grad_norm": 1.8811639296980769, + "learning_rate": 3.4086203213469224e-08, + "loss": 0.5554, + "step": 31451 + }, + { + "epoch": 0.9639573372563442, + "grad_norm": 1.7727879323775038, + "learning_rate": 3.402837309707363e-08, + "loss": 0.5913, + "step": 31452 + }, + { + "epoch": 0.9639879857790854, + "grad_norm": 1.9616452624981282, + "learning_rate": 3.39705919116673e-08, + "loss": 0.6139, + "step": 31453 + }, + { + "epoch": 0.9640186343018267, + "grad_norm": 0.8020516865176345, + "learning_rate": 3.391285965781976e-08, + "loss": 0.379, + "step": 31454 + }, + { + "epoch": 0.9640492828245678, + "grad_norm": 1.9432139612013453, + "learning_rate": 3.385517633609892e-08, + "loss": 0.5702, + "step": 31455 + }, + { + "epoch": 0.9640799313473091, + "grad_norm": 1.793000416061456, + "learning_rate": 3.3797541947073744e-08, + "loss": 0.4684, + "step": 31456 + }, + { + "epoch": 0.9641105798700502, + "grad_norm": 1.7302366745880435, + "learning_rate": 3.3739956491311564e-08, + "loss": 0.527, + "step": 31457 + }, + { + "epoch": 0.9641412283927915, + "grad_norm": 2.195405306877065, + "learning_rate": 3.368241996938137e-08, + "loss": 0.593, + "step": 31458 + }, + { + "epoch": 0.9641718769155326, + "grad_norm": 1.963633648728703, + "learning_rate": 3.362493238184827e-08, + "loss": 0.5088, + "step": 31459 + }, + { + "epoch": 0.9642025254382739, + "grad_norm": 0.7511786265746598, + "learning_rate": 3.356749372927903e-08, + "loss": 0.3749, + "step": 31460 + }, + { + "epoch": 0.964233173961015, + "grad_norm": 1.6624959269529287, + "learning_rate": 3.3510104012240976e-08, + "loss": 0.5062, + "step": 31461 + }, + { + "epoch": 0.9642638224837563, + "grad_norm": 1.8991247675099978, + "learning_rate": 3.345276323129809e-08, + "loss": 0.5524, + "step": 31462 + }, + { + "epoch": 0.9642944710064975, + "grad_norm": 1.883329035236024, + "learning_rate": 3.3395471387015485e-08, + "loss": 0.5576, + "step": 31463 + }, + { + "epoch": 0.9643251195292387, + "grad_norm": 1.8533434403528388, + "learning_rate": 3.333822847995882e-08, + "loss": 0.6314, + "step": 31464 + }, + { + "epoch": 0.9643557680519799, + "grad_norm": 2.0146740301687234, + "learning_rate": 3.3281034510690977e-08, + "loss": 0.5609, + "step": 31465 + }, + { + "epoch": 0.9643864165747211, + "grad_norm": 1.6536162082268724, + "learning_rate": 3.322388947977595e-08, + "loss": 0.4922, + "step": 31466 + }, + { + "epoch": 0.9644170650974623, + "grad_norm": 1.9549016515219781, + "learning_rate": 3.316679338777662e-08, + "loss": 0.6063, + "step": 31467 + }, + { + "epoch": 0.9644477136202035, + "grad_norm": 2.0433809811011328, + "learning_rate": 3.3109746235255316e-08, + "loss": 0.5326, + "step": 31468 + }, + { + "epoch": 0.9644783621429447, + "grad_norm": 1.8689639905442827, + "learning_rate": 3.305274802277547e-08, + "loss": 0.4561, + "step": 31469 + }, + { + "epoch": 0.964509010665686, + "grad_norm": 1.840285936889023, + "learning_rate": 3.2995798750897204e-08, + "loss": 0.5553, + "step": 31470 + }, + { + "epoch": 0.9645396591884271, + "grad_norm": 1.7233999644454332, + "learning_rate": 3.2938898420182276e-08, + "loss": 0.5522, + "step": 31471 + }, + { + "epoch": 0.9645703077111684, + "grad_norm": 1.8571551987946575, + "learning_rate": 3.2882047031191354e-08, + "loss": 0.6392, + "step": 31472 + }, + { + "epoch": 0.9646009562339095, + "grad_norm": 1.912111613840711, + "learning_rate": 3.282524458448455e-08, + "loss": 0.6033, + "step": 31473 + }, + { + "epoch": 0.9646316047566508, + "grad_norm": 2.3782972051589675, + "learning_rate": 3.2768491080620854e-08, + "loss": 0.6618, + "step": 31474 + }, + { + "epoch": 0.9646622532793919, + "grad_norm": 1.8595228902115581, + "learning_rate": 3.2711786520160934e-08, + "loss": 0.4713, + "step": 31475 + }, + { + "epoch": 0.9646929018021332, + "grad_norm": 1.9903870736246856, + "learning_rate": 3.265513090366213e-08, + "loss": 0.6249, + "step": 31476 + }, + { + "epoch": 0.9647235503248743, + "grad_norm": 1.8136506524618192, + "learning_rate": 3.259852423168397e-08, + "loss": 0.6443, + "step": 31477 + }, + { + "epoch": 0.9647541988476156, + "grad_norm": 2.0352278345350414, + "learning_rate": 3.25419665047827e-08, + "loss": 0.6252, + "step": 31478 + }, + { + "epoch": 0.9647848473703567, + "grad_norm": 1.944744638898108, + "learning_rate": 3.248545772351674e-08, + "loss": 0.6973, + "step": 31479 + }, + { + "epoch": 0.964815495893098, + "grad_norm": 1.8557763947829817, + "learning_rate": 3.2428997888442894e-08, + "loss": 0.5881, + "step": 31480 + }, + { + "epoch": 0.9648461444158392, + "grad_norm": 0.8025826487879908, + "learning_rate": 3.237258700011736e-08, + "loss": 0.3931, + "step": 31481 + }, + { + "epoch": 0.9648767929385803, + "grad_norm": 2.0833383721960983, + "learning_rate": 3.231622505909471e-08, + "loss": 0.6536, + "step": 31482 + }, + { + "epoch": 0.9649074414613216, + "grad_norm": 1.8993911244464958, + "learning_rate": 3.225991206593226e-08, + "loss": 0.493, + "step": 31483 + }, + { + "epoch": 0.9649380899840627, + "grad_norm": 1.8705753133149183, + "learning_rate": 3.2203648021183475e-08, + "loss": 0.5247, + "step": 31484 + }, + { + "epoch": 0.964968738506804, + "grad_norm": 2.1688802689208715, + "learning_rate": 3.2147432925403455e-08, + "loss": 0.5638, + "step": 31485 + }, + { + "epoch": 0.9649993870295451, + "grad_norm": 2.0512445025570982, + "learning_rate": 3.2091266779145643e-08, + "loss": 0.5967, + "step": 31486 + }, + { + "epoch": 0.9650300355522864, + "grad_norm": 2.0191953910872082, + "learning_rate": 3.20351495829635e-08, + "loss": 0.6348, + "step": 31487 + }, + { + "epoch": 0.9650606840750275, + "grad_norm": 1.7919074832627273, + "learning_rate": 3.1979081337410453e-08, + "loss": 0.5352, + "step": 31488 + }, + { + "epoch": 0.9650913325977688, + "grad_norm": 1.786741061577768, + "learning_rate": 3.1923062043038856e-08, + "loss": 0.5569, + "step": 31489 + }, + { + "epoch": 0.96512198112051, + "grad_norm": 1.8587067864187647, + "learning_rate": 3.186709170039992e-08, + "loss": 0.5685, + "step": 31490 + }, + { + "epoch": 0.9651526296432512, + "grad_norm": 0.8419494042591912, + "learning_rate": 3.181117031004599e-08, + "loss": 0.4008, + "step": 31491 + }, + { + "epoch": 0.9651832781659924, + "grad_norm": 2.05710878214068, + "learning_rate": 3.175529787252773e-08, + "loss": 0.4057, + "step": 31492 + }, + { + "epoch": 0.9652139266887336, + "grad_norm": 2.087259204636892, + "learning_rate": 3.1699474388395824e-08, + "loss": 0.5518, + "step": 31493 + }, + { + "epoch": 0.9652445752114748, + "grad_norm": 1.8704585413734467, + "learning_rate": 3.1643699858199815e-08, + "loss": 0.5172, + "step": 31494 + }, + { + "epoch": 0.965275223734216, + "grad_norm": 2.210152829977733, + "learning_rate": 3.158797428248983e-08, + "loss": 0.5757, + "step": 31495 + }, + { + "epoch": 0.9653058722569572, + "grad_norm": 2.2296640095567803, + "learning_rate": 3.153229766181487e-08, + "loss": 0.5385, + "step": 31496 + }, + { + "epoch": 0.9653365207796984, + "grad_norm": 1.746863381355645, + "learning_rate": 3.1476669996723384e-08, + "loss": 0.5387, + "step": 31497 + }, + { + "epoch": 0.9653671693024396, + "grad_norm": 1.8888872556623622, + "learning_rate": 3.1421091287763826e-08, + "loss": 0.5286, + "step": 31498 + }, + { + "epoch": 0.9653978178251809, + "grad_norm": 2.2515643142840815, + "learning_rate": 3.136556153548298e-08, + "loss": 0.6445, + "step": 31499 + }, + { + "epoch": 0.965428466347922, + "grad_norm": 2.226626635908187, + "learning_rate": 3.131008074042874e-08, + "loss": 0.7042, + "step": 31500 + }, + { + "epoch": 0.9654591148706633, + "grad_norm": 1.7230995443892625, + "learning_rate": 3.1254648903147336e-08, + "loss": 0.5395, + "step": 31501 + }, + { + "epoch": 0.9654897633934044, + "grad_norm": 1.8071512030693104, + "learning_rate": 3.119926602418555e-08, + "loss": 0.4945, + "step": 31502 + }, + { + "epoch": 0.9655204119161457, + "grad_norm": 1.8499991345063, + "learning_rate": 3.1143932104088506e-08, + "loss": 0.6439, + "step": 31503 + }, + { + "epoch": 0.9655510604388868, + "grad_norm": 1.771986545769202, + "learning_rate": 3.1088647143401875e-08, + "loss": 0.5124, + "step": 31504 + }, + { + "epoch": 0.9655817089616281, + "grad_norm": 0.7873203592981427, + "learning_rate": 3.1033411142670225e-08, + "loss": 0.4021, + "step": 31505 + }, + { + "epoch": 0.9656123574843692, + "grad_norm": 1.8981068610346783, + "learning_rate": 3.097822410243756e-08, + "loss": 0.4992, + "step": 31506 + }, + { + "epoch": 0.9656430060071105, + "grad_norm": 0.8311488735732468, + "learning_rate": 3.09230860232479e-08, + "loss": 0.4108, + "step": 31507 + }, + { + "epoch": 0.9656736545298517, + "grad_norm": 0.8131236082247367, + "learning_rate": 3.086799690564413e-08, + "loss": 0.3954, + "step": 31508 + }, + { + "epoch": 0.9657043030525929, + "grad_norm": 2.2049567276788014, + "learning_rate": 3.081295675016971e-08, + "loss": 0.5567, + "step": 31509 + }, + { + "epoch": 0.9657349515753341, + "grad_norm": 1.8323119957602467, + "learning_rate": 3.075796555736643e-08, + "loss": 0.5465, + "step": 31510 + }, + { + "epoch": 0.9657656000980753, + "grad_norm": 0.7988874350354227, + "learning_rate": 3.0703023327776635e-08, + "loss": 0.4236, + "step": 31511 + }, + { + "epoch": 0.9657962486208165, + "grad_norm": 2.2248711491409243, + "learning_rate": 3.0648130061941004e-08, + "loss": 0.524, + "step": 31512 + }, + { + "epoch": 0.9658268971435576, + "grad_norm": 2.019258513819308, + "learning_rate": 3.0593285760401325e-08, + "loss": 0.5927, + "step": 31513 + }, + { + "epoch": 0.9658575456662989, + "grad_norm": 0.7644366190025282, + "learning_rate": 3.0538490423697166e-08, + "loss": 0.3923, + "step": 31514 + }, + { + "epoch": 0.96588819418904, + "grad_norm": 0.809611134896956, + "learning_rate": 3.048374405236865e-08, + "loss": 0.4222, + "step": 31515 + }, + { + "epoch": 0.9659188427117813, + "grad_norm": 1.9460656281031075, + "learning_rate": 3.042904664695534e-08, + "loss": 0.5497, + "step": 31516 + }, + { + "epoch": 0.9659494912345225, + "grad_norm": 1.7321151796738758, + "learning_rate": 3.037439820799626e-08, + "loss": 0.5047, + "step": 31517 + }, + { + "epoch": 0.9659801397572637, + "grad_norm": 1.6890862766954806, + "learning_rate": 3.031979873602986e-08, + "loss": 0.6549, + "step": 31518 + }, + { + "epoch": 0.9660107882800049, + "grad_norm": 1.6898333484141275, + "learning_rate": 3.026524823159405e-08, + "loss": 0.5171, + "step": 31519 + }, + { + "epoch": 0.9660414368027461, + "grad_norm": 1.9195848369368587, + "learning_rate": 3.021074669522617e-08, + "loss": 0.5802, + "step": 31520 + }, + { + "epoch": 0.9660720853254873, + "grad_norm": 2.1072586489430853, + "learning_rate": 3.015629412746357e-08, + "loss": 0.6036, + "step": 31521 + }, + { + "epoch": 0.9661027338482285, + "grad_norm": 1.932350400558947, + "learning_rate": 3.010189052884249e-08, + "loss": 0.6393, + "step": 31522 + }, + { + "epoch": 0.9661333823709697, + "grad_norm": 0.8075014291574936, + "learning_rate": 3.0047535899898616e-08, + "loss": 0.3839, + "step": 31523 + }, + { + "epoch": 0.966164030893711, + "grad_norm": 1.5783472266829515, + "learning_rate": 2.999323024116873e-08, + "loss": 0.4415, + "step": 31524 + }, + { + "epoch": 0.9661946794164521, + "grad_norm": 0.8307456000120635, + "learning_rate": 2.993897355318687e-08, + "loss": 0.4243, + "step": 31525 + }, + { + "epoch": 0.9662253279391934, + "grad_norm": 1.8621649029413514, + "learning_rate": 2.988476583648814e-08, + "loss": 0.4708, + "step": 31526 + }, + { + "epoch": 0.9662559764619345, + "grad_norm": 2.0206422263819883, + "learning_rate": 2.983060709160601e-08, + "loss": 0.5837, + "step": 31527 + }, + { + "epoch": 0.9662866249846758, + "grad_norm": 0.8262147597058059, + "learning_rate": 2.9776497319074503e-08, + "loss": 0.3881, + "step": 31528 + }, + { + "epoch": 0.9663172735074169, + "grad_norm": 1.8630029987451344, + "learning_rate": 2.9722436519427632e-08, + "loss": 0.559, + "step": 31529 + }, + { + "epoch": 0.9663479220301582, + "grad_norm": 1.885153536611853, + "learning_rate": 2.966842469319664e-08, + "loss": 0.6325, + "step": 31530 + }, + { + "epoch": 0.9663785705528993, + "grad_norm": 2.0252969344362683, + "learning_rate": 2.9614461840914432e-08, + "loss": 0.5642, + "step": 31531 + }, + { + "epoch": 0.9664092190756406, + "grad_norm": 1.9974182153851014, + "learning_rate": 2.9560547963112808e-08, + "loss": 0.6199, + "step": 31532 + }, + { + "epoch": 0.9664398675983817, + "grad_norm": 1.7844961115310733, + "learning_rate": 2.9506683060323005e-08, + "loss": 0.5003, + "step": 31533 + }, + { + "epoch": 0.966470516121123, + "grad_norm": 2.333120996702714, + "learning_rate": 2.945286713307516e-08, + "loss": 0.6293, + "step": 31534 + }, + { + "epoch": 0.9665011646438642, + "grad_norm": 1.8313614552900286, + "learning_rate": 2.9399100181900507e-08, + "loss": 0.6094, + "step": 31535 + }, + { + "epoch": 0.9665318131666054, + "grad_norm": 1.653855321707302, + "learning_rate": 2.9345382207328078e-08, + "loss": 0.4515, + "step": 31536 + }, + { + "epoch": 0.9665624616893466, + "grad_norm": 1.991531513402732, + "learning_rate": 2.9291713209887442e-08, + "loss": 0.5547, + "step": 31537 + }, + { + "epoch": 0.9665931102120878, + "grad_norm": 1.7426987151336524, + "learning_rate": 2.9238093190107066e-08, + "loss": 0.4717, + "step": 31538 + }, + { + "epoch": 0.966623758734829, + "grad_norm": 1.8871132478779173, + "learning_rate": 2.918452214851597e-08, + "loss": 0.5521, + "step": 31539 + }, + { + "epoch": 0.9666544072575702, + "grad_norm": 0.8392348646909842, + "learning_rate": 2.9131000085641514e-08, + "loss": 0.4188, + "step": 31540 + }, + { + "epoch": 0.9666850557803114, + "grad_norm": 2.0565494094375047, + "learning_rate": 2.9077527002011054e-08, + "loss": 0.6424, + "step": 31541 + }, + { + "epoch": 0.9667157043030526, + "grad_norm": 1.7712717260646573, + "learning_rate": 2.902410289815194e-08, + "loss": 0.5688, + "step": 31542 + }, + { + "epoch": 0.9667463528257938, + "grad_norm": 1.6266064820825097, + "learning_rate": 2.897072777458987e-08, + "loss": 0.5016, + "step": 31543 + }, + { + "epoch": 0.966777001348535, + "grad_norm": 0.8094435363072211, + "learning_rate": 2.891740163185108e-08, + "loss": 0.399, + "step": 31544 + }, + { + "epoch": 0.9668076498712762, + "grad_norm": 0.7987073205218556, + "learning_rate": 2.8864124470461276e-08, + "loss": 0.4024, + "step": 31545 + }, + { + "epoch": 0.9668382983940174, + "grad_norm": 0.8135651587143096, + "learning_rate": 2.8810896290945023e-08, + "loss": 0.4091, + "step": 31546 + }, + { + "epoch": 0.9668689469167586, + "grad_norm": 0.8030438085715187, + "learning_rate": 2.875771709382691e-08, + "loss": 0.3805, + "step": 31547 + }, + { + "epoch": 0.9668995954394998, + "grad_norm": 0.8721169579245636, + "learning_rate": 2.870458687963096e-08, + "loss": 0.39, + "step": 31548 + }, + { + "epoch": 0.966930243962241, + "grad_norm": 1.8260584573140666, + "learning_rate": 2.865150564888064e-08, + "loss": 0.5887, + "step": 31549 + }, + { + "epoch": 0.9669608924849822, + "grad_norm": 1.9531791169278143, + "learning_rate": 2.859847340209887e-08, + "loss": 0.4634, + "step": 31550 + }, + { + "epoch": 0.9669915410077234, + "grad_norm": 1.9889724526184527, + "learning_rate": 2.8545490139809117e-08, + "loss": 0.616, + "step": 31551 + }, + { + "epoch": 0.9670221895304646, + "grad_norm": 0.7734974911137978, + "learning_rate": 2.8492555862531522e-08, + "loss": 0.383, + "step": 31552 + }, + { + "epoch": 0.9670528380532059, + "grad_norm": 1.9427266347098229, + "learning_rate": 2.843967057078956e-08, + "loss": 0.5821, + "step": 31553 + }, + { + "epoch": 0.967083486575947, + "grad_norm": 2.0666172013389583, + "learning_rate": 2.8386834265103357e-08, + "loss": 0.5804, + "step": 31554 + }, + { + "epoch": 0.9671141350986883, + "grad_norm": 0.7936356420837691, + "learning_rate": 2.8334046945993622e-08, + "loss": 0.3851, + "step": 31555 + }, + { + "epoch": 0.9671447836214294, + "grad_norm": 2.1267796570215616, + "learning_rate": 2.8281308613980483e-08, + "loss": 0.6006, + "step": 31556 + }, + { + "epoch": 0.9671754321441707, + "grad_norm": 0.7907657036774621, + "learning_rate": 2.822861926958409e-08, + "loss": 0.391, + "step": 31557 + }, + { + "epoch": 0.9672060806669118, + "grad_norm": 1.7669498671253923, + "learning_rate": 2.817597891332291e-08, + "loss": 0.5092, + "step": 31558 + }, + { + "epoch": 0.9672367291896531, + "grad_norm": 0.8124743347486859, + "learning_rate": 2.8123387545715975e-08, + "loss": 0.4137, + "step": 31559 + }, + { + "epoch": 0.9672673777123942, + "grad_norm": 1.6792816467315586, + "learning_rate": 2.8070845167281202e-08, + "loss": 0.5405, + "step": 31560 + }, + { + "epoch": 0.9672980262351355, + "grad_norm": 1.7214963053365138, + "learning_rate": 2.8018351778537068e-08, + "loss": 0.5658, + "step": 31561 + }, + { + "epoch": 0.9673286747578766, + "grad_norm": 2.0404048021591805, + "learning_rate": 2.7965907379999822e-08, + "loss": 0.6175, + "step": 31562 + }, + { + "epoch": 0.9673593232806179, + "grad_norm": 1.5210486125090268, + "learning_rate": 2.7913511972186834e-08, + "loss": 0.4547, + "step": 31563 + }, + { + "epoch": 0.9673899718033591, + "grad_norm": 1.8149173491241795, + "learning_rate": 2.78611655556138e-08, + "loss": 0.5697, + "step": 31564 + }, + { + "epoch": 0.9674206203261003, + "grad_norm": 1.9236009015268865, + "learning_rate": 2.7808868130796974e-08, + "loss": 0.587, + "step": 31565 + }, + { + "epoch": 0.9674512688488415, + "grad_norm": 1.8116045115297836, + "learning_rate": 2.7756619698252053e-08, + "loss": 0.6425, + "step": 31566 + }, + { + "epoch": 0.9674819173715827, + "grad_norm": 0.8179684873016287, + "learning_rate": 2.7704420258492515e-08, + "loss": 0.4154, + "step": 31567 + }, + { + "epoch": 0.9675125658943239, + "grad_norm": 1.8707799628171031, + "learning_rate": 2.765226981203406e-08, + "loss": 0.4619, + "step": 31568 + }, + { + "epoch": 0.9675432144170651, + "grad_norm": 2.128134206845601, + "learning_rate": 2.7600168359390166e-08, + "loss": 0.5838, + "step": 31569 + }, + { + "epoch": 0.9675738629398063, + "grad_norm": 1.9034384309839298, + "learning_rate": 2.7548115901074314e-08, + "loss": 0.6018, + "step": 31570 + }, + { + "epoch": 0.9676045114625476, + "grad_norm": 1.9743833559999289, + "learning_rate": 2.749611243759831e-08, + "loss": 0.5449, + "step": 31571 + }, + { + "epoch": 0.9676351599852887, + "grad_norm": 1.860885440589818, + "learning_rate": 2.7444157969475637e-08, + "loss": 0.5967, + "step": 31572 + }, + { + "epoch": 0.96766580850803, + "grad_norm": 2.0401800738120826, + "learning_rate": 2.7392252497218662e-08, + "loss": 0.6248, + "step": 31573 + }, + { + "epoch": 0.9676964570307711, + "grad_norm": 0.7721756719446466, + "learning_rate": 2.7340396021336977e-08, + "loss": 0.3777, + "step": 31574 + }, + { + "epoch": 0.9677271055535123, + "grad_norm": 2.0788969797371526, + "learning_rate": 2.7288588542343508e-08, + "loss": 0.5388, + "step": 31575 + }, + { + "epoch": 0.9677577540762535, + "grad_norm": 1.9285214031131392, + "learning_rate": 2.7236830060747287e-08, + "loss": 0.5883, + "step": 31576 + }, + { + "epoch": 0.9677884025989947, + "grad_norm": 0.8284953580565892, + "learning_rate": 2.7185120577059575e-08, + "loss": 0.4236, + "step": 31577 + }, + { + "epoch": 0.9678190511217359, + "grad_norm": 1.681926057469386, + "learning_rate": 2.713346009178941e-08, + "loss": 0.5524, + "step": 31578 + }, + { + "epoch": 0.9678496996444771, + "grad_norm": 1.8521961149955235, + "learning_rate": 2.7081848605444716e-08, + "loss": 0.4688, + "step": 31579 + }, + { + "epoch": 0.9678803481672184, + "grad_norm": 0.8368240775432254, + "learning_rate": 2.7030286118535643e-08, + "loss": 0.3657, + "step": 31580 + }, + { + "epoch": 0.9679109966899595, + "grad_norm": 2.010674970793177, + "learning_rate": 2.6978772631569007e-08, + "loss": 0.5923, + "step": 31581 + }, + { + "epoch": 0.9679416452127008, + "grad_norm": 1.9196006807777954, + "learning_rate": 2.692730814505329e-08, + "loss": 0.5877, + "step": 31582 + }, + { + "epoch": 0.9679722937354419, + "grad_norm": 2.177545720301933, + "learning_rate": 2.687589265949475e-08, + "loss": 0.5874, + "step": 31583 + }, + { + "epoch": 0.9680029422581832, + "grad_norm": 1.833086639477431, + "learning_rate": 2.682452617540077e-08, + "loss": 0.5536, + "step": 31584 + }, + { + "epoch": 0.9680335907809243, + "grad_norm": 1.8041853226954854, + "learning_rate": 2.6773208693277595e-08, + "loss": 0.4958, + "step": 31585 + }, + { + "epoch": 0.9680642393036656, + "grad_norm": 2.136535195561452, + "learning_rate": 2.672194021362984e-08, + "loss": 0.6308, + "step": 31586 + }, + { + "epoch": 0.9680948878264067, + "grad_norm": 2.0201420407193957, + "learning_rate": 2.667072073696375e-08, + "loss": 0.6036, + "step": 31587 + }, + { + "epoch": 0.968125536349148, + "grad_norm": 1.9495336060479784, + "learning_rate": 2.661955026378338e-08, + "loss": 0.5908, + "step": 31588 + }, + { + "epoch": 0.9681561848718891, + "grad_norm": 2.0573350943642454, + "learning_rate": 2.6568428794593314e-08, + "loss": 0.6136, + "step": 31589 + }, + { + "epoch": 0.9681868333946304, + "grad_norm": 1.7870775651935704, + "learning_rate": 2.6517356329896492e-08, + "loss": 0.5441, + "step": 31590 + }, + { + "epoch": 0.9682174819173716, + "grad_norm": 2.1764059653391437, + "learning_rate": 2.6466332870196953e-08, + "loss": 0.5716, + "step": 31591 + }, + { + "epoch": 0.9682481304401128, + "grad_norm": 1.8836563021164112, + "learning_rate": 2.641535841599707e-08, + "loss": 0.6241, + "step": 31592 + }, + { + "epoch": 0.968278778962854, + "grad_norm": 2.168269810271015, + "learning_rate": 2.6364432967799225e-08, + "loss": 0.6342, + "step": 31593 + }, + { + "epoch": 0.9683094274855952, + "grad_norm": 1.7209610432009081, + "learning_rate": 2.6313556526105234e-08, + "loss": 0.5974, + "step": 31594 + }, + { + "epoch": 0.9683400760083364, + "grad_norm": 1.9401466440249302, + "learning_rate": 2.626272909141636e-08, + "loss": 0.5811, + "step": 31595 + }, + { + "epoch": 0.9683707245310776, + "grad_norm": 2.146911104157268, + "learning_rate": 2.6211950664233322e-08, + "loss": 0.5926, + "step": 31596 + }, + { + "epoch": 0.9684013730538188, + "grad_norm": 1.8544011831448521, + "learning_rate": 2.6161221245056266e-08, + "loss": 0.5665, + "step": 31597 + }, + { + "epoch": 0.96843202157656, + "grad_norm": 0.7952085673164468, + "learning_rate": 2.6110540834385912e-08, + "loss": 0.4088, + "step": 31598 + }, + { + "epoch": 0.9684626700993012, + "grad_norm": 0.7774524931783084, + "learning_rate": 2.605990943272074e-08, + "loss": 0.3848, + "step": 31599 + }, + { + "epoch": 0.9684933186220425, + "grad_norm": 0.7950984614467173, + "learning_rate": 2.600932704056036e-08, + "loss": 0.3869, + "step": 31600 + }, + { + "epoch": 0.9685239671447836, + "grad_norm": 1.7432035296464168, + "learning_rate": 2.5958793658401593e-08, + "loss": 0.5232, + "step": 31601 + }, + { + "epoch": 0.9685546156675249, + "grad_norm": 1.9130226069527698, + "learning_rate": 2.5908309286744593e-08, + "loss": 0.5639, + "step": 31602 + }, + { + "epoch": 0.968585264190266, + "grad_norm": 2.045994289937715, + "learning_rate": 2.5857873926085075e-08, + "loss": 0.5727, + "step": 31603 + }, + { + "epoch": 0.9686159127130073, + "grad_norm": 1.7753027882791397, + "learning_rate": 2.5807487576920977e-08, + "loss": 0.529, + "step": 31604 + }, + { + "epoch": 0.9686465612357484, + "grad_norm": 1.8507580777005297, + "learning_rate": 2.575715023974801e-08, + "loss": 0.5042, + "step": 31605 + }, + { + "epoch": 0.9686772097584896, + "grad_norm": 1.857346060947962, + "learning_rate": 2.5706861915062996e-08, + "loss": 0.5389, + "step": 31606 + }, + { + "epoch": 0.9687078582812308, + "grad_norm": 0.8060492107758652, + "learning_rate": 2.5656622603361104e-08, + "loss": 0.3961, + "step": 31607 + }, + { + "epoch": 0.968738506803972, + "grad_norm": 2.4581690419325426, + "learning_rate": 2.5606432305136376e-08, + "loss": 0.5643, + "step": 31608 + }, + { + "epoch": 0.9687691553267133, + "grad_norm": 1.7237774715453524, + "learning_rate": 2.5556291020885084e-08, + "loss": 0.5701, + "step": 31609 + }, + { + "epoch": 0.9687998038494544, + "grad_norm": 0.7585619091492547, + "learning_rate": 2.5506198751100163e-08, + "loss": 0.3811, + "step": 31610 + }, + { + "epoch": 0.9688304523721957, + "grad_norm": 1.7846880920330814, + "learning_rate": 2.5456155496275114e-08, + "loss": 0.5246, + "step": 31611 + }, + { + "epoch": 0.9688611008949368, + "grad_norm": 1.9667818983397316, + "learning_rate": 2.5406161256903982e-08, + "loss": 0.547, + "step": 31612 + }, + { + "epoch": 0.9688917494176781, + "grad_norm": 2.140182203850835, + "learning_rate": 2.5356216033478598e-08, + "loss": 0.5637, + "step": 31613 + }, + { + "epoch": 0.9689223979404192, + "grad_norm": 1.9446237340186572, + "learning_rate": 2.5306319826490788e-08, + "loss": 0.5682, + "step": 31614 + }, + { + "epoch": 0.9689530464631605, + "grad_norm": 0.7961103698176147, + "learning_rate": 2.5256472636432938e-08, + "loss": 0.4051, + "step": 31615 + }, + { + "epoch": 0.9689836949859016, + "grad_norm": 2.1382915113686045, + "learning_rate": 2.5206674463795766e-08, + "loss": 0.6486, + "step": 31616 + }, + { + "epoch": 0.9690143435086429, + "grad_norm": 1.9737141860612677, + "learning_rate": 2.5156925309070544e-08, + "loss": 0.4684, + "step": 31617 + }, + { + "epoch": 0.9690449920313841, + "grad_norm": 1.813522577989904, + "learning_rate": 2.510722517274633e-08, + "loss": 0.6172, + "step": 31618 + }, + { + "epoch": 0.9690756405541253, + "grad_norm": 2.078618026637253, + "learning_rate": 2.5057574055313837e-08, + "loss": 0.4721, + "step": 31619 + }, + { + "epoch": 0.9691062890768665, + "grad_norm": 2.3734786858625596, + "learning_rate": 2.500797195726212e-08, + "loss": 0.6571, + "step": 31620 + }, + { + "epoch": 0.9691369375996077, + "grad_norm": 1.8868400678756356, + "learning_rate": 2.4958418879079683e-08, + "loss": 0.5635, + "step": 31621 + }, + { + "epoch": 0.9691675861223489, + "grad_norm": 2.041456042880567, + "learning_rate": 2.4908914821255016e-08, + "loss": 0.6543, + "step": 31622 + }, + { + "epoch": 0.9691982346450901, + "grad_norm": 2.0257488825910133, + "learning_rate": 2.4859459784275507e-08, + "loss": 0.5565, + "step": 31623 + }, + { + "epoch": 0.9692288831678313, + "grad_norm": 1.9673505179103024, + "learning_rate": 2.481005376862855e-08, + "loss": 0.558, + "step": 31624 + }, + { + "epoch": 0.9692595316905726, + "grad_norm": 1.9528104547821317, + "learning_rate": 2.4760696774800973e-08, + "loss": 0.5521, + "step": 31625 + }, + { + "epoch": 0.9692901802133137, + "grad_norm": 1.8059067267622644, + "learning_rate": 2.4711388803279613e-08, + "loss": 0.5354, + "step": 31626 + }, + { + "epoch": 0.969320828736055, + "grad_norm": 1.899405594437495, + "learning_rate": 2.466212985454963e-08, + "loss": 0.4698, + "step": 31627 + }, + { + "epoch": 0.9693514772587961, + "grad_norm": 0.8239650137708713, + "learning_rate": 2.4612919929096756e-08, + "loss": 0.3816, + "step": 31628 + }, + { + "epoch": 0.9693821257815374, + "grad_norm": 2.012226812203538, + "learning_rate": 2.4563759027406155e-08, + "loss": 0.6307, + "step": 31629 + }, + { + "epoch": 0.9694127743042785, + "grad_norm": 0.8067682354016605, + "learning_rate": 2.4514647149961878e-08, + "loss": 0.3905, + "step": 31630 + }, + { + "epoch": 0.9694434228270198, + "grad_norm": 1.941320602695435, + "learning_rate": 2.4465584297247434e-08, + "loss": 0.6443, + "step": 31631 + }, + { + "epoch": 0.9694740713497609, + "grad_norm": 1.879302653030023, + "learning_rate": 2.4416570469746326e-08, + "loss": 0.5631, + "step": 31632 + }, + { + "epoch": 0.9695047198725022, + "grad_norm": 2.067333068556759, + "learning_rate": 2.4367605667942607e-08, + "loss": 0.521, + "step": 31633 + }, + { + "epoch": 0.9695353683952433, + "grad_norm": 1.8825654544028845, + "learning_rate": 2.431868989231756e-08, + "loss": 0.5409, + "step": 31634 + }, + { + "epoch": 0.9695660169179846, + "grad_norm": 2.014735684058113, + "learning_rate": 2.4269823143353578e-08, + "loss": 0.6611, + "step": 31635 + }, + { + "epoch": 0.9695966654407258, + "grad_norm": 1.9607213825235612, + "learning_rate": 2.422100542153194e-08, + "loss": 0.661, + "step": 31636 + }, + { + "epoch": 0.9696273139634669, + "grad_norm": 1.7204906138446312, + "learning_rate": 2.4172236727333933e-08, + "loss": 0.5698, + "step": 31637 + }, + { + "epoch": 0.9696579624862082, + "grad_norm": 1.88744793789759, + "learning_rate": 2.412351706123972e-08, + "loss": 0.5271, + "step": 31638 + }, + { + "epoch": 0.9696886110089493, + "grad_norm": 1.7055463717378576, + "learning_rate": 2.4074846423730035e-08, + "loss": 0.5721, + "step": 31639 + }, + { + "epoch": 0.9697192595316906, + "grad_norm": 1.7352670595475561, + "learning_rate": 2.4026224815283937e-08, + "loss": 0.4741, + "step": 31640 + }, + { + "epoch": 0.9697499080544317, + "grad_norm": 1.946082943573191, + "learning_rate": 2.3977652236380488e-08, + "loss": 0.6292, + "step": 31641 + }, + { + "epoch": 0.969780556577173, + "grad_norm": 2.124939488767779, + "learning_rate": 2.3929128687498748e-08, + "loss": 0.5298, + "step": 31642 + }, + { + "epoch": 0.9698112050999141, + "grad_norm": 1.9089508863642957, + "learning_rate": 2.3880654169116113e-08, + "loss": 0.6607, + "step": 31643 + }, + { + "epoch": 0.9698418536226554, + "grad_norm": 1.9087751144010536, + "learning_rate": 2.3832228681710533e-08, + "loss": 0.5251, + "step": 31644 + }, + { + "epoch": 0.9698725021453966, + "grad_norm": 2.0413881929604467, + "learning_rate": 2.378385222575996e-08, + "loss": 0.4984, + "step": 31645 + }, + { + "epoch": 0.9699031506681378, + "grad_norm": 1.8496453294741515, + "learning_rate": 2.3735524801739017e-08, + "loss": 0.6167, + "step": 31646 + }, + { + "epoch": 0.969933799190879, + "grad_norm": 1.8547367080603558, + "learning_rate": 2.3687246410126207e-08, + "loss": 0.5741, + "step": 31647 + }, + { + "epoch": 0.9699644477136202, + "grad_norm": 2.1301375144675636, + "learning_rate": 2.3639017051396152e-08, + "loss": 0.6155, + "step": 31648 + }, + { + "epoch": 0.9699950962363614, + "grad_norm": 1.6593401284646967, + "learning_rate": 2.3590836726024024e-08, + "loss": 0.4479, + "step": 31649 + }, + { + "epoch": 0.9700257447591026, + "grad_norm": 1.8275144145911169, + "learning_rate": 2.3542705434484448e-08, + "loss": 0.5392, + "step": 31650 + }, + { + "epoch": 0.9700563932818438, + "grad_norm": 1.862592525541383, + "learning_rate": 2.3494623177252042e-08, + "loss": 0.5408, + "step": 31651 + }, + { + "epoch": 0.970087041804585, + "grad_norm": 2.247981300084337, + "learning_rate": 2.3446589954799757e-08, + "loss": 0.5707, + "step": 31652 + }, + { + "epoch": 0.9701176903273262, + "grad_norm": 1.8675764025341877, + "learning_rate": 2.3398605767602224e-08, + "loss": 0.598, + "step": 31653 + }, + { + "epoch": 0.9701483388500675, + "grad_norm": 1.864413533279644, + "learning_rate": 2.3350670616131275e-08, + "loss": 0.5989, + "step": 31654 + }, + { + "epoch": 0.9701789873728086, + "grad_norm": 1.8828620310420088, + "learning_rate": 2.3302784500859877e-08, + "loss": 0.5535, + "step": 31655 + }, + { + "epoch": 0.9702096358955499, + "grad_norm": 1.7992617370335449, + "learning_rate": 2.325494742225931e-08, + "loss": 0.5739, + "step": 31656 + }, + { + "epoch": 0.970240284418291, + "grad_norm": 1.7995026012378927, + "learning_rate": 2.320715938080087e-08, + "loss": 0.6145, + "step": 31657 + }, + { + "epoch": 0.9702709329410323, + "grad_norm": 2.0066979568176038, + "learning_rate": 2.3159420376955844e-08, + "loss": 0.552, + "step": 31658 + }, + { + "epoch": 0.9703015814637734, + "grad_norm": 2.1036663751456692, + "learning_rate": 2.311173041119441e-08, + "loss": 0.6001, + "step": 31659 + }, + { + "epoch": 0.9703322299865147, + "grad_norm": 1.924206883866052, + "learning_rate": 2.3064089483986195e-08, + "loss": 0.5295, + "step": 31660 + }, + { + "epoch": 0.9703628785092558, + "grad_norm": 1.9093049545936391, + "learning_rate": 2.301649759580138e-08, + "loss": 0.6102, + "step": 31661 + }, + { + "epoch": 0.9703935270319971, + "grad_norm": 0.7563579869699992, + "learning_rate": 2.2968954747108474e-08, + "loss": 0.3901, + "step": 31662 + }, + { + "epoch": 0.9704241755547383, + "grad_norm": 2.1021731888142643, + "learning_rate": 2.292146093837544e-08, + "loss": 0.5406, + "step": 31663 + }, + { + "epoch": 0.9704548240774795, + "grad_norm": 0.8157244292000065, + "learning_rate": 2.287401617007079e-08, + "loss": 0.39, + "step": 31664 + }, + { + "epoch": 0.9704854726002207, + "grad_norm": 2.1435704339886987, + "learning_rate": 2.2826620442661927e-08, + "loss": 0.6515, + "step": 31665 + }, + { + "epoch": 0.9705161211229619, + "grad_norm": 0.8415595893255705, + "learning_rate": 2.277927375661626e-08, + "loss": 0.4067, + "step": 31666 + }, + { + "epoch": 0.9705467696457031, + "grad_norm": 2.009353300296861, + "learning_rate": 2.2731976112399522e-08, + "loss": 0.5085, + "step": 31667 + }, + { + "epoch": 0.9705774181684442, + "grad_norm": 1.956765048453026, + "learning_rate": 2.268472751047801e-08, + "loss": 0.5031, + "step": 31668 + }, + { + "epoch": 0.9706080666911855, + "grad_norm": 1.9004518044047338, + "learning_rate": 2.2637527951317462e-08, + "loss": 0.6494, + "step": 31669 + }, + { + "epoch": 0.9706387152139266, + "grad_norm": 1.8718155274215154, + "learning_rate": 2.2590377435383058e-08, + "loss": 0.4551, + "step": 31670 + }, + { + "epoch": 0.9706693637366679, + "grad_norm": 1.9647110919476636, + "learning_rate": 2.2543275963138877e-08, + "loss": 0.5332, + "step": 31671 + }, + { + "epoch": 0.970700012259409, + "grad_norm": 1.794410064971025, + "learning_rate": 2.2496223535049544e-08, + "loss": 0.572, + "step": 31672 + }, + { + "epoch": 0.9707306607821503, + "grad_norm": 1.806197453758538, + "learning_rate": 2.244922015157802e-08, + "loss": 0.5229, + "step": 31673 + }, + { + "epoch": 0.9707613093048915, + "grad_norm": 0.8233410423819092, + "learning_rate": 2.2402265813188938e-08, + "loss": 0.3981, + "step": 31674 + }, + { + "epoch": 0.9707919578276327, + "grad_norm": 1.8795598418787671, + "learning_rate": 2.235536052034304e-08, + "loss": 0.5741, + "step": 31675 + }, + { + "epoch": 0.9708226063503739, + "grad_norm": 1.8893215757157407, + "learning_rate": 2.2308504273503285e-08, + "loss": 0.4962, + "step": 31676 + }, + { + "epoch": 0.9708532548731151, + "grad_norm": 2.102784819531181, + "learning_rate": 2.2261697073132084e-08, + "loss": 0.5653, + "step": 31677 + }, + { + "epoch": 0.9708839033958563, + "grad_norm": 2.114911185831545, + "learning_rate": 2.221493891968962e-08, + "loss": 0.6348, + "step": 31678 + }, + { + "epoch": 0.9709145519185975, + "grad_norm": 1.8673771697172117, + "learning_rate": 2.2168229813637198e-08, + "loss": 0.5806, + "step": 31679 + }, + { + "epoch": 0.9709452004413387, + "grad_norm": 1.7069869423920643, + "learning_rate": 2.2121569755434446e-08, + "loss": 0.5265, + "step": 31680 + }, + { + "epoch": 0.97097584896408, + "grad_norm": 1.859994102248004, + "learning_rate": 2.2074958745541553e-08, + "loss": 0.5809, + "step": 31681 + }, + { + "epoch": 0.9710064974868211, + "grad_norm": 1.992877461092978, + "learning_rate": 2.202839678441815e-08, + "loss": 0.5642, + "step": 31682 + }, + { + "epoch": 0.9710371460095624, + "grad_norm": 1.8930743345396819, + "learning_rate": 2.1981883872522204e-08, + "loss": 0.6162, + "step": 31683 + }, + { + "epoch": 0.9710677945323035, + "grad_norm": 0.8012953527842986, + "learning_rate": 2.1935420010312235e-08, + "loss": 0.4162, + "step": 31684 + }, + { + "epoch": 0.9710984430550448, + "grad_norm": 1.83304256105555, + "learning_rate": 2.188900519824677e-08, + "loss": 0.5822, + "step": 31685 + }, + { + "epoch": 0.9711290915777859, + "grad_norm": 1.773847865814823, + "learning_rate": 2.1842639436782664e-08, + "loss": 0.5567, + "step": 31686 + }, + { + "epoch": 0.9711597401005272, + "grad_norm": 1.9543389258816495, + "learning_rate": 2.1796322726376772e-08, + "loss": 0.5326, + "step": 31687 + }, + { + "epoch": 0.9711903886232683, + "grad_norm": 1.930669985047454, + "learning_rate": 2.17500550674854e-08, + "loss": 0.6054, + "step": 31688 + }, + { + "epoch": 0.9712210371460096, + "grad_norm": 2.077277109463137, + "learning_rate": 2.1703836460564286e-08, + "loss": 0.5302, + "step": 31689 + }, + { + "epoch": 0.9712516856687508, + "grad_norm": 1.9646379998745216, + "learning_rate": 2.1657666906069185e-08, + "loss": 0.6431, + "step": 31690 + }, + { + "epoch": 0.971282334191492, + "grad_norm": 1.7570750959405563, + "learning_rate": 2.161154640445473e-08, + "loss": 0.5562, + "step": 31691 + }, + { + "epoch": 0.9713129827142332, + "grad_norm": 1.9138223433207835, + "learning_rate": 2.1565474956175002e-08, + "loss": 0.5984, + "step": 31692 + }, + { + "epoch": 0.9713436312369744, + "grad_norm": 2.0152320639284205, + "learning_rate": 2.1519452561685194e-08, + "loss": 0.5813, + "step": 31693 + }, + { + "epoch": 0.9713742797597156, + "grad_norm": 1.9778368039304324, + "learning_rate": 2.147347922143772e-08, + "loss": 0.5888, + "step": 31694 + }, + { + "epoch": 0.9714049282824568, + "grad_norm": 0.7745660225731427, + "learning_rate": 2.1427554935886106e-08, + "loss": 0.4223, + "step": 31695 + }, + { + "epoch": 0.971435576805198, + "grad_norm": 2.1313712868152686, + "learning_rate": 2.1381679705482217e-08, + "loss": 0.4876, + "step": 31696 + }, + { + "epoch": 0.9714662253279392, + "grad_norm": 1.759046324424533, + "learning_rate": 2.133585353067902e-08, + "loss": 0.6256, + "step": 31697 + }, + { + "epoch": 0.9714968738506804, + "grad_norm": 2.1823790504017255, + "learning_rate": 2.129007641192671e-08, + "loss": 0.604, + "step": 31698 + }, + { + "epoch": 0.9715275223734215, + "grad_norm": 1.8685044328209695, + "learning_rate": 2.12443483496777e-08, + "loss": 0.5795, + "step": 31699 + }, + { + "epoch": 0.9715581708961628, + "grad_norm": 1.9378738013823782, + "learning_rate": 2.1198669344382196e-08, + "loss": 0.5776, + "step": 31700 + }, + { + "epoch": 0.971588819418904, + "grad_norm": 1.8131385133098554, + "learning_rate": 2.1153039396489274e-08, + "loss": 0.5353, + "step": 31701 + }, + { + "epoch": 0.9716194679416452, + "grad_norm": 1.7955889281519273, + "learning_rate": 2.110745850645024e-08, + "loss": 0.5609, + "step": 31702 + }, + { + "epoch": 0.9716501164643864, + "grad_norm": 1.6682649507972669, + "learning_rate": 2.1061926674712518e-08, + "loss": 0.4489, + "step": 31703 + }, + { + "epoch": 0.9716807649871276, + "grad_norm": 1.8812673694389672, + "learning_rate": 2.10164439017263e-08, + "loss": 0.5655, + "step": 31704 + }, + { + "epoch": 0.9717114135098688, + "grad_norm": 1.8750859760512695, + "learning_rate": 2.0971010187938456e-08, + "loss": 0.6502, + "step": 31705 + }, + { + "epoch": 0.97174206203261, + "grad_norm": 1.886794259180863, + "learning_rate": 2.092562553379751e-08, + "loss": 0.5187, + "step": 31706 + }, + { + "epoch": 0.9717727105553512, + "grad_norm": 0.8254338680209592, + "learning_rate": 2.088028993975033e-08, + "loss": 0.4247, + "step": 31707 + }, + { + "epoch": 0.9718033590780925, + "grad_norm": 1.883009862812064, + "learning_rate": 2.0835003406243227e-08, + "loss": 0.5501, + "step": 31708 + }, + { + "epoch": 0.9718340076008336, + "grad_norm": 1.8346855279340497, + "learning_rate": 2.078976593372306e-08, + "loss": 0.5633, + "step": 31709 + }, + { + "epoch": 0.9718646561235749, + "grad_norm": 1.861701461056088, + "learning_rate": 2.074457752263559e-08, + "loss": 0.5902, + "step": 31710 + }, + { + "epoch": 0.971895304646316, + "grad_norm": 1.8987942830190705, + "learning_rate": 2.06994381734249e-08, + "loss": 0.513, + "step": 31711 + }, + { + "epoch": 0.9719259531690573, + "grad_norm": 2.131301755434329, + "learning_rate": 2.065434788653786e-08, + "loss": 0.5126, + "step": 31712 + }, + { + "epoch": 0.9719566016917984, + "grad_norm": 1.9778591535418255, + "learning_rate": 2.0609306662416896e-08, + "loss": 0.5541, + "step": 31713 + }, + { + "epoch": 0.9719872502145397, + "grad_norm": 2.122538070025594, + "learning_rate": 2.0564314501506088e-08, + "loss": 0.5538, + "step": 31714 + }, + { + "epoch": 0.9720178987372808, + "grad_norm": 1.8711682544373478, + "learning_rate": 2.051937140425009e-08, + "loss": 0.5384, + "step": 31715 + }, + { + "epoch": 0.9720485472600221, + "grad_norm": 1.8438206765202634, + "learning_rate": 2.0474477371090208e-08, + "loss": 0.5287, + "step": 31716 + }, + { + "epoch": 0.9720791957827633, + "grad_norm": 1.813943177732691, + "learning_rate": 2.042963240246887e-08, + "loss": 0.4996, + "step": 31717 + }, + { + "epoch": 0.9721098443055045, + "grad_norm": 0.8577991228454601, + "learning_rate": 2.0384836498829608e-08, + "loss": 0.4252, + "step": 31718 + }, + { + "epoch": 0.9721404928282457, + "grad_norm": 1.8206755338984966, + "learning_rate": 2.0340089660611518e-08, + "loss": 0.5397, + "step": 31719 + }, + { + "epoch": 0.9721711413509869, + "grad_norm": 2.114471621727208, + "learning_rate": 2.0295391888257577e-08, + "loss": 0.5648, + "step": 31720 + }, + { + "epoch": 0.9722017898737281, + "grad_norm": 1.880044973212771, + "learning_rate": 2.0250743182206877e-08, + "loss": 0.6168, + "step": 31721 + }, + { + "epoch": 0.9722324383964693, + "grad_norm": 2.1708830790809843, + "learning_rate": 2.0206143542899625e-08, + "loss": 0.5932, + "step": 31722 + }, + { + "epoch": 0.9722630869192105, + "grad_norm": 1.8447174952425214, + "learning_rate": 2.016159297077547e-08, + "loss": 0.6131, + "step": 31723 + }, + { + "epoch": 0.9722937354419517, + "grad_norm": 2.085821798610382, + "learning_rate": 2.0117091466272943e-08, + "loss": 0.5577, + "step": 31724 + }, + { + "epoch": 0.9723243839646929, + "grad_norm": 2.0055023348976633, + "learning_rate": 2.0072639029831142e-08, + "loss": 0.5919, + "step": 31725 + }, + { + "epoch": 0.9723550324874342, + "grad_norm": 1.734509614026842, + "learning_rate": 2.002823566188805e-08, + "loss": 0.5273, + "step": 31726 + }, + { + "epoch": 0.9723856810101753, + "grad_norm": 1.7729707934366776, + "learning_rate": 1.9983881362880542e-08, + "loss": 0.5461, + "step": 31727 + }, + { + "epoch": 0.9724163295329166, + "grad_norm": 2.336259662547532, + "learning_rate": 1.993957613324604e-08, + "loss": 0.5409, + "step": 31728 + }, + { + "epoch": 0.9724469780556577, + "grad_norm": 0.8156128196305433, + "learning_rate": 1.9895319973421423e-08, + "loss": 0.3879, + "step": 31729 + }, + { + "epoch": 0.9724776265783989, + "grad_norm": 1.9857183065014978, + "learning_rate": 1.985111288384245e-08, + "loss": 0.5216, + "step": 31730 + }, + { + "epoch": 0.9725082751011401, + "grad_norm": 2.0653140176406795, + "learning_rate": 1.9806954864944328e-08, + "loss": 0.5534, + "step": 31731 + }, + { + "epoch": 0.9725389236238813, + "grad_norm": 1.9989358474339634, + "learning_rate": 1.976284591716282e-08, + "loss": 0.5394, + "step": 31732 + }, + { + "epoch": 0.9725695721466225, + "grad_norm": 0.8229407761117813, + "learning_rate": 1.9718786040932024e-08, + "loss": 0.385, + "step": 31733 + }, + { + "epoch": 0.9726002206693637, + "grad_norm": 0.8629277136071317, + "learning_rate": 1.9674775236686595e-08, + "loss": 0.398, + "step": 31734 + }, + { + "epoch": 0.972630869192105, + "grad_norm": 1.9596135714795988, + "learning_rate": 1.9630813504859515e-08, + "loss": 0.562, + "step": 31735 + }, + { + "epoch": 0.9726615177148461, + "grad_norm": 1.9012758586017626, + "learning_rate": 1.9586900845884327e-08, + "loss": 0.5608, + "step": 31736 + }, + { + "epoch": 0.9726921662375874, + "grad_norm": 2.012064780113119, + "learning_rate": 1.954303726019402e-08, + "loss": 0.5452, + "step": 31737 + }, + { + "epoch": 0.9727228147603285, + "grad_norm": 1.9294832815700318, + "learning_rate": 1.9499222748219916e-08, + "loss": 0.5147, + "step": 31738 + }, + { + "epoch": 0.9727534632830698, + "grad_norm": 2.0565172155363802, + "learning_rate": 1.9455457310394444e-08, + "loss": 0.6891, + "step": 31739 + }, + { + "epoch": 0.9727841118058109, + "grad_norm": 1.8531471024343475, + "learning_rate": 1.941174094714948e-08, + "loss": 0.5486, + "step": 31740 + }, + { + "epoch": 0.9728147603285522, + "grad_norm": 1.8162914400325947, + "learning_rate": 1.936807365891413e-08, + "loss": 0.4807, + "step": 31741 + }, + { + "epoch": 0.9728454088512933, + "grad_norm": 1.8421662862706534, + "learning_rate": 1.9324455446119718e-08, + "loss": 0.4853, + "step": 31742 + }, + { + "epoch": 0.9728760573740346, + "grad_norm": 2.3277750436014872, + "learning_rate": 1.928088630919589e-08, + "loss": 0.6013, + "step": 31743 + }, + { + "epoch": 0.9729067058967757, + "grad_norm": 1.9124974815162183, + "learning_rate": 1.9237366248571754e-08, + "loss": 0.5009, + "step": 31744 + }, + { + "epoch": 0.972937354419517, + "grad_norm": 0.9087435136442549, + "learning_rate": 1.919389526467641e-08, + "loss": 0.413, + "step": 31745 + }, + { + "epoch": 0.9729680029422582, + "grad_norm": 1.8794295787980588, + "learning_rate": 1.9150473357937847e-08, + "loss": 0.4967, + "step": 31746 + }, + { + "epoch": 0.9729986514649994, + "grad_norm": 1.7646167314241479, + "learning_rate": 1.9107100528784063e-08, + "loss": 0.4741, + "step": 31747 + }, + { + "epoch": 0.9730292999877406, + "grad_norm": 2.0031868811527476, + "learning_rate": 1.9063776777642485e-08, + "loss": 0.4735, + "step": 31748 + }, + { + "epoch": 0.9730599485104818, + "grad_norm": 0.840516979562198, + "learning_rate": 1.9020502104939996e-08, + "loss": 0.4001, + "step": 31749 + }, + { + "epoch": 0.973090597033223, + "grad_norm": 1.8618730502961784, + "learning_rate": 1.8977276511102927e-08, + "loss": 0.6095, + "step": 31750 + }, + { + "epoch": 0.9731212455559642, + "grad_norm": 0.7912796016758243, + "learning_rate": 1.8934099996557044e-08, + "loss": 0.3996, + "step": 31751 + }, + { + "epoch": 0.9731518940787054, + "grad_norm": 1.9261470030455556, + "learning_rate": 1.8890972561728115e-08, + "loss": 0.625, + "step": 31752 + }, + { + "epoch": 0.9731825426014467, + "grad_norm": 2.29967521633271, + "learning_rate": 1.8847894207040806e-08, + "loss": 0.6105, + "step": 31753 + }, + { + "epoch": 0.9732131911241878, + "grad_norm": 2.009422833147141, + "learning_rate": 1.8804864932919774e-08, + "loss": 0.6359, + "step": 31754 + }, + { + "epoch": 0.9732438396469291, + "grad_norm": 1.9494433399901892, + "learning_rate": 1.8761884739788573e-08, + "loss": 0.6363, + "step": 31755 + }, + { + "epoch": 0.9732744881696702, + "grad_norm": 2.0846405108605617, + "learning_rate": 1.8718953628071303e-08, + "loss": 0.602, + "step": 31756 + }, + { + "epoch": 0.9733051366924115, + "grad_norm": 1.8826535696512403, + "learning_rate": 1.867607159819096e-08, + "loss": 0.5392, + "step": 31757 + }, + { + "epoch": 0.9733357852151526, + "grad_norm": 2.0353766120304315, + "learning_rate": 1.863323865056943e-08, + "loss": 0.5258, + "step": 31758 + }, + { + "epoch": 0.9733664337378939, + "grad_norm": 1.7770506679099247, + "learning_rate": 1.8590454785629152e-08, + "loss": 0.5677, + "step": 31759 + }, + { + "epoch": 0.973397082260635, + "grad_norm": 1.8230249052310143, + "learning_rate": 1.8547720003792013e-08, + "loss": 0.5576, + "step": 31760 + }, + { + "epoch": 0.9734277307833762, + "grad_norm": 0.8303730231679375, + "learning_rate": 1.850503430547823e-08, + "loss": 0.3831, + "step": 31761 + }, + { + "epoch": 0.9734583793061174, + "grad_norm": 2.1031554000366546, + "learning_rate": 1.8462397691109135e-08, + "loss": 0.5709, + "step": 31762 + }, + { + "epoch": 0.9734890278288586, + "grad_norm": 1.8264785526734444, + "learning_rate": 1.8419810161104946e-08, + "loss": 0.5095, + "step": 31763 + }, + { + "epoch": 0.9735196763515999, + "grad_norm": 1.8099336037944367, + "learning_rate": 1.837727171588477e-08, + "loss": 0.4803, + "step": 31764 + }, + { + "epoch": 0.973550324874341, + "grad_norm": 2.1131556471562436, + "learning_rate": 1.833478235586772e-08, + "loss": 0.6185, + "step": 31765 + }, + { + "epoch": 0.9735809733970823, + "grad_norm": 0.7854385290125679, + "learning_rate": 1.8292342081472346e-08, + "loss": 0.3843, + "step": 31766 + }, + { + "epoch": 0.9736116219198234, + "grad_norm": 1.6901166898078996, + "learning_rate": 1.8249950893117762e-08, + "loss": 0.5356, + "step": 31767 + }, + { + "epoch": 0.9736422704425647, + "grad_norm": 1.725879803389061, + "learning_rate": 1.8207608791220855e-08, + "loss": 0.4863, + "step": 31768 + }, + { + "epoch": 0.9736729189653058, + "grad_norm": 1.7964150795381162, + "learning_rate": 1.8165315776199065e-08, + "loss": 0.6052, + "step": 31769 + }, + { + "epoch": 0.9737035674880471, + "grad_norm": 1.989179043641693, + "learning_rate": 1.8123071848469286e-08, + "loss": 0.6485, + "step": 31770 + }, + { + "epoch": 0.9737342160107882, + "grad_norm": 0.7859053513614543, + "learning_rate": 1.8080877008447296e-08, + "loss": 0.4073, + "step": 31771 + }, + { + "epoch": 0.9737648645335295, + "grad_norm": 1.8978838739524884, + "learning_rate": 1.8038731256549426e-08, + "loss": 0.5794, + "step": 31772 + }, + { + "epoch": 0.9737955130562707, + "grad_norm": 1.9908427872997505, + "learning_rate": 1.7996634593189787e-08, + "loss": 0.597, + "step": 31773 + }, + { + "epoch": 0.9738261615790119, + "grad_norm": 1.5967866584919441, + "learning_rate": 1.795458701878472e-08, + "loss": 0.4952, + "step": 31774 + }, + { + "epoch": 0.9738568101017531, + "grad_norm": 1.9643371243200334, + "learning_rate": 1.7912588533747777e-08, + "loss": 0.6437, + "step": 31775 + }, + { + "epoch": 0.9738874586244943, + "grad_norm": 1.9126675166128417, + "learning_rate": 1.787063913849252e-08, + "loss": 0.5316, + "step": 31776 + }, + { + "epoch": 0.9739181071472355, + "grad_norm": 1.9699166702838387, + "learning_rate": 1.7828738833433055e-08, + "loss": 0.6154, + "step": 31777 + }, + { + "epoch": 0.9739487556699767, + "grad_norm": 1.9422766872167272, + "learning_rate": 1.7786887618981287e-08, + "loss": 0.5166, + "step": 31778 + }, + { + "epoch": 0.9739794041927179, + "grad_norm": 1.8524906869774203, + "learning_rate": 1.774508549555021e-08, + "loss": 0.6324, + "step": 31779 + }, + { + "epoch": 0.9740100527154592, + "grad_norm": 1.9705866800271252, + "learning_rate": 1.7703332463551714e-08, + "loss": 0.544, + "step": 31780 + }, + { + "epoch": 0.9740407012382003, + "grad_norm": 1.919469413939203, + "learning_rate": 1.7661628523397146e-08, + "loss": 0.6036, + "step": 31781 + }, + { + "epoch": 0.9740713497609416, + "grad_norm": 2.0112404659581986, + "learning_rate": 1.7619973675496728e-08, + "loss": 0.6189, + "step": 31782 + }, + { + "epoch": 0.9741019982836827, + "grad_norm": 1.914928741094061, + "learning_rate": 1.7578367920262352e-08, + "loss": 0.5851, + "step": 31783 + }, + { + "epoch": 0.974132646806424, + "grad_norm": 2.0480402571180076, + "learning_rate": 1.7536811258102582e-08, + "loss": 0.66, + "step": 31784 + }, + { + "epoch": 0.9741632953291651, + "grad_norm": 1.7753304945172232, + "learning_rate": 1.7495303689427644e-08, + "loss": 0.5894, + "step": 31785 + }, + { + "epoch": 0.9741939438519064, + "grad_norm": 2.197130568063493, + "learning_rate": 1.7453845214646102e-08, + "loss": 0.6239, + "step": 31786 + }, + { + "epoch": 0.9742245923746475, + "grad_norm": 1.852973375201662, + "learning_rate": 1.741243583416652e-08, + "loss": 0.6141, + "step": 31787 + }, + { + "epoch": 0.9742552408973888, + "grad_norm": 2.117679987920416, + "learning_rate": 1.7371075548397454e-08, + "loss": 0.5323, + "step": 31788 + }, + { + "epoch": 0.97428588942013, + "grad_norm": 1.8092498399203631, + "learning_rate": 1.7329764357746358e-08, + "loss": 0.6021, + "step": 31789 + }, + { + "epoch": 0.9743165379428712, + "grad_norm": 1.82477653109671, + "learning_rate": 1.7288502262619022e-08, + "loss": 0.597, + "step": 31790 + }, + { + "epoch": 0.9743471864656124, + "grad_norm": 2.0647067792794154, + "learning_rate": 1.724728926342345e-08, + "loss": 0.4964, + "step": 31791 + }, + { + "epoch": 0.9743778349883535, + "grad_norm": 2.1253228310320376, + "learning_rate": 1.7206125360565427e-08, + "loss": 0.5912, + "step": 31792 + }, + { + "epoch": 0.9744084835110948, + "grad_norm": 2.1208712756439936, + "learning_rate": 1.716501055445019e-08, + "loss": 0.608, + "step": 31793 + }, + { + "epoch": 0.9744391320338359, + "grad_norm": 2.1255329598103785, + "learning_rate": 1.7123944845482963e-08, + "loss": 0.5501, + "step": 31794 + }, + { + "epoch": 0.9744697805565772, + "grad_norm": 0.7969459487385565, + "learning_rate": 1.708292823406843e-08, + "loss": 0.4001, + "step": 31795 + }, + { + "epoch": 0.9745004290793183, + "grad_norm": 2.2695723358750906, + "learning_rate": 1.7041960720610708e-08, + "loss": 0.4782, + "step": 31796 + }, + { + "epoch": 0.9745310776020596, + "grad_norm": 1.7966942943838466, + "learning_rate": 1.700104230551336e-08, + "loss": 0.4872, + "step": 31797 + }, + { + "epoch": 0.9745617261248007, + "grad_norm": 1.7706771841212576, + "learning_rate": 1.6960172989179958e-08, + "loss": 0.4957, + "step": 31798 + }, + { + "epoch": 0.974592374647542, + "grad_norm": 1.9538933170741817, + "learning_rate": 1.6919352772012954e-08, + "loss": 0.6488, + "step": 31799 + }, + { + "epoch": 0.9746230231702832, + "grad_norm": 2.082111695367197, + "learning_rate": 1.687858165441425e-08, + "loss": 0.5399, + "step": 31800 + }, + { + "epoch": 0.9746536716930244, + "grad_norm": 1.7049984028793572, + "learning_rate": 1.6837859636786303e-08, + "loss": 0.5632, + "step": 31801 + }, + { + "epoch": 0.9746843202157656, + "grad_norm": 1.8516884406125789, + "learning_rate": 1.6797186719529347e-08, + "loss": 0.5673, + "step": 31802 + }, + { + "epoch": 0.9747149687385068, + "grad_norm": 1.8809970896855077, + "learning_rate": 1.6756562903045282e-08, + "loss": 0.5824, + "step": 31803 + }, + { + "epoch": 0.974745617261248, + "grad_norm": 1.8563824960230093, + "learning_rate": 1.671598818773379e-08, + "loss": 0.5857, + "step": 31804 + }, + { + "epoch": 0.9747762657839892, + "grad_norm": 1.7418916517143348, + "learning_rate": 1.667546257399455e-08, + "loss": 0.4899, + "step": 31805 + }, + { + "epoch": 0.9748069143067304, + "grad_norm": 1.9950178961961116, + "learning_rate": 1.663498606222669e-08, + "loss": 0.5932, + "step": 31806 + }, + { + "epoch": 0.9748375628294716, + "grad_norm": 0.7902598228087562, + "learning_rate": 1.6594558652829884e-08, + "loss": 0.3976, + "step": 31807 + }, + { + "epoch": 0.9748682113522128, + "grad_norm": 1.9626182511875137, + "learning_rate": 1.6554180346201597e-08, + "loss": 0.5979, + "step": 31808 + }, + { + "epoch": 0.9748988598749541, + "grad_norm": 2.010177646507929, + "learning_rate": 1.6513851142739845e-08, + "loss": 0.6174, + "step": 31809 + }, + { + "epoch": 0.9749295083976952, + "grad_norm": 2.154801809410911, + "learning_rate": 1.647357104284264e-08, + "loss": 0.5871, + "step": 31810 + }, + { + "epoch": 0.9749601569204365, + "grad_norm": 1.7801149047409082, + "learning_rate": 1.6433340046906334e-08, + "loss": 0.5465, + "step": 31811 + }, + { + "epoch": 0.9749908054431776, + "grad_norm": 0.8311325480361897, + "learning_rate": 1.639315815532727e-08, + "loss": 0.4187, + "step": 31812 + }, + { + "epoch": 0.9750214539659189, + "grad_norm": 1.9389854502456616, + "learning_rate": 1.635302536850181e-08, + "loss": 0.6028, + "step": 31813 + }, + { + "epoch": 0.97505210248866, + "grad_norm": 1.8734629376714331, + "learning_rate": 1.6312941686824622e-08, + "loss": 0.5159, + "step": 31814 + }, + { + "epoch": 0.9750827510114013, + "grad_norm": 1.912781759015803, + "learning_rate": 1.6272907110691516e-08, + "loss": 0.6172, + "step": 31815 + }, + { + "epoch": 0.9751133995341424, + "grad_norm": 2.028138023773924, + "learning_rate": 1.6232921640497167e-08, + "loss": 0.6268, + "step": 31816 + }, + { + "epoch": 0.9751440480568837, + "grad_norm": 2.3696234188935157, + "learning_rate": 1.6192985276634042e-08, + "loss": 0.624, + "step": 31817 + }, + { + "epoch": 0.9751746965796249, + "grad_norm": 0.838105902149964, + "learning_rate": 1.6153098019496826e-08, + "loss": 0.3973, + "step": 31818 + }, + { + "epoch": 0.9752053451023661, + "grad_norm": 1.7749581638307665, + "learning_rate": 1.6113259869478536e-08, + "loss": 0.565, + "step": 31819 + }, + { + "epoch": 0.9752359936251073, + "grad_norm": 1.609906522951461, + "learning_rate": 1.607347082697164e-08, + "loss": 0.476, + "step": 31820 + }, + { + "epoch": 0.9752666421478485, + "grad_norm": 1.9032678378793468, + "learning_rate": 1.6033730892367484e-08, + "loss": 0.5567, + "step": 31821 + }, + { + "epoch": 0.9752972906705897, + "grad_norm": 2.003318210925103, + "learning_rate": 1.5994040066058535e-08, + "loss": 0.5803, + "step": 31822 + }, + { + "epoch": 0.9753279391933308, + "grad_norm": 1.9975021116174776, + "learning_rate": 1.595439834843504e-08, + "loss": 0.6017, + "step": 31823 + }, + { + "epoch": 0.9753585877160721, + "grad_norm": 1.97041137512288, + "learning_rate": 1.5914805739888906e-08, + "loss": 0.5392, + "step": 31824 + }, + { + "epoch": 0.9753892362388132, + "grad_norm": 2.0978638565400174, + "learning_rate": 1.587526224080871e-08, + "loss": 0.7043, + "step": 31825 + }, + { + "epoch": 0.9754198847615545, + "grad_norm": 1.8705695655743395, + "learning_rate": 1.583576785158525e-08, + "loss": 0.5878, + "step": 31826 + }, + { + "epoch": 0.9754505332842957, + "grad_norm": 1.9142064445332958, + "learning_rate": 1.5796322572607105e-08, + "loss": 0.5947, + "step": 31827 + }, + { + "epoch": 0.9754811818070369, + "grad_norm": 1.6983964068440414, + "learning_rate": 1.5756926404262852e-08, + "loss": 0.6266, + "step": 31828 + }, + { + "epoch": 0.9755118303297781, + "grad_norm": 2.067200329868559, + "learning_rate": 1.571757934694107e-08, + "loss": 0.5618, + "step": 31829 + }, + { + "epoch": 0.9755424788525193, + "grad_norm": 2.1634114680880727, + "learning_rate": 1.5678281401029228e-08, + "loss": 0.6126, + "step": 31830 + }, + { + "epoch": 0.9755731273752605, + "grad_norm": 1.8517520555526037, + "learning_rate": 1.5639032566914793e-08, + "loss": 0.5703, + "step": 31831 + }, + { + "epoch": 0.9756037758980017, + "grad_norm": 1.8318532863695012, + "learning_rate": 1.5599832844983564e-08, + "loss": 0.5083, + "step": 31832 + }, + { + "epoch": 0.9756344244207429, + "grad_norm": 1.8934496229498505, + "learning_rate": 1.5560682235623014e-08, + "loss": 0.5504, + "step": 31833 + }, + { + "epoch": 0.9756650729434841, + "grad_norm": 1.916141753634737, + "learning_rate": 1.552158073921839e-08, + "loss": 0.5537, + "step": 31834 + }, + { + "epoch": 0.9756957214662253, + "grad_norm": 1.7233299457571478, + "learning_rate": 1.5482528356154937e-08, + "loss": 0.4902, + "step": 31835 + }, + { + "epoch": 0.9757263699889666, + "grad_norm": 2.0104127512312298, + "learning_rate": 1.5443525086817347e-08, + "loss": 0.5917, + "step": 31836 + }, + { + "epoch": 0.9757570185117077, + "grad_norm": 2.251026920127436, + "learning_rate": 1.5404570931590314e-08, + "loss": 0.5757, + "step": 31837 + }, + { + "epoch": 0.975787667034449, + "grad_norm": 1.8587067898764922, + "learning_rate": 1.536566589085742e-08, + "loss": 0.612, + "step": 31838 + }, + { + "epoch": 0.9758183155571901, + "grad_norm": 1.9998656539877573, + "learning_rate": 1.532680996500169e-08, + "loss": 0.5688, + "step": 31839 + }, + { + "epoch": 0.9758489640799314, + "grad_norm": 1.8478500945962695, + "learning_rate": 1.5288003154406707e-08, + "loss": 0.521, + "step": 31840 + }, + { + "epoch": 0.9758796126026725, + "grad_norm": 1.694313376057823, + "learning_rate": 1.5249245459453833e-08, + "loss": 0.479, + "step": 31841 + }, + { + "epoch": 0.9759102611254138, + "grad_norm": 1.8712384224623908, + "learning_rate": 1.5210536880526093e-08, + "loss": 0.5997, + "step": 31842 + }, + { + "epoch": 0.9759409096481549, + "grad_norm": 1.8197325291760589, + "learning_rate": 1.5171877418003744e-08, + "loss": 0.5653, + "step": 31843 + }, + { + "epoch": 0.9759715581708962, + "grad_norm": 2.0863208097149593, + "learning_rate": 1.51332670722687e-08, + "loss": 0.5595, + "step": 31844 + }, + { + "epoch": 0.9760022066936374, + "grad_norm": 2.0582748639172075, + "learning_rate": 1.509470584370121e-08, + "loss": 0.597, + "step": 31845 + }, + { + "epoch": 0.9760328552163786, + "grad_norm": 0.7990896212853191, + "learning_rate": 1.505619373268097e-08, + "loss": 0.399, + "step": 31846 + }, + { + "epoch": 0.9760635037391198, + "grad_norm": 2.0995105467955333, + "learning_rate": 1.501773073958712e-08, + "loss": 0.601, + "step": 31847 + }, + { + "epoch": 0.976094152261861, + "grad_norm": 1.9127494339555062, + "learning_rate": 1.4979316864799364e-08, + "loss": 0.6561, + "step": 31848 + }, + { + "epoch": 0.9761248007846022, + "grad_norm": 1.7784168432882994, + "learning_rate": 1.4940952108695727e-08, + "loss": 0.5922, + "step": 31849 + }, + { + "epoch": 0.9761554493073434, + "grad_norm": 1.9065979811442446, + "learning_rate": 1.490263647165424e-08, + "loss": 0.5479, + "step": 31850 + }, + { + "epoch": 0.9761860978300846, + "grad_norm": 2.1653766792002114, + "learning_rate": 1.4864369954052938e-08, + "loss": 0.6557, + "step": 31851 + }, + { + "epoch": 0.9762167463528258, + "grad_norm": 1.9210858370923831, + "learning_rate": 1.4826152556268181e-08, + "loss": 0.517, + "step": 31852 + }, + { + "epoch": 0.976247394875567, + "grad_norm": 1.8624679141069673, + "learning_rate": 1.4787984278676892e-08, + "loss": 0.5873, + "step": 31853 + }, + { + "epoch": 0.9762780433983081, + "grad_norm": 2.020034126109676, + "learning_rate": 1.4749865121655438e-08, + "loss": 0.602, + "step": 31854 + }, + { + "epoch": 0.9763086919210494, + "grad_norm": 0.8085930878566514, + "learning_rate": 1.4711795085578517e-08, + "loss": 0.4012, + "step": 31855 + }, + { + "epoch": 0.9763393404437906, + "grad_norm": 1.814412781039921, + "learning_rate": 1.4673774170822496e-08, + "loss": 0.5603, + "step": 31856 + }, + { + "epoch": 0.9763699889665318, + "grad_norm": 1.6702355404075098, + "learning_rate": 1.4635802377760966e-08, + "loss": 0.4844, + "step": 31857 + }, + { + "epoch": 0.976400637489273, + "grad_norm": 1.8037637568186058, + "learning_rate": 1.4597879706768625e-08, + "loss": 0.554, + "step": 31858 + }, + { + "epoch": 0.9764312860120142, + "grad_norm": 1.7086485959007878, + "learning_rate": 1.456000615821851e-08, + "loss": 0.5494, + "step": 31859 + }, + { + "epoch": 0.9764619345347554, + "grad_norm": 1.7663277448335903, + "learning_rate": 1.4522181732484209e-08, + "loss": 0.5901, + "step": 31860 + }, + { + "epoch": 0.9764925830574966, + "grad_norm": 0.8457075212267262, + "learning_rate": 1.4484406429938758e-08, + "loss": 0.401, + "step": 31861 + }, + { + "epoch": 0.9765232315802378, + "grad_norm": 1.9145559001210897, + "learning_rate": 1.4446680250954082e-08, + "loss": 0.5242, + "step": 31862 + }, + { + "epoch": 0.976553880102979, + "grad_norm": 1.7183494781512125, + "learning_rate": 1.4409003195902105e-08, + "loss": 0.5356, + "step": 31863 + }, + { + "epoch": 0.9765845286257202, + "grad_norm": 1.6258729962619216, + "learning_rate": 1.4371375265153643e-08, + "loss": 0.544, + "step": 31864 + }, + { + "epoch": 0.9766151771484615, + "grad_norm": 2.0178698547722176, + "learning_rate": 1.4333796459079508e-08, + "loss": 0.5535, + "step": 31865 + }, + { + "epoch": 0.9766458256712026, + "grad_norm": 2.2976689657299376, + "learning_rate": 1.429626677804996e-08, + "loss": 0.5799, + "step": 31866 + }, + { + "epoch": 0.9766764741939439, + "grad_norm": 1.9487985683397668, + "learning_rate": 1.4258786222435261e-08, + "loss": 0.6262, + "step": 31867 + }, + { + "epoch": 0.976707122716685, + "grad_norm": 0.7841234544463233, + "learning_rate": 1.4221354792604004e-08, + "loss": 0.3933, + "step": 31868 + }, + { + "epoch": 0.9767377712394263, + "grad_norm": 1.788490161897443, + "learning_rate": 1.418397248892589e-08, + "loss": 0.512, + "step": 31869 + }, + { + "epoch": 0.9767684197621674, + "grad_norm": 2.1320077949842418, + "learning_rate": 1.4146639311768406e-08, + "loss": 0.5952, + "step": 31870 + }, + { + "epoch": 0.9767990682849087, + "grad_norm": 1.8171341412807838, + "learning_rate": 1.4109355261500146e-08, + "loss": 0.6138, + "step": 31871 + }, + { + "epoch": 0.9768297168076499, + "grad_norm": 0.7732698906120108, + "learning_rate": 1.4072120338488038e-08, + "loss": 0.3881, + "step": 31872 + }, + { + "epoch": 0.9768603653303911, + "grad_norm": 1.9404617177513157, + "learning_rate": 1.4034934543098454e-08, + "loss": 0.5457, + "step": 31873 + }, + { + "epoch": 0.9768910138531323, + "grad_norm": 1.7259473186561074, + "learning_rate": 1.3997797875698882e-08, + "loss": 0.4828, + "step": 31874 + }, + { + "epoch": 0.9769216623758735, + "grad_norm": 2.394299163902907, + "learning_rate": 1.3960710336654582e-08, + "loss": 0.6116, + "step": 31875 + }, + { + "epoch": 0.9769523108986147, + "grad_norm": 0.786583638461665, + "learning_rate": 1.3923671926331373e-08, + "loss": 0.3882, + "step": 31876 + }, + { + "epoch": 0.9769829594213559, + "grad_norm": 1.833583111418267, + "learning_rate": 1.3886682645093407e-08, + "loss": 0.5342, + "step": 31877 + }, + { + "epoch": 0.9770136079440971, + "grad_norm": 0.7873920805289292, + "learning_rate": 1.3849742493306506e-08, + "loss": 0.3875, + "step": 31878 + }, + { + "epoch": 0.9770442564668383, + "grad_norm": 1.7799435754329973, + "learning_rate": 1.3812851471333156e-08, + "loss": 0.5534, + "step": 31879 + }, + { + "epoch": 0.9770749049895795, + "grad_norm": 2.0132499871521192, + "learning_rate": 1.3776009579538063e-08, + "loss": 0.5075, + "step": 31880 + }, + { + "epoch": 0.9771055535123208, + "grad_norm": 1.8228952648720056, + "learning_rate": 1.3739216818283163e-08, + "loss": 0.5627, + "step": 31881 + }, + { + "epoch": 0.9771362020350619, + "grad_norm": 0.7837897654698595, + "learning_rate": 1.3702473187932053e-08, + "loss": 0.3946, + "step": 31882 + }, + { + "epoch": 0.9771668505578032, + "grad_norm": 0.7825882507590101, + "learning_rate": 1.366577868884611e-08, + "loss": 0.3963, + "step": 31883 + }, + { + "epoch": 0.9771974990805443, + "grad_norm": 2.295853931681736, + "learning_rate": 1.3629133321387266e-08, + "loss": 0.5652, + "step": 31884 + }, + { + "epoch": 0.9772281476032855, + "grad_norm": 1.781082614413763, + "learning_rate": 1.3592537085915792e-08, + "loss": 0.5783, + "step": 31885 + }, + { + "epoch": 0.9772587961260267, + "grad_norm": 1.7099890211703175, + "learning_rate": 1.3555989982793615e-08, + "loss": 0.559, + "step": 31886 + }, + { + "epoch": 0.9772894446487679, + "grad_norm": 1.7056210469667308, + "learning_rate": 1.3519492012379898e-08, + "loss": 0.6017, + "step": 31887 + }, + { + "epoch": 0.9773200931715091, + "grad_norm": 1.705201338048168, + "learning_rate": 1.3483043175033794e-08, + "loss": 0.505, + "step": 31888 + }, + { + "epoch": 0.9773507416942503, + "grad_norm": 1.8376586239348065, + "learning_rate": 1.3446643471116127e-08, + "loss": 0.5719, + "step": 31889 + }, + { + "epoch": 0.9773813902169916, + "grad_norm": 1.8002811393432405, + "learning_rate": 1.3410292900983835e-08, + "loss": 0.5419, + "step": 31890 + }, + { + "epoch": 0.9774120387397327, + "grad_norm": 1.8647755520758869, + "learning_rate": 1.3373991464996072e-08, + "loss": 0.5662, + "step": 31891 + }, + { + "epoch": 0.977442687262474, + "grad_norm": 1.826893093878163, + "learning_rate": 1.3337739163510333e-08, + "loss": 0.5107, + "step": 31892 + }, + { + "epoch": 0.9774733357852151, + "grad_norm": 1.831795888350997, + "learning_rate": 1.3301535996883552e-08, + "loss": 0.5815, + "step": 31893 + }, + { + "epoch": 0.9775039843079564, + "grad_norm": 0.7473496201782, + "learning_rate": 1.3265381965472668e-08, + "loss": 0.3727, + "step": 31894 + }, + { + "epoch": 0.9775346328306975, + "grad_norm": 0.7843474255340667, + "learning_rate": 1.3229277069634062e-08, + "loss": 0.4032, + "step": 31895 + }, + { + "epoch": 0.9775652813534388, + "grad_norm": 0.8403566765196125, + "learning_rate": 1.3193221309723004e-08, + "loss": 0.4106, + "step": 31896 + }, + { + "epoch": 0.9775959298761799, + "grad_norm": 0.8439363826978342, + "learning_rate": 1.3157214686095321e-08, + "loss": 0.3996, + "step": 31897 + }, + { + "epoch": 0.9776265783989212, + "grad_norm": 1.8731311659605938, + "learning_rate": 1.312125719910573e-08, + "loss": 0.5926, + "step": 31898 + }, + { + "epoch": 0.9776572269216623, + "grad_norm": 1.811618178139205, + "learning_rate": 1.3085348849107837e-08, + "loss": 0.5218, + "step": 31899 + }, + { + "epoch": 0.9776878754444036, + "grad_norm": 1.7354293420212186, + "learning_rate": 1.3049489636456358e-08, + "loss": 0.5772, + "step": 31900 + }, + { + "epoch": 0.9777185239671448, + "grad_norm": 1.9774091652710486, + "learning_rate": 1.3013679561503789e-08, + "loss": 0.6365, + "step": 31901 + }, + { + "epoch": 0.977749172489886, + "grad_norm": 1.8815545852725586, + "learning_rate": 1.2977918624603736e-08, + "loss": 0.4771, + "step": 31902 + }, + { + "epoch": 0.9777798210126272, + "grad_norm": 1.9981337797034788, + "learning_rate": 1.2942206826108139e-08, + "loss": 0.5778, + "step": 31903 + }, + { + "epoch": 0.9778104695353684, + "grad_norm": 1.9189680304059709, + "learning_rate": 1.290654416636894e-08, + "loss": 0.5842, + "step": 31904 + }, + { + "epoch": 0.9778411180581096, + "grad_norm": 2.063224174254957, + "learning_rate": 1.2870930645738078e-08, + "loss": 0.6074, + "step": 31905 + }, + { + "epoch": 0.9778717665808508, + "grad_norm": 1.835256070908092, + "learning_rate": 1.2835366264565275e-08, + "loss": 0.479, + "step": 31906 + }, + { + "epoch": 0.977902415103592, + "grad_norm": 1.6436492903983173, + "learning_rate": 1.2799851023201914e-08, + "loss": 0.5627, + "step": 31907 + }, + { + "epoch": 0.9779330636263333, + "grad_norm": 1.8986326520928547, + "learning_rate": 1.2764384921997718e-08, + "loss": 0.5614, + "step": 31908 + }, + { + "epoch": 0.9779637121490744, + "grad_norm": 2.0604016088432333, + "learning_rate": 1.2728967961301853e-08, + "loss": 0.5938, + "step": 31909 + }, + { + "epoch": 0.9779943606718157, + "grad_norm": 2.0270544282239618, + "learning_rate": 1.269360014146348e-08, + "loss": 0.5611, + "step": 31910 + }, + { + "epoch": 0.9780250091945568, + "grad_norm": 1.842541233764921, + "learning_rate": 1.2658281462831212e-08, + "loss": 0.5334, + "step": 31911 + }, + { + "epoch": 0.9780556577172981, + "grad_norm": 1.7947948163725607, + "learning_rate": 1.2623011925753104e-08, + "loss": 0.6424, + "step": 31912 + }, + { + "epoch": 0.9780863062400392, + "grad_norm": 2.150762475995836, + "learning_rate": 1.2587791530576653e-08, + "loss": 0.5807, + "step": 31913 + }, + { + "epoch": 0.9781169547627805, + "grad_norm": 1.8723556692939702, + "learning_rate": 1.2552620277648253e-08, + "loss": 0.6432, + "step": 31914 + }, + { + "epoch": 0.9781476032855216, + "grad_norm": 1.8893388679551355, + "learning_rate": 1.2517498167315401e-08, + "loss": 0.5566, + "step": 31915 + }, + { + "epoch": 0.9781782518082628, + "grad_norm": 1.7601524692220067, + "learning_rate": 1.2482425199923931e-08, + "loss": 0.5699, + "step": 31916 + }, + { + "epoch": 0.978208900331004, + "grad_norm": 1.8354211610169624, + "learning_rate": 1.2447401375818569e-08, + "loss": 0.5427, + "step": 31917 + }, + { + "epoch": 0.9782395488537452, + "grad_norm": 1.842420814097618, + "learning_rate": 1.2412426695345702e-08, + "loss": 0.5229, + "step": 31918 + }, + { + "epoch": 0.9782701973764865, + "grad_norm": 1.862829934088107, + "learning_rate": 1.2377501158848947e-08, + "loss": 0.5597, + "step": 31919 + }, + { + "epoch": 0.9783008458992276, + "grad_norm": 1.9737795551710935, + "learning_rate": 1.2342624766673028e-08, + "loss": 0.5167, + "step": 31920 + }, + { + "epoch": 0.9783314944219689, + "grad_norm": 1.8469035295481453, + "learning_rate": 1.2307797519161558e-08, + "loss": 0.5574, + "step": 31921 + }, + { + "epoch": 0.97836214294471, + "grad_norm": 1.8179868060474267, + "learning_rate": 1.227301941665704e-08, + "loss": 0.5463, + "step": 31922 + }, + { + "epoch": 0.9783927914674513, + "grad_norm": 2.1472050359721844, + "learning_rate": 1.2238290459502533e-08, + "loss": 0.5882, + "step": 31923 + }, + { + "epoch": 0.9784234399901924, + "grad_norm": 2.0861477668995656, + "learning_rate": 1.2203610648041098e-08, + "loss": 0.6867, + "step": 31924 + }, + { + "epoch": 0.9784540885129337, + "grad_norm": 0.7550695487127996, + "learning_rate": 1.2168979982613016e-08, + "loss": 0.3683, + "step": 31925 + }, + { + "epoch": 0.9784847370356748, + "grad_norm": 2.135681019911945, + "learning_rate": 1.2134398463560238e-08, + "loss": 0.5402, + "step": 31926 + }, + { + "epoch": 0.9785153855584161, + "grad_norm": 1.9316476634752442, + "learning_rate": 1.20998660912236e-08, + "loss": 0.5133, + "step": 31927 + }, + { + "epoch": 0.9785460340811573, + "grad_norm": 2.1568871407044217, + "learning_rate": 1.2065382865942832e-08, + "loss": 0.6472, + "step": 31928 + }, + { + "epoch": 0.9785766826038985, + "grad_norm": 0.7982383536294246, + "learning_rate": 1.2030948788058772e-08, + "loss": 0.4029, + "step": 31929 + }, + { + "epoch": 0.9786073311266397, + "grad_norm": 1.873059959584648, + "learning_rate": 1.1996563857909482e-08, + "loss": 0.5353, + "step": 31930 + }, + { + "epoch": 0.9786379796493809, + "grad_norm": 1.8270161167884624, + "learning_rate": 1.1962228075834137e-08, + "loss": 0.5264, + "step": 31931 + }, + { + "epoch": 0.9786686281721221, + "grad_norm": 2.0035210308675104, + "learning_rate": 1.1927941442171908e-08, + "loss": 0.5882, + "step": 31932 + }, + { + "epoch": 0.9786992766948633, + "grad_norm": 0.8530117139590689, + "learning_rate": 1.1893703957259194e-08, + "loss": 0.4095, + "step": 31933 + }, + { + "epoch": 0.9787299252176045, + "grad_norm": 2.2252060719468445, + "learning_rate": 1.1859515621434615e-08, + "loss": 0.6557, + "step": 31934 + }, + { + "epoch": 0.9787605737403458, + "grad_norm": 1.8030863735010636, + "learning_rate": 1.1825376435034008e-08, + "loss": 0.4797, + "step": 31935 + }, + { + "epoch": 0.9787912222630869, + "grad_norm": 2.3406413075036063, + "learning_rate": 1.179128639839433e-08, + "loss": 0.5654, + "step": 31936 + }, + { + "epoch": 0.9788218707858282, + "grad_norm": 1.8456915282808701, + "learning_rate": 1.1757245511851423e-08, + "loss": 0.5558, + "step": 31937 + }, + { + "epoch": 0.9788525193085693, + "grad_norm": 1.8208960806401542, + "learning_rate": 1.1723253775741129e-08, + "loss": 0.5115, + "step": 31938 + }, + { + "epoch": 0.9788831678313106, + "grad_norm": 0.764049829879985, + "learning_rate": 1.1689311190397624e-08, + "loss": 0.3873, + "step": 31939 + }, + { + "epoch": 0.9789138163540517, + "grad_norm": 1.8396727722930533, + "learning_rate": 1.165541775615564e-08, + "loss": 0.5551, + "step": 31940 + }, + { + "epoch": 0.978944464876793, + "grad_norm": 2.2472271528017242, + "learning_rate": 1.1621573473348801e-08, + "loss": 0.6678, + "step": 31941 + }, + { + "epoch": 0.9789751133995341, + "grad_norm": 0.8299616352376835, + "learning_rate": 1.1587778342311284e-08, + "loss": 0.4162, + "step": 31942 + }, + { + "epoch": 0.9790057619222754, + "grad_norm": 1.7128234487610028, + "learning_rate": 1.1554032363376156e-08, + "loss": 0.5784, + "step": 31943 + }, + { + "epoch": 0.9790364104450165, + "grad_norm": 1.6973560300227395, + "learning_rate": 1.1520335536874816e-08, + "loss": 0.4651, + "step": 31944 + }, + { + "epoch": 0.9790670589677578, + "grad_norm": 1.725814974818113, + "learning_rate": 1.1486687863139778e-08, + "loss": 0.5859, + "step": 31945 + }, + { + "epoch": 0.979097707490499, + "grad_norm": 1.8345641800476444, + "learning_rate": 1.1453089342503555e-08, + "loss": 0.5591, + "step": 31946 + }, + { + "epoch": 0.9791283560132401, + "grad_norm": 1.6995622213731028, + "learning_rate": 1.1419539975295878e-08, + "loss": 0.5377, + "step": 31947 + }, + { + "epoch": 0.9791590045359814, + "grad_norm": 1.8847205944202197, + "learning_rate": 1.1386039761848155e-08, + "loss": 0.4936, + "step": 31948 + }, + { + "epoch": 0.9791896530587225, + "grad_norm": 1.8774533163047418, + "learning_rate": 1.135258870249012e-08, + "loss": 0.5741, + "step": 31949 + }, + { + "epoch": 0.9792203015814638, + "grad_norm": 2.0481770560766415, + "learning_rate": 1.1319186797550952e-08, + "loss": 0.63, + "step": 31950 + }, + { + "epoch": 0.9792509501042049, + "grad_norm": 2.1366090652104814, + "learning_rate": 1.1285834047360943e-08, + "loss": 0.6605, + "step": 31951 + }, + { + "epoch": 0.9792815986269462, + "grad_norm": 1.8362893825631765, + "learning_rate": 1.1252530452247612e-08, + "loss": 0.511, + "step": 31952 + }, + { + "epoch": 0.9793122471496873, + "grad_norm": 1.677369193164723, + "learning_rate": 1.1219276012539581e-08, + "loss": 0.5251, + "step": 31953 + }, + { + "epoch": 0.9793428956724286, + "grad_norm": 2.0183036624823054, + "learning_rate": 1.1186070728564369e-08, + "loss": 0.6543, + "step": 31954 + }, + { + "epoch": 0.9793735441951698, + "grad_norm": 2.068724147358849, + "learning_rate": 1.1152914600649489e-08, + "loss": 0.4995, + "step": 31955 + }, + { + "epoch": 0.979404192717911, + "grad_norm": 1.9109513153725721, + "learning_rate": 1.1119807629121348e-08, + "loss": 0.6399, + "step": 31956 + }, + { + "epoch": 0.9794348412406522, + "grad_norm": 2.1124404875536835, + "learning_rate": 1.1086749814306352e-08, + "loss": 0.5507, + "step": 31957 + }, + { + "epoch": 0.9794654897633934, + "grad_norm": 2.0223195735238817, + "learning_rate": 1.1053741156529795e-08, + "loss": 0.5844, + "step": 31958 + }, + { + "epoch": 0.9794961382861346, + "grad_norm": 1.9259743971039909, + "learning_rate": 1.1020781656116975e-08, + "loss": 0.5772, + "step": 31959 + }, + { + "epoch": 0.9795267868088758, + "grad_norm": 0.7774717593851073, + "learning_rate": 1.0987871313393183e-08, + "loss": 0.396, + "step": 31960 + }, + { + "epoch": 0.979557435331617, + "grad_norm": 1.8505126790320385, + "learning_rate": 1.0955010128682608e-08, + "loss": 0.5615, + "step": 31961 + }, + { + "epoch": 0.9795880838543582, + "grad_norm": 1.8841107910905441, + "learning_rate": 1.0922198102308878e-08, + "loss": 0.5454, + "step": 31962 + }, + { + "epoch": 0.9796187323770994, + "grad_norm": 1.8546057665406201, + "learning_rate": 1.0889435234594514e-08, + "loss": 0.6504, + "step": 31963 + }, + { + "epoch": 0.9796493808998407, + "grad_norm": 1.9571624594655888, + "learning_rate": 1.0856721525863701e-08, + "loss": 0.6011, + "step": 31964 + }, + { + "epoch": 0.9796800294225818, + "grad_norm": 0.7914785260951971, + "learning_rate": 1.0824056976437846e-08, + "loss": 0.3923, + "step": 31965 + }, + { + "epoch": 0.9797106779453231, + "grad_norm": 1.8337540058320454, + "learning_rate": 1.0791441586639472e-08, + "loss": 0.523, + "step": 31966 + }, + { + "epoch": 0.9797413264680642, + "grad_norm": 1.9246109982727395, + "learning_rate": 1.0758875356789434e-08, + "loss": 0.6586, + "step": 31967 + }, + { + "epoch": 0.9797719749908055, + "grad_norm": 1.8395795032472855, + "learning_rate": 1.0726358287208583e-08, + "loss": 0.4691, + "step": 31968 + }, + { + "epoch": 0.9798026235135466, + "grad_norm": 1.8916481664249096, + "learning_rate": 1.069389037821722e-08, + "loss": 0.5879, + "step": 31969 + }, + { + "epoch": 0.9798332720362879, + "grad_norm": 1.9984094009467344, + "learning_rate": 1.0661471630135644e-08, + "loss": 0.5965, + "step": 31970 + }, + { + "epoch": 0.979863920559029, + "grad_norm": 1.9373659739658333, + "learning_rate": 1.0629102043283602e-08, + "loss": 0.6142, + "step": 31971 + }, + { + "epoch": 0.9798945690817703, + "grad_norm": 1.9278199302222796, + "learning_rate": 1.0596781617979168e-08, + "loss": 0.5897, + "step": 31972 + }, + { + "epoch": 0.9799252176045115, + "grad_norm": 0.8088340161358509, + "learning_rate": 1.0564510354541535e-08, + "loss": 0.3933, + "step": 31973 + }, + { + "epoch": 0.9799558661272527, + "grad_norm": 1.8898264480293452, + "learning_rate": 1.0532288253288225e-08, + "loss": 0.5878, + "step": 31974 + }, + { + "epoch": 0.9799865146499939, + "grad_norm": 0.80194322097358, + "learning_rate": 1.0500115314536763e-08, + "loss": 0.3865, + "step": 31975 + }, + { + "epoch": 0.9800171631727351, + "grad_norm": 1.8868163882563185, + "learning_rate": 1.0467991538604672e-08, + "loss": 0.5057, + "step": 31976 + }, + { + "epoch": 0.9800478116954763, + "grad_norm": 2.1957109911678425, + "learning_rate": 1.043591692580781e-08, + "loss": 0.621, + "step": 31977 + }, + { + "epoch": 0.9800784602182174, + "grad_norm": 2.0409043830423794, + "learning_rate": 1.040389147646259e-08, + "loss": 0.5783, + "step": 31978 + }, + { + "epoch": 0.9801091087409587, + "grad_norm": 1.7657718486447838, + "learning_rate": 1.0371915190884319e-08, + "loss": 0.5053, + "step": 31979 + }, + { + "epoch": 0.9801397572636998, + "grad_norm": 1.9217984803933938, + "learning_rate": 1.0339988069388295e-08, + "loss": 0.6132, + "step": 31980 + }, + { + "epoch": 0.9801704057864411, + "grad_norm": 2.032217031377303, + "learning_rate": 1.0308110112289271e-08, + "loss": 0.5874, + "step": 31981 + }, + { + "epoch": 0.9802010543091823, + "grad_norm": 1.6346896007866465, + "learning_rate": 1.0276281319900883e-08, + "loss": 0.5065, + "step": 31982 + }, + { + "epoch": 0.9802317028319235, + "grad_norm": 1.8483017756172266, + "learning_rate": 1.0244501692536768e-08, + "loss": 0.5809, + "step": 31983 + }, + { + "epoch": 0.9802623513546647, + "grad_norm": 1.9229872184680892, + "learning_rate": 1.0212771230510565e-08, + "loss": 0.5531, + "step": 31984 + }, + { + "epoch": 0.9802929998774059, + "grad_norm": 0.8236928416824567, + "learning_rate": 1.0181089934134247e-08, + "loss": 0.3848, + "step": 31985 + }, + { + "epoch": 0.9803236484001471, + "grad_norm": 2.019003898043624, + "learning_rate": 1.0149457803720897e-08, + "loss": 0.5663, + "step": 31986 + }, + { + "epoch": 0.9803542969228883, + "grad_norm": 1.8459651346374513, + "learning_rate": 1.0117874839581376e-08, + "loss": 0.6318, + "step": 31987 + }, + { + "epoch": 0.9803849454456295, + "grad_norm": 1.9551996703521368, + "learning_rate": 1.0086341042027104e-08, + "loss": 0.5698, + "step": 31988 + }, + { + "epoch": 0.9804155939683707, + "grad_norm": 0.8124863435432201, + "learning_rate": 1.0054856411368941e-08, + "loss": 0.3921, + "step": 31989 + }, + { + "epoch": 0.9804462424911119, + "grad_norm": 2.0590925586661943, + "learning_rate": 1.0023420947917195e-08, + "loss": 0.5513, + "step": 31990 + }, + { + "epoch": 0.9804768910138532, + "grad_norm": 2.2291040719064745, + "learning_rate": 9.992034651981064e-09, + "loss": 0.613, + "step": 31991 + }, + { + "epoch": 0.9805075395365943, + "grad_norm": 2.1074526656071826, + "learning_rate": 9.9606975238703e-09, + "loss": 0.6021, + "step": 31992 + }, + { + "epoch": 0.9805381880593356, + "grad_norm": 0.7807808607320236, + "learning_rate": 9.929409563893544e-09, + "loss": 0.3729, + "step": 31993 + }, + { + "epoch": 0.9805688365820767, + "grad_norm": 1.8474246657196873, + "learning_rate": 9.898170772358883e-09, + "loss": 0.5924, + "step": 31994 + }, + { + "epoch": 0.980599485104818, + "grad_norm": 2.0260794011705, + "learning_rate": 9.866981149574405e-09, + "loss": 0.5365, + "step": 31995 + }, + { + "epoch": 0.9806301336275591, + "grad_norm": 1.8009490017319887, + "learning_rate": 9.835840695847643e-09, + "loss": 0.5835, + "step": 31996 + }, + { + "epoch": 0.9806607821503004, + "grad_norm": 1.8323801591591018, + "learning_rate": 9.804749411485014e-09, + "loss": 0.593, + "step": 31997 + }, + { + "epoch": 0.9806914306730415, + "grad_norm": 1.7678693994760308, + "learning_rate": 9.773707296792944e-09, + "loss": 0.5832, + "step": 31998 + }, + { + "epoch": 0.9807220791957828, + "grad_norm": 1.98199456137436, + "learning_rate": 9.742714352077298e-09, + "loss": 0.6594, + "step": 31999 + }, + { + "epoch": 0.980752727718524, + "grad_norm": 1.9614080535684848, + "learning_rate": 9.711770577643387e-09, + "loss": 0.5488, + "step": 32000 + }, + { + "epoch": 0.9807833762412652, + "grad_norm": 1.9184793159808053, + "learning_rate": 9.680875973795966e-09, + "loss": 0.5864, + "step": 32001 + }, + { + "epoch": 0.9808140247640064, + "grad_norm": 2.003624381670176, + "learning_rate": 9.650030540840349e-09, + "loss": 0.6331, + "step": 32002 + }, + { + "epoch": 0.9808446732867476, + "grad_norm": 0.8064583925934871, + "learning_rate": 9.619234279079625e-09, + "loss": 0.3839, + "step": 32003 + }, + { + "epoch": 0.9808753218094888, + "grad_norm": 2.1519519889551515, + "learning_rate": 9.588487188816886e-09, + "loss": 0.5805, + "step": 32004 + }, + { + "epoch": 0.98090597033223, + "grad_norm": 1.909206219710678, + "learning_rate": 9.557789270356333e-09, + "loss": 0.5342, + "step": 32005 + }, + { + "epoch": 0.9809366188549712, + "grad_norm": 1.957865054821352, + "learning_rate": 9.52714052399939e-09, + "loss": 0.5988, + "step": 32006 + }, + { + "epoch": 0.9809672673777124, + "grad_norm": 1.8889723672885048, + "learning_rate": 9.496540950048594e-09, + "loss": 0.527, + "step": 32007 + }, + { + "epoch": 0.9809979159004536, + "grad_norm": 0.8131956178953065, + "learning_rate": 9.46599054880537e-09, + "loss": 0.4055, + "step": 32008 + }, + { + "epoch": 0.9810285644231947, + "grad_norm": 0.8180858500670286, + "learning_rate": 9.435489320570035e-09, + "loss": 0.3812, + "step": 32009 + }, + { + "epoch": 0.981059212945936, + "grad_norm": 0.7534990955695939, + "learning_rate": 9.405037265644568e-09, + "loss": 0.3791, + "step": 32010 + }, + { + "epoch": 0.9810898614686772, + "grad_norm": 1.8051291569174583, + "learning_rate": 9.37463438432762e-09, + "loss": 0.5239, + "step": 32011 + }, + { + "epoch": 0.9811205099914184, + "grad_norm": 2.0512921265416186, + "learning_rate": 9.344280676918949e-09, + "loss": 0.562, + "step": 32012 + }, + { + "epoch": 0.9811511585141596, + "grad_norm": 1.9142620282584628, + "learning_rate": 9.313976143718873e-09, + "loss": 0.6085, + "step": 32013 + }, + { + "epoch": 0.9811818070369008, + "grad_norm": 1.815811489239524, + "learning_rate": 9.283720785024376e-09, + "loss": 0.5033, + "step": 32014 + }, + { + "epoch": 0.981212455559642, + "grad_norm": 1.797385905216103, + "learning_rate": 9.25351460113466e-09, + "loss": 0.5342, + "step": 32015 + }, + { + "epoch": 0.9812431040823832, + "grad_norm": 0.808864421194006, + "learning_rate": 9.223357592347272e-09, + "loss": 0.3962, + "step": 32016 + }, + { + "epoch": 0.9812737526051244, + "grad_norm": 2.1440032755217473, + "learning_rate": 9.193249758958633e-09, + "loss": 0.5608, + "step": 32017 + }, + { + "epoch": 0.9813044011278657, + "grad_norm": 1.7083799424177553, + "learning_rate": 9.163191101265734e-09, + "loss": 0.5838, + "step": 32018 + }, + { + "epoch": 0.9813350496506068, + "grad_norm": 2.0735264125069994, + "learning_rate": 9.133181619565002e-09, + "loss": 0.5729, + "step": 32019 + }, + { + "epoch": 0.9813656981733481, + "grad_norm": 1.9068144146968775, + "learning_rate": 9.10322131415231e-09, + "loss": 0.5258, + "step": 32020 + }, + { + "epoch": 0.9813963466960892, + "grad_norm": 1.8931667077873044, + "learning_rate": 9.073310185322425e-09, + "loss": 0.6312, + "step": 32021 + }, + { + "epoch": 0.9814269952188305, + "grad_norm": 1.7584145178854658, + "learning_rate": 9.043448233370111e-09, + "loss": 0.6059, + "step": 32022 + }, + { + "epoch": 0.9814576437415716, + "grad_norm": 0.7759676616239347, + "learning_rate": 9.013635458589575e-09, + "loss": 0.3847, + "step": 32023 + }, + { + "epoch": 0.9814882922643129, + "grad_norm": 1.9517171840609335, + "learning_rate": 8.983871861275029e-09, + "loss": 0.5291, + "step": 32024 + }, + { + "epoch": 0.981518940787054, + "grad_norm": 2.108356889826978, + "learning_rate": 8.954157441719014e-09, + "loss": 0.5984, + "step": 32025 + }, + { + "epoch": 0.9815495893097953, + "grad_norm": 2.236236991659414, + "learning_rate": 8.92449220021463e-09, + "loss": 0.5699, + "step": 32026 + }, + { + "epoch": 0.9815802378325365, + "grad_norm": 0.7624748645407461, + "learning_rate": 8.89487613705442e-09, + "loss": 0.3671, + "step": 32027 + }, + { + "epoch": 0.9816108863552777, + "grad_norm": 1.7450841648746453, + "learning_rate": 8.865309252530374e-09, + "loss": 0.5423, + "step": 32028 + }, + { + "epoch": 0.9816415348780189, + "grad_norm": 1.659716261829808, + "learning_rate": 8.83579154693337e-09, + "loss": 0.5094, + "step": 32029 + }, + { + "epoch": 0.9816721834007601, + "grad_norm": 1.9179031460171334, + "learning_rate": 8.806323020553731e-09, + "loss": 0.5093, + "step": 32030 + }, + { + "epoch": 0.9817028319235013, + "grad_norm": 2.0111016894024334, + "learning_rate": 8.776903673683446e-09, + "loss": 0.6167, + "step": 32031 + }, + { + "epoch": 0.9817334804462425, + "grad_norm": 2.064461791649948, + "learning_rate": 8.747533506610618e-09, + "loss": 0.5856, + "step": 32032 + }, + { + "epoch": 0.9817641289689837, + "grad_norm": 0.7953891307708895, + "learning_rate": 8.718212519625569e-09, + "loss": 0.4048, + "step": 32033 + }, + { + "epoch": 0.981794777491725, + "grad_norm": 1.8456653137846164, + "learning_rate": 8.688940713016958e-09, + "loss": 0.513, + "step": 32034 + }, + { + "epoch": 0.9818254260144661, + "grad_norm": 2.0646082224301483, + "learning_rate": 8.659718087073998e-09, + "loss": 0.5967, + "step": 32035 + }, + { + "epoch": 0.9818560745372074, + "grad_norm": 2.038990487222901, + "learning_rate": 8.630544642083128e-09, + "loss": 0.5304, + "step": 32036 + }, + { + "epoch": 0.9818867230599485, + "grad_norm": 0.83940234227609, + "learning_rate": 8.601420378333003e-09, + "loss": 0.4033, + "step": 32037 + }, + { + "epoch": 0.9819173715826898, + "grad_norm": 1.9727868864227052, + "learning_rate": 8.572345296109508e-09, + "loss": 0.6516, + "step": 32038 + }, + { + "epoch": 0.9819480201054309, + "grad_norm": 1.8482120977624652, + "learning_rate": 8.543319395700744e-09, + "loss": 0.473, + "step": 32039 + }, + { + "epoch": 0.9819786686281721, + "grad_norm": 1.836410263845654, + "learning_rate": 8.514342677391486e-09, + "loss": 0.5742, + "step": 32040 + }, + { + "epoch": 0.9820093171509133, + "grad_norm": 2.086075700287257, + "learning_rate": 8.485415141467057e-09, + "loss": 0.6396, + "step": 32041 + }, + { + "epoch": 0.9820399656736545, + "grad_norm": 2.002854052635818, + "learning_rate": 8.456536788213343e-09, + "loss": 0.6121, + "step": 32042 + }, + { + "epoch": 0.9820706141963957, + "grad_norm": 2.2766670224312744, + "learning_rate": 8.42770761791456e-09, + "loss": 0.6854, + "step": 32043 + }, + { + "epoch": 0.9821012627191369, + "grad_norm": 0.8173810355297702, + "learning_rate": 8.398927630854925e-09, + "loss": 0.4132, + "step": 32044 + }, + { + "epoch": 0.9821319112418782, + "grad_norm": 1.75004972227259, + "learning_rate": 8.370196827317545e-09, + "loss": 0.5147, + "step": 32045 + }, + { + "epoch": 0.9821625597646193, + "grad_norm": 1.872961931847651, + "learning_rate": 8.341515207585526e-09, + "loss": 0.6013, + "step": 32046 + }, + { + "epoch": 0.9821932082873606, + "grad_norm": 1.8258353612499236, + "learning_rate": 8.312882771941976e-09, + "loss": 0.5483, + "step": 32047 + }, + { + "epoch": 0.9822238568101017, + "grad_norm": 1.8962229953185814, + "learning_rate": 8.284299520668892e-09, + "loss": 0.5557, + "step": 32048 + }, + { + "epoch": 0.982254505332843, + "grad_norm": 0.7845979902786705, + "learning_rate": 8.255765454047716e-09, + "loss": 0.3904, + "step": 32049 + }, + { + "epoch": 0.9822851538555841, + "grad_norm": 1.6893446929841256, + "learning_rate": 8.227280572359331e-09, + "loss": 0.5668, + "step": 32050 + }, + { + "epoch": 0.9823158023783254, + "grad_norm": 2.011081175108313, + "learning_rate": 8.198844875885182e-09, + "loss": 0.6, + "step": 32051 + }, + { + "epoch": 0.9823464509010665, + "grad_norm": 1.769806930675884, + "learning_rate": 8.170458364905043e-09, + "loss": 0.5637, + "step": 32052 + }, + { + "epoch": 0.9823770994238078, + "grad_norm": 1.8461914426390449, + "learning_rate": 8.142121039698136e-09, + "loss": 0.5499, + "step": 32053 + }, + { + "epoch": 0.982407747946549, + "grad_norm": 2.1186789941540694, + "learning_rate": 8.113832900544239e-09, + "loss": 0.5195, + "step": 32054 + }, + { + "epoch": 0.9824383964692902, + "grad_norm": 1.8851794612737631, + "learning_rate": 8.085593947722569e-09, + "loss": 0.531, + "step": 32055 + }, + { + "epoch": 0.9824690449920314, + "grad_norm": 1.8264619505200508, + "learning_rate": 8.057404181510131e-09, + "loss": 0.5619, + "step": 32056 + }, + { + "epoch": 0.9824996935147726, + "grad_norm": 2.0008131157654496, + "learning_rate": 8.029263602185588e-09, + "loss": 0.6514, + "step": 32057 + }, + { + "epoch": 0.9825303420375138, + "grad_norm": 1.8104441757622263, + "learning_rate": 8.001172210025942e-09, + "loss": 0.4945, + "step": 32058 + }, + { + "epoch": 0.982560990560255, + "grad_norm": 1.8411826111272667, + "learning_rate": 7.973130005308193e-09, + "loss": 0.6361, + "step": 32059 + }, + { + "epoch": 0.9825916390829962, + "grad_norm": 0.8227007418191568, + "learning_rate": 7.945136988308232e-09, + "loss": 0.4115, + "step": 32060 + }, + { + "epoch": 0.9826222876057374, + "grad_norm": 1.872275505103735, + "learning_rate": 7.91719315930195e-09, + "loss": 0.5826, + "step": 32061 + }, + { + "epoch": 0.9826529361284786, + "grad_norm": 0.7502101885103806, + "learning_rate": 7.889298518565236e-09, + "loss": 0.3735, + "step": 32062 + }, + { + "epoch": 0.9826835846512199, + "grad_norm": 1.8885030631177957, + "learning_rate": 7.861453066372316e-09, + "loss": 0.5635, + "step": 32063 + }, + { + "epoch": 0.982714233173961, + "grad_norm": 1.963163099892888, + "learning_rate": 7.833656802997968e-09, + "loss": 0.567, + "step": 32064 + }, + { + "epoch": 0.9827448816967023, + "grad_norm": 2.176711093288529, + "learning_rate": 7.805909728715866e-09, + "loss": 0.587, + "step": 32065 + }, + { + "epoch": 0.9827755302194434, + "grad_norm": 1.9286608924434867, + "learning_rate": 7.778211843799122e-09, + "loss": 0.6022, + "step": 32066 + }, + { + "epoch": 0.9828061787421847, + "grad_norm": 1.8475393175055588, + "learning_rate": 7.750563148521406e-09, + "loss": 0.5389, + "step": 32067 + }, + { + "epoch": 0.9828368272649258, + "grad_norm": 1.7450833558515164, + "learning_rate": 7.722963643154169e-09, + "loss": 0.6028, + "step": 32068 + }, + { + "epoch": 0.9828674757876671, + "grad_norm": 1.81546661933752, + "learning_rate": 7.695413327970525e-09, + "loss": 0.5996, + "step": 32069 + }, + { + "epoch": 0.9828981243104082, + "grad_norm": 1.7848550767781883, + "learning_rate": 7.667912203240812e-09, + "loss": 0.505, + "step": 32070 + }, + { + "epoch": 0.9829287728331494, + "grad_norm": 1.608312575381778, + "learning_rate": 7.640460269237038e-09, + "loss": 0.5159, + "step": 32071 + }, + { + "epoch": 0.9829594213558907, + "grad_norm": 2.260847731308854, + "learning_rate": 7.613057526228428e-09, + "loss": 0.5175, + "step": 32072 + }, + { + "epoch": 0.9829900698786318, + "grad_norm": 1.9460906587573807, + "learning_rate": 7.585703974486435e-09, + "loss": 0.528, + "step": 32073 + }, + { + "epoch": 0.9830207184013731, + "grad_norm": 0.8059224621895098, + "learning_rate": 7.558399614279732e-09, + "loss": 0.4049, + "step": 32074 + }, + { + "epoch": 0.9830513669241142, + "grad_norm": 1.9188064958298787, + "learning_rate": 7.531144445876993e-09, + "loss": 0.5401, + "step": 32075 + }, + { + "epoch": 0.9830820154468555, + "grad_norm": 0.7704400428127524, + "learning_rate": 7.503938469547444e-09, + "loss": 0.382, + "step": 32076 + }, + { + "epoch": 0.9831126639695966, + "grad_norm": 1.6786535080310283, + "learning_rate": 7.47678168555921e-09, + "loss": 0.5536, + "step": 32077 + }, + { + "epoch": 0.9831433124923379, + "grad_norm": 1.900212206573953, + "learning_rate": 7.449674094179848e-09, + "loss": 0.6077, + "step": 32078 + }, + { + "epoch": 0.983173961015079, + "grad_norm": 1.8441255479639869, + "learning_rate": 7.422615695675817e-09, + "loss": 0.5578, + "step": 32079 + }, + { + "epoch": 0.9832046095378203, + "grad_norm": 1.8946336959134495, + "learning_rate": 7.395606490314122e-09, + "loss": 0.625, + "step": 32080 + }, + { + "epoch": 0.9832352580605614, + "grad_norm": 2.187548534919168, + "learning_rate": 7.3686464783612185e-09, + "loss": 0.6327, + "step": 32081 + }, + { + "epoch": 0.9832659065833027, + "grad_norm": 1.9142993594413114, + "learning_rate": 7.34173566008245e-09, + "loss": 0.5092, + "step": 32082 + }, + { + "epoch": 0.9832965551060439, + "grad_norm": 1.9376258412856266, + "learning_rate": 7.3148740357426025e-09, + "loss": 0.5949, + "step": 32083 + }, + { + "epoch": 0.9833272036287851, + "grad_norm": 2.227633777829901, + "learning_rate": 7.288061605607022e-09, + "loss": 0.6297, + "step": 32084 + }, + { + "epoch": 0.9833578521515263, + "grad_norm": 0.7915539918779755, + "learning_rate": 7.261298369939939e-09, + "loss": 0.4092, + "step": 32085 + }, + { + "epoch": 0.9833885006742675, + "grad_norm": 2.3157230721266933, + "learning_rate": 7.234584329003924e-09, + "loss": 0.7563, + "step": 32086 + }, + { + "epoch": 0.9834191491970087, + "grad_norm": 2.09016037983261, + "learning_rate": 7.207919483063763e-09, + "loss": 0.5462, + "step": 32087 + }, + { + "epoch": 0.9834497977197499, + "grad_norm": 2.0126592238691483, + "learning_rate": 7.181303832380915e-09, + "loss": 0.6309, + "step": 32088 + }, + { + "epoch": 0.9834804462424911, + "grad_norm": 1.853168712974103, + "learning_rate": 7.154737377218501e-09, + "loss": 0.5679, + "step": 32089 + }, + { + "epoch": 0.9835110947652324, + "grad_norm": 0.7540866073230365, + "learning_rate": 7.12822011783798e-09, + "loss": 0.3785, + "step": 32090 + }, + { + "epoch": 0.9835417432879735, + "grad_norm": 1.7951255811127218, + "learning_rate": 7.101752054500255e-09, + "loss": 0.5491, + "step": 32091 + }, + { + "epoch": 0.9835723918107148, + "grad_norm": 1.878776439541849, + "learning_rate": 7.075333187466782e-09, + "loss": 0.5508, + "step": 32092 + }, + { + "epoch": 0.9836030403334559, + "grad_norm": 2.3282919791148395, + "learning_rate": 7.048963516997354e-09, + "loss": 0.5277, + "step": 32093 + }, + { + "epoch": 0.9836336888561972, + "grad_norm": 0.7642656812535754, + "learning_rate": 7.022643043351762e-09, + "loss": 0.403, + "step": 32094 + }, + { + "epoch": 0.9836643373789383, + "grad_norm": 2.1034629572765544, + "learning_rate": 6.9963717667898e-09, + "loss": 0.5264, + "step": 32095 + }, + { + "epoch": 0.9836949859016796, + "grad_norm": 0.8102528323412399, + "learning_rate": 6.970149687570149e-09, + "loss": 0.3839, + "step": 32096 + }, + { + "epoch": 0.9837256344244207, + "grad_norm": 2.049003035760739, + "learning_rate": 6.943976805950936e-09, + "loss": 0.6175, + "step": 32097 + }, + { + "epoch": 0.983756282947162, + "grad_norm": 1.9148311258387554, + "learning_rate": 6.917853122190843e-09, + "loss": 0.5547, + "step": 32098 + }, + { + "epoch": 0.9837869314699031, + "grad_norm": 1.7783633575719633, + "learning_rate": 6.891778636546331e-09, + "loss": 0.5661, + "step": 32099 + }, + { + "epoch": 0.9838175799926444, + "grad_norm": 1.917440205511402, + "learning_rate": 6.865753349274418e-09, + "loss": 0.6295, + "step": 32100 + }, + { + "epoch": 0.9838482285153856, + "grad_norm": 2.1239229945077205, + "learning_rate": 6.8397772606315635e-09, + "loss": 0.5803, + "step": 32101 + }, + { + "epoch": 0.9838788770381267, + "grad_norm": 1.9828564465780258, + "learning_rate": 6.813850370874786e-09, + "loss": 0.6254, + "step": 32102 + }, + { + "epoch": 0.983909525560868, + "grad_norm": 1.9164695195599697, + "learning_rate": 6.78797268025777e-09, + "loss": 0.4881, + "step": 32103 + }, + { + "epoch": 0.9839401740836091, + "grad_norm": 0.8288615014375638, + "learning_rate": 6.762144189036978e-09, + "loss": 0.3843, + "step": 32104 + }, + { + "epoch": 0.9839708226063504, + "grad_norm": 1.9576406027012268, + "learning_rate": 6.7363648974666514e-09, + "loss": 0.5717, + "step": 32105 + }, + { + "epoch": 0.9840014711290915, + "grad_norm": 1.9675983762993101, + "learning_rate": 6.710634805799921e-09, + "loss": 0.5548, + "step": 32106 + }, + { + "epoch": 0.9840321196518328, + "grad_norm": 1.9821035713678428, + "learning_rate": 6.684953914291026e-09, + "loss": 0.5721, + "step": 32107 + }, + { + "epoch": 0.9840627681745739, + "grad_norm": 2.037490932562613, + "learning_rate": 6.659322223193098e-09, + "loss": 0.683, + "step": 32108 + }, + { + "epoch": 0.9840934166973152, + "grad_norm": 1.8009626955086522, + "learning_rate": 6.63373973275816e-09, + "loss": 0.5198, + "step": 32109 + }, + { + "epoch": 0.9841240652200564, + "grad_norm": 2.0576698754037817, + "learning_rate": 6.608206443238785e-09, + "loss": 0.6509, + "step": 32110 + }, + { + "epoch": 0.9841547137427976, + "grad_norm": 1.7626166457613506, + "learning_rate": 6.58272235488644e-09, + "loss": 0.5426, + "step": 32111 + }, + { + "epoch": 0.9841853622655388, + "grad_norm": 2.017501333228605, + "learning_rate": 6.557287467952034e-09, + "loss": 0.5069, + "step": 32112 + }, + { + "epoch": 0.98421601078828, + "grad_norm": 1.8207007909368162, + "learning_rate": 6.531901782686478e-09, + "loss": 0.5569, + "step": 32113 + }, + { + "epoch": 0.9842466593110212, + "grad_norm": 1.9310399962267562, + "learning_rate": 6.5065652993395736e-09, + "loss": 0.6457, + "step": 32114 + }, + { + "epoch": 0.9842773078337624, + "grad_norm": 1.8913628859625735, + "learning_rate": 6.481278018161119e-09, + "loss": 0.5564, + "step": 32115 + }, + { + "epoch": 0.9843079563565036, + "grad_norm": 1.9059668257479678, + "learning_rate": 6.45603993940036e-09, + "loss": 0.6091, + "step": 32116 + }, + { + "epoch": 0.9843386048792448, + "grad_norm": 1.8394968915530427, + "learning_rate": 6.430851063305432e-09, + "loss": 0.5864, + "step": 32117 + }, + { + "epoch": 0.984369253401986, + "grad_norm": 1.765680239184946, + "learning_rate": 6.40571139012558e-09, + "loss": 0.5003, + "step": 32118 + }, + { + "epoch": 0.9843999019247273, + "grad_norm": 1.874694871162933, + "learning_rate": 6.380620920107827e-09, + "loss": 0.5733, + "step": 32119 + }, + { + "epoch": 0.9844305504474684, + "grad_norm": 2.0646488986050153, + "learning_rate": 6.3555796534992e-09, + "loss": 0.6085, + "step": 32120 + }, + { + "epoch": 0.9844611989702097, + "grad_norm": 2.178370899113514, + "learning_rate": 6.330587590546722e-09, + "loss": 0.5966, + "step": 32121 + }, + { + "epoch": 0.9844918474929508, + "grad_norm": 2.1521574512247774, + "learning_rate": 6.305644731496863e-09, + "loss": 0.5764, + "step": 32122 + }, + { + "epoch": 0.9845224960156921, + "grad_norm": 2.0071922786927248, + "learning_rate": 6.280751076594982e-09, + "loss": 0.5773, + "step": 32123 + }, + { + "epoch": 0.9845531445384332, + "grad_norm": 2.016206331089607, + "learning_rate": 6.255906626086994e-09, + "loss": 0.5245, + "step": 32124 + }, + { + "epoch": 0.9845837930611745, + "grad_norm": 1.8835427417200508, + "learning_rate": 6.231111380217147e-09, + "loss": 0.6246, + "step": 32125 + }, + { + "epoch": 0.9846144415839156, + "grad_norm": 0.7637162871147954, + "learning_rate": 6.206365339229692e-09, + "loss": 0.3885, + "step": 32126 + }, + { + "epoch": 0.9846450901066569, + "grad_norm": 1.9619666079586087, + "learning_rate": 6.181668503368321e-09, + "loss": 0.6558, + "step": 32127 + }, + { + "epoch": 0.9846757386293981, + "grad_norm": 1.9181336744683208, + "learning_rate": 6.157020872877284e-09, + "loss": 0.6169, + "step": 32128 + }, + { + "epoch": 0.9847063871521393, + "grad_norm": 2.031251411936697, + "learning_rate": 6.13242244799861e-09, + "loss": 0.603, + "step": 32129 + }, + { + "epoch": 0.9847370356748805, + "grad_norm": 2.055838831414465, + "learning_rate": 6.107873228974881e-09, + "loss": 0.5197, + "step": 32130 + }, + { + "epoch": 0.9847676841976217, + "grad_norm": 1.8676322748424878, + "learning_rate": 6.083373216048127e-09, + "loss": 0.5321, + "step": 32131 + }, + { + "epoch": 0.9847983327203629, + "grad_norm": 1.8869173392774625, + "learning_rate": 6.058922409459267e-09, + "loss": 0.5943, + "step": 32132 + }, + { + "epoch": 0.984828981243104, + "grad_norm": 1.8616059360517128, + "learning_rate": 6.034520809449773e-09, + "loss": 0.6827, + "step": 32133 + }, + { + "epoch": 0.9848596297658453, + "grad_norm": 2.3401479587616763, + "learning_rate": 6.01016841626001e-09, + "loss": 0.6086, + "step": 32134 + }, + { + "epoch": 0.9848902782885864, + "grad_norm": 1.7526917834913114, + "learning_rate": 5.985865230129784e-09, + "loss": 0.5569, + "step": 32135 + }, + { + "epoch": 0.9849209268113277, + "grad_norm": 0.7863599701782031, + "learning_rate": 5.961611251298904e-09, + "loss": 0.3944, + "step": 32136 + }, + { + "epoch": 0.9849515753340689, + "grad_norm": 1.597700851730749, + "learning_rate": 5.9374064800060695e-09, + "loss": 0.5221, + "step": 32137 + }, + { + "epoch": 0.9849822238568101, + "grad_norm": 1.9416851198001104, + "learning_rate": 5.9132509164888664e-09, + "loss": 0.5368, + "step": 32138 + }, + { + "epoch": 0.9850128723795513, + "grad_norm": 1.7338769730102026, + "learning_rate": 5.889144560987103e-09, + "loss": 0.554, + "step": 32139 + }, + { + "epoch": 0.9850435209022925, + "grad_norm": 2.000654787603007, + "learning_rate": 5.8650874137372586e-09, + "loss": 0.5657, + "step": 32140 + }, + { + "epoch": 0.9850741694250337, + "grad_norm": 1.8206627709968017, + "learning_rate": 5.841079474976363e-09, + "loss": 0.5789, + "step": 32141 + }, + { + "epoch": 0.9851048179477749, + "grad_norm": 1.9423160418477605, + "learning_rate": 5.817120744940896e-09, + "loss": 0.564, + "step": 32142 + }, + { + "epoch": 0.9851354664705161, + "grad_norm": 1.751477953167837, + "learning_rate": 5.793211223867334e-09, + "loss": 0.569, + "step": 32143 + }, + { + "epoch": 0.9851661149932573, + "grad_norm": 1.6884098950256825, + "learning_rate": 5.7693509119910455e-09, + "loss": 0.6115, + "step": 32144 + }, + { + "epoch": 0.9851967635159985, + "grad_norm": 1.7879168427286751, + "learning_rate": 5.745539809547396e-09, + "loss": 0.5534, + "step": 32145 + }, + { + "epoch": 0.9852274120387398, + "grad_norm": 1.92737118961795, + "learning_rate": 5.721777916770643e-09, + "loss": 0.5658, + "step": 32146 + }, + { + "epoch": 0.9852580605614809, + "grad_norm": 2.152723209755073, + "learning_rate": 5.698065233895045e-09, + "loss": 0.5892, + "step": 32147 + }, + { + "epoch": 0.9852887090842222, + "grad_norm": 1.8250826780639031, + "learning_rate": 5.674401761154302e-09, + "loss": 0.462, + "step": 32148 + }, + { + "epoch": 0.9853193576069633, + "grad_norm": 0.7683808374996174, + "learning_rate": 5.650787498781563e-09, + "loss": 0.3938, + "step": 32149 + }, + { + "epoch": 0.9853500061297046, + "grad_norm": 0.7897190747097925, + "learning_rate": 5.627222447009417e-09, + "loss": 0.409, + "step": 32150 + }, + { + "epoch": 0.9853806546524457, + "grad_norm": 1.814815050031637, + "learning_rate": 5.603706606069903e-09, + "loss": 0.542, + "step": 32151 + }, + { + "epoch": 0.985411303175187, + "grad_norm": 1.8318769292853074, + "learning_rate": 5.580239976195057e-09, + "loss": 0.5909, + "step": 32152 + }, + { + "epoch": 0.9854419516979281, + "grad_norm": 2.135524915053273, + "learning_rate": 5.556822557615804e-09, + "loss": 0.5119, + "step": 32153 + }, + { + "epoch": 0.9854726002206694, + "grad_norm": 1.9526865604786987, + "learning_rate": 5.5334543505636275e-09, + "loss": 0.5729, + "step": 32154 + }, + { + "epoch": 0.9855032487434106, + "grad_norm": 0.8601601827691663, + "learning_rate": 5.5101353552677876e-09, + "loss": 0.3756, + "step": 32155 + }, + { + "epoch": 0.9855338972661518, + "grad_norm": 1.9516135008344637, + "learning_rate": 5.486865571958655e-09, + "loss": 0.6055, + "step": 32156 + }, + { + "epoch": 0.985564545788893, + "grad_norm": 1.843114779832284, + "learning_rate": 5.463645000864937e-09, + "loss": 0.6545, + "step": 32157 + }, + { + "epoch": 0.9855951943116342, + "grad_norm": 1.9256268350171313, + "learning_rate": 5.440473642216449e-09, + "loss": 0.543, + "step": 32158 + }, + { + "epoch": 0.9856258428343754, + "grad_norm": 0.823210495446886, + "learning_rate": 5.417351496240786e-09, + "loss": 0.3929, + "step": 32159 + }, + { + "epoch": 0.9856564913571166, + "grad_norm": 2.0821418785032537, + "learning_rate": 5.3942785631655444e-09, + "loss": 0.6489, + "step": 32160 + }, + { + "epoch": 0.9856871398798578, + "grad_norm": 0.8114654272372399, + "learning_rate": 5.371254843218321e-09, + "loss": 0.4128, + "step": 32161 + }, + { + "epoch": 0.985717788402599, + "grad_norm": 1.870478169197147, + "learning_rate": 5.34828033662671e-09, + "loss": 0.5324, + "step": 32162 + }, + { + "epoch": 0.9857484369253402, + "grad_norm": 1.9983496152994686, + "learning_rate": 5.325355043615532e-09, + "loss": 0.6408, + "step": 32163 + }, + { + "epoch": 0.9857790854480813, + "grad_norm": 2.02811786070875, + "learning_rate": 5.302478964412383e-09, + "loss": 0.6626, + "step": 32164 + }, + { + "epoch": 0.9858097339708226, + "grad_norm": 0.8425484653265515, + "learning_rate": 5.279652099241528e-09, + "loss": 0.4039, + "step": 32165 + }, + { + "epoch": 0.9858403824935638, + "grad_norm": 1.7402283314858409, + "learning_rate": 5.256874448328342e-09, + "loss": 0.5148, + "step": 32166 + }, + { + "epoch": 0.985871031016305, + "grad_norm": 2.1824515427200377, + "learning_rate": 5.23414601189709e-09, + "loss": 0.5608, + "step": 32167 + }, + { + "epoch": 0.9859016795390462, + "grad_norm": 1.8802516674925316, + "learning_rate": 5.211466790171482e-09, + "loss": 0.5672, + "step": 32168 + }, + { + "epoch": 0.9859323280617874, + "grad_norm": 1.974438411456843, + "learning_rate": 5.188836783375228e-09, + "loss": 0.4974, + "step": 32169 + }, + { + "epoch": 0.9859629765845286, + "grad_norm": 1.8956857602609283, + "learning_rate": 5.166255991731484e-09, + "loss": 0.5133, + "step": 32170 + }, + { + "epoch": 0.9859936251072698, + "grad_norm": 1.860621978447314, + "learning_rate": 5.143724415462847e-09, + "loss": 0.507, + "step": 32171 + }, + { + "epoch": 0.986024273630011, + "grad_norm": 1.7526104171923664, + "learning_rate": 5.1212420547908095e-09, + "loss": 0.5225, + "step": 32172 + }, + { + "epoch": 0.9860549221527523, + "grad_norm": 2.157527357259956, + "learning_rate": 5.098808909937414e-09, + "loss": 0.5916, + "step": 32173 + }, + { + "epoch": 0.9860855706754934, + "grad_norm": 1.9719745325863633, + "learning_rate": 5.07642498112304e-09, + "loss": 0.5671, + "step": 32174 + }, + { + "epoch": 0.9861162191982347, + "grad_norm": 1.8587645937059005, + "learning_rate": 5.054090268569178e-09, + "loss": 0.5891, + "step": 32175 + }, + { + "epoch": 0.9861468677209758, + "grad_norm": 0.8284946626324323, + "learning_rate": 5.031804772495097e-09, + "loss": 0.3931, + "step": 32176 + }, + { + "epoch": 0.9861775162437171, + "grad_norm": 1.8605764848143003, + "learning_rate": 5.00956849312062e-09, + "loss": 0.6033, + "step": 32177 + }, + { + "epoch": 0.9862081647664582, + "grad_norm": 0.7819590517040335, + "learning_rate": 4.987381430665017e-09, + "loss": 0.411, + "step": 32178 + }, + { + "epoch": 0.9862388132891995, + "grad_norm": 1.8504504902410337, + "learning_rate": 4.965243585346447e-09, + "loss": 0.5194, + "step": 32179 + }, + { + "epoch": 0.9862694618119406, + "grad_norm": 1.8975039924903372, + "learning_rate": 4.943154957384177e-09, + "loss": 0.5677, + "step": 32180 + }, + { + "epoch": 0.9863001103346819, + "grad_norm": 1.9092605578129695, + "learning_rate": 4.921115546994148e-09, + "loss": 0.5733, + "step": 32181 + }, + { + "epoch": 0.986330758857423, + "grad_norm": 0.7797848914976064, + "learning_rate": 4.899125354395074e-09, + "loss": 0.3769, + "step": 32182 + }, + { + "epoch": 0.9863614073801643, + "grad_norm": 2.0503409407295727, + "learning_rate": 4.877184379802335e-09, + "loss": 0.6076, + "step": 32183 + }, + { + "epoch": 0.9863920559029055, + "grad_norm": 1.6344436633345008, + "learning_rate": 4.855292623432983e-09, + "loss": 0.4878, + "step": 32184 + }, + { + "epoch": 0.9864227044256467, + "grad_norm": 1.788843673450308, + "learning_rate": 4.8334500855029555e-09, + "loss": 0.5557, + "step": 32185 + }, + { + "epoch": 0.9864533529483879, + "grad_norm": 1.932931227417717, + "learning_rate": 4.811656766226524e-09, + "loss": 0.5843, + "step": 32186 + }, + { + "epoch": 0.9864840014711291, + "grad_norm": 1.8632941887458132, + "learning_rate": 4.7899126658190745e-09, + "loss": 0.602, + "step": 32187 + }, + { + "epoch": 0.9865146499938703, + "grad_norm": 1.9471303721456579, + "learning_rate": 4.7682177844948775e-09, + "loss": 0.6263, + "step": 32188 + }, + { + "epoch": 0.9865452985166115, + "grad_norm": 2.1477313127025974, + "learning_rate": 4.746572122467097e-09, + "loss": 0.5013, + "step": 32189 + }, + { + "epoch": 0.9865759470393527, + "grad_norm": 1.9570904364160888, + "learning_rate": 4.72497567994945e-09, + "loss": 0.5457, + "step": 32190 + }, + { + "epoch": 0.986606595562094, + "grad_norm": 2.2215150925256593, + "learning_rate": 4.703428457155102e-09, + "loss": 0.5239, + "step": 32191 + }, + { + "epoch": 0.9866372440848351, + "grad_norm": 1.850322505438101, + "learning_rate": 4.681930454295547e-09, + "loss": 0.579, + "step": 32192 + }, + { + "epoch": 0.9866678926075764, + "grad_norm": 1.8449529303632448, + "learning_rate": 4.660481671583394e-09, + "loss": 0.5897, + "step": 32193 + }, + { + "epoch": 0.9866985411303175, + "grad_norm": 2.0695083336910955, + "learning_rate": 4.639082109229587e-09, + "loss": 0.6, + "step": 32194 + }, + { + "epoch": 0.9867291896530587, + "grad_norm": 2.0245268423535983, + "learning_rate": 4.617731767445066e-09, + "loss": 0.5491, + "step": 32195 + }, + { + "epoch": 0.9867598381757999, + "grad_norm": 1.8629116248158342, + "learning_rate": 4.596430646439664e-09, + "loss": 0.5178, + "step": 32196 + }, + { + "epoch": 0.9867904866985411, + "grad_norm": 2.0246353052794097, + "learning_rate": 4.575178746424324e-09, + "loss": 0.5724, + "step": 32197 + }, + { + "epoch": 0.9868211352212823, + "grad_norm": 1.822492455233751, + "learning_rate": 4.553976067607768e-09, + "loss": 0.5321, + "step": 32198 + }, + { + "epoch": 0.9868517837440235, + "grad_norm": 1.8283371594830122, + "learning_rate": 4.532822610198717e-09, + "loss": 0.5569, + "step": 32199 + }, + { + "epoch": 0.9868824322667648, + "grad_norm": 1.86182680842588, + "learning_rate": 4.511718374406448e-09, + "loss": 0.5186, + "step": 32200 + }, + { + "epoch": 0.9869130807895059, + "grad_norm": 1.795125767724452, + "learning_rate": 4.490663360438019e-09, + "loss": 0.502, + "step": 32201 + }, + { + "epoch": 0.9869437293122472, + "grad_norm": 1.8729907667626347, + "learning_rate": 4.4696575685010406e-09, + "loss": 0.582, + "step": 32202 + }, + { + "epoch": 0.9869743778349883, + "grad_norm": 1.9859984896924678, + "learning_rate": 4.448700998803124e-09, + "loss": 0.5097, + "step": 32203 + }, + { + "epoch": 0.9870050263577296, + "grad_norm": 2.128585990631028, + "learning_rate": 4.427793651550216e-09, + "loss": 0.6388, + "step": 32204 + }, + { + "epoch": 0.9870356748804707, + "grad_norm": 1.805038700210401, + "learning_rate": 4.406935526948264e-09, + "loss": 0.5265, + "step": 32205 + }, + { + "epoch": 0.987066323403212, + "grad_norm": 2.110387490300948, + "learning_rate": 4.386126625202658e-09, + "loss": 0.6389, + "step": 32206 + }, + { + "epoch": 0.9870969719259531, + "grad_norm": 0.7730748247702898, + "learning_rate": 4.365366946519345e-09, + "loss": 0.3726, + "step": 32207 + }, + { + "epoch": 0.9871276204486944, + "grad_norm": 2.048451452045281, + "learning_rate": 4.34465649110205e-09, + "loss": 0.6265, + "step": 32208 + }, + { + "epoch": 0.9871582689714355, + "grad_norm": 2.1757032719535885, + "learning_rate": 4.323995259155056e-09, + "loss": 0.6229, + "step": 32209 + }, + { + "epoch": 0.9871889174941768, + "grad_norm": 0.7941359928635768, + "learning_rate": 4.3033832508815325e-09, + "loss": 0.4191, + "step": 32210 + }, + { + "epoch": 0.987219566016918, + "grad_norm": 0.7694174650853886, + "learning_rate": 4.28282046648576e-09, + "loss": 0.4072, + "step": 32211 + }, + { + "epoch": 0.9872502145396592, + "grad_norm": 1.7265407166560198, + "learning_rate": 4.262306906168689e-09, + "loss": 0.5766, + "step": 32212 + }, + { + "epoch": 0.9872808630624004, + "grad_norm": 2.1932803561001672, + "learning_rate": 4.241842570134047e-09, + "loss": 0.6062, + "step": 32213 + }, + { + "epoch": 0.9873115115851416, + "grad_norm": 1.8117841679690938, + "learning_rate": 4.221427458582228e-09, + "loss": 0.6357, + "step": 32214 + }, + { + "epoch": 0.9873421601078828, + "grad_norm": 1.8173269500741098, + "learning_rate": 4.201061571715292e-09, + "loss": 0.585, + "step": 32215 + }, + { + "epoch": 0.987372808630624, + "grad_norm": 1.9037973565945123, + "learning_rate": 4.180744909733636e-09, + "loss": 0.633, + "step": 32216 + }, + { + "epoch": 0.9874034571533652, + "grad_norm": 1.9612720374069939, + "learning_rate": 4.160477472837099e-09, + "loss": 0.6893, + "step": 32217 + }, + { + "epoch": 0.9874341056761065, + "grad_norm": 1.936639053211424, + "learning_rate": 4.140259261225521e-09, + "loss": 0.6218, + "step": 32218 + }, + { + "epoch": 0.9874647541988476, + "grad_norm": 0.8475129935707264, + "learning_rate": 4.120090275098187e-09, + "loss": 0.4137, + "step": 32219 + }, + { + "epoch": 0.9874954027215889, + "grad_norm": 2.1199289667982555, + "learning_rate": 4.099970514653828e-09, + "loss": 0.5593, + "step": 32220 + }, + { + "epoch": 0.98752605124433, + "grad_norm": 1.8428306815754096, + "learning_rate": 4.079899980091173e-09, + "loss": 0.5747, + "step": 32221 + }, + { + "epoch": 0.9875566997670713, + "grad_norm": 1.7984612543243763, + "learning_rate": 4.059878671607287e-09, + "loss": 0.6101, + "step": 32222 + }, + { + "epoch": 0.9875873482898124, + "grad_norm": 0.763883657444421, + "learning_rate": 4.039906589399234e-09, + "loss": 0.4004, + "step": 32223 + }, + { + "epoch": 0.9876179968125537, + "grad_norm": 2.1004638525357064, + "learning_rate": 4.019983733664634e-09, + "loss": 0.6418, + "step": 32224 + }, + { + "epoch": 0.9876486453352948, + "grad_norm": 2.0379004514972783, + "learning_rate": 4.000110104599442e-09, + "loss": 0.4931, + "step": 32225 + }, + { + "epoch": 0.987679293858036, + "grad_norm": 0.783448930171779, + "learning_rate": 3.980285702399611e-09, + "loss": 0.398, + "step": 32226 + }, + { + "epoch": 0.9877099423807773, + "grad_norm": 1.990666501569811, + "learning_rate": 3.960510527259986e-09, + "loss": 0.546, + "step": 32227 + }, + { + "epoch": 0.9877405909035184, + "grad_norm": 1.778667597230556, + "learning_rate": 3.9407845793759665e-09, + "loss": 0.5701, + "step": 32228 + }, + { + "epoch": 0.9877712394262597, + "grad_norm": 1.931221792540909, + "learning_rate": 3.921107858941287e-09, + "loss": 0.5922, + "step": 32229 + }, + { + "epoch": 0.9878018879490008, + "grad_norm": 1.979304449793369, + "learning_rate": 3.90148036615079e-09, + "loss": 0.6101, + "step": 32230 + }, + { + "epoch": 0.9878325364717421, + "grad_norm": 2.068155465243968, + "learning_rate": 3.881902101197099e-09, + "loss": 0.5447, + "step": 32231 + }, + { + "epoch": 0.9878631849944832, + "grad_norm": 1.8467232784039986, + "learning_rate": 3.862373064273395e-09, + "loss": 0.5331, + "step": 32232 + }, + { + "epoch": 0.9878938335172245, + "grad_norm": 2.280171031500444, + "learning_rate": 3.842893255571745e-09, + "loss": 0.5653, + "step": 32233 + }, + { + "epoch": 0.9879244820399656, + "grad_norm": 1.8540085945340654, + "learning_rate": 3.823462675284772e-09, + "loss": 0.5827, + "step": 32234 + }, + { + "epoch": 0.9879551305627069, + "grad_norm": 2.135988259963244, + "learning_rate": 3.804081323603437e-09, + "loss": 0.5343, + "step": 32235 + }, + { + "epoch": 0.987985779085448, + "grad_norm": 1.803590683291635, + "learning_rate": 3.784749200718696e-09, + "loss": 0.5851, + "step": 32236 + }, + { + "epoch": 0.9880164276081893, + "grad_norm": 0.8003353686763306, + "learning_rate": 3.765466306820953e-09, + "loss": 0.3845, + "step": 32237 + }, + { + "epoch": 0.9880470761309305, + "grad_norm": 1.6982522956613724, + "learning_rate": 3.746232642100611e-09, + "loss": 0.5453, + "step": 32238 + }, + { + "epoch": 0.9880777246536717, + "grad_norm": 1.9145183490635151, + "learning_rate": 3.727048206746964e-09, + "loss": 0.5286, + "step": 32239 + }, + { + "epoch": 0.9881083731764129, + "grad_norm": 0.7651950476585828, + "learning_rate": 3.7079130009493035e-09, + "loss": 0.3825, + "step": 32240 + }, + { + "epoch": 0.9881390216991541, + "grad_norm": 1.95574396627305, + "learning_rate": 3.6888270248958136e-09, + "loss": 0.547, + "step": 32241 + }, + { + "epoch": 0.9881696702218953, + "grad_norm": 1.750787758012265, + "learning_rate": 3.6697902787746763e-09, + "loss": 0.6026, + "step": 32242 + }, + { + "epoch": 0.9882003187446365, + "grad_norm": 1.841225239114983, + "learning_rate": 3.6508027627735198e-09, + "loss": 0.574, + "step": 32243 + }, + { + "epoch": 0.9882309672673777, + "grad_norm": 0.763162597280135, + "learning_rate": 3.6318644770788613e-09, + "loss": 0.3976, + "step": 32244 + }, + { + "epoch": 0.988261615790119, + "grad_norm": 1.9142805769689704, + "learning_rate": 3.6129754218783286e-09, + "loss": 0.5253, + "step": 32245 + }, + { + "epoch": 0.9882922643128601, + "grad_norm": 1.8516854655785686, + "learning_rate": 3.5941355973573288e-09, + "loss": 0.5351, + "step": 32246 + }, + { + "epoch": 0.9883229128356014, + "grad_norm": 2.0159087513926206, + "learning_rate": 3.5753450037018244e-09, + "loss": 0.5908, + "step": 32247 + }, + { + "epoch": 0.9883535613583425, + "grad_norm": 1.8120194463923358, + "learning_rate": 3.556603641097223e-09, + "loss": 0.5788, + "step": 32248 + }, + { + "epoch": 0.9883842098810838, + "grad_norm": 1.8048947764614445, + "learning_rate": 3.5379115097272655e-09, + "loss": 0.5309, + "step": 32249 + }, + { + "epoch": 0.9884148584038249, + "grad_norm": 1.9394321370093994, + "learning_rate": 3.5192686097768045e-09, + "loss": 0.4979, + "step": 32250 + }, + { + "epoch": 0.9884455069265662, + "grad_norm": 0.8637550157744022, + "learning_rate": 3.5006749414295825e-09, + "loss": 0.3843, + "step": 32251 + }, + { + "epoch": 0.9884761554493073, + "grad_norm": 2.3426402918538076, + "learning_rate": 3.482130504868231e-09, + "loss": 0.6138, + "step": 32252 + }, + { + "epoch": 0.9885068039720486, + "grad_norm": 1.9899286388729285, + "learning_rate": 3.463635300275936e-09, + "loss": 0.5137, + "step": 32253 + }, + { + "epoch": 0.9885374524947897, + "grad_norm": 1.774475995704833, + "learning_rate": 3.445189327834775e-09, + "loss": 0.551, + "step": 32254 + }, + { + "epoch": 0.988568101017531, + "grad_norm": 0.7697518250217043, + "learning_rate": 3.4267925877268238e-09, + "loss": 0.4009, + "step": 32255 + }, + { + "epoch": 0.9885987495402722, + "grad_norm": 2.3643913254875857, + "learning_rate": 3.4084450801330493e-09, + "loss": 0.6988, + "step": 32256 + }, + { + "epoch": 0.9886293980630133, + "grad_norm": 1.9143391676352595, + "learning_rate": 3.3901468052344177e-09, + "loss": 0.5332, + "step": 32257 + }, + { + "epoch": 0.9886600465857546, + "grad_norm": 1.8770483870358863, + "learning_rate": 3.3718977632113404e-09, + "loss": 0.6108, + "step": 32258 + }, + { + "epoch": 0.9886906951084957, + "grad_norm": 1.890287187003058, + "learning_rate": 3.353697954243118e-09, + "loss": 0.5766, + "step": 32259 + }, + { + "epoch": 0.988721343631237, + "grad_norm": 1.9541312728695222, + "learning_rate": 3.335547378509052e-09, + "loss": 0.6227, + "step": 32260 + }, + { + "epoch": 0.9887519921539781, + "grad_norm": 1.9635470913781583, + "learning_rate": 3.3174460361884432e-09, + "loss": 0.5995, + "step": 32261 + }, + { + "epoch": 0.9887826406767194, + "grad_norm": 1.9824097759397545, + "learning_rate": 3.2993939274594823e-09, + "loss": 0.5479, + "step": 32262 + }, + { + "epoch": 0.9888132891994605, + "grad_norm": 1.6946325778826274, + "learning_rate": 3.28139105250036e-09, + "loss": 0.5798, + "step": 32263 + }, + { + "epoch": 0.9888439377222018, + "grad_norm": 1.735656938267684, + "learning_rate": 3.2634374114881574e-09, + "loss": 0.5829, + "step": 32264 + }, + { + "epoch": 0.988874586244943, + "grad_norm": 2.2283621347997222, + "learning_rate": 3.2455330045993994e-09, + "loss": 0.5784, + "step": 32265 + }, + { + "epoch": 0.9889052347676842, + "grad_norm": 1.9441022850304974, + "learning_rate": 3.2276778320111666e-09, + "loss": 0.5276, + "step": 32266 + }, + { + "epoch": 0.9889358832904254, + "grad_norm": 1.7023372294551942, + "learning_rate": 3.209871893898875e-09, + "loss": 0.5316, + "step": 32267 + }, + { + "epoch": 0.9889665318131666, + "grad_norm": 0.7771233260374095, + "learning_rate": 3.192115190438494e-09, + "loss": 0.3965, + "step": 32268 + }, + { + "epoch": 0.9889971803359078, + "grad_norm": 1.850064781517279, + "learning_rate": 3.174407721804329e-09, + "loss": 0.6092, + "step": 32269 + }, + { + "epoch": 0.989027828858649, + "grad_norm": 1.9024669561725371, + "learning_rate": 3.15674948817124e-09, + "loss": 0.5878, + "step": 32270 + }, + { + "epoch": 0.9890584773813902, + "grad_norm": 1.9714840647295342, + "learning_rate": 3.1391404897135323e-09, + "loss": 0.5802, + "step": 32271 + }, + { + "epoch": 0.9890891259041314, + "grad_norm": 1.8054647553765486, + "learning_rate": 3.121580726604401e-09, + "loss": 0.4758, + "step": 32272 + }, + { + "epoch": 0.9891197744268726, + "grad_norm": 1.7596961999436902, + "learning_rate": 3.1040701990164844e-09, + "loss": 0.4517, + "step": 32273 + }, + { + "epoch": 0.9891504229496139, + "grad_norm": 1.8328337935918229, + "learning_rate": 3.086608907122979e-09, + "loss": 0.6212, + "step": 32274 + }, + { + "epoch": 0.989181071472355, + "grad_norm": 1.9732698676740656, + "learning_rate": 3.069196851095413e-09, + "loss": 0.5799, + "step": 32275 + }, + { + "epoch": 0.9892117199950963, + "grad_norm": 1.76601797804861, + "learning_rate": 3.051834031105316e-09, + "loss": 0.5354, + "step": 32276 + }, + { + "epoch": 0.9892423685178374, + "grad_norm": 1.9544380476330125, + "learning_rate": 3.0345204473247735e-09, + "loss": 0.5322, + "step": 32277 + }, + { + "epoch": 0.9892730170405787, + "grad_norm": 1.8089045690079744, + "learning_rate": 3.0172560999230937e-09, + "loss": 0.4589, + "step": 32278 + }, + { + "epoch": 0.9893036655633198, + "grad_norm": 1.9130724069265286, + "learning_rate": 3.000040989071251e-09, + "loss": 0.4429, + "step": 32279 + }, + { + "epoch": 0.9893343140860611, + "grad_norm": 0.7910041515565472, + "learning_rate": 2.9828751149379997e-09, + "loss": 0.4162, + "step": 32280 + }, + { + "epoch": 0.9893649626088022, + "grad_norm": 2.018214970642845, + "learning_rate": 2.9657584776932035e-09, + "loss": 0.575, + "step": 32281 + }, + { + "epoch": 0.9893956111315435, + "grad_norm": 1.9808614313547368, + "learning_rate": 2.9486910775056165e-09, + "loss": 0.6024, + "step": 32282 + }, + { + "epoch": 0.9894262596542847, + "grad_norm": 1.7242514193103176, + "learning_rate": 2.9316729145428825e-09, + "loss": 0.4989, + "step": 32283 + }, + { + "epoch": 0.9894569081770259, + "grad_norm": 1.8037851597943726, + "learning_rate": 2.9147039889731997e-09, + "loss": 0.5533, + "step": 32284 + }, + { + "epoch": 0.9894875566997671, + "grad_norm": 0.8117418259629621, + "learning_rate": 2.8977843009631025e-09, + "loss": 0.4018, + "step": 32285 + }, + { + "epoch": 0.9895182052225083, + "grad_norm": 1.7872813320001866, + "learning_rate": 2.8809138506802338e-09, + "loss": 0.5254, + "step": 32286 + }, + { + "epoch": 0.9895488537452495, + "grad_norm": 1.8771045727603848, + "learning_rate": 2.864092638290017e-09, + "loss": 0.5976, + "step": 32287 + }, + { + "epoch": 0.9895795022679906, + "grad_norm": 1.9046268586798567, + "learning_rate": 2.8473206639584307e-09, + "loss": 0.5828, + "step": 32288 + }, + { + "epoch": 0.9896101507907319, + "grad_norm": 1.8584435656437983, + "learning_rate": 2.8305979278508977e-09, + "loss": 0.5862, + "step": 32289 + }, + { + "epoch": 0.989640799313473, + "grad_norm": 2.1937332462833394, + "learning_rate": 2.8139244301317316e-09, + "loss": 0.6171, + "step": 32290 + }, + { + "epoch": 0.9896714478362143, + "grad_norm": 1.7031239252346566, + "learning_rate": 2.7973001709658e-09, + "loss": 0.5366, + "step": 32291 + }, + { + "epoch": 0.9897020963589555, + "grad_norm": 0.8070005581951195, + "learning_rate": 2.7807251505168608e-09, + "loss": 0.3724, + "step": 32292 + }, + { + "epoch": 0.9897327448816967, + "grad_norm": 2.1978367450161964, + "learning_rate": 2.7641993689475623e-09, + "loss": 0.5574, + "step": 32293 + }, + { + "epoch": 0.9897633934044379, + "grad_norm": 1.9507128715783832, + "learning_rate": 2.7477228264216614e-09, + "loss": 0.5572, + "step": 32294 + }, + { + "epoch": 0.9897940419271791, + "grad_norm": 1.7654928441353481, + "learning_rate": 2.7312955231006966e-09, + "loss": 0.5002, + "step": 32295 + }, + { + "epoch": 0.9898246904499203, + "grad_norm": 1.726081073698921, + "learning_rate": 2.7149174591467597e-09, + "loss": 0.6059, + "step": 32296 + }, + { + "epoch": 0.9898553389726615, + "grad_norm": 1.7044705953724408, + "learning_rate": 2.6985886347219438e-09, + "loss": 0.5695, + "step": 32297 + }, + { + "epoch": 0.9898859874954027, + "grad_norm": 0.8130417859817631, + "learning_rate": 2.6823090499861204e-09, + "loss": 0.4015, + "step": 32298 + }, + { + "epoch": 0.989916636018144, + "grad_norm": 0.7874767086928876, + "learning_rate": 2.6660787051002724e-09, + "loss": 0.4038, + "step": 32299 + }, + { + "epoch": 0.9899472845408851, + "grad_norm": 2.13959510227846, + "learning_rate": 2.6498976002237166e-09, + "loss": 0.6184, + "step": 32300 + }, + { + "epoch": 0.9899779330636264, + "grad_norm": 1.9832329048290565, + "learning_rate": 2.63376573551688e-09, + "loss": 0.5479, + "step": 32301 + }, + { + "epoch": 0.9900085815863675, + "grad_norm": 2.128928472824126, + "learning_rate": 2.6176831111379697e-09, + "loss": 0.685, + "step": 32302 + }, + { + "epoch": 0.9900392301091088, + "grad_norm": 1.9930631442622004, + "learning_rate": 2.6016497272457473e-09, + "loss": 0.5667, + "step": 32303 + }, + { + "epoch": 0.9900698786318499, + "grad_norm": 0.7633079144739536, + "learning_rate": 2.5856655839984203e-09, + "loss": 0.3795, + "step": 32304 + }, + { + "epoch": 0.9901005271545912, + "grad_norm": 1.8668627130864, + "learning_rate": 2.5697306815530842e-09, + "loss": 0.5989, + "step": 32305 + }, + { + "epoch": 0.9901311756773323, + "grad_norm": 1.9534994715188156, + "learning_rate": 2.553845020066281e-09, + "loss": 0.5863, + "step": 32306 + }, + { + "epoch": 0.9901618242000736, + "grad_norm": 1.8537962149663938, + "learning_rate": 2.538008599695663e-09, + "loss": 0.4624, + "step": 32307 + }, + { + "epoch": 0.9901924727228147, + "grad_norm": 1.727414049319096, + "learning_rate": 2.522221420596105e-09, + "loss": 0.4794, + "step": 32308 + }, + { + "epoch": 0.990223121245556, + "grad_norm": 1.9652356865871272, + "learning_rate": 2.5064834829241492e-09, + "loss": 0.5866, + "step": 32309 + }, + { + "epoch": 0.9902537697682972, + "grad_norm": 2.0028786029464203, + "learning_rate": 2.4907947868346717e-09, + "loss": 0.5822, + "step": 32310 + }, + { + "epoch": 0.9902844182910384, + "grad_norm": 1.6968563410431623, + "learning_rate": 2.475155332481438e-09, + "loss": 0.5387, + "step": 32311 + }, + { + "epoch": 0.9903150668137796, + "grad_norm": 2.1893432590352884, + "learning_rate": 2.45956512001988e-09, + "loss": 0.5458, + "step": 32312 + }, + { + "epoch": 0.9903457153365208, + "grad_norm": 2.0217247287745264, + "learning_rate": 2.4440241496026527e-09, + "loss": 0.5504, + "step": 32313 + }, + { + "epoch": 0.990376363859262, + "grad_norm": 0.7973183652562535, + "learning_rate": 2.4285324213829675e-09, + "loss": 0.3891, + "step": 32314 + }, + { + "epoch": 0.9904070123820032, + "grad_norm": 1.850354696338655, + "learning_rate": 2.4130899355140346e-09, + "loss": 0.5924, + "step": 32315 + }, + { + "epoch": 0.9904376609047444, + "grad_norm": 0.7787276853462833, + "learning_rate": 2.3976966921468448e-09, + "loss": 0.3809, + "step": 32316 + }, + { + "epoch": 0.9904683094274856, + "grad_norm": 1.787871277020972, + "learning_rate": 2.3823526914346086e-09, + "loss": 0.5376, + "step": 32317 + }, + { + "epoch": 0.9904989579502268, + "grad_norm": 1.9132058288633633, + "learning_rate": 2.367057933527206e-09, + "loss": 0.5692, + "step": 32318 + }, + { + "epoch": 0.990529606472968, + "grad_norm": 1.8389651399992923, + "learning_rate": 2.3518124185761827e-09, + "loss": 0.5811, + "step": 32319 + }, + { + "epoch": 0.9905602549957092, + "grad_norm": 1.9254825704629697, + "learning_rate": 2.3366161467314187e-09, + "loss": 0.5742, + "step": 32320 + }, + { + "epoch": 0.9905909035184504, + "grad_norm": 2.0796120994600225, + "learning_rate": 2.321469118142794e-09, + "loss": 0.4711, + "step": 32321 + }, + { + "epoch": 0.9906215520411916, + "grad_norm": 1.8646197543004688, + "learning_rate": 2.3063713329590787e-09, + "loss": 0.5447, + "step": 32322 + }, + { + "epoch": 0.9906522005639328, + "grad_norm": 1.8754400405178673, + "learning_rate": 2.291322791330153e-09, + "loss": 0.6077, + "step": 32323 + }, + { + "epoch": 0.990682849086674, + "grad_norm": 1.9937562276109695, + "learning_rate": 2.2763234934025656e-09, + "loss": 0.5491, + "step": 32324 + }, + { + "epoch": 0.9907134976094152, + "grad_norm": 1.8037395744907188, + "learning_rate": 2.2613734393256427e-09, + "loss": 0.5944, + "step": 32325 + }, + { + "epoch": 0.9907441461321564, + "grad_norm": 1.8545556493032525, + "learning_rate": 2.2464726292459326e-09, + "loss": 0.5279, + "step": 32326 + }, + { + "epoch": 0.9907747946548976, + "grad_norm": 1.8260676969163159, + "learning_rate": 2.2316210633105406e-09, + "loss": 0.4928, + "step": 32327 + }, + { + "epoch": 0.9908054431776389, + "grad_norm": 1.9113340096538314, + "learning_rate": 2.2168187416660158e-09, + "loss": 0.6794, + "step": 32328 + }, + { + "epoch": 0.99083609170038, + "grad_norm": 1.90818675465824, + "learning_rate": 2.2020656644577976e-09, + "loss": 0.6112, + "step": 32329 + }, + { + "epoch": 0.9908667402231213, + "grad_norm": 1.8687595654921185, + "learning_rate": 2.1873618318307698e-09, + "loss": 0.4662, + "step": 32330 + }, + { + "epoch": 0.9908973887458624, + "grad_norm": 2.033680253019711, + "learning_rate": 2.172707243930927e-09, + "loss": 0.5469, + "step": 32331 + }, + { + "epoch": 0.9909280372686037, + "grad_norm": 1.811499829042566, + "learning_rate": 2.1581019009020434e-09, + "loss": 0.5924, + "step": 32332 + }, + { + "epoch": 0.9909586857913448, + "grad_norm": 0.8128461397105116, + "learning_rate": 2.143545802888447e-09, + "loss": 0.3887, + "step": 32333 + }, + { + "epoch": 0.9909893343140861, + "grad_norm": 1.7244266731144327, + "learning_rate": 2.1290389500328023e-09, + "loss": 0.5328, + "step": 32334 + }, + { + "epoch": 0.9910199828368272, + "grad_norm": 1.913145213483092, + "learning_rate": 2.114581342478883e-09, + "loss": 0.5862, + "step": 32335 + }, + { + "epoch": 0.9910506313595685, + "grad_norm": 1.9183772887577866, + "learning_rate": 2.1001729803682424e-09, + "loss": 0.5684, + "step": 32336 + }, + { + "epoch": 0.9910812798823097, + "grad_norm": 2.0997620875951992, + "learning_rate": 2.0858138638440995e-09, + "loss": 0.5168, + "step": 32337 + }, + { + "epoch": 0.9911119284050509, + "grad_norm": 1.6523076352979513, + "learning_rate": 2.071503993046342e-09, + "loss": 0.5491, + "step": 32338 + }, + { + "epoch": 0.9911425769277921, + "grad_norm": 1.877765093625312, + "learning_rate": 2.057243368117634e-09, + "loss": 0.5445, + "step": 32339 + }, + { + "epoch": 0.9911732254505333, + "grad_norm": 1.8201494293176619, + "learning_rate": 2.043031989197308e-09, + "loss": 0.5152, + "step": 32340 + }, + { + "epoch": 0.9912038739732745, + "grad_norm": 1.862486968069916, + "learning_rate": 2.028869856425808e-09, + "loss": 0.5878, + "step": 32341 + }, + { + "epoch": 0.9912345224960157, + "grad_norm": 2.0392747811564784, + "learning_rate": 2.0147569699424664e-09, + "loss": 0.668, + "step": 32342 + }, + { + "epoch": 0.9912651710187569, + "grad_norm": 1.7778946458565557, + "learning_rate": 2.000693329886616e-09, + "loss": 0.5908, + "step": 32343 + }, + { + "epoch": 0.9912958195414981, + "grad_norm": 1.8670087528483288, + "learning_rate": 1.98667893639648e-09, + "loss": 0.5299, + "step": 32344 + }, + { + "epoch": 0.9913264680642393, + "grad_norm": 1.7950451741069617, + "learning_rate": 1.972713789610836e-09, + "loss": 0.5171, + "step": 32345 + }, + { + "epoch": 0.9913571165869806, + "grad_norm": 2.055907930974508, + "learning_rate": 1.958797889666797e-09, + "loss": 0.5661, + "step": 32346 + }, + { + "epoch": 0.9913877651097217, + "grad_norm": 1.938267099128094, + "learning_rate": 1.944931236701475e-09, + "loss": 0.6156, + "step": 32347 + }, + { + "epoch": 0.991418413632463, + "grad_norm": 2.026606896224071, + "learning_rate": 1.9311138308514276e-09, + "loss": 0.5882, + "step": 32348 + }, + { + "epoch": 0.9914490621552041, + "grad_norm": 0.8666064585734491, + "learning_rate": 1.9173456722526574e-09, + "loss": 0.401, + "step": 32349 + }, + { + "epoch": 0.9914797106779453, + "grad_norm": 1.8627516768464893, + "learning_rate": 1.9036267610417215e-09, + "loss": 0.6323, + "step": 32350 + }, + { + "epoch": 0.9915103592006865, + "grad_norm": 1.919088534080887, + "learning_rate": 1.889957097352957e-09, + "loss": 0.4786, + "step": 32351 + }, + { + "epoch": 0.9915410077234277, + "grad_norm": 2.130395710596046, + "learning_rate": 1.876336681321256e-09, + "loss": 0.6126, + "step": 32352 + }, + { + "epoch": 0.9915716562461689, + "grad_norm": 0.8023121195240088, + "learning_rate": 1.8627655130804e-09, + "loss": 0.3948, + "step": 32353 + }, + { + "epoch": 0.9916023047689101, + "grad_norm": 1.6545009545868972, + "learning_rate": 1.849243592765282e-09, + "loss": 0.5146, + "step": 32354 + }, + { + "epoch": 0.9916329532916514, + "grad_norm": 2.023190288680263, + "learning_rate": 1.8357709205080177e-09, + "loss": 0.58, + "step": 32355 + }, + { + "epoch": 0.9916636018143925, + "grad_norm": 1.744054856676193, + "learning_rate": 1.8223474964418343e-09, + "loss": 0.4798, + "step": 32356 + }, + { + "epoch": 0.9916942503371338, + "grad_norm": 1.9828304142184896, + "learning_rate": 1.808973320698293e-09, + "loss": 0.5821, + "step": 32357 + }, + { + "epoch": 0.9917248988598749, + "grad_norm": 1.7316210617088583, + "learning_rate": 1.7956483934106205e-09, + "loss": 0.5913, + "step": 32358 + }, + { + "epoch": 0.9917555473826162, + "grad_norm": 2.1680754216286946, + "learning_rate": 1.7823727147087132e-09, + "loss": 0.5198, + "step": 32359 + }, + { + "epoch": 0.9917861959053573, + "grad_norm": 1.907460114892345, + "learning_rate": 1.7691462847241325e-09, + "loss": 0.5602, + "step": 32360 + }, + { + "epoch": 0.9918168444280986, + "grad_norm": 1.8599520084832315, + "learning_rate": 1.7559691035873295e-09, + "loss": 0.5241, + "step": 32361 + }, + { + "epoch": 0.9918474929508397, + "grad_norm": 2.3679243428744914, + "learning_rate": 1.742841171427645e-09, + "loss": 0.6275, + "step": 32362 + }, + { + "epoch": 0.991878141473581, + "grad_norm": 2.036859270697984, + "learning_rate": 1.7297624883744203e-09, + "loss": 0.591, + "step": 32363 + }, + { + "epoch": 0.9919087899963221, + "grad_norm": 0.7664919000358935, + "learning_rate": 1.716733054556441e-09, + "loss": 0.4006, + "step": 32364 + }, + { + "epoch": 0.9919394385190634, + "grad_norm": 1.6236200433945298, + "learning_rate": 1.703752870103048e-09, + "loss": 0.5065, + "step": 32365 + }, + { + "epoch": 0.9919700870418046, + "grad_norm": 0.7888277478791679, + "learning_rate": 1.6908219351408072e-09, + "loss": 0.3795, + "step": 32366 + }, + { + "epoch": 0.9920007355645458, + "grad_norm": 2.0248111419979327, + "learning_rate": 1.6779402497979491e-09, + "loss": 0.5415, + "step": 32367 + }, + { + "epoch": 0.992031384087287, + "grad_norm": 1.8815750259545174, + "learning_rate": 1.6651078142015942e-09, + "loss": 0.5651, + "step": 32368 + }, + { + "epoch": 0.9920620326100282, + "grad_norm": 2.116542277151326, + "learning_rate": 1.6523246284777528e-09, + "loss": 0.516, + "step": 32369 + }, + { + "epoch": 0.9920926811327694, + "grad_norm": 1.9905515251945736, + "learning_rate": 1.63959069275188e-09, + "loss": 0.5725, + "step": 32370 + }, + { + "epoch": 0.9921233296555106, + "grad_norm": 1.899027653971234, + "learning_rate": 1.6269060071505416e-09, + "loss": 0.5493, + "step": 32371 + }, + { + "epoch": 0.9921539781782518, + "grad_norm": 0.7917135873031516, + "learning_rate": 1.6142705717980823e-09, + "loss": 0.3962, + "step": 32372 + }, + { + "epoch": 0.992184626700993, + "grad_norm": 2.072392712619704, + "learning_rate": 1.601684386818847e-09, + "loss": 0.618, + "step": 32373 + }, + { + "epoch": 0.9922152752237342, + "grad_norm": 1.9154467161097268, + "learning_rate": 1.5891474523371809e-09, + "loss": 0.4986, + "step": 32374 + }, + { + "epoch": 0.9922459237464755, + "grad_norm": 1.6864190860787498, + "learning_rate": 1.5766597684768737e-09, + "loss": 0.547, + "step": 32375 + }, + { + "epoch": 0.9922765722692166, + "grad_norm": 2.0402753577535857, + "learning_rate": 1.564221335360605e-09, + "loss": 0.6099, + "step": 32376 + }, + { + "epoch": 0.9923072207919579, + "grad_norm": 0.8103302728392725, + "learning_rate": 1.5518321531104996e-09, + "loss": 0.4048, + "step": 32377 + }, + { + "epoch": 0.992337869314699, + "grad_norm": 1.7062447093467132, + "learning_rate": 1.539492221849237e-09, + "loss": 0.5603, + "step": 32378 + }, + { + "epoch": 0.9923685178374403, + "grad_norm": 2.098390138173315, + "learning_rate": 1.5272015416983866e-09, + "loss": 0.5323, + "step": 32379 + }, + { + "epoch": 0.9923991663601814, + "grad_norm": 1.9090116481382617, + "learning_rate": 1.5149601127789627e-09, + "loss": 0.5261, + "step": 32380 + }, + { + "epoch": 0.9924298148829226, + "grad_norm": 2.385791742427983, + "learning_rate": 1.5027679352119795e-09, + "loss": 0.6526, + "step": 32381 + }, + { + "epoch": 0.9924604634056639, + "grad_norm": 1.8117169198658563, + "learning_rate": 1.490625009116231e-09, + "loss": 0.5456, + "step": 32382 + }, + { + "epoch": 0.992491111928405, + "grad_norm": 1.9412008348660312, + "learning_rate": 1.4785313346132868e-09, + "loss": 0.58, + "step": 32383 + }, + { + "epoch": 0.9925217604511463, + "grad_norm": 1.888139657799969, + "learning_rate": 1.466486911820275e-09, + "loss": 0.5917, + "step": 32384 + }, + { + "epoch": 0.9925524089738874, + "grad_norm": 2.012723001531079, + "learning_rate": 1.4544917408576553e-09, + "loss": 0.4963, + "step": 32385 + }, + { + "epoch": 0.9925830574966287, + "grad_norm": 2.024330568442075, + "learning_rate": 1.442545821842556e-09, + "loss": 0.6014, + "step": 32386 + }, + { + "epoch": 0.9926137060193698, + "grad_norm": 0.806843011558152, + "learning_rate": 1.4306491548932156e-09, + "loss": 0.4004, + "step": 32387 + }, + { + "epoch": 0.9926443545421111, + "grad_norm": 1.8630564129436604, + "learning_rate": 1.4188017401262077e-09, + "loss": 0.5249, + "step": 32388 + }, + { + "epoch": 0.9926750030648522, + "grad_norm": 1.9565992092254143, + "learning_rate": 1.4070035776592162e-09, + "loss": 0.6726, + "step": 32389 + }, + { + "epoch": 0.9927056515875935, + "grad_norm": 1.8553358552473593, + "learning_rate": 1.395254667607704e-09, + "loss": 0.5638, + "step": 32390 + }, + { + "epoch": 0.9927363001103346, + "grad_norm": 1.7618600725059559, + "learning_rate": 1.3835550100876892e-09, + "loss": 0.5474, + "step": 32391 + }, + { + "epoch": 0.9927669486330759, + "grad_norm": 1.920962930205549, + "learning_rate": 1.3719046052140805e-09, + "loss": 0.5224, + "step": 32392 + }, + { + "epoch": 0.9927975971558171, + "grad_norm": 0.8348806576198882, + "learning_rate": 1.3603034531023407e-09, + "loss": 0.3902, + "step": 32393 + }, + { + "epoch": 0.9928282456785583, + "grad_norm": 2.033635904748177, + "learning_rate": 1.3487515538668229e-09, + "loss": 0.6075, + "step": 32394 + }, + { + "epoch": 0.9928588942012995, + "grad_norm": 1.9444144166023891, + "learning_rate": 1.3372489076207695e-09, + "loss": 0.6033, + "step": 32395 + }, + { + "epoch": 0.9928895427240407, + "grad_norm": 1.9199559680265048, + "learning_rate": 1.3257955144774238e-09, + "loss": 0.5451, + "step": 32396 + }, + { + "epoch": 0.9929201912467819, + "grad_norm": 1.8028044986563536, + "learning_rate": 1.3143913745505831e-09, + "loss": 0.5516, + "step": 32397 + }, + { + "epoch": 0.9929508397695231, + "grad_norm": 0.8373252779523769, + "learning_rate": 1.3030364879518253e-09, + "loss": 0.408, + "step": 32398 + }, + { + "epoch": 0.9929814882922643, + "grad_norm": 1.9119705845750166, + "learning_rate": 1.2917308547932828e-09, + "loss": 0.6282, + "step": 32399 + }, + { + "epoch": 0.9930121368150056, + "grad_norm": 2.258582144786885, + "learning_rate": 1.2804744751859777e-09, + "loss": 0.7292, + "step": 32400 + }, + { + "epoch": 0.9930427853377467, + "grad_norm": 1.772617230266316, + "learning_rate": 1.2692673492414875e-09, + "loss": 0.5165, + "step": 32401 + }, + { + "epoch": 0.993073433860488, + "grad_norm": 1.915660482305068, + "learning_rate": 1.2581094770697243e-09, + "loss": 0.6035, + "step": 32402 + }, + { + "epoch": 0.9931040823832291, + "grad_norm": 1.8270633995876655, + "learning_rate": 1.2470008587806004e-09, + "loss": 0.5394, + "step": 32403 + }, + { + "epoch": 0.9931347309059704, + "grad_norm": 2.0288162021626173, + "learning_rate": 1.2359414944840276e-09, + "loss": 0.5365, + "step": 32404 + }, + { + "epoch": 0.9931653794287115, + "grad_norm": 1.7090416818381298, + "learning_rate": 1.2249313842882527e-09, + "loss": 0.5815, + "step": 32405 + }, + { + "epoch": 0.9931960279514528, + "grad_norm": 2.1115839431066425, + "learning_rate": 1.2139705283026326e-09, + "loss": 0.6343, + "step": 32406 + }, + { + "epoch": 0.9932266764741939, + "grad_norm": 0.8098782063634047, + "learning_rate": 1.203058926634859e-09, + "loss": 0.3847, + "step": 32407 + }, + { + "epoch": 0.9932573249969352, + "grad_norm": 1.6489259175296398, + "learning_rate": 1.1921965793920687e-09, + "loss": 0.5351, + "step": 32408 + }, + { + "epoch": 0.9932879735196763, + "grad_norm": 2.2652814825909138, + "learning_rate": 1.1813834866819529e-09, + "loss": 0.5618, + "step": 32409 + }, + { + "epoch": 0.9933186220424176, + "grad_norm": 3.6247582502436786, + "learning_rate": 1.170619648609983e-09, + "loss": 0.6888, + "step": 32410 + }, + { + "epoch": 0.9933492705651588, + "grad_norm": 0.7868903995385159, + "learning_rate": 1.1599050652832955e-09, + "loss": 0.4, + "step": 32411 + }, + { + "epoch": 0.9933799190878999, + "grad_norm": 1.975984324667343, + "learning_rate": 1.1492397368073615e-09, + "loss": 0.5234, + "step": 32412 + }, + { + "epoch": 0.9934105676106412, + "grad_norm": 1.818983758917286, + "learning_rate": 1.1386236632865421e-09, + "loss": 0.5599, + "step": 32413 + }, + { + "epoch": 0.9934412161333823, + "grad_norm": 1.6643780720420174, + "learning_rate": 1.1280568448263084e-09, + "loss": 0.5291, + "step": 32414 + }, + { + "epoch": 0.9934718646561236, + "grad_norm": 1.8592699114268707, + "learning_rate": 1.1175392815299112e-09, + "loss": 0.5726, + "step": 32415 + }, + { + "epoch": 0.9935025131788647, + "grad_norm": 1.7114869031922368, + "learning_rate": 1.1070709735017115e-09, + "loss": 0.5693, + "step": 32416 + }, + { + "epoch": 0.993533161701606, + "grad_norm": 1.7654414224557602, + "learning_rate": 1.0966519208444048e-09, + "loss": 0.6574, + "step": 32417 + }, + { + "epoch": 0.9935638102243471, + "grad_norm": 2.3153774050921716, + "learning_rate": 1.0862821236606868e-09, + "loss": 0.4556, + "step": 32418 + }, + { + "epoch": 0.9935944587470884, + "grad_norm": 1.8404114285179909, + "learning_rate": 1.0759615820532532e-09, + "loss": 0.5697, + "step": 32419 + }, + { + "epoch": 0.9936251072698296, + "grad_norm": 0.8726328553403891, + "learning_rate": 1.065690296123134e-09, + "loss": 0.3884, + "step": 32420 + }, + { + "epoch": 0.9936557557925708, + "grad_norm": 0.7689584619449756, + "learning_rate": 1.0554682659719152e-09, + "loss": 0.3761, + "step": 32421 + }, + { + "epoch": 0.993686404315312, + "grad_norm": 2.2655048705340657, + "learning_rate": 1.0452954917000713e-09, + "loss": 0.5262, + "step": 32422 + }, + { + "epoch": 0.9937170528380532, + "grad_norm": 2.3430263558618045, + "learning_rate": 1.035171973408078e-09, + "loss": 0.5871, + "step": 32423 + }, + { + "epoch": 0.9937477013607944, + "grad_norm": 1.9650198482891699, + "learning_rate": 1.0250977111952998e-09, + "loss": 0.5563, + "step": 32424 + }, + { + "epoch": 0.9937783498835356, + "grad_norm": 1.893484003502866, + "learning_rate": 1.0150727051616572e-09, + "loss": 0.6624, + "step": 32425 + }, + { + "epoch": 0.9938089984062768, + "grad_norm": 1.9188913495664206, + "learning_rate": 1.0050969554054047e-09, + "loss": 0.4929, + "step": 32426 + }, + { + "epoch": 0.993839646929018, + "grad_norm": 2.066759382559697, + "learning_rate": 9.95170462024797e-10, + "loss": 0.5971, + "step": 32427 + }, + { + "epoch": 0.9938702954517592, + "grad_norm": 1.9117894211724775, + "learning_rate": 9.852932251180891e-10, + "loss": 0.5493, + "step": 32428 + }, + { + "epoch": 0.9939009439745005, + "grad_norm": 0.7416758395124246, + "learning_rate": 9.754652447818702e-10, + "loss": 0.377, + "step": 32429 + }, + { + "epoch": 0.9939315924972416, + "grad_norm": 1.7839703024412514, + "learning_rate": 9.6568652111384e-10, + "loss": 0.4911, + "step": 32430 + }, + { + "epoch": 0.9939622410199829, + "grad_norm": 2.0092306375442823, + "learning_rate": 9.559570542100327e-10, + "loss": 0.5881, + "step": 32431 + }, + { + "epoch": 0.993992889542724, + "grad_norm": 2.167592057448729, + "learning_rate": 9.462768441659276e-10, + "loss": 0.5464, + "step": 32432 + }, + { + "epoch": 0.9940235380654653, + "grad_norm": 1.9207917836334187, + "learning_rate": 9.366458910775588e-10, + "loss": 0.6098, + "step": 32433 + }, + { + "epoch": 0.9940541865882064, + "grad_norm": 1.9848592380857635, + "learning_rate": 9.270641950392956e-10, + "loss": 0.6307, + "step": 32434 + }, + { + "epoch": 0.9940848351109477, + "grad_norm": 2.3392064424511236, + "learning_rate": 9.175317561460617e-10, + "loss": 0.5478, + "step": 32435 + }, + { + "epoch": 0.9941154836336888, + "grad_norm": 1.790689152362571, + "learning_rate": 9.08048574491116e-10, + "loss": 0.4661, + "step": 32436 + }, + { + "epoch": 0.9941461321564301, + "grad_norm": 0.7771549005719156, + "learning_rate": 8.986146501682724e-10, + "loss": 0.3969, + "step": 32437 + }, + { + "epoch": 0.9941767806791713, + "grad_norm": 1.610307290942385, + "learning_rate": 8.892299832707896e-10, + "loss": 0.4597, + "step": 32438 + }, + { + "epoch": 0.9942074292019125, + "grad_norm": 2.0102363195185404, + "learning_rate": 8.798945738902609e-10, + "loss": 0.586, + "step": 32439 + }, + { + "epoch": 0.9942380777246537, + "grad_norm": 0.822760531753198, + "learning_rate": 8.7060842211939e-10, + "loss": 0.3952, + "step": 32440 + }, + { + "epoch": 0.9942687262473949, + "grad_norm": 2.098571436738282, + "learning_rate": 8.613715280497703e-10, + "loss": 0.5849, + "step": 32441 + }, + { + "epoch": 0.9942993747701361, + "grad_norm": 1.7748485844351118, + "learning_rate": 8.52183891771885e-10, + "loss": 0.5338, + "step": 32442 + }, + { + "epoch": 0.9943300232928772, + "grad_norm": 1.7140724350549013, + "learning_rate": 8.430455133767723e-10, + "loss": 0.4983, + "step": 32443 + }, + { + "epoch": 0.9943606718156185, + "grad_norm": 2.2050476489290105, + "learning_rate": 8.339563929538052e-10, + "loss": 0.5762, + "step": 32444 + }, + { + "epoch": 0.9943913203383596, + "grad_norm": 1.8782828791279074, + "learning_rate": 8.249165305929119e-10, + "loss": 0.5343, + "step": 32445 + }, + { + "epoch": 0.9944219688611009, + "grad_norm": 1.9092194782102274, + "learning_rate": 8.159259263834651e-10, + "loss": 0.6454, + "step": 32446 + }, + { + "epoch": 0.994452617383842, + "grad_norm": 1.7646827164452838, + "learning_rate": 8.069845804142828e-10, + "loss": 0.5803, + "step": 32447 + }, + { + "epoch": 0.9944832659065833, + "grad_norm": 1.7564931305162836, + "learning_rate": 7.980924927725175e-10, + "loss": 0.5815, + "step": 32448 + }, + { + "epoch": 0.9945139144293245, + "grad_norm": 2.014022441834454, + "learning_rate": 7.892496635458769e-10, + "loss": 0.556, + "step": 32449 + }, + { + "epoch": 0.9945445629520657, + "grad_norm": 2.038988728431421, + "learning_rate": 7.804560928226234e-10, + "loss": 0.4929, + "step": 32450 + }, + { + "epoch": 0.9945752114748069, + "grad_norm": 1.7898077919081776, + "learning_rate": 7.717117806876895e-10, + "loss": 0.5566, + "step": 32451 + }, + { + "epoch": 0.9946058599975481, + "grad_norm": 2.1786971999917415, + "learning_rate": 7.630167272287825e-10, + "loss": 0.5917, + "step": 32452 + }, + { + "epoch": 0.9946365085202893, + "grad_norm": 1.8240011931062887, + "learning_rate": 7.543709325313897e-10, + "loss": 0.4684, + "step": 32453 + }, + { + "epoch": 0.9946671570430305, + "grad_norm": 1.9719427424998235, + "learning_rate": 7.457743966793329e-10, + "loss": 0.594, + "step": 32454 + }, + { + "epoch": 0.9946978055657717, + "grad_norm": 1.8186113195140907, + "learning_rate": 7.372271197592096e-10, + "loss": 0.5353, + "step": 32455 + }, + { + "epoch": 0.994728454088513, + "grad_norm": 1.814493557143603, + "learning_rate": 7.287291018537312e-10, + "loss": 0.5609, + "step": 32456 + }, + { + "epoch": 0.9947591026112541, + "grad_norm": 1.9313140932140858, + "learning_rate": 7.202803430472749e-10, + "loss": 0.5796, + "step": 32457 + }, + { + "epoch": 0.9947897511339954, + "grad_norm": 1.8598414563501968, + "learning_rate": 7.118808434231073e-10, + "loss": 0.5976, + "step": 32458 + }, + { + "epoch": 0.9948203996567365, + "grad_norm": 2.220743846919311, + "learning_rate": 7.0353060306394e-10, + "loss": 0.5674, + "step": 32459 + }, + { + "epoch": 0.9948510481794778, + "grad_norm": 1.7039612364803436, + "learning_rate": 6.952296220519294e-10, + "loss": 0.5375, + "step": 32460 + }, + { + "epoch": 0.9948816967022189, + "grad_norm": 0.7760243408079823, + "learning_rate": 6.869779004692323e-10, + "loss": 0.3869, + "step": 32461 + }, + { + "epoch": 0.9949123452249602, + "grad_norm": 1.9704454207655802, + "learning_rate": 6.787754383963396e-10, + "loss": 0.5739, + "step": 32462 + }, + { + "epoch": 0.9949429937477013, + "grad_norm": 1.9306476801308432, + "learning_rate": 6.706222359148529e-10, + "loss": 0.5325, + "step": 32463 + }, + { + "epoch": 0.9949736422704426, + "grad_norm": 2.122972407202374, + "learning_rate": 6.62518293104708e-10, + "loss": 0.6418, + "step": 32464 + }, + { + "epoch": 0.9950042907931838, + "grad_norm": 1.7792109215416412, + "learning_rate": 6.544636100463963e-10, + "loss": 0.6902, + "step": 32465 + }, + { + "epoch": 0.995034939315925, + "grad_norm": 1.8453834391839739, + "learning_rate": 6.464581868181885e-10, + "loss": 0.4497, + "step": 32466 + }, + { + "epoch": 0.9950655878386662, + "grad_norm": 1.6808676391088242, + "learning_rate": 6.385020235000206e-10, + "loss": 0.4829, + "step": 32467 + }, + { + "epoch": 0.9950962363614074, + "grad_norm": 0.7593812641611267, + "learning_rate": 6.305951201696081e-10, + "loss": 0.3934, + "step": 32468 + }, + { + "epoch": 0.9951268848841486, + "grad_norm": 2.0131238438843266, + "learning_rate": 6.227374769052219e-10, + "loss": 0.5211, + "step": 32469 + }, + { + "epoch": 0.9951575334068898, + "grad_norm": 1.7145272096717052, + "learning_rate": 6.149290937840224e-10, + "loss": 0.5491, + "step": 32470 + }, + { + "epoch": 0.995188181929631, + "grad_norm": 1.9189581445620798, + "learning_rate": 6.071699708831702e-10, + "loss": 0.5377, + "step": 32471 + }, + { + "epoch": 0.9952188304523722, + "grad_norm": 1.8108460090133052, + "learning_rate": 5.994601082787155e-10, + "loss": 0.4805, + "step": 32472 + }, + { + "epoch": 0.9952494789751134, + "grad_norm": 1.8326639905729902, + "learning_rate": 5.917995060472636e-10, + "loss": 0.5401, + "step": 32473 + }, + { + "epoch": 0.9952801274978545, + "grad_norm": 1.9592937449826902, + "learning_rate": 5.841881642637548e-10, + "loss": 0.5887, + "step": 32474 + }, + { + "epoch": 0.9953107760205958, + "grad_norm": 1.978130212320474, + "learning_rate": 5.766260830036841e-10, + "loss": 0.5688, + "step": 32475 + }, + { + "epoch": 0.995341424543337, + "grad_norm": 1.7957016773589156, + "learning_rate": 5.691132623414364e-10, + "loss": 0.5762, + "step": 32476 + }, + { + "epoch": 0.9953720730660782, + "grad_norm": 2.1085157316239322, + "learning_rate": 5.616497023502865e-10, + "loss": 0.6096, + "step": 32477 + }, + { + "epoch": 0.9954027215888194, + "grad_norm": 2.1592769750084657, + "learning_rate": 5.542354031046193e-10, + "loss": 0.6149, + "step": 32478 + }, + { + "epoch": 0.9954333701115606, + "grad_norm": 1.9735830595489254, + "learning_rate": 5.468703646771545e-10, + "loss": 0.5348, + "step": 32479 + }, + { + "epoch": 0.9954640186343018, + "grad_norm": 1.9278102871205498, + "learning_rate": 5.395545871406116e-10, + "loss": 0.5877, + "step": 32480 + }, + { + "epoch": 0.995494667157043, + "grad_norm": 1.9919007660814803, + "learning_rate": 5.322880705671552e-10, + "loss": 0.4802, + "step": 32481 + }, + { + "epoch": 0.9955253156797842, + "grad_norm": 1.8385078234402312, + "learning_rate": 5.250708150283946e-10, + "loss": 0.5497, + "step": 32482 + }, + { + "epoch": 0.9955559642025255, + "grad_norm": 1.7691639924249711, + "learning_rate": 5.179028205948289e-10, + "loss": 0.5863, + "step": 32483 + }, + { + "epoch": 0.9955866127252666, + "grad_norm": 2.058815353807029, + "learning_rate": 5.107840873375125e-10, + "loss": 0.5985, + "step": 32484 + }, + { + "epoch": 0.9956172612480079, + "grad_norm": 1.8605298832595476, + "learning_rate": 5.037146153269446e-10, + "loss": 0.5437, + "step": 32485 + }, + { + "epoch": 0.995647909770749, + "grad_norm": 1.5653971700185851, + "learning_rate": 4.96694404632514e-10, + "loss": 0.6142, + "step": 32486 + }, + { + "epoch": 0.9956785582934903, + "grad_norm": 1.769936605877186, + "learning_rate": 4.897234553230546e-10, + "loss": 0.5638, + "step": 32487 + }, + { + "epoch": 0.9957092068162314, + "grad_norm": 1.9098415971114955, + "learning_rate": 4.828017674674002e-10, + "loss": 0.5844, + "step": 32488 + }, + { + "epoch": 0.9957398553389727, + "grad_norm": 1.781580527272616, + "learning_rate": 4.759293411343846e-10, + "loss": 0.6099, + "step": 32489 + }, + { + "epoch": 0.9957705038617138, + "grad_norm": 1.5311510508435056, + "learning_rate": 4.691061763906213e-10, + "loss": 0.4768, + "step": 32490 + }, + { + "epoch": 0.9958011523844551, + "grad_norm": 1.8905640722246262, + "learning_rate": 4.623322733043889e-10, + "loss": 0.6182, + "step": 32491 + }, + { + "epoch": 0.9958318009071963, + "grad_norm": 0.7750294866979908, + "learning_rate": 4.556076319417457e-10, + "loss": 0.3829, + "step": 32492 + }, + { + "epoch": 0.9958624494299375, + "grad_norm": 1.61438288267145, + "learning_rate": 4.489322523693052e-10, + "loss": 0.4033, + "step": 32493 + }, + { + "epoch": 0.9958930979526787, + "grad_norm": 1.9568754858337418, + "learning_rate": 4.4230613465257033e-10, + "loss": 0.5986, + "step": 32494 + }, + { + "epoch": 0.9959237464754199, + "grad_norm": 1.827548871274276, + "learning_rate": 4.3572927885704443e-10, + "loss": 0.4755, + "step": 32495 + }, + { + "epoch": 0.9959543949981611, + "grad_norm": 1.8344200423820187, + "learning_rate": 4.2920168504767547e-10, + "loss": 0.5403, + "step": 32496 + }, + { + "epoch": 0.9959850435209023, + "grad_norm": 0.7916115336737735, + "learning_rate": 4.2272335328830127e-10, + "loss": 0.3929, + "step": 32497 + }, + { + "epoch": 0.9960156920436435, + "grad_norm": 1.877916550661781, + "learning_rate": 4.162942836433148e-10, + "loss": 0.618, + "step": 32498 + }, + { + "epoch": 0.9960463405663847, + "grad_norm": 1.761994399882847, + "learning_rate": 4.0991447617599876e-10, + "loss": 0.6368, + "step": 32499 + }, + { + "epoch": 0.9960769890891259, + "grad_norm": 2.1864805726522922, + "learning_rate": 4.035839309485257e-10, + "loss": 0.6271, + "step": 32500 + }, + { + "epoch": 0.9961076376118672, + "grad_norm": 1.9242109304767032, + "learning_rate": 3.973026480236231e-10, + "loss": 0.5647, + "step": 32501 + }, + { + "epoch": 0.9961382861346083, + "grad_norm": 1.7474248594530843, + "learning_rate": 3.9107062746346346e-10, + "loss": 0.5437, + "step": 32502 + }, + { + "epoch": 0.9961689346573496, + "grad_norm": 1.871250686035161, + "learning_rate": 3.848878693296643e-10, + "loss": 0.5241, + "step": 32503 + }, + { + "epoch": 0.9961995831800907, + "grad_norm": 2.169042261404359, + "learning_rate": 3.787543736821775e-10, + "loss": 0.5817, + "step": 32504 + }, + { + "epoch": 0.9962302317028319, + "grad_norm": 1.9941710181970609, + "learning_rate": 3.726701405826205e-10, + "loss": 0.6221, + "step": 32505 + }, + { + "epoch": 0.9962608802255731, + "grad_norm": 1.811009300052714, + "learning_rate": 3.666351700898352e-10, + "loss": 0.5443, + "step": 32506 + }, + { + "epoch": 0.9962915287483143, + "grad_norm": 2.119216235217823, + "learning_rate": 3.606494622643286e-10, + "loss": 0.5768, + "step": 32507 + }, + { + "epoch": 0.9963221772710555, + "grad_norm": 1.7283965203299783, + "learning_rate": 3.5471301716383246e-10, + "loss": 0.6466, + "step": 32508 + }, + { + "epoch": 0.9963528257937967, + "grad_norm": 1.7611573809048569, + "learning_rate": 3.4882583484829867e-10, + "loss": 0.5724, + "step": 32509 + }, + { + "epoch": 0.996383474316538, + "grad_norm": 1.9555929088864943, + "learning_rate": 3.4298791537434874e-10, + "loss": 0.5888, + "step": 32510 + }, + { + "epoch": 0.9964141228392791, + "grad_norm": 1.8487075093390417, + "learning_rate": 3.3719925880082437e-10, + "loss": 0.5845, + "step": 32511 + }, + { + "epoch": 0.9964447713620204, + "grad_norm": 0.7935717631851266, + "learning_rate": 3.314598651837919e-10, + "loss": 0.3859, + "step": 32512 + }, + { + "epoch": 0.9964754198847615, + "grad_norm": 1.6845447985862105, + "learning_rate": 3.257697345798727e-10, + "loss": 0.5527, + "step": 32513 + }, + { + "epoch": 0.9965060684075028, + "grad_norm": 1.8503024190808444, + "learning_rate": 3.2012886704568814e-10, + "loss": 0.5589, + "step": 32514 + }, + { + "epoch": 0.9965367169302439, + "grad_norm": 1.7298240914289544, + "learning_rate": 3.1453726263619424e-10, + "loss": 0.5296, + "step": 32515 + }, + { + "epoch": 0.9965673654529852, + "grad_norm": 1.8607150950820026, + "learning_rate": 3.089949214069021e-10, + "loss": 0.4958, + "step": 32516 + }, + { + "epoch": 0.9965980139757263, + "grad_norm": 1.830685069489406, + "learning_rate": 3.035018434127679e-10, + "loss": 0.5308, + "step": 32517 + }, + { + "epoch": 0.9966286624984676, + "grad_norm": 2.0377088239609846, + "learning_rate": 2.9805802870708224e-10, + "loss": 0.5488, + "step": 32518 + }, + { + "epoch": 0.9966593110212087, + "grad_norm": 1.835817432568892, + "learning_rate": 2.926634773436909e-10, + "loss": 0.499, + "step": 32519 + }, + { + "epoch": 0.99668995954395, + "grad_norm": 0.7847537024371078, + "learning_rate": 2.8731818937588473e-10, + "loss": 0.404, + "step": 32520 + }, + { + "epoch": 0.9967206080666912, + "grad_norm": 2.05898278423979, + "learning_rate": 2.820221648569543e-10, + "loss": 0.5744, + "step": 32521 + }, + { + "epoch": 0.9967512565894324, + "grad_norm": 1.7522798599677787, + "learning_rate": 2.7677540383796996e-10, + "loss": 0.5533, + "step": 32522 + }, + { + "epoch": 0.9967819051121736, + "grad_norm": 1.7881632125964166, + "learning_rate": 2.7157790637111213e-10, + "loss": 0.5987, + "step": 32523 + }, + { + "epoch": 0.9968125536349148, + "grad_norm": 1.9771471248759125, + "learning_rate": 2.664296725080062e-10, + "loss": 0.4941, + "step": 32524 + }, + { + "epoch": 0.996843202157656, + "grad_norm": 0.7931894726426594, + "learning_rate": 2.613307022986122e-10, + "loss": 0.3998, + "step": 32525 + }, + { + "epoch": 0.9968738506803972, + "grad_norm": 2.0505974150982156, + "learning_rate": 2.5628099579344535e-10, + "loss": 0.6116, + "step": 32526 + }, + { + "epoch": 0.9969044992031384, + "grad_norm": 1.973881046920517, + "learning_rate": 2.5128055304302067e-10, + "loss": 0.5981, + "step": 32527 + }, + { + "epoch": 0.9969351477258797, + "grad_norm": 2.0094748913123035, + "learning_rate": 2.4632937409563297e-10, + "loss": 0.5649, + "step": 32528 + }, + { + "epoch": 0.9969657962486208, + "grad_norm": 1.921706027655259, + "learning_rate": 2.4142745900013196e-10, + "loss": 0.6011, + "step": 32529 + }, + { + "epoch": 0.9969964447713621, + "grad_norm": 1.9632121558040196, + "learning_rate": 2.365748078053676e-10, + "loss": 0.5639, + "step": 32530 + }, + { + "epoch": 0.9970270932941032, + "grad_norm": 2.0045312991275113, + "learning_rate": 2.3177142055907931e-10, + "loss": 0.5895, + "step": 32531 + }, + { + "epoch": 0.9970577418168445, + "grad_norm": 1.791420884671684, + "learning_rate": 2.2701729730789657e-10, + "loss": 0.5343, + "step": 32532 + }, + { + "epoch": 0.9970883903395856, + "grad_norm": 1.944889675372813, + "learning_rate": 2.2231243809955895e-10, + "loss": 0.5422, + "step": 32533 + }, + { + "epoch": 0.9971190388623269, + "grad_norm": 1.9385606876561798, + "learning_rate": 2.1765684298014068e-10, + "loss": 0.6482, + "step": 32534 + }, + { + "epoch": 0.997149687385068, + "grad_norm": 1.894128516592279, + "learning_rate": 2.130505119951609e-10, + "loss": 0.555, + "step": 32535 + }, + { + "epoch": 0.9971803359078092, + "grad_norm": 0.8130320957908672, + "learning_rate": 2.084934451901388e-10, + "loss": 0.3847, + "step": 32536 + }, + { + "epoch": 0.9972109844305505, + "grad_norm": 1.8026502288715838, + "learning_rate": 2.039856426100384e-10, + "loss": 0.627, + "step": 32537 + }, + { + "epoch": 0.9972416329532916, + "grad_norm": 1.7484254267240986, + "learning_rate": 1.995271042998237e-10, + "loss": 0.549, + "step": 32538 + }, + { + "epoch": 0.9972722814760329, + "grad_norm": 1.656117963607354, + "learning_rate": 1.951178303022383e-10, + "loss": 0.5425, + "step": 32539 + }, + { + "epoch": 0.997302929998774, + "grad_norm": 1.8658317105577433, + "learning_rate": 1.9075782066169114e-10, + "loss": 0.6168, + "step": 32540 + }, + { + "epoch": 0.9973335785215153, + "grad_norm": 0.7879327651598306, + "learning_rate": 1.8644707542092578e-10, + "loss": 0.3874, + "step": 32541 + }, + { + "epoch": 0.9973642270442564, + "grad_norm": 0.8048062834726323, + "learning_rate": 1.8218559462268582e-10, + "loss": 0.366, + "step": 32542 + }, + { + "epoch": 0.9973948755669977, + "grad_norm": 1.8921423909914452, + "learning_rate": 1.779733783080495e-10, + "loss": 0.6573, + "step": 32543 + }, + { + "epoch": 0.9974255240897388, + "grad_norm": 1.838703786011133, + "learning_rate": 1.7381042651920531e-10, + "loss": 0.504, + "step": 32544 + }, + { + "epoch": 0.9974561726124801, + "grad_norm": 1.9592946219895004, + "learning_rate": 1.696967392972315e-10, + "loss": 0.4459, + "step": 32545 + }, + { + "epoch": 0.9974868211352212, + "grad_norm": 1.8376329240944225, + "learning_rate": 1.6563231668265124e-10, + "loss": 0.5002, + "step": 32546 + }, + { + "epoch": 0.9975174696579625, + "grad_norm": 2.1468842526543255, + "learning_rate": 1.616171587154325e-10, + "loss": 0.5217, + "step": 32547 + }, + { + "epoch": 0.9975481181807037, + "grad_norm": 2.2149445582291745, + "learning_rate": 1.576512654344331e-10, + "loss": 0.5119, + "step": 32548 + }, + { + "epoch": 0.9975787667034449, + "grad_norm": 1.9000848484734936, + "learning_rate": 1.5373463687962108e-10, + "loss": 0.55, + "step": 32549 + }, + { + "epoch": 0.9976094152261861, + "grad_norm": 0.7824076341561896, + "learning_rate": 1.4986727308985427e-10, + "loss": 0.3898, + "step": 32550 + }, + { + "epoch": 0.9976400637489273, + "grad_norm": 1.7463400386697834, + "learning_rate": 1.4604917410232511e-10, + "loss": 0.4644, + "step": 32551 + }, + { + "epoch": 0.9976707122716685, + "grad_norm": 1.768453109424755, + "learning_rate": 1.4228033995478118e-10, + "loss": 0.5186, + "step": 32552 + }, + { + "epoch": 0.9977013607944097, + "grad_norm": 1.6402206515594036, + "learning_rate": 1.385607706849701e-10, + "loss": 0.4516, + "step": 32553 + }, + { + "epoch": 0.9977320093171509, + "grad_norm": 1.982591144042332, + "learning_rate": 1.3489046632897406e-10, + "loss": 0.6246, + "step": 32554 + }, + { + "epoch": 0.9977626578398922, + "grad_norm": 1.7623203286076565, + "learning_rate": 1.3126942692343046e-10, + "loss": 0.5801, + "step": 32555 + }, + { + "epoch": 0.9977933063626333, + "grad_norm": 1.8865741218455918, + "learning_rate": 1.2769765250331135e-10, + "loss": 0.6302, + "step": 32556 + }, + { + "epoch": 0.9978239548853746, + "grad_norm": 1.906566246983705, + "learning_rate": 1.241751431046989e-10, + "loss": 0.6325, + "step": 32557 + }, + { + "epoch": 0.9978546034081157, + "grad_norm": 2.1113498782520974, + "learning_rate": 1.20701898761455e-10, + "loss": 0.6021, + "step": 32558 + }, + { + "epoch": 0.997885251930857, + "grad_norm": 1.7638103477918858, + "learning_rate": 1.172779195085516e-10, + "loss": 0.51, + "step": 32559 + }, + { + "epoch": 0.9979159004535981, + "grad_norm": 1.6672211490610211, + "learning_rate": 1.1390320537929545e-10, + "loss": 0.5472, + "step": 32560 + }, + { + "epoch": 0.9979465489763394, + "grad_norm": 0.8144430283336112, + "learning_rate": 1.105777564069932e-10, + "loss": 0.3924, + "step": 32561 + }, + { + "epoch": 0.9979771974990805, + "grad_norm": 1.7973461360595844, + "learning_rate": 1.0730157262495156e-10, + "loss": 0.53, + "step": 32562 + }, + { + "epoch": 0.9980078460218218, + "grad_norm": 1.835725515312279, + "learning_rate": 1.0407465406425677e-10, + "loss": 0.5777, + "step": 32563 + }, + { + "epoch": 0.998038494544563, + "grad_norm": 1.9797432259638723, + "learning_rate": 1.0089700075766041e-10, + "loss": 0.6083, + "step": 32564 + }, + { + "epoch": 0.9980691430673042, + "grad_norm": 0.8112860236056678, + "learning_rate": 9.776861273624871e-11, + "loss": 0.4111, + "step": 32565 + }, + { + "epoch": 0.9980997915900454, + "grad_norm": 1.8006988770795123, + "learning_rate": 9.468949003055283e-11, + "loss": 0.5295, + "step": 32566 + }, + { + "epoch": 0.9981304401127865, + "grad_norm": 1.8721891760530947, + "learning_rate": 9.165963267110389e-11, + "loss": 0.5331, + "step": 32567 + }, + { + "epoch": 0.9981610886355278, + "grad_norm": 1.8160120478779402, + "learning_rate": 8.867904068843303e-11, + "loss": 0.5272, + "step": 32568 + }, + { + "epoch": 0.9981917371582689, + "grad_norm": 2.0404915924022657, + "learning_rate": 8.574771411085093e-11, + "loss": 0.6189, + "step": 32569 + }, + { + "epoch": 0.9982223856810102, + "grad_norm": 1.6987777733419924, + "learning_rate": 8.286565296777848e-11, + "loss": 0.5493, + "step": 32570 + }, + { + "epoch": 0.9982530342037513, + "grad_norm": 1.6562554348964913, + "learning_rate": 8.00328572869713e-11, + "loss": 0.4337, + "step": 32571 + }, + { + "epoch": 0.9982836827264926, + "grad_norm": 1.75103252181554, + "learning_rate": 7.724932709785027e-11, + "loss": 0.4954, + "step": 32572 + }, + { + "epoch": 0.9983143312492337, + "grad_norm": 0.8274116469928839, + "learning_rate": 7.451506242595053e-11, + "loss": 0.4067, + "step": 32573 + }, + { + "epoch": 0.998344979771975, + "grad_norm": 0.7773869842667587, + "learning_rate": 7.183006329958276e-11, + "loss": 0.3968, + "step": 32574 + }, + { + "epoch": 0.9983756282947162, + "grad_norm": 2.1933417995604523, + "learning_rate": 6.919432974483719e-11, + "loss": 0.5705, + "step": 32575 + }, + { + "epoch": 0.9984062768174574, + "grad_norm": 0.7630368916738295, + "learning_rate": 6.660786178780409e-11, + "loss": 0.3691, + "step": 32576 + }, + { + "epoch": 0.9984369253401986, + "grad_norm": 0.8356315473139404, + "learning_rate": 6.407065945346347e-11, + "loss": 0.3842, + "step": 32577 + }, + { + "epoch": 0.9984675738629398, + "grad_norm": 1.9957267900237006, + "learning_rate": 6.158272276679533e-11, + "loss": 0.54, + "step": 32578 + }, + { + "epoch": 0.998498222385681, + "grad_norm": 1.791237934602006, + "learning_rate": 5.914405175333482e-11, + "loss": 0.5352, + "step": 32579 + }, + { + "epoch": 0.9985288709084222, + "grad_norm": 0.7697810364065011, + "learning_rate": 5.67546464358415e-11, + "loss": 0.3979, + "step": 32580 + }, + { + "epoch": 0.9985595194311634, + "grad_norm": 1.9601439638049711, + "learning_rate": 5.441450683874028e-11, + "loss": 0.5475, + "step": 32581 + }, + { + "epoch": 0.9985901679539047, + "grad_norm": 1.8697973056479091, + "learning_rate": 5.212363298479073e-11, + "loss": 0.5868, + "step": 32582 + }, + { + "epoch": 0.9986208164766458, + "grad_norm": 1.7554768179446358, + "learning_rate": 4.9882024896752427e-11, + "loss": 0.5988, + "step": 32583 + }, + { + "epoch": 0.9986514649993871, + "grad_norm": 1.9256891700194616, + "learning_rate": 4.768968259627471e-11, + "loss": 0.4906, + "step": 32584 + }, + { + "epoch": 0.9986821135221282, + "grad_norm": 0.8069172173034996, + "learning_rate": 4.554660610500694e-11, + "loss": 0.3872, + "step": 32585 + }, + { + "epoch": 0.9987127620448695, + "grad_norm": 1.800279804183096, + "learning_rate": 4.3452795444598464e-11, + "loss": 0.5431, + "step": 32586 + }, + { + "epoch": 0.9987434105676106, + "grad_norm": 0.7804391390519072, + "learning_rate": 4.1408250635033284e-11, + "loss": 0.3996, + "step": 32587 + }, + { + "epoch": 0.9987740590903519, + "grad_norm": 0.7670135913754451, + "learning_rate": 3.9412971696850545e-11, + "loss": 0.3799, + "step": 32588 + }, + { + "epoch": 0.998804707613093, + "grad_norm": 1.8596046987760353, + "learning_rate": 3.7466958649479136e-11, + "loss": 0.6164, + "step": 32589 + }, + { + "epoch": 0.9988353561358343, + "grad_norm": 1.94191581735636, + "learning_rate": 3.5570211512903076e-11, + "loss": 0.6295, + "step": 32590 + }, + { + "epoch": 0.9988660046585754, + "grad_norm": 0.8307885557989587, + "learning_rate": 3.372273030433082e-11, + "loss": 0.3964, + "step": 32591 + }, + { + "epoch": 0.9988966531813167, + "grad_norm": 2.0351339089356077, + "learning_rate": 3.1924515043191275e-11, + "loss": 0.6365, + "step": 32592 + }, + { + "epoch": 0.9989273017040579, + "grad_norm": 1.8829294156151837, + "learning_rate": 3.0175565746692894e-11, + "loss": 0.5292, + "step": 32593 + }, + { + "epoch": 0.9989579502267991, + "grad_norm": 1.9659973085386557, + "learning_rate": 2.8475882432599245e-11, + "loss": 0.5277, + "step": 32594 + }, + { + "epoch": 0.9989885987495403, + "grad_norm": 1.8319911538487135, + "learning_rate": 2.6825465117008564e-11, + "loss": 0.5268, + "step": 32595 + }, + { + "epoch": 0.9990192472722815, + "grad_norm": 1.975662110981465, + "learning_rate": 2.5224313816019086e-11, + "loss": 0.5788, + "step": 32596 + }, + { + "epoch": 0.9990498957950227, + "grad_norm": 1.8158912757278973, + "learning_rate": 2.3672428546284155e-11, + "loss": 0.5873, + "step": 32597 + }, + { + "epoch": 0.9990805443177638, + "grad_norm": 2.051283735207216, + "learning_rate": 2.216980932223667e-11, + "loss": 0.5992, + "step": 32598 + }, + { + "epoch": 0.9991111928405051, + "grad_norm": 2.0106777543267613, + "learning_rate": 2.071645615886464e-11, + "loss": 0.5308, + "step": 32599 + }, + { + "epoch": 0.9991418413632462, + "grad_norm": 1.8694554943372983, + "learning_rate": 1.931236907115608e-11, + "loss": 0.5219, + "step": 32600 + }, + { + "epoch": 0.9991724898859875, + "grad_norm": 1.725994360407251, + "learning_rate": 1.7957548072433662e-11, + "loss": 0.5258, + "step": 32601 + }, + { + "epoch": 0.9992031384087287, + "grad_norm": 1.883498350566362, + "learning_rate": 1.6651993176020065e-11, + "loss": 0.5139, + "step": 32602 + }, + { + "epoch": 0.9992337869314699, + "grad_norm": 1.7182459064135425, + "learning_rate": 1.5395704394682852e-11, + "loss": 0.5656, + "step": 32603 + }, + { + "epoch": 0.9992644354542111, + "grad_norm": 2.2676420074470998, + "learning_rate": 1.418868174063448e-11, + "loss": 0.5744, + "step": 32604 + }, + { + "epoch": 0.9992950839769523, + "grad_norm": 1.7812301206215584, + "learning_rate": 1.303092522664251e-11, + "loss": 0.4687, + "step": 32605 + }, + { + "epoch": 0.9993257324996935, + "grad_norm": 2.03024946813698, + "learning_rate": 1.1922434863254063e-11, + "loss": 0.6514, + "step": 32606 + }, + { + "epoch": 0.9993563810224347, + "grad_norm": 1.813081014164946, + "learning_rate": 1.0863210662126478e-11, + "loss": 0.5773, + "step": 32607 + }, + { + "epoch": 0.9993870295451759, + "grad_norm": 1.840268499913335, + "learning_rate": 9.853252632696652e-12, + "loss": 0.5232, + "step": 32608 + }, + { + "epoch": 0.9994176780679171, + "grad_norm": 0.8219903402021789, + "learning_rate": 8.892560786066817e-12, + "loss": 0.4078, + "step": 32609 + }, + { + "epoch": 0.9994483265906583, + "grad_norm": 2.1520604953272238, + "learning_rate": 7.981135130563644e-12, + "loss": 0.6343, + "step": 32610 + }, + { + "epoch": 0.9994789751133996, + "grad_norm": 1.9505836505811718, + "learning_rate": 7.118975676179141e-12, + "loss": 0.5376, + "step": 32611 + }, + { + "epoch": 0.9995096236361407, + "grad_norm": 1.7622214207566387, + "learning_rate": 6.306082430684868e-12, + "loss": 0.6196, + "step": 32612 + }, + { + "epoch": 0.999540272158882, + "grad_norm": 1.6711520449176345, + "learning_rate": 5.542455402407499e-12, + "loss": 0.5057, + "step": 32613 + }, + { + "epoch": 0.9995709206816231, + "grad_norm": 0.7759173613169017, + "learning_rate": 4.8280945991185935e-12, + "loss": 0.3878, + "step": 32614 + }, + { + "epoch": 0.9996015692043644, + "grad_norm": 0.8062338101491675, + "learning_rate": 4.16300002692438e-12, + "loss": 0.3921, + "step": 32615 + }, + { + "epoch": 0.9996322177271055, + "grad_norm": 2.1926745469453572, + "learning_rate": 3.5471716935964183e-12, + "loss": 0.59, + "step": 32616 + }, + { + "epoch": 0.9996628662498468, + "grad_norm": 1.7537341652114582, + "learning_rate": 2.980609604130713e-12, + "loss": 0.5844, + "step": 32617 + }, + { + "epoch": 0.9996935147725879, + "grad_norm": 1.8750733788346663, + "learning_rate": 2.463313765188602e-12, + "loss": 0.4933, + "step": 32618 + }, + { + "epoch": 0.9997241632953292, + "grad_norm": 1.8970030599458279, + "learning_rate": 1.9952841806558656e-12, + "loss": 0.6204, + "step": 32619 + }, + { + "epoch": 0.9997548118180704, + "grad_norm": 1.7975500782236078, + "learning_rate": 1.5765208560836187e-12, + "loss": 0.5605, + "step": 32620 + }, + { + "epoch": 0.9997854603408116, + "grad_norm": 1.883584583281274, + "learning_rate": 1.2070237953576425e-12, + "loss": 0.6066, + "step": 32621 + }, + { + "epoch": 0.9998161088635528, + "grad_norm": 2.172016664944165, + "learning_rate": 8.867930018086057e-13, + "loss": 0.571, + "step": 32622 + }, + { + "epoch": 0.999846757386294, + "grad_norm": 1.8493748024547958, + "learning_rate": 6.15828479322289e-13, + "loss": 0.4624, + "step": 32623 + }, + { + "epoch": 0.9998774059090352, + "grad_norm": 1.7905111932006559, + "learning_rate": 3.9413022956402703e-13, + "loss": 0.4968, + "step": 32624 + }, + { + "epoch": 0.9999080544317764, + "grad_norm": 1.5782453500734084, + "learning_rate": 2.2169825530937716e-13, + "loss": 0.4832, + "step": 32625 + }, + { + "epoch": 0.9999387029545176, + "grad_norm": 1.8615298675419594, + "learning_rate": 9.853255822367403e-14, + "loss": 0.5593, + "step": 32626 + }, + { + "epoch": 0.9999693514772588, + "grad_norm": 1.0884508281833924, + "learning_rate": 2.4633139417140626e-14, + "loss": 0.424, + "step": 32627 + }, + { + "epoch": 1.0, + "grad_norm": 1.977194309437267, + "learning_rate": 0.0, + "loss": 0.5395, + "step": 32628 + }, + { + "epoch": 1.0, + "step": 32628, + "total_flos": 1.3205010417123328e+16, + "train_loss": 0.6281804487061687, + "train_runtime": 381666.8813, + "train_samples_per_second": 10.942, + "train_steps_per_second": 0.085 + } + ], + "logging_steps": 1.0, + "max_steps": 32628, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.3205010417123328e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}